summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--Documentation/netlink/genetlink-c.yaml9
-rw-r--r--Documentation/netlink/genetlink-legacy.yaml9
-rw-r--r--Documentation/netlink/genetlink.yaml9
-rw-r--r--Documentation/netlink/netlink-raw.yaml9
-rw-r--r--Documentation/netlink/specs/net_shaper.yaml7
-rw-r--r--Documentation/process/index.rst1
-rw-r--r--Documentation/process/security-bugs.rst106
-rw-r--r--Documentation/process/threat-model.rst235
-rw-r--r--Documentation/virt/kvm/x86/amd-memory-encryption.rst8
-rw-r--r--MAINTAINERS17
-rw-r--r--arch/arm64/include/asm/kvm_nested.h1
-rw-r--r--arch/arm64/include/asm/sysreg.h2
-rw-r--r--arch/arm64/kvm/arm.c21
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/clock.c3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c47
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c16
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c3
-rw-r--r--arch/arm64/kvm/mmu.c29
-rw-r--r--arch/s390/kvm/interrupt.c3
-rw-r--r--arch/s390/kvm/pci.c10
-rw-r--r--arch/x86/include/asm/msr-index.h3
-rw-r--r--arch/x86/kernel/acpi/cppc.c6
-rw-r--r--arch/x86/kernel/cpu/amd.c3
-rw-r--r--arch/x86/kvm/emulate.c2
-rw-r--r--arch/x86/kvm/mmu/mmu.c23
-rw-r--r--arch/x86/kvm/svm/nested.c43
-rw-r--r--arch/x86/kvm/svm/svm.c15
-rw-r--r--arch/x86/kvm/trace.h2
-rw-r--r--arch/x86/kvm/vmx/capabilities.h1
-rw-r--r--arch/x86/kvm/vmx/vmx.c17
-rw-r--r--arch/x86/xen/mmu_pv.c8
-rw-r--r--arch/x86/xen/setup.c2
-rw-r--r--block/bio-integrity.c19
-rw-r--r--block/bio.c27
-rw-r--r--block/blk-mq.c19
-rw-r--r--block/blk-zoned.c32
-rw-r--r--drivers/acpi/ac.c6
-rw-r--r--drivers/acpi/acpi_pad.c6
-rw-r--r--drivers/acpi/acpi_tad.c6
-rw-r--r--drivers/acpi/battery.c6
-rw-r--r--drivers/acpi/button.c9
-rw-r--r--drivers/acpi/ec.c6
-rw-r--r--drivers/acpi/hed.c6
-rw-r--r--drivers/acpi/nfit/core.c6
-rw-r--r--drivers/acpi/pfr_telemetry.c6
-rw-r--r--drivers/acpi/pfr_update.c6
-rw-r--r--drivers/acpi/sbs.c6
-rw-r--r--drivers/acpi/sbshc.c6
-rw-r--r--drivers/acpi/thermal.c2
-rw-r--r--drivers/acpi/tiny-power-button.c6
-rw-r--r--drivers/block/ublk_drv.c3
-rw-r--r--drivers/hid/bpf/hid_bpf_dispatch.c6
-rw-r--r--drivers/hid/hid-appletb-kbd.c56
-rw-r--r--drivers/hid/hid-core.c67
-rw-r--r--drivers/hid/hid-elan.c1
-rw-r--r--drivers/hid/hid-ft260.c16
-rw-r--r--drivers/hid/hid-gfrm.c4
-rw-r--r--drivers/hid/hid-google-hammer.c2
-rw-r--r--drivers/hid/hid-ids.h4
-rw-r--r--drivers/hid/hid-lenovo-go-s.c44
-rw-r--r--drivers/hid/hid-logitech-hidpp.c40
-rw-r--r--drivers/hid/hid-magicmouse.c16
-rw-r--r--drivers/hid/hid-mcp2221.c7
-rw-r--r--drivers/hid/hid-multitouch.c2
-rw-r--r--drivers/hid/hid-playstation.c6
-rw-r--r--drivers/hid/hid-primax.c2
-rw-r--r--drivers/hid/hid-quirks.c2
-rw-r--r--drivers/hid/hid-sjoy.c12
-rw-r--r--drivers/hid/hid-sony.c15
-rw-r--r--drivers/hid/hid-uclogic-core.c4
-rw-r--r--drivers/hid/hid-vivaldi-common.c2
-rw-r--r--drivers/hid/i2c-hid/i2c-hid-core.c9
-rw-r--r--drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c4
-rw-r--r--drivers/hid/usbhid/hid-core.c11
-rw-r--r--drivers/hid/usbhid/hid-pidff.c7
-rw-r--r--drivers/hid/wacom_sys.c6
-rw-r--r--drivers/iommu/amd/debugfs.c9
-rw-r--r--drivers/iommu/amd/iommu.c8
-rw-r--r--drivers/iommu/generic_pt/iommu_pt.h24
-rw-r--r--drivers/iommu/intel/iommu.c16
-rw-r--r--drivers/iommu/iommu.c305
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.c7
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_phc.c5
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_hw.c4
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c2
-rw-r--r--drivers/net/ethernet/cirrus/cs89x0.c2
-rw-r--r--drivers/net/ethernet/cortina/gemini.c18
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ptp.c3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dpll.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dpll.h32
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c2
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_idc.c6
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_ptp.c4
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c8
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev.c7
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c9
-rw-r--r--drivers/net/ethernet/ti/icssm/icssm_prueth.c1
-rw-r--r--drivers/net/fddi/defza.c11
-rw-r--r--drivers/net/macsec.c39
-rw-r--r--drivers/net/net_failover.c12
-rw-r--r--drivers/net/phy/dp83tc811.c1
-rw-r--r--drivers/net/wan/fsl_ucc_hdlc.c7
-rw-r--r--drivers/nvme/host/apple.c1
-rw-r--r--drivers/nvme/host/core.c6
-rw-r--r--drivers/nvme/host/ioctl.c18
-rw-r--r--drivers/nvme/host/pci.c8
-rw-r--r--drivers/nvme/target/Kconfig9
-rw-r--r--drivers/nvme/target/auth.c13
-rw-r--r--drivers/nvme/target/tcp.c4
-rw-r--r--drivers/platform/x86/asus-nb-wmi.c9
-rw-r--r--drivers/platform/x86/hp/hp-wmi.c4
-rw-r--r--drivers/platform/x86/intel/plr_tpmi.c45
-rw-r--r--drivers/platform/x86/intel/vsec_tpmi.c29
-rw-r--r--drivers/platform/x86/lenovo/Kconfig1
-rw-r--r--drivers/platform/x86/lenovo/wmi-capdata.c8
-rw-r--r--drivers/platform/x86/lenovo/wmi-capdata.h20
-rw-r--r--drivers/platform/x86/lenovo/wmi-events.c2
-rw-r--r--drivers/platform/x86/lenovo/wmi-gamezone.c7
-rw-r--r--drivers/platform/x86/lenovo/wmi-gamezone.h20
-rw-r--r--drivers/platform/x86/lenovo/wmi-helpers.c105
-rw-r--r--drivers/platform/x86/lenovo/wmi-helpers.h21
-rw-r--r--drivers/platform/x86/lenovo/wmi-other.c294
-rw-r--r--drivers/platform/x86/lenovo/wmi-other.h16
-rw-r--r--drivers/platform/x86/samsung-galaxybook.c69
-rw-r--r--drivers/resctrl/mpam_devices.c81
-rw-r--r--drivers/resctrl/mpam_internal.h2
-rw-r--r--drivers/staging/greybus/hid.c2
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c37
-rw-r--r--drivers/vfio/pci/vfio_pci_dmabuf.c42
-rw-r--r--drivers/vfio/pci/vfio_pci_rdwr.c26
-rw-r--r--drivers/xen/xen-acpi-pad.c6
-rw-r--r--fs/btrfs/block-group.c23
-rw-r--r--fs/btrfs/compression.c26
-rw-r--r--fs/btrfs/disk-io.c1
-rw-r--r--fs/btrfs/inode.c28
-rw-r--r--fs/btrfs/transaction.c9
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/ceph/quota.c37
-rw-r--r--fs/ceph/xattr.c17
-rw-r--r--fs/iomap/direct-io.c2
-rw-r--r--fs/nfsd/nfs4proc.c18
-rw-r--r--fs/nfsd/nfs4state.c64
-rw-r--r--fs/nfsd/state.h1
-rw-r--r--fs/nfsd/xdr4.h1
-rw-r--r--fs/smb/client/fs_context.c161
-rw-r--r--fs/smb/client/smb2file.c3
-rw-r--r--fs/smb/client/smb2ops.c4
-rw-r--r--fs/smb/client/transport.c15
-rw-r--r--fs/smb/common/fscc.h4
-rw-r--r--fs/smb/common/smb2pdu.h4
-rw-r--r--fs/smb/smbdirect/connection.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c18
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c3
-rw-r--r--fs/xfs/scrub/common.c11
-rw-r--r--fs/xfs/scrub/common.h2
-rw-r--r--fs/xfs/scrub/dabtree.c7
-rw-r--r--fs/xfs/scrub/dir.c7
-rw-r--r--fs/xfs/xfs_inode.c6
-rw-r--r--fs/xfs/xfs_notify_failure.c2
-rw-r--r--fs/xfs/xfs_trans.c11
-rw-r--r--fs/xfs/xfs_zone_alloc.c4
-rw-r--r--fs/xfs/xfs_zone_gc.c2
-rw-r--r--include/asm-generic/kprobes.h2
-rw-r--r--include/linux/arm-smccc.h6
-rw-r--r--include/linux/bio.h3
-rw-r--r--include/linux/fprobe.h5
-rw-r--r--include/linux/hid.h6
-rw-r--r--include/linux/hid_bpf.h14
-rw-r--r--include/linux/intel_tpmi.h6
-rw-r--r--include/linux/netfilter/x_tables.h4
-rw-r--r--include/linux/netfilter_arp/arp_tables.h1
-rw-r--r--include/linux/netfilter_ipv4/ip_tables.h1
-rw-r--r--include/linux/netfilter_ipv6/ip6_tables.h1
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/sched/deadline.h9
-rw-r--r--include/linux/sched/ext.h17
-rw-r--r--include/linux/slab.h3
-rw-r--r--include/linux/vfio_pci_core.h2
-rw-r--r--include/net/genetlink.h4
-rw-r--r--include/net/macsec.h7
-rw-r--r--include/net/netfilter/nf_conntrack_expect.h5
-rw-r--r--include/net/netns/ipv6.h1
-rw-r--r--include/net/nsh.h4
-rw-r--r--include/trace/events/btrfs.h4
-rw-r--r--include/xen/arm/interface.h2
-rw-r--r--io_uring/cancel.c2
-rw-r--r--io_uring/fdinfo.c3
-rw-r--r--io_uring/io-wq.c3
-rw-r--r--io_uring/io_uring.c29
-rw-r--r--io_uring/rw.c4
-rw-r--r--io_uring/timeout.c16
-rw-r--r--kernel/audit.c4
-rw-r--r--kernel/auditsc.c2
-rw-r--r--kernel/cgroup/cpuset-internal.h1
-rw-r--r--kernel/cgroup/cpuset.c56
-rw-r--r--kernel/cgroup/dmem.c1
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/liveupdate/kexec_handover.c2
-rw-r--r--kernel/ptrace.c22
-rw-r--r--kernel/sched/deadline.c13
-rw-r--r--kernel/sched/ext.c277
-rw-r--r--kernel/trace/bpf_trace.c3
-rw-r--r--kernel/trace/fprobe.c23
-rw-r--r--kernel/workqueue.c16
-rw-r--r--lib/kunit/Kconfig5
-rw-r--r--lib/rhashtable.c33
-rw-r--r--lib/tests/test_kprobes.c29
-rw-r--r--mm/memfd_luo.c25
-rw-r--r--mm/slub.c16
-rw-r--r--net/atm/signaling.c1
-rw-r--r--net/batman-adv/bat_iv_ogm.c85
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c11
-rw-r--r--net/batman-adv/main.c1
-rw-r--r--net/batman-adv/tp_meter.c116
-rw-r--r--net/batman-adv/tp_meter.h1
-rw-r--r--net/batman-adv/types.h4
-rw-r--r--net/bridge/netfilter/ebtable_broute.c14
-rw-r--r--net/bridge/netfilter/ebtable_filter.c14
-rw-r--r--net/bridge/netfilter/ebtable_nat.c12
-rw-r--r--net/bridge/netfilter/ebtables.c71
-rw-r--r--net/ceph/auth_x.c5
-rw-r--r--net/ceph/crush/crush.c6
-rw-r--r--net/ceph/osdmap.c17
-rw-r--r--net/core/dev.c21
-rw-r--r--net/core/failover.c6
-rw-r--r--net/ethtool/bitset.c8
-rw-r--r--net/ethtool/phy.c36
-rw-r--r--net/hsr/hsr_framereg.c5
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/netfilter/arp_tables.c53
-rw-r--r--net/ipv4/netfilter/arptable_filter.c27
-rw-r--r--net/ipv4/netfilter/ip_tables.c59
-rw-r--r--net/ipv4/netfilter/iptable_filter.c27
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c29
-rw-r--r--net/ipv4/netfilter/iptable_nat.c6
-rw-r--r--net/ipv4/netfilter/iptable_raw.c26
-rw-r--r--net/ipv4/netfilter/iptable_security.c27
-rw-r--r--net/ipv4/tcp_ao.c3
-rw-r--r--net/ipv6/ip6_flowlabel.c46
-rw-r--r--net/ipv6/netfilter/ip6_tables.c56
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c26
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c27
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c6
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c24
-rw-r--r--net/ipv6/netfilter/ip6table_security.c27
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c1
-rw-r--r--net/netfilter/nf_conntrack_core.c7
-rw-r--r--net/netfilter/nf_conntrack_expect.c1
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c12
-rw-r--r--net/netfilter/nf_conntrack_helper.c5
-rw-r--r--net/netfilter/nf_conntrack_netlink.c21
-rw-r--r--net/netfilter/nf_conntrack_sip.c10
-rw-r--r--net/netfilter/nft_ct.c2
-rw-r--r--net/netfilter/x_tables.c177
-rw-r--r--net/netlink/genetlink.c8
-rw-r--r--net/rds/message.c1
-rw-r--r--net/sched/sch_cbs.c16
-rw-r--r--net/sched/sch_dualpi2.c4
-rw-r--r--net/sctp/socket.c9
-rw-r--r--net/shaper/shaper.c166
-rw-r--r--net/shaper/shaper_nl_gen.c7
-rw-r--r--net/shaper/shaper_nl_gen.h2
-rw-r--r--net/smc/af_smc.c20
-rw-r--r--net/smc/smc_tracepoint.h2
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/tls/tls_sw.c26
-rw-r--r--net/vmw_vsock/virtio_transport_common.c47
-rw-r--r--scripts/gcc-plugins/gcc-common.h4
-rw-r--r--tools/arch/x86/include/asm/msr-index.h3
-rw-r--r--tools/net/ynl/Makefile.deps4
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py31
-rw-r--r--tools/testing/selftests/cgroup/lib/cgroup_util.c5
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_v1_base.sh2
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c10
-rwxr-xr-xtools/testing/selftests/drivers/net/shaper.py24
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c9
-rw-r--r--tools/testing/selftests/kvm/steal_time.c2
-rw-r--r--tools/testing/selftests/sched_ext/dequeue.c1
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json41
-rw-r--r--tools/testing/selftests/ublk/kublk.c11
-rw-r--r--virt/kvm/dirty_ring.c3
286 files changed, 3845 insertions, 1610 deletions
diff --git a/.mailmap b/.mailmap
index eec4a740f7ca..bd6a72f29d9c 100644
--- a/.mailmap
+++ b/.mailmap
@@ -857,6 +857,7 @@ Tobias Klauser <tklauser@distanz.ch> <klto@zhaw.ch>
Tobias Klauser <tklauser@distanz.ch> <tklauser@nuerscht.ch>
Tobias Klauser <tklauser@distanz.ch> <tklauser@xenon.tklauser.home>
Todor Tomov <todor.too@gmail.com> <todor.tomov@linaro.org>
+Tomasz Jeznach <tomasz.jeznach@linux.dev> <tjeznach@rivosinc.com>
Tony Luck <tony.luck@intel.com>
Trilok Soni <quic_tsoni@quicinc.com> <tsoni@codeaurora.org>
TripleX Chung <xxx.phy@gmail.com> <triplex@zh-kernel.org>
diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml
index 57f59fe23e3f..4ea31e8fc4d1 100644
--- a/Documentation/netlink/genetlink-c.yaml
+++ b/Documentation/netlink/genetlink-c.yaml
@@ -69,6 +69,15 @@ properties:
header:
description: For C-compatible languages, header which already defines this value.
type: string
+ scope:
+ description: |
+ Visibility of this definition. "uapi" (default) renders into
+ the uAPI header, "kernel" renders into the kernel-side
+ generated header, "user" renders into the user-side
+ generated header. When combined with `header:`, the
+ definition is not rendered, and the named header is
+ included only by code matching the scope.
+ enum: [ uapi, kernel, user ]
type:
enum: [ const, enum, flags ]
doc:
diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index 66fb8653a344..f9c44747729a 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -83,6 +83,15 @@ properties:
header:
description: For C-compatible languages, header which already defines this value.
type: string
+ scope:
+ description: |
+ Visibility of this definition. "uapi" (default) renders into
+ the uAPI header, "kernel" renders into the kernel-side
+ generated header, "user" renders into the user-side
+ generated header. When combined with `header:`, the
+ definition is not rendered, and the named header is
+ included only by code matching the scope.
+ enum: [ uapi, kernel, user ]
type:
enum: [ const, enum, flags, struct ] # Trim
doc:
diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml
index a1194d5d93fc..d3f3f3399ddf 100644
--- a/Documentation/netlink/genetlink.yaml
+++ b/Documentation/netlink/genetlink.yaml
@@ -55,6 +55,15 @@ properties:
header:
description: For C-compatible languages, header which already defines this value.
type: string
+ scope:
+ description: |
+ Visibility of this definition. "uapi" (default) renders into
+ the uAPI header, "kernel" renders into the kernel-side
+ generated header, "user" renders into the user-side
+ generated header. When combined with `header:`, the
+ definition is not rendered, and the named header is
+ included only by code matching the scope.
+ enum: [ uapi, kernel, user ]
type:
enum: [ const, enum, flags ]
doc:
diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml
index dd98dda55bd0..4c436b59a34b 100644
--- a/Documentation/netlink/netlink-raw.yaml
+++ b/Documentation/netlink/netlink-raw.yaml
@@ -87,6 +87,15 @@ properties:
header:
description: For C-compatible languages, header which already defines this value.
type: string
+ scope:
+ description: |
+ Visibility of this definition. "uapi" (default) renders into
+ the uAPI header, "kernel" renders into the kernel-side
+ generated header, "user" renders into the user-side
+ generated header. When combined with `header:`, the
+ definition is not rendered, and the named header is
+ included only by code matching the scope.
+ enum: [ uapi, kernel, user ]
type:
enum: [ const, enum, flags, struct ] # Trim
doc:
diff --git a/Documentation/netlink/specs/net_shaper.yaml b/Documentation/netlink/specs/net_shaper.yaml
index 3f2ad772b64b..de01f922040a 100644
--- a/Documentation/netlink/specs/net_shaper.yaml
+++ b/Documentation/netlink/specs/net_shaper.yaml
@@ -34,6 +34,11 @@ doc: |
definitions:
-
+ type: const
+ name: max-handle-id
+ value: 0x3fffffe
+ scope: kernel
+ -
type: enum
name: scope
doc: Defines the shaper @id interpretation.
@@ -140,6 +145,8 @@ attribute-sets:
-
name: id
type: u32
+ checks:
+ max: max-handle-id
doc: |
Numeric identifier of a shaper. The id semantic depends on
the scope. For @queue scope it's the queue id and for @node
diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index dbd6ea16aca7..aa7c959a52b8 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -86,6 +86,7 @@ regressions and security problems.
debugging/index
handling-regressions
security-bugs
+ threat-model
cve
embargoed-hardware-issues
diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst
index 27b028e85861..3c51ddde31dd 100644
--- a/Documentation/process/security-bugs.rst
+++ b/Documentation/process/security-bugs.rst
@@ -66,6 +66,42 @@ In addition, the following information are highly desirable:
the issue appear. It is useful to share them, as they can be helpful to
keep end users protected during the time it takes them to apply the fix.
+What qualifies as a security bug
+--------------------------------
+
+It is important that most bugs are handled publicly so as to involve the widest
+possible audience and find the best solution. By nature, bugs that are handled
+in closed discussions between a small set of participants are less likely to
+produce the best possible fix (e.g., risk of missing valid use cases, limited
+testing abilities).
+
+It turns out that the majority of the bugs reported via the security team are
+just regular bugs that have been improperly qualified as security bugs due to
+a lack of awareness of the Linux kernel's threat model, as described in
+Documentation/process/threat-model.rst, and ought to have been sent through
+the normal channels described in Documentation/admin-guide/reporting-issues.rst
+instead.
+
+The security list exists for urgent bugs that grant an attacker a capability
+they are not supposed to have on a correctly configured production system, and
+can be easily exploited, representing an imminent threat to many users. Before
+reporting, consider whether the issue actually crosses a trust boundary on such
+a system.
+
+**If you resorted to AI assistance to identify a bug, you must treat it as
+public**. While you may have valid reasons to believe it is not, the security
+team's experience shows that bugs discovered this way systematically surface
+simultaneously across multiple researchers, often on the same day. In this
+case, do not publicly share a reproducer, as this could cause unintended harm;
+just mention that one is available and maintainers might ask for it privately
+if they need it.
+
+If you are unsure whether an issue qualifies, err on the side of reporting
+privately: the security team would rather triage a borderline report than miss
+a real vulnerability. Reporting ordinary bugs to the security list, however,
+does not make them move faster and consumes triage capacity that other reports
+need.
+
Identifying contacts
--------------------
@@ -74,7 +110,7 @@ affected subsystem's maintainers and Cc: the Linux kernel security team. Do
not send it to a public list at this stage, unless you have good reasons to
consider the issue as being public or trivial to discover (e.g. result of a
widely available automated vulnerability scanning tool that can be repeated by
-anyone).
+anyone, or use of AI-based tools).
If you're sending a report for issues affecting multiple parts in the kernel,
even if they're fairly similar issues, please send individual messages (think
@@ -131,6 +167,64 @@ the Linux kernel security team only. Your message will be triaged, and you
will receive instructions about whom to contact, if needed. Your message may
equally be forwarded as-is to the relevant maintainers.
+Responsible use of AI to find bugs
+----------------------------------
+
+A significant fraction of bug reports submitted to the security team are
+actually the result of code reviews assisted by AI tools. While this can be an
+efficient means to find bugs in rarely explored areas, it causes an overload on
+maintainers, who are sometimes forced to ignore such reports due to their poor
+quality or accuracy. As such, reporters must be particularly cautious about a
+number of points which tend to make these reports needlessly difficult to
+handle:
+
+ * **Length**: AI-generated reports tend to be excessively long, containing
+ multiple sections and excessive detail. This makes it difficult to spot
+ important information such as affected files, versions, and impact. Please
+ ensure that a clear summary of the problem and all critical details are
+ presented first. Do not require triage engineers to scan multiple pages of
+ text. Configure your tools to produce concise, human-style reports.
+
+ * **Formatting**: Most AI-generated reports are littered with Markdown tags.
+ These decorations complicate the search for important information and do
+ not survive the quoting processes involved in forwarding or replying.
+ Please **always convert your report to plain text** without any formatting
+ decorations before sending it.
+
+ * **Impact Evaluation**: Many AI-generated reports lack an understanding
+ of the kernel's threat model (see Documentation/process/threat-model.rst)
+ and go to great lengths inventing theoretical consequences. This adds
+ noise and complicates triage. Please stick to verifiable facts (e.g.,
+ "this bug permits any user to gain CAP_NET_ADMIN") without enumerating
+ speculative implications. Have your tool read this documentation as
+ part of the evaluation process.
+
+ * **Reproducer**: AI-based tools are often capable of generating reproducers.
+ Please always ensure your tool provides one and **test it thoroughly**. If
+ the reproducer does not work, or if the tool cannot produce one, the
+ validity of the report should be seriously questioned. Note that since the
+ report will be posted to a public list, the reproducer should only be
+ shared upon maintainers' request.
+
+ * **Propose a Fix**: Many AI tools are actually better at writing code than
+ evaluating it. Please ask your tool to propose a fix and **test it** before
+ reporting the problem. If the fix cannot be tested because it relies on
+ rare hardware or almost extinct network protocols, the issue is likely not
+ a security bug. In any case, if a fix is proposed, it must adhere to
+ Documentation/process/submitting-patches.rst and include a 'Fixes:' tag
+ designating the commit that introduced the bug.
+
+Failure to consider these points exposes your report to the risk of being
+ignored.
+
+Use common sense when evaluating the report. If the affected file has not been
+touched for more than one year and is maintained by a single individual, it is
+likely that usage has declined and exposed users are virtually non-existent
+(e.g., drivers for very old hardware, obsolete filesystems). In such cases,
+there is no need to consume a maintainer's time with an unimportant report. If
+the issue is clearly trivial and publicly discoverable, you should report it
+directly to the public mailing lists.
+
Sending the report
------------------
@@ -148,7 +242,15 @@ run additional tests. Reports where the reporter does not respond promptly
or cannot effectively discuss their findings may be abandoned if the
communication does not quickly improve.
-The report must be sent to maintainers, with the security team in ``Cc:``.
+The report must be sent to maintainers. If there are two or fewer
+recipients in your message, you must also always Cc: the Linux kernel
+security team who will ensure the message is delivered to the proper
+people, and will be able to assist small maintainer teams with processes
+they may not be familiar with. For larger teams, Cc: the Linux kernel
+security team for your first few reports or when seeking specific help,
+such as when resending a message which got no response within a week.
+Once you have become comfortable with the process for a few reports, it is
+no longer necessary to Cc: the security list when sending to large teams.
The Linux kernel security team can be contacted by email at
<security@kernel.org>. This is a private list of security officers
who will help verify the bug report and assist developers working on a fix.
diff --git a/Documentation/process/threat-model.rst b/Documentation/process/threat-model.rst
new file mode 100644
index 000000000000..f177b8d3c1ca
--- /dev/null
+++ b/Documentation/process/threat-model.rst
@@ -0,0 +1,235 @@
+The Linux Kernel threat model
+=============================
+
+There are a lot of assumptions regarding what the kernel does and does not
+protect against. These assumptions tend to cause confusion for bug reports
+(:doc:`security-related ones <security-bugs>` vs :doc:`non-security ones
+<../admin-guide/reporting-issues>`), and can complicate security enforcement
+when the responsibilities for some boundaries is not clear between the kernel,
+distros, administrators and users.
+
+This document tries to clarify the responsibilities of the kernel in this
+domain.
+
+The kernel's responsibilities
+-----------------------------
+
+The kernel abstracts access to local hardware resources and to remote systems
+in a way that allows multiple local users to get a fair share of the available
+resources granted to them, and, when the underlying hardware permits, to assign
+a level of confidentiality to their communications and to the data they are
+processing or storing.
+
+The kernel assumes that the underlying hardware behaves according to its
+specifications. This includes the integrity of the CPU's instruction set, the
+transparency of the branch prediction unit and the cache units, the consistency
+of the Memory Management Unit (MMU), the isolation of DMA-capable peripherals
+(e.g., via IOMMU), state transitions in controllers, ranges of values read from
+registers, the respect of documented hardware limitations, etc.
+
+When hardware fails to maintain its specified isolation (e.g., CPU bugs,
+side-channels, hardware response to unexpected inputs), the kernel will usually
+attempt to implement reasonable mitigations. These are best-effort measures
+intended to reduce the attack surface or elevate the cost of an attack within
+the limits of the hardware's facilities; they do not constitute a
+kernel-provided safety guarantee.
+
+Users always perform their activities under the authority of an administrator
+who is able to grant or deny various types of permissions that may affect how
+users benefit from available resources, or the level of confidentiality of
+their activities. Administrators may also delegate all or part of their own
+permissions to some users, particularly via capabilities but not only. All this
+is performed via configuration (sysctl, file-system permissions etc).
+
+The Linux Kernel applies a certain collection of default settings that match
+its threat model. Distros have their own threat model and will come with their
+own configuration presets, that the administrator may have to adjust to better
+suit their expectations (relax or restrict).
+
+By default, the Linux Kernel guarantees the following protections when running
+on common processors featuring privilege levels and memory management units:
+
+* **User-based isolation**: an unprivileged user may restrict access to their
+ own data from other unprivileged users running on the same system. This
+ includes:
+
+ * stored data, via file system permissions
+ * in-memory data (pages are not accessible by default to other users)
+ * process activity (ptrace is not permitted to other users)
+ * inter-process communication (other users may not observe data exchanged via
+ UNIX domain sockets or other IPC mechanisms).
+ * network communications within the same or with other systems
+
+* **Capability-based protection**:
+
+ * users not having elevated capabilities (including but not limited to
+ CAP_SYS_ADMIN) may not alter the
+ kernel's configuration, memory nor state, change other users' view of the
+ file system layout, grant any user capabilities they do not have, nor
+ affect the system's availability (shutdown, reboot, panic, hang, or making
+ the system unresponsive via unbounded resource exhaustion).
+ * users not having the ``CAP_NET_ADMIN`` capability may not alter the network
+ configuration, intercept nor spoof network communications from other users
+ nor systems.
+ * users not having ``CAP_SYS_PTRACE`` may not observe other users' processes
+ activities.
+
+When ``CONFIG_USER_NS`` is set, the kernel also permits unprivileged users to
+create their own user namespace in which they have all capabilities, but with a
+number of restrictions (they may not perform actions that have impacts on the
+initial user namespace, such as changing time, loading modules or mounting
+block devices). Please refer to ``user_namespaces(7)`` for more details, the
+possibilities of user namespaces are not covered in this document.
+
+The kernel also offers a lot of troubleshooting and debugging facilities, which
+can constitute attack vectors when placed in wrong hands. While some of them
+are designed to be accessible to regular local users with a low risk (e.g.
+kernel logs via ``/proc/kmsg``), some would expose enough information to
+represent a risk in most places and the decision to expose them is under the
+administrator's responsibility (perf events, traces), and others are not
+designed to be accessed by non-privileged users (e.g. debugfs). Access to these
+facilities by a user who has been explicitly granted permission by an
+administrator does not constitute a security breach.
+
+Bugs that permit to violate the principles above constitute security breaches.
+However, bugs that permit one violation only once another one was already
+achieved are only weaknesses. The kernel applies a number of self-protection
+measures whose purpose is to avoid crossing a security boundary when certain
+classes of bugs are found, but a failure of these extra protections do not
+constitute a vulnerability alone.
+
+What does not constitute a security bug
+---------------------------------------
+
+In the Linux kernel's threat model, the following classes of problems are
+**NOT** considered as Linux Kernel security bugs. However, when it is believed
+that the kernel could do better, they should be reported, so that they can be
+reviewed and fixed where reasonably possible, but they will be handled as any
+regular bug:
+
+* **Configuration**:
+
+ * outdated kernels and particularly end-of-life branches are out of the scope
+ of the kernel's threat model: administrators are responsible for keeping
+ their system up to date. For a bug to qualify as a security bug, it must be
+ demonstrated that it affects actively maintained versions.
+
+ * build-level: changes to the kernel configuration that are explicitly
+ documented as lowering the security level (e.g. ``CONFIG_NOMMU``), or
+ targeted at developers only.
+
+ * OS-level: changes to command line parameters, sysctls, filesystem
+ permissions, user capabilities, exposure of privileged interfaces, that
+ explicitly increase exposure by either offering non-default access to
+ unprivileged users, or reduce the kernel's ability to enforce some
+ protections or mitigations. Example: write access to procfs or debugfs.
+
+ * issues triggered only when using features intended for development or
+ debugging (e.g., LOCKDEP, KASAN, FAULT_INJECTION): these features are known
+ to introduce overhead and potential instability and are not intended for
+ production use.
+
+ * issues affecting drivers exposed under CONFIG_STAGING, as well as features
+ marked EXPERIMENTAL in the configuration.
+
+ * loading of explicitly insecure/broken/staging modules, and generally any
+ using any subsystem marked as experimental or not intended for production
+ use.
+
+ * running out-of-tree modules or unofficial kernel forks; these should be
+ reported to the relevant vendor.
+
+* **Excess of initial privileges**:
+
+ * actions performed by a user already possessing the privileges required to
+ perform that action or modify that state (e.g. ``CAP_SYS_ADMIN``,
+ ``CAP_NET_ADMIN``, ``CAP_SYS_RAWIO``, ``CAP_SYS_MODULE`` with no further
+ boundary being crossed).
+
+ * actions performed in user namespace that do not bypass the restrictions
+ imposed to the initial user (e.g. ptrace usage, signal delivery, resource
+ usage, access to FS/device/sysctl/memory, network binding, system/network
+ configuration etc).
+
+ * anything performed by the root user in the initial namespace (e.g. kernel
+ oops when writing to a privileged device).
+
+* **Out of production use**:
+
+ This covers theoretical/probabilistic attacks that rely on laboratory
+ conditions with zero system noise, or those requiring an unrealistic number
+ of attempts (e.g., billions of trials) that would be detected by standard
+ system monitoring long before success, such as:
+
+ * prediction of random numbers that only works in a totally silent
+ environment (such as IP ID, TCP ports or sequence numbers that can only be
+ guessed in a lab).
+
+ * activity observation and information leaks based on probabilistic
+ approaches that are prone to measurement noise and not realistically
+ reproducible on a production system.
+
+ * issues that can only be triggered by heavy attacks (e.g. brute force) whose
+ impact on the system makes it unlikely or impossible to remain undetected
+ before they succeed (e.g. consuming all memory before succeeding).
+
+ * problems seen only under development simulators, emulators, or combinations
+ that do not exist on real systems at the time of reporting (issues
+ involving tens of millions of threads, tens of thousands of CPUs,
+ unrealistic CPU frequencies, RAM sizes or disk capacities, network speeds.
+
+ * issues whose reproduction requires hardware modification or emulation,
+ including fake USB devices that pretend to be another one.
+
+ * as well as issues that can be triggered at a cost that is orders of
+ magnitude higher than the expected benefits (e.g. fully functional keyboard
+ emulator only to retrieve 7 uninitialized bytes in a structure, or
+ brute-force method involving millions of connection attempts to guess a
+ port number).
+
+* **Hardening failures**:
+
+ * ability to bypass some of the kernel's hardening measures with no
+ demonstrable exploit path (e.g. ASLR bypass, events timing or probing with
+ no demonstrable consequence). These are just weaknesses, not
+ vulnerabilities.
+
+ * missing argument checks and failure to report certain errors with no
+ immediate consequence.
+
+* **Random information leaks**:
+
+ This concerns information leaks of small data parts that happen to be there
+ and that cannot be chosen by the attacker, or face access restrictions:
+
+ * structure padding reported by syscalls or other interfaces.
+
+ * identifiers, partial data, non-terminated strings reported in error
+ messages.
+
+ * Leaks of kernel memory addresses/pointers do not constitute an immediately
+ exploitable vector and are not security bugs, though they must be reported
+ and fixed.
+
+* **Crafted file system images**:
+
+ * bugs triggered by mounting a corrupted or maliciously crafted file system
+ image are generally not security bugs, as the kernel assumes the underlying
+ storage media is under the administrator's control, unless the filesystem
+ driver is specifically documented as being hardened against untrusted media.
+
+ * issues that are resolved, mitigated, or detected by running a filesystem
+ consistency check (fsck) on the image prior to mounting.
+
+* **Physical access**:
+
+ Issues that require physical access to the machine, hardware modification, or
+ the use of specialized hardware (e.g., logic analyzers, DMA-attack tools over
+ PCI-E/Thunderbolt) are out of scope unless the system is explicitly
+ configured with technologies meant to defend against such attacks
+ (e.g. IOMMU).
+
+* **Functional and performance regressions**:
+
+ Any issue that can be mitigated by setting proper permissions and limits
+ doesn't qualify as a security bug.
diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
index b2395dd4769d..bd04a908a8db 100644
--- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
@@ -656,8 +656,8 @@ References
See [white-paper]_, [api-spec]_, [amd-apm]_, [kvm-forum]_, and [snp-fw-abi]_
for more info.
-.. [white-paper] https://developer.amd.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf
-.. [api-spec] https://support.amd.com/TechDocs/55766_SEV-KM_API_Specification.pdf
-.. [amd-apm] https://support.amd.com/TechDocs/24593.pdf (section 15.34)
+.. [white-paper] https://docs.amd.com/v/u/en-US/memory-encryption-white-paper
+.. [api-spec] https://docs.amd.com/v/u/en-US/55766_PUB_3.24_SEV_API
+.. [amd-apm] https://docs.amd.com/v/u/en-US/24593_3.44_APM_Vol2 (section 15.34)
.. [kvm-forum] https://www.linux-kvm.org/images/7/74/02x08A-Thomas_Lendacky-AMDs_Virtualizatoin_Memory_Encryption_Technology.pdf
-.. [snp-fw-abi] https://www.amd.com/system/files/TechDocs/56860.pdf
+.. [snp-fw-abi] https://www.amd.com/content/dam/amd/en/documents/developer/56860.pdf
diff --git a/MAINTAINERS b/MAINTAINERS
index b2040011a386..c2c6d79275c6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -68,6 +68,12 @@ Maintainers List
first. When adding to this list, please keep the entries in
alphabetical order.
+3C509 NETWORK DRIVER
+M: "Maciej W. Rozycki" <macro@orcam.me.uk>
+L: netdev@vger.kernel.org
+S: Maintained
+F: drivers/net/ethernet/3com/3c509.c
+
3C59X NETWORK DRIVER
M: Steffen Klassert <klassert@kernel.org>
L: netdev@vger.kernel.org
@@ -2015,7 +2021,7 @@ F: Documentation/hwmon/aquacomputer_d5next.rst
F: drivers/hwmon/aquacomputer_d5next.c
AQUANTIA ETHERNET DRIVER (atlantic)
-M: Igor Russkikh <irusskikh@marvell.com>
+M: Sukhdeep Singh <sukhdeeps@marvell.com>
L: netdev@vger.kernel.org
S: Maintained
W: https://www.marvell.com/
@@ -2024,7 +2030,7 @@ F: Documentation/networking/device_drivers/ethernet/aquantia/atlantic.rst
F: drivers/net/ethernet/aquantia/atlantic/
AQUANTIA ETHERNET DRIVER PTP SUBSYSTEM
-M: Egor Pomozov <epomozov@marvell.com>
+M: Sukhdeep Singh <sukhdeeps@marvell.com>
L: netdev@vger.kernel.org
S: Maintained
W: http://www.aquantia.com
@@ -4181,8 +4187,8 @@ F: include/uapi/linux/sonet.h
F: net/atm/
ATMEL MACB ETHERNET DRIVER
-M: Nicolas Ferre <nicolas.ferre@microchip.com>
-M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
+M: Théo Lebrun <theo.lebrun@bootlin.com>
+R: Conor Dooley <conor.dooley@microchip.com>
S: Maintained
F: drivers/net/ethernet/cadence/
@@ -14055,6 +14061,7 @@ KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
M: Marc Zyngier <maz@kernel.org>
M: Oliver Upton <oupton@kernel.org>
R: Joey Gouly <joey.gouly@arm.com>
+R: Steffen Eiden <seiden@linux.ibm.com>
R: Suzuki K Poulose <suzuki.poulose@arm.com>
R: Zenghui Yu <yuzenghui@huawei.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -22944,7 +22951,7 @@ N: riscv
K: riscv
RISC-V IOMMU
-M: Tomasz Jeznach <tjeznach@rivosinc.com>
+M: Tomasz Jeznach <tomasz.jeznach@linux.dev>
L: iommu@lists.linux.dev
L: linux-riscv@lists.infradead.org
S: Maintained
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 091544e6af44..dc2957662ff2 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -23,6 +23,7 @@ static inline u64 tcr_el2_ps_to_tcr_el1_ips(u64 tcr_el2)
static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr)
{
return TCR_EPD1_MASK | /* disable TTBR1_EL1 */
+ ((tcr & TCR_EL2_DS) ? TCR_DS : 0) |
((tcr & TCR_EL2_TBI) ? TCR_TBI0 : 0) |
tcr_el2_ps_to_tcr_el1_ips(tcr) |
(tcr & TCR_EL2_TG0_MASK) |
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 736561480f36..7aa08d59d494 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -844,7 +844,7 @@
#define INIT_SCTLR_EL2_MMU_ON \
(SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \
SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \
- SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
+ SCTLR_ELx_ITFSB | SCTLR_ELx_EIS | SCTLR_ELx_EOS | SCTLR_EL2_RES1)
#define INIT_SCTLR_EL2_MMU_OFF \
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 8bb2c7422cc8..34c9950884d5 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -4,6 +4,7 @@
* Author: Christoffer Dall <c.dall@virtualopensystems.com>
*/
+#include <linux/arm-smccc.h>
#include <linux/bug.h>
#include <linux/cpu_pm.h>
#include <linux/errno.h>
@@ -2638,6 +2639,22 @@ static int init_pkvm_host_sve_state(void)
return 0;
}
+static int pkvm_check_sme_dvmsync_fw_call(void)
+{
+ struct arm_smccc_res res;
+
+ if (!cpus_have_final_cap(ARM64_WORKAROUND_4193714))
+ return 0;
+
+ arm_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res);
+ if (res.a0) {
+ kvm_err("pKVM requires firmware support for C1-Pro erratum 4193714\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
/*
* Finalizes the initialization of hyp mode, once everything else is initialized
* and the initialziation process cannot fail.
@@ -2838,6 +2855,10 @@ static int __init init_hyp_mode(void)
if (err)
goto out_err;
+ err = pkvm_check_sme_dvmsync_fw_call();
+ if (err)
+ goto out_err;
+
err = kvm_hyp_init_protection(hyp_va_bits);
if (err) {
kvm_err("Failed to init hyp memory protection\n");
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 98b2976837b1..bf0eb5e43427 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -245,7 +245,7 @@ static inline void __activate_traps_ich_hfgxtr(struct kvm_vcpu *vcpu)
__activate_fgt(hctxt, vcpu, ICH_HFGITR_EL2);
}
-#define __deactivate_fgt(htcxt, vcpu, reg) \
+#define __deactivate_fgt(hctxt, vcpu, reg) \
do { \
write_sysreg_s(ctxt_sys_reg(hctxt, reg), \
SYS_ ## reg); \
diff --git a/arch/arm64/kvm/hyp/nvhe/clock.c b/arch/arm64/kvm/hyp/nvhe/clock.c
index 32fc4313fe43..a7fc61976fd0 100644
--- a/arch/arm64/kvm/hyp/nvhe/clock.c
+++ b/arch/arm64/kvm/hyp/nvhe/clock.c
@@ -35,6 +35,9 @@ void trace_clock_update(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc)
struct clock_data *clock = &trace_clock_data;
u64 bank = clock->cur ^ 1;
+ if (!mult || shift >= 64)
+ return;
+
clock->data[bank].mult = mult;
clock->data[bank].shift = shift;
clock->data[bank].epoch_ns = epoch_ns;
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 28a471d1927c..25f04629014e 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -5,6 +5,7 @@
*/
#include <linux/kvm_host.h>
+
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
@@ -14,6 +15,7 @@
#include <hyp/fault.h>
+#include <nvhe/arm-smccc.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
@@ -29,6 +31,19 @@ static struct hyp_pool host_s2_pool;
static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
#define current_vm (*this_cpu_ptr(&__current_vm))
+static void pkvm_sme_dvmsync_fw_call(void)
+{
+ if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714)) {
+ struct arm_smccc_res res;
+
+ /*
+ * Ignore the return value. Probing for the workaround
+ * availability took place in init_hyp_mode().
+ */
+ hyp_smccc_1_1_smc(ARM_SMCCC_CPU_WORKAROUND_4193714, &res);
+ }
+}
+
static void guest_lock_component(struct pkvm_hyp_vm *vm)
{
hyp_spin_lock(&vm->lock);
@@ -574,8 +589,14 @@ static int host_stage2_set_owner_metadata_locked(phys_addr_t addr, u64 size,
ret = host_stage2_try(kvm_pgtable_stage2_annotate, &host_mmu.pgt,
addr, size, &host_s2_pool,
KVM_HOST_INVALID_PTE_TYPE_DONATION, annotation);
- if (!ret)
+ if (!ret) {
+ /*
+ * After stage2 maintenance has happened, but before the page
+ * owner has changed.
+ */
+ pkvm_sme_dvmsync_fw_call();
__host_update_page_state(addr, size, PKVM_NOPAGE);
+ }
return ret;
}
@@ -1369,6 +1390,22 @@ unlock:
return ret && ret != -EHWPOISON ? ret : 0;
}
+/*
+ * share/donate install at most one stage-2 leaf (PAGE_SIZE, or one
+ * KVM_PGTABLE_LAST_LEVEL - 1 block for share). kvm_mmu_cache_min_pages()
+ * bounds the worst-case allocation: exact for the PAGE_SIZE leaf,
+ * conservative by one for the block.
+ */
+static int __guest_check_pgtable_memcache(struct pkvm_hyp_vcpu *vcpu)
+{
+ struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
+
+ if (vcpu->vcpu.arch.pkvm_memcache.nr_pages < kvm_mmu_cache_min_pages(vm->pgt.mmu))
+ return -ENOMEM;
+
+ return 0;
+}
+
int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
{
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
@@ -1388,6 +1425,10 @@ int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu)
if (ret)
goto unlock;
+ ret = __guest_check_pgtable_memcache(vcpu);
+ if (ret)
+ goto unlock;
+
meta = host_stage2_encode_gfn_meta(vm, gfn);
WARN_ON(host_stage2_set_owner_metadata_locked(phys, PAGE_SIZE,
PKVM_ID_GUEST, meta));
@@ -1453,6 +1494,10 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu
}
}
+ ret = __guest_check_pgtable_memcache(vcpu);
+ if (ret)
+ goto unlock;
+
for_each_hyp_page(page, phys, size) {
set_host_state(page, PKVM_PAGE_SHARED_OWNED);
page->host_share_guest_count++;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index e7496eb85628..eb1c10120f9f 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -752,16 +752,30 @@ static struct pkvm_hyp_vcpu selftest_vcpu = {
struct pkvm_hyp_vcpu *init_selftest_vm(void *virt)
{
struct hyp_page *p = hyp_virt_to_page(virt);
+ unsigned long min_pages, seeded = 0;
int i;
selftest_vm.kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
WARN_ON(kvm_guest_prepare_stage2(&selftest_vm, virt));
+ /*
+ * Mirror pkvm_refill_memcache() for the share/donate pre-checks;
+ * the selftest invokes those functions directly and would
+ * otherwise see an empty memcache.
+ */
+ min_pages = kvm_mmu_cache_min_pages(&selftest_vm.kvm.arch.mmu);
+
for (i = 0; i < pkvm_selftest_pages(); i++) {
if (p[i].refcount)
continue;
p[i].refcount = 1;
- hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));
+ if (seeded < min_pages) {
+ push_hyp_memcache(&selftest_vcpu.vcpu.arch.pkvm_memcache,
+ hyp_page_to_virt(&p[i]), hyp_virt_to_phys);
+ seeded++;
+ } else {
+ hyp_put_page(&selftest_vm.pool, hyp_page_to_virt(&p[i]));
+ }
}
selftest_vm.kvm.arch.pkvm.handle = __pkvm_reserve_vm();
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 9db3f11a4754..1e8995add14f 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -663,7 +663,8 @@ static void __noreturn __hyp_call_panic(u64 spsr, u64 elr, u64 par)
host_ctxt = host_data_ptr(host_ctxt);
vcpu = host_ctxt->__hyp_running_vcpu;
- __deactivate_traps(vcpu);
+ if (vcpu)
+ __deactivate_traps(vcpu);
sysreg_restore_host_state_vhe(host_ctxt);
panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n",
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index d089c107d9b7..4da9281312eb 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1576,21 +1576,24 @@ struct kvm_s2_fault_desc {
static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
{
bool write_fault, exec_fault;
+ bool perm_fault = kvm_vcpu_trap_is_permission_fault(s2fd->vcpu);
enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt = s2fd->vcpu->arch.hw_mmu->pgt;
unsigned long mmu_seq;
struct page *page;
struct kvm *kvm = s2fd->vcpu->kvm;
- void *memcache;
+ void *memcache = NULL;
kvm_pfn_t pfn;
gfn_t gfn;
int ret;
- memcache = get_mmu_memcache(s2fd->vcpu);
- ret = topup_mmu_memcache(s2fd->vcpu, memcache);
- if (ret)
- return ret;
+ if (!perm_fault) {
+ memcache = get_mmu_memcache(s2fd->vcpu);
+ ret = topup_mmu_memcache(s2fd->vcpu, memcache);
+ if (ret)
+ return ret;
+ }
if (s2fd->nested)
gfn = kvm_s2_trans_output(s2fd->nested) >> PAGE_SHIFT;
@@ -1631,9 +1634,19 @@ static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
goto out_unlock;
}
- ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE,
- __pfn_to_phys(pfn), prot,
- memcache, flags);
+ if (perm_fault) {
+ /*
+ * Drop the SW bits in favour of those stored in the
+ * PTE, which will be preserved.
+ */
+ prot &= ~KVM_NV_GUEST_MAP_SZ;
+ ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, s2fd->fault_ipa,
+ prot, flags);
+ } else {
+ ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE,
+ __pfn_to_phys(pfn), prot,
+ memcache, flags);
+ }
out_unlock:
kvm_release_faultin_page(kvm, page, !!ret, prot & KVM_PGTABLE_PROT_W);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 07f59c3b9a7b..3bcdbbbb6891 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -3310,8 +3310,7 @@ static void aen_host_forward(unsigned long si)
struct zpci_gaite *gaite;
struct kvm *kvm;
- gaite = (struct zpci_gaite *)aift->gait +
- (si * sizeof(struct zpci_gaite));
+ gaite = aift->gait + si;
if (gaite->count == 0)
return;
if (gaite->aisb != 0)
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index 86d93e8dddae..5b075c38998e 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -166,7 +166,7 @@ static int kvm_zpci_set_airq(struct zpci_dev *zdev)
fib.fmt0.noi = airq_iv_end(zdev->aibv);
fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
fib.fmt0.aibvo = 0;
- fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
+ fib.fmt0.aisb = virt_to_phys(aift->sbv->vector) + (zdev->aisb / 64) * 8;
fib.fmt0.aisbo = zdev->aisb & 63;
fib.gd = zdev->gisa;
@@ -290,8 +290,7 @@ static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
phys_to_virt(fib->fmt0.aibv));
spin_lock_irq(&aift->gait_lock);
- gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
- sizeof(struct zpci_gaite));
+ gaite = aift->gait + zdev->aisb;
/* If assist not requested, host will get all alerts */
if (assist)
@@ -309,7 +308,7 @@ static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
/* Update guest FIB for re-issue */
fib->fmt0.aisbo = zdev->aisb & 63;
- fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
+ fib->fmt0.aisb = virt_to_phys(aift->sbv->vector) + (zdev->aisb / 64) * 8;
fib->fmt0.isc = gisc;
/* Save some guest fib values in the host for later use */
@@ -357,8 +356,7 @@ static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force)
if (zdev->kzdev->fib.fmt0.aibv == 0)
goto out;
spin_lock_irq(&aift->gait_lock);
- gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
- sizeof(struct zpci_gaite));
+ gaite = aift->gait + zdev->aisb;
isc = gaite->gisc;
gaite->count--;
if (gaite->count == 0) {
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a14a0f43e04a..86554de9a3f5 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -803,9 +803,10 @@
#define MSR_AMD64_LBR_SELECT 0xc000010e
/* Zen4 */
-#define MSR_ZEN4_BP_CFG 0xc001102e
+#define MSR_ZEN4_BP_CFG 0xc001102e
#define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4
#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
+#define MSR_ZEN2_BP_CFG_BUG_FIX_BIT 33
/* Fam 19h MSRs */
#define MSR_F19H_UMC_PERF_CTL 0xc0010800
diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c
index d7c8ef1e354d..be4c5e9e5ff6 100644
--- a/arch/x86/kernel/acpi/cppc.c
+++ b/arch/x86/kernel/acpi/cppc.c
@@ -88,19 +88,19 @@ static void amd_set_max_freq_ratio(void)
rc = cppc_get_perf_caps(0, &perf_caps);
if (rc) {
- pr_warn("Could not retrieve perf counters (%d)\n", rc);
+ pr_debug("Could not retrieve perf counters (%d)\n", rc);
return;
}
rc = amd_get_boost_ratio_numerator(0, &numerator);
if (rc) {
- pr_warn("Could not retrieve highest performance (%d)\n", rc);
+ pr_debug("Could not retrieve highest performance (%d)\n", rc);
return;
}
nominal_perf = perf_caps.nominal_perf;
if (!nominal_perf) {
- pr_warn("Could not retrieve nominal performance\n");
+ pr_debug("Could not retrieve nominal performance\n");
return;
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 2d9ae6ab1701..2f8e8ff2d000 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -989,6 +989,9 @@ static void init_amd_zen2(struct cpuinfo_x86 *c)
/* Correct misconfigured CPUID on some clients. */
clear_cpu_cap(c, X86_FEATURE_INVLPGB);
+
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR))
+ msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN2_BP_CFG_BUG_FIX_BIT);
}
static void init_amd_zen3(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c8c6cc0406d6..8013dccb3110 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4481,7 +4481,7 @@ static const struct opcode opcode_map_0f_38[256] = {
X16(N), X16(N),
/* 0x20 - 0x2f */
X8(N),
- X2(N), GP(SrcReg | DstMem | ModRM | Mov | Aligned, &pfx_0f_e7_0f_38_2a), N, N, N, N, N,
+ X2(N), GP(SrcMem | DstReg | ModRM | Mov | Aligned, &pfx_0f_e7_0f_38_2a), N, N, N, N, N,
/* 0x30 - 0x7f */
X16(N), X16(N), X16(N), X16(N), X16(N),
/* 0x80 - 0xef */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 892246204435..f0144ae8d891 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2526,6 +2526,23 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
__shadow_walk_next(iterator, *iterator->sptep);
}
+/*
+ * Note: while normally KVM uses a "bool flush" return value to let
+ * the caller batch flushes, __link_shadow_page() flushes immediately
+ * before populating the parent PTE with the new shadow page. The
+ * typical callers, direct_map() and FNAME(fetch)(), are not going
+ * to zap more than one huge SPTE anyway.
+ *
+ * The only exception, where @flush can be false, is when a huge SPTE
+ * is replaced with a shadow page SPTE with a fully populated page table,
+ * which can happen from shadow_mmu_split_huge_page(). In this case,
+ * no memory is unmapped across the change to the page tables and no
+ * immediate flush is needed for correctness.
+ *
+ * Even in that case, calls to kvm_mmu_commit_zap_page() are not
+ * batched. Doing so would require adding an invalid_list argument
+ * all the way down to __walk_slot_rmaps().
+ */
static void __link_shadow_page(struct kvm *kvm,
struct kvm_mmu_memory_cache *cache, u64 *sptep,
struct kvm_mmu_page *sp, bool flush)
@@ -2541,8 +2558,10 @@ static void __link_shadow_page(struct kvm *kvm,
parent_sp = sptep_to_sp(sptep);
WARN_ON_ONCE(parent_sp->role.level == PG_LEVEL_4K);
- mmu_page_zap_pte(kvm, parent_sp, sptep, &invalid_list);
- kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, true);
+ if (mmu_page_zap_pte(kvm, parent_sp, sptep, &invalid_list))
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
+ else if (flush)
+ kvm_flush_remote_tlbs_sptep(kvm, sptep);
}
spte = make_nonleaf_spte(sp->spt, sp_ad_disabled(sp));
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 961804df5f45..b340dc9991ad 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -160,6 +160,16 @@ void nested_vmcb02_recalc_intercepts(struct vcpu_svm *svm)
if (!intercept_smi)
vmcb_clr_intercept(&vmcb02->control, INTERCEPT_SMI);
+ /*
+ * Intercept PAUSE if and only if L1 wants to. KVM intercepts PAUSE so
+ * that a vCPU that may be spinning waiting for a lock can be scheduled
+ * out in favor of the vCPU that holds said lock. KVM doesn't support
+ * yielding across L2 vCPUs, as KVM has limited visilibity into which
+ * L2 vCPUs are in the same L2 VM, i.e. may be contending for locks.
+ */
+ if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE))
+ vmcb_clr_intercept(&vmcb02->control, INTERCEPT_PAUSE);
+
if (nested_vmcb_needs_vls_intercept(svm)) {
/*
* If the virtual VMLOAD/VMSAVE is not enabled for the L2,
@@ -819,7 +829,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
struct vmcb *vmcb01 = svm->vmcb01.ptr;
struct kvm_vcpu *vcpu = &svm->vcpu;
- u32 pause_count12, pause_thresh12;
nested_svm_transition_tlb_flush(vcpu);
@@ -947,31 +956,13 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
vmcb02->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
if (guest_cpu_cap_has(vcpu, X86_FEATURE_PAUSEFILTER))
- pause_count12 = vmcb12_ctrl->pause_filter_count;
+ vmcb02->control.pause_filter_count = vmcb12_ctrl->pause_filter_count;
else
- pause_count12 = 0;
+ vmcb02->control.pause_filter_count = 0;
if (guest_cpu_cap_has(vcpu, X86_FEATURE_PFTHRESHOLD))
- pause_thresh12 = vmcb12_ctrl->pause_filter_thresh;
+ vmcb02->control.pause_filter_thresh = vmcb12_ctrl->pause_filter_thresh;
else
- pause_thresh12 = 0;
- if (kvm_pause_in_guest(svm->vcpu.kvm)) {
- /* use guest values since host doesn't intercept PAUSE */
- vmcb02->control.pause_filter_count = pause_count12;
- vmcb02->control.pause_filter_thresh = pause_thresh12;
-
- } else {
- /* start from host values otherwise */
- vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
- vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
-
- /* ... but ensure filtering is disabled if so requested. */
- if (vmcb12_is_intercept(vmcb12_ctrl, INTERCEPT_PAUSE)) {
- if (!pause_count12)
- vmcb02->control.pause_filter_count = 0;
- if (!pause_thresh12)
- vmcb02->control.pause_filter_thresh = 0;
- }
- }
+ vmcb02->control.pause_filter_thresh = 0;
/*
* Take ALLOW_LARGER_RAP from vmcb12 even though it should be safe to
@@ -1298,12 +1289,6 @@ void nested_svm_vmexit(struct vcpu_svm *svm)
/* in case we halted in L2 */
kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
- if (!kvm_pause_in_guest(vcpu->kvm)) {
- vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
- vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
-
- }
-
/*
* Invalidate last_bus_lock_rip unless KVM is still waiting for the
* guest to make forward progress before re-enabling bus lock detection.
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e7fdd7a9c280..e02a38da5296 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -913,7 +913,15 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
- if (kvm_pause_in_guest(vcpu->kvm))
+ /* Adjusting pause_filter_count makes no sense if PLE is disabled. */
+ WARN_ON_ONCE(kvm_pause_in_guest(vcpu->kvm));
+
+ /*
+ * While running L2, KVM should intercept PAUSE if and only if L1 wants
+ * to intercept PAUSE, and L1's intercept should take priority, i.e.
+ * KVM should never handle a PAUSE intercept from L2.
+ */
+ if (WARN_ON_ONCE(is_guest_mode(vcpu)))
return;
control->pause_filter_count = __grow_ple_window(old,
@@ -934,7 +942,10 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
- if (kvm_pause_in_guest(vcpu->kvm))
+ /* Adjusting pause_filter_count makes no sense if PLE is disabled. */
+ WARN_ON_ONCE(kvm_pause_in_guest(vcpu->kvm));
+
+ if (is_guest_mode(vcpu))
return;
control->pause_filter_count =
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index e7fdbe9efc90..0db25bba17f6 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -154,7 +154,7 @@ TRACE_EVENT(kvm_xen_hypercall,
__entry->a2 = a2;
__entry->a3 = a3;
__entry->a4 = a4;
- __entry->a4 = a5;
+ __entry->a5 = a5;
),
TP_printk("cpl %d nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx",
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 56cacc06225e..31568274d8bb 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -14,6 +14,7 @@ extern bool __read_mostly flexpriority_enabled;
extern bool __read_mostly enable_ept;
extern bool __read_mostly enable_unrestricted_guest;
extern bool __read_mostly enable_ept_ad_bits;
+extern bool __read_mostly enable_cet;
extern bool __read_mostly enable_pml;
extern int __read_mostly pt_mode;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 5c2c33a5f7dc..49feecb286b2 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -108,6 +108,9 @@ module_param_named(unrestricted_guest,
bool __read_mostly enable_ept_ad_bits = 1;
module_param_named(eptad, enable_ept_ad_bits, bool, 0444);
+bool __read_mostly enable_cet = 1;
+module_param_named(cet, enable_cet, bool, 0444);
+
static bool __read_mostly emulate_invalid_guest_state = true;
module_param(emulate_invalid_guest_state, bool, 0444);
@@ -4476,7 +4479,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
* SSP is reloaded from IA32_PL3_SSP. Check SDM Vol.2A/B Chapter
* 3 and 4 for details.
*/
- if (cpu_has_load_cet_ctrl()) {
+ if (enable_cet) {
vmcs_writel(HOST_S_CET, kvm_host.s_cet);
vmcs_writel(HOST_SSP, 0);
vmcs_writel(HOST_INTR_SSP_TABLE, 0);
@@ -4532,6 +4535,10 @@ static u32 vmx_get_initial_vmentry_ctrl(void)
if (vmx_pt_mode_is_system())
vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
VM_ENTRY_LOAD_IA32_RTIT_CTL);
+
+ if (!enable_cet)
+ vmentry_ctrl &= ~VM_ENTRY_LOAD_CET_STATE;
+
/*
* IA32e mode, and loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically.
*/
@@ -4546,6 +4553,9 @@ static u32 vmx_get_initial_vmexit_ctrl(void)
{
u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
+ if (!enable_cet)
+ vmexit_ctrl &= ~VM_EXIT_LOAD_CET_STATE;
+
/*
* Not used by KVM and never set in vmcs01 or vmcs02, but emulated for
* nested virtualization and thus allowed to be set in vmcs12.
@@ -8155,7 +8165,7 @@ static __init void vmx_set_cpu_caps(void)
* VMX_BASIC[bit56] == 0, inject #CP at VMX entry with error code
* fails, so disable CET in this case too.
*/
- if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest ||
+ if (!enable_cet || !enable_unrestricted_guest ||
!cpu_has_vmx_basic_no_hw_errcode_cc()) {
kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
kvm_cpu_cap_clear(X86_FEATURE_IBT);
@@ -8630,6 +8640,9 @@ __init int vmx_hardware_setup(void)
!cpu_has_vmx_invept_global())
enable_ept = 0;
+ if (!cpu_has_load_cet_ctrl())
+ enable_cet = 0;
+
/* NX support is required for shadow paging. */
if (!enable_ept && !boot_cpu_has(X86_FEATURE_NX)) {
pr_err_ratelimited("NX (Execute Disable) not supported\n");
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index c80d0058efd1..3eee5f84f8a7 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2145,7 +2145,10 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
static void xen_enter_lazy_mmu(void)
{
- enter_lazy(XEN_LAZY_MMU);
+ preempt_disable();
+ if (xen_get_lazy_mode() != XEN_LAZY_MMU)
+ enter_lazy(XEN_LAZY_MMU);
+ preempt_enable();
}
static void xen_flush_lazy_mmu(void)
@@ -2182,7 +2185,8 @@ static void xen_leave_lazy_mmu(void)
{
preempt_disable();
xen_mc_flush();
- leave_lazy(XEN_LAZY_MMU);
+ if (xen_get_lazy_mode() != XEN_LAZY_NONE)
+ leave_lazy(XEN_LAZY_MMU);
preempt_enable();
}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index bb95a05259b8..41251d4cf953 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -655,7 +655,7 @@ static void __init xen_e820_swap_entry_with_ram(struct e820_entry *swap_entry)
/* Fill new entry (keep size and page offset). */
entry->type = swap_entry->type;
entry->addr = entry_end - swap_size +
- swap_addr - swap_entry->addr;
+ swap_entry->addr - swap_addr;
entry->size = swap_entry->size;
/* Convert old entry to RAM, align to pages. */
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index e54c6e06e1cb..e796de1a749e 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -308,7 +308,6 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
}
bip->bip_flags |= BIP_COPY_USER;
- bip->bip_vcnt = nr_vecs;
return 0;
free_bip:
bio_integrity_free(bio);
@@ -403,6 +402,24 @@ int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter)
if (unlikely(ret < 0))
goto free_bvec;
+ /*
+ * Handle partial pinning. This can happen when pin_user_pages_fast()
+ * returns fewer pages than requested.
+ */
+ if (user_backed_iter(iter) && unlikely(ret != bytes)) {
+ if (ret > 0) {
+ int npinned = DIV_ROUND_UP(offset + ret, PAGE_SIZE);
+ int i;
+
+ for (i = 0; i < npinned; i++)
+ unpin_user_page(pages[i]);
+ }
+ if (pages != stack_pages)
+ kvfree(pages);
+ ret = -EFAULT;
+ goto free_bvec;
+ }
+
nr_bvecs = bvec_from_pages(bvec, pages, nr_vecs, bytes, offset,
&is_p2p);
if (pages != stack_pages)
diff --git a/block/bio.c b/block/bio.c
index b8972dba68a0..5f10900b3f42 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1279,11 +1279,12 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter,
return bio_iov_iter_align_down(bio, iter, len_align_mask);
}
-static struct folio *folio_alloc_greedy(gfp_t gfp, size_t *size)
+static struct folio *folio_alloc_greedy(gfp_t gfp, size_t *size,
+ size_t minsize)
{
struct folio *folio;
- while (*size > PAGE_SIZE) {
+ while (*size > minsize) {
folio = folio_alloc(gfp | __GFP_NORETRY, get_order(*size));
if (folio)
return folio;
@@ -1307,7 +1308,7 @@ static void bio_free_folios(struct bio *bio)
}
static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter,
- size_t maxlen)
+ size_t maxlen, size_t minsize)
{
size_t total_len = min(maxlen, iov_iter_count(iter));
@@ -1322,13 +1323,13 @@ static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter,
size_t this_len = min(total_len, SZ_1M);
struct folio *folio;
- if (this_len > PAGE_SIZE * 2)
+ if (this_len > minsize * 2)
this_len = rounddown_pow_of_two(this_len);
if (bio->bi_iter.bi_size > BIO_MAX_SIZE - this_len)
break;
- folio = folio_alloc_greedy(GFP_KERNEL, &this_len);
+ folio = folio_alloc_greedy(GFP_KERNEL, &this_len, minsize);
if (!folio)
break;
bio_add_folio_nofail(bio, folio, this_len, 0);
@@ -1344,16 +1345,16 @@ static int bio_iov_iter_bounce_write(struct bio *bio, struct iov_iter *iter,
if (!bio->bi_iter.bi_size)
return -ENOMEM;
- return 0;
+ return bio_iov_iter_align_down(bio, iter, minsize - 1);
}
static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter,
- size_t maxlen)
+ size_t maxlen, size_t minsize)
{
size_t len = min3(iov_iter_count(iter), maxlen, SZ_1M);
struct folio *folio;
- folio = folio_alloc_greedy(GFP_KERNEL, &len);
+ folio = folio_alloc_greedy(GFP_KERNEL, &len, minsize);
if (!folio)
return -ENOMEM;
@@ -1382,7 +1383,7 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter,
bvec_set_folio(&bio->bi_io_vec[0], folio, bio->bi_iter.bi_size, 0);
if (iov_iter_extract_will_pin(iter))
bio_set_flag(bio, BIO_PAGE_PINNED);
- return 0;
+ return bio_iov_iter_align_down(bio, iter, minsize - 1);
}
/**
@@ -1390,6 +1391,7 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter,
* @bio: bio to send
* @iter: iter to read from / write into
* @maxlen: maximum size to bounce
+ * @minsize: minimum folio allocation size
*
* Helper for direct I/O implementations that need to bounce buffer because
* we need to checksum the data or perform other operations that require
@@ -1397,11 +1399,12 @@ static int bio_iov_iter_bounce_read(struct bio *bio, struct iov_iter *iter,
* copies the data into it. Needs to be paired with bio_iov_iter_unbounce()
* called on completion.
*/
-int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen)
+int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen,
+ size_t minsize)
{
if (op_is_write(bio_op(bio)))
- return bio_iov_iter_bounce_write(bio, iter, maxlen);
- return bio_iov_iter_bounce_read(bio, iter, maxlen);
+ return bio_iov_iter_bounce_write(bio, iter, maxlen, minsize);
+ return bio_iov_iter_bounce_read(bio, iter, maxlen, minsize);
}
static void bvec_unpin(struct bio_vec *bv, bool mark_dirty)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4c5c16cce4f8..d0c37daf568f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3307,6 +3307,25 @@ blk_status_t blk_insert_cloned_request(struct request *rq)
return BLK_STS_IOERR;
}
+ /*
+ * Integrity segment counting depends on the same queue limits
+ * (virt_boundary_mask, seg_boundary_mask, max_segment_size) that
+ * vary across stacked queues, so recompute against the bottom
+ * queue just like nr_phys_segments above.
+ */
+ if (blk_integrity_rq(rq) && rq->bio) {
+ unsigned short max_int_segs = queue_max_integrity_segments(q);
+
+ rq->nr_integrity_segments =
+ blk_rq_count_integrity_sg(rq->q, rq->bio);
+ if (rq->nr_integrity_segments > max_int_segs) {
+ printk(KERN_ERR "%s: over max integrity segments limit. (%u > %u)\n",
+ __func__, rq->nr_integrity_segments,
+ max_int_segs);
+ return BLK_STS_IOERR;
+ }
+ }
+
if (q->disk && should_fail_request(q->disk->part0, blk_rq_bytes(rq)))
return BLK_STS_IOERR;
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 30cad2bb9291..42ef830054dc 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -623,6 +623,28 @@ static void disk_mark_zone_wplug_dead(struct blk_zone_wplug *zwplug)
}
}
+static inline bool disk_check_zone_wplug_dead(struct blk_zone_wplug *zwplug)
+{
+ if (!(zwplug->flags & BLK_ZONE_WPLUG_DEAD))
+ return false;
+
+ /*
+ * If a new write is received right after a zone reset completes and
+ * while the disk_zone_wplugs_worker() thread has not yet released the
+ * reference on the zone write plug after processing the last write to
+ * the zone, then the new write BIO will see the zone write plug marked
+ * as dead. This case is however a false positive and a perfectly valid
+ * pattern. In such case, restore the zone write plug to a live one.
+ */
+ if (!zwplug->wp_offset && bio_list_empty(&zwplug->bio_list)) {
+ zwplug->flags &= ~BLK_ZONE_WPLUG_DEAD;
+ refcount_inc(&zwplug->ref);
+ return false;
+ }
+
+ return true;
+}
+
static bool disk_zone_wplug_submit_bio(struct gendisk *disk,
struct blk_zone_wplug *zwplug);
@@ -1444,12 +1466,12 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
spin_lock_irqsave(&zwplug->lock, flags);
/*
- * If we got a zone write plug marked as dead, then the user is issuing
- * writes to a full zone, or without synchronizing with zone reset or
- * zone finish operations. In such case, fail the BIO to signal this
- * invalid usage.
+ * Check if we got a zone write plug marked as dead. If yes, then the
+ * user is likely issuing writes to a full zone, or without
+ * synchronizing with zone reset or zone finish operations. In such
+ * case, fail the BIO to signal this invalid usage.
*/
- if (zwplug->flags & BLK_ZONE_WPLUG_DEAD) {
+ if (disk_check_zone_wplug_dead(zwplug)) {
spin_unlock_irqrestore(&zwplug->lock, flags);
disk_put_zone_wplug(zwplug);
bio_io_error(bio);
diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index e9e970fd8f33..27f31744f29e 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -192,11 +192,15 @@ static const struct dmi_system_id ac_dmi_table[] __initconst = {
static int acpi_ac_probe(struct platform_device *pdev)
{
- struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
struct power_supply_config psy_cfg = {};
+ struct acpi_device *adev;
struct acpi_ac *ac;
int result;
+ adev = ACPI_COMPANION(&pdev->dev);
+ if (!adev)
+ return -ENODEV;
+
ac = kzalloc_obj(struct acpi_ac);
if (!ac)
return -ENOMEM;
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index 0a8e02bc8c8b..ec94b09bb747 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -423,7 +423,11 @@ static void acpi_pad_notify(acpi_handle handle, u32 event, void *data)
static int acpi_pad_probe(struct platform_device *pdev)
{
- struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+ struct acpi_device *adev;
+
+ adev = ACPI_COMPANION(&pdev->dev);
+ if (!adev)
+ return -ENODEV;
return acpi_dev_install_notify_handler(adev, ACPI_DEVICE_NOTIFY,
acpi_pad_notify, adev);
diff --git a/drivers/acpi/acpi_tad.c b/drivers/acpi/acpi_tad.c
index cac07e997028..386fc1abcbdc 100644
--- a/drivers/acpi/acpi_tad.c
+++ b/drivers/acpi/acpi_tad.c
@@ -815,12 +815,16 @@ static void acpi_tad_remove(void *data)
static int acpi_tad_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- acpi_handle handle = ACPI_HANDLE(dev);
struct acpi_tad_driver_data *dd;
+ acpi_handle handle;
acpi_status status;
unsigned long long caps;
int ret;
+ handle = ACPI_HANDLE(dev);
+ if (!handle)
+ return -ENODEV;
+
/*
* Initialization failure messages are mostly about firmware issues, so
* print them at the "info" level.
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index b4c25474f42f..d4ae21a71007 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -1214,10 +1214,14 @@ static void sysfs_battery_cleanup(struct acpi_battery *battery)
static int acpi_battery_probe(struct platform_device *pdev)
{
- struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
struct acpi_battery *battery;
+ struct acpi_device *device;
int result;
+ device = ACPI_COMPANION(&pdev->dev);
+ if (!device)
+ return -ENODEV;
+
if (device->dep_unmet)
return -EPROBE_DEFER;
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index dc064a388c23..b47301ee4c8a 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -531,15 +531,20 @@ static int acpi_lid_input_open(struct input_dev *input)
static int acpi_button_probe(struct platform_device *pdev)
{
- struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
acpi_notify_handler handler;
+ struct acpi_device *device;
struct acpi_button *button;
struct input_dev *input;
- const char *hid = acpi_device_hid(device);
acpi_status status;
char *name, *class;
+ const char *hid;
int error = 0;
+ device = ACPI_COMPANION(&pdev->dev);
+ if (!device)
+ return -ENODEV;
+
+ hid = acpi_device_hid(device);
if (!strcmp(hid, ACPI_BUTTON_HID_LID) &&
lid_init_state == ACPI_BUTTON_LID_INIT_DISABLED)
return -ENODEV;
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 45204538ed87..64ad4cfa6208 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1676,10 +1676,14 @@ static int acpi_ec_setup(struct acpi_ec *ec, struct acpi_device *device, bool ca
static int acpi_ec_probe(struct platform_device *pdev)
{
- struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
+ struct acpi_device *device;
struct acpi_ec *ec;
int ret;
+ device = ACPI_COMPANION(&pdev->dev);
+ if (!device)
+ return -ENODEV;
+
if (boot_ec && (boot_ec->handle == device->handle ||
!strcmp(acpi_device_hid(device), ACPI_ECDT_HID))) {
/* Fast path: this device corresponds to the boot EC. */
diff --git a/drivers/acpi/hed.c b/drivers/acpi/hed.c
index 4d5e12ed6f3c..060e8d670f5d 100644
--- a/drivers/acpi/hed.c
+++ b/drivers/acpi/hed.c
@@ -50,9 +50,13 @@ static void acpi_hed_notify(acpi_handle handle, u32 event, void *data)
static int acpi_hed_probe(struct platform_device *pdev)
{
- struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
+ struct acpi_device *device;
int err;
+ device = ACPI_COMPANION(&pdev->dev);
+ if (!device)
+ return -ENODEV;
+
/* Only one hardware error device */
if (hed_handle)
return -EINVAL;
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index d13264fb9e02..9304ac996d41 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -3341,12 +3341,16 @@ static int acpi_nfit_probe(struct platform_device *pdev)
struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
struct acpi_nfit_desc *acpi_desc;
struct device *dev = &pdev->dev;
- struct acpi_device *adev = ACPI_COMPANION(dev);
struct acpi_table_header *tbl;
+ struct acpi_device *adev;
acpi_status status = AE_OK;
acpi_size sz;
int rc = 0;
+ adev = ACPI_COMPANION(&pdev->dev);
+ if (!adev)
+ return -ENODEV;
+
rc = acpi_dev_install_notify_handler(adev, ACPI_DEVICE_NOTIFY,
acpi_nfit_notify, dev);
if (rc)
diff --git a/drivers/acpi/pfr_telemetry.c b/drivers/acpi/pfr_telemetry.c
index 32bdf8cbe8f2..2387376832a1 100644
--- a/drivers/acpi/pfr_telemetry.c
+++ b/drivers/acpi/pfr_telemetry.c
@@ -360,10 +360,14 @@ static void pfrt_log_put_idx(void *data)
static int acpi_pfrt_log_probe(struct platform_device *pdev)
{
- acpi_handle handle = ACPI_HANDLE(&pdev->dev);
struct pfrt_log_device *pfrt_log_dev;
+ acpi_handle handle;
int ret;
+ handle = ACPI_HANDLE(&pdev->dev);
+ if (!handle)
+ return -ENODEV;
+
if (!acpi_has_method(handle, "_DSM")) {
dev_dbg(&pdev->dev, "Missing _DSM\n");
return -ENODEV;
diff --git a/drivers/acpi/pfr_update.c b/drivers/acpi/pfr_update.c
index 11b1c2828005..6283105bb0e8 100644
--- a/drivers/acpi/pfr_update.c
+++ b/drivers/acpi/pfr_update.c
@@ -538,10 +538,14 @@ static void pfru_put_idx(void *data)
static int acpi_pfru_probe(struct platform_device *pdev)
{
- acpi_handle handle = ACPI_HANDLE(&pdev->dev);
struct pfru_device *pfru_dev;
+ acpi_handle handle;
int ret;
+ handle = ACPI_HANDLE(&pdev->dev);
+ if (!handle)
+ return -ENODEV;
+
if (!acpi_has_method(handle, "_DSM")) {
dev_dbg(&pdev->dev, "Missing _DSM\n");
return -ENODEV;
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 440f1d69aca8..86b7c7975852 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -629,11 +629,15 @@ static void acpi_sbs_callback(void *context)
static int acpi_sbs_probe(struct platform_device *pdev)
{
- struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
+ struct acpi_device *device;
struct acpi_sbs *sbs;
int result = 0;
int id;
+ device = ACPI_COMPANION(&pdev->dev);
+ if (!device)
+ return -ENODEV;
+
sbs = kzalloc_obj(struct acpi_sbs);
if (!sbs) {
result = -ENOMEM;
diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c
index f413270415b6..c0ffa267f96c 100644
--- a/drivers/acpi/sbshc.c
+++ b/drivers/acpi/sbshc.c
@@ -237,11 +237,15 @@ static int smbus_alarm(void *context)
static int acpi_smbus_hc_probe(struct platform_device *pdev)
{
- struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
+ struct acpi_device *device;
int status;
unsigned long long val;
struct acpi_smb_hc *hc;
+ device = ACPI_COMPANION(&pdev->dev);
+ if (!device)
+ return -ENODEV;
+
status = acpi_evaluate_integer(device->handle, "_EC", NULL, &val);
if (ACPI_FAILURE(status)) {
pr_err("error obtaining _EC.\n");
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index b8b487d89d25..dfc7daa809b5 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -789,7 +789,7 @@ static int acpi_thermal_probe(struct platform_device *pdev)
int i;
if (!device)
- return -EINVAL;
+ return -ENODEV;
tz = kzalloc_obj(struct acpi_thermal);
if (!tz)
diff --git a/drivers/acpi/tiny-power-button.c b/drivers/acpi/tiny-power-button.c
index 531e65b01bcb..92516ef84b02 100644
--- a/drivers/acpi/tiny-power-button.c
+++ b/drivers/acpi/tiny-power-button.c
@@ -38,9 +38,13 @@ static u32 acpi_tiny_power_button_event(void *not_used)
static int acpi_tiny_power_button_probe(struct platform_device *pdev)
{
- struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
+ struct acpi_device *device;
acpi_status status;
+ device = ACPI_COMPANION(&pdev->dev);
+ if (!device)
+ return -ENODEV;
+
if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) {
status = acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON,
acpi_tiny_power_button_event,
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 6d13f1481de0..6c041eaebdb9 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -920,6 +920,9 @@ static int ublk_validate_params(const struct ublk_device *ub)
if (p->max_sectors > (ub->dev_info.max_io_buf_bytes >> 9))
return -EINVAL;
+ if (p->max_sectors < PAGE_SECTORS)
+ return -EINVAL;
+
if (ublk_dev_is_zoned(ub) && !p->chunk_sectors)
return -EINVAL;
} else
diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c
index 50c7b45c59e3..d0130658091b 100644
--- a/drivers/hid/bpf/hid_bpf_dispatch.c
+++ b/drivers/hid/bpf/hid_bpf_dispatch.c
@@ -24,7 +24,8 @@ EXPORT_SYMBOL(hid_ops);
u8 *
dispatch_hid_bpf_device_event(struct hid_device *hdev, enum hid_report_type type, u8 *data,
- u32 *size, int interrupt, u64 source, bool from_bpf)
+ size_t *buf_size, u32 *size, int interrupt, u64 source,
+ bool from_bpf)
{
struct hid_bpf_ctx_kern ctx_kern = {
.ctx = {
@@ -74,6 +75,7 @@ dispatch_hid_bpf_device_event(struct hid_device *hdev, enum hid_report_type type
*size = ret;
}
+ *buf_size = ctx_kern.ctx.allocated_size;
return ctx_kern.data;
}
EXPORT_SYMBOL_GPL(dispatch_hid_bpf_device_event);
@@ -505,7 +507,7 @@ __hid_bpf_input_report(struct hid_bpf_ctx *ctx, enum hid_report_type type, u8 *b
if (ret)
return ret;
- return hid_ops->hid_input_report(ctx->hid, type, buf, size, 0, (u64)(long)ctx, true,
+ return hid_ops->hid_input_report(ctx->hid, type, buf, size, size, 0, (u64)(long)ctx, true,
lock_already_taken);
}
diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c
index 0fdc0968b9ef..462010a75899 100644
--- a/drivers/hid/hid-appletb-kbd.c
+++ b/drivers/hid/hid-appletb-kbd.c
@@ -17,7 +17,7 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/backlight.h>
-#include <linux/timer.h>
+#include <linux/workqueue.h>
#include <linux/input/sparse-keymap.h>
#include "hid-ids.h"
@@ -62,7 +62,8 @@ struct appletb_kbd {
struct input_handle kbd_handle;
struct input_handle tpd_handle;
struct backlight_device *backlight_dev;
- struct timer_list inactivity_timer;
+ struct delayed_work inactivity_work;
+ struct work_struct restore_brightness_work;
bool has_dimmed;
bool has_turned_off;
u8 saved_mode;
@@ -164,16 +165,18 @@ static int appletb_tb_key_to_slot(unsigned int code)
}
}
-static void appletb_inactivity_timer(struct timer_list *t)
+static void appletb_inactivity_work(struct work_struct *work)
{
- struct appletb_kbd *kbd = timer_container_of(kbd, t, inactivity_timer);
+ struct appletb_kbd *kbd = container_of(to_delayed_work(work),
+ struct appletb_kbd,
+ inactivity_work);
if (kbd->backlight_dev && appletb_tb_autodim) {
if (!kbd->has_dimmed) {
backlight_device_set_brightness(kbd->backlight_dev, 1);
kbd->has_dimmed = true;
- mod_timer(&kbd->inactivity_timer,
- jiffies + secs_to_jiffies(appletb_tb_idle_timeout));
+ mod_delayed_work(system_wq, &kbd->inactivity_work,
+ secs_to_jiffies(appletb_tb_idle_timeout));
} else if (!kbd->has_turned_off) {
backlight_device_set_brightness(kbd->backlight_dev, 0);
kbd->has_turned_off = true;
@@ -181,16 +184,25 @@ static void appletb_inactivity_timer(struct timer_list *t)
}
}
+static void appletb_restore_brightness_work(struct work_struct *work)
+{
+ struct appletb_kbd *kbd = container_of(work, struct appletb_kbd,
+ restore_brightness_work);
+
+ if (kbd->backlight_dev)
+ backlight_device_set_brightness(kbd->backlight_dev, 2);
+}
+
static void reset_inactivity_timer(struct appletb_kbd *kbd)
{
if (kbd->backlight_dev && appletb_tb_autodim) {
if (kbd->has_dimmed || kbd->has_turned_off) {
- backlight_device_set_brightness(kbd->backlight_dev, 2);
kbd->has_dimmed = false;
kbd->has_turned_off = false;
+ schedule_work(&kbd->restore_brightness_work);
}
- mod_timer(&kbd->inactivity_timer,
- jiffies + secs_to_jiffies(appletb_tb_dim_timeout));
+ mod_delayed_work(system_wq, &kbd->inactivity_work,
+ secs_to_jiffies(appletb_tb_dim_timeout));
}
}
@@ -408,9 +420,11 @@ static int appletb_kbd_probe(struct hid_device *hdev, const struct hid_device_id
dev_err_probe(dev, -ENODEV, "Failed to get backlight device\n");
} else {
backlight_device_set_brightness(kbd->backlight_dev, 2);
- timer_setup(&kbd->inactivity_timer, appletb_inactivity_timer, 0);
- mod_timer(&kbd->inactivity_timer,
- jiffies + secs_to_jiffies(appletb_tb_dim_timeout));
+ INIT_DELAYED_WORK(&kbd->inactivity_work, appletb_inactivity_work);
+ INIT_WORK(&kbd->restore_brightness_work,
+ appletb_restore_brightness_work);
+ mod_delayed_work(system_wq, &kbd->inactivity_work,
+ secs_to_jiffies(appletb_tb_dim_timeout));
}
kbd->inp_handler.event = appletb_kbd_inp_event;
@@ -440,13 +454,14 @@ static int appletb_kbd_probe(struct hid_device *hdev, const struct hid_device_id
unregister_handler:
input_unregister_handler(&kbd->inp_handler);
close_hw:
- if (kbd->backlight_dev) {
- put_device(&kbd->backlight_dev->dev);
- timer_delete_sync(&kbd->inactivity_timer);
- }
hid_hw_close(hdev);
stop_hw:
hid_hw_stop(hdev);
+ if (kbd->backlight_dev) {
+ cancel_delayed_work_sync(&kbd->inactivity_work);
+ cancel_work_sync(&kbd->restore_brightness_work);
+ put_device(&kbd->backlight_dev->dev);
+ }
return ret;
}
@@ -457,13 +472,14 @@ static void appletb_kbd_remove(struct hid_device *hdev)
appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF);
input_unregister_handler(&kbd->inp_handler);
+ hid_hw_close(hdev);
+ hid_hw_stop(hdev);
+
if (kbd->backlight_dev) {
+ cancel_delayed_work_sync(&kbd->inactivity_work);
+ cancel_work_sync(&kbd->restore_brightness_work);
put_device(&kbd->backlight_dev->dev);
- timer_delete_sync(&kbd->inactivity_timer);
}
-
- hid_hw_close(hdev);
- hid_hw_stop(hdev);
}
static int appletb_kbd_suspend(struct hid_device *hdev, pm_message_t msg)
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 61afec5915ec..b3596851c719 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2033,24 +2033,32 @@ int __hid_request(struct hid_device *hid, struct hid_report *report,
}
EXPORT_SYMBOL_GPL(__hid_request);
-int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size,
- int interrupt)
+int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data,
+ size_t bufsize, u32 size, int interrupt)
{
struct hid_report_enum *report_enum = hid->report_enum + type;
struct hid_report *report;
struct hid_driver *hdrv;
int max_buffer_size = HID_MAX_BUFFER_SIZE;
u32 rsize, csize = size;
+ size_t bsize = bufsize;
u8 *cdata = data;
int ret = 0;
report = hid_get_report(report_enum, data);
if (!report)
- goto out;
+ return 0;
+
+ if (unlikely(bsize < csize)) {
+ hid_warn_ratelimited(hid, "Event data for report %d is incorrect (%d vs %ld)\n",
+ report->id, csize, bsize);
+ return -EINVAL;
+ }
if (report_enum->numbered) {
cdata++;
csize--;
+ bsize--;
}
rsize = hid_compute_report_size(report);
@@ -2063,11 +2071,16 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *
else if (rsize > max_buffer_size)
rsize = max_buffer_size;
+ if (bsize < rsize) {
+ hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %ld)\n",
+ report->id, rsize, bsize);
+ return -EINVAL;
+ }
+
if (csize < rsize) {
- hid_warn_ratelimited(hid, "Event data for report %d was too short (%d vs %d)\n",
- report->id, rsize, csize);
- ret = -EINVAL;
- goto out;
+ dbg_hid("report %d is too short, (%d < %d)\n", report->id,
+ csize, rsize);
+ memset(cdata + csize, 0, rsize - csize);
}
if ((hid->claimed & HID_CLAIMED_HIDDEV) && hid->hiddev_report_event)
@@ -2075,7 +2088,7 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *
if (hid->claimed & HID_CLAIMED_HIDRAW) {
ret = hidraw_report_event(hid, data, size);
if (ret)
- goto out;
+ return ret;
}
if (hid->claimed != HID_CLAIMED_HIDRAW && report->maxfield) {
@@ -2087,15 +2100,15 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *
if (hid->claimed & HID_CLAIMED_INPUT)
hidinput_report_event(hid, report);
-out:
+
return ret;
}
EXPORT_SYMBOL_GPL(hid_report_raw_event);
static int __hid_input_report(struct hid_device *hid, enum hid_report_type type,
- u8 *data, u32 size, int interrupt, u64 source, bool from_bpf,
- bool lock_already_taken)
+ u8 *data, size_t bufsize, u32 size, int interrupt, u64 source,
+ bool from_bpf, bool lock_already_taken)
{
struct hid_report_enum *report_enum;
struct hid_driver *hdrv;
@@ -2120,7 +2133,8 @@ static int __hid_input_report(struct hid_device *hid, enum hid_report_type type,
report_enum = hid->report_enum + type;
hdrv = hid->driver;
- data = dispatch_hid_bpf_device_event(hid, type, data, &size, interrupt, source, from_bpf);
+ data = dispatch_hid_bpf_device_event(hid, type, data, &bufsize, &size, interrupt,
+ source, from_bpf);
if (IS_ERR(data)) {
ret = PTR_ERR(data);
goto unlock;
@@ -2149,7 +2163,7 @@ static int __hid_input_report(struct hid_device *hid, enum hid_report_type type,
goto unlock;
}
- ret = hid_report_raw_event(hid, type, data, size, interrupt);
+ ret = hid_report_raw_event(hid, type, data, bufsize, size, interrupt);
unlock:
if (!lock_already_taken)
@@ -2167,16 +2181,41 @@ unlock:
* @interrupt: distinguish between interrupt and control transfers
*
* This is data entry for lower layers.
+ * Legacy, please use hid_safe_input_report() instead.
*/
int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size,
int interrupt)
{
- return __hid_input_report(hid, type, data, size, interrupt, 0,
+ return __hid_input_report(hid, type, data, size, size, interrupt, 0,
false, /* from_bpf */
false /* lock_already_taken */);
}
EXPORT_SYMBOL_GPL(hid_input_report);
+/**
+ * hid_safe_input_report - report data from lower layer (usb, bt...)
+ *
+ * @hid: hid device
+ * @type: HID report type (HID_*_REPORT)
+ * @data: report contents
+ * @bufsize: allocated size of the data buffer
+ * @size: useful size of data parameter
+ * @interrupt: distinguish between interrupt and control transfers
+ *
+ * This is data entry for lower layers.
+ * Please use this function instead of the non safe version because we provide
+ * here the size of the buffer, allowing hid-core to make smarter decisions
+ * regarding the incoming buffer.
+ */
+int hid_safe_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data,
+ size_t bufsize, u32 size, int interrupt)
+{
+ return __hid_input_report(hid, type, data, bufsize, size, interrupt, 0,
+ false, /* from_bpf */
+ false /* lock_already_taken */);
+}
+EXPORT_SYMBOL_GPL(hid_safe_input_report);
+
bool hid_match_one_id(const struct hid_device *hdev,
const struct hid_device_id *id)
{
diff --git a/drivers/hid/hid-elan.c b/drivers/hid/hid-elan.c
index 76d93fc48f6a..0190ad567ce4 100644
--- a/drivers/hid/hid-elan.c
+++ b/drivers/hid/hid-elan.c
@@ -513,6 +513,7 @@ static const struct hid_device_id elan_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_HP_X2_10_COVER),
.driver_data = ELAN_HAS_LED },
{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_TOSHIBA_CLICK_L9W) },
+ { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_SB974D) },
{ }
};
MODULE_DEVICE_TABLE(hid, elan_devices);
diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c
index 333341e80b0e..70e2eedb465a 100644
--- a/drivers/hid/hid-ft260.c
+++ b/drivers/hid/hid-ft260.c
@@ -1068,10 +1068,22 @@ static int ft260_raw_event(struct hid_device *hdev, struct hid_report *report,
struct ft260_device *dev = hid_get_drvdata(hdev);
struct ft260_i2c_input_report *xfer = (void *)data;
+ if (size < offsetof(struct ft260_i2c_input_report, data)) {
+ hid_err(hdev, "short report %d\n", size);
+ return -1;
+ }
+
if (xfer->report >= FT260_I2C_REPORT_MIN &&
xfer->report <= FT260_I2C_REPORT_MAX) {
- ft260_dbg("i2c resp: rep %#02x len %d\n", xfer->report,
- xfer->length);
+ ft260_dbg("i2c resp: rep %#02x len %d size %d\n",
+ xfer->report, xfer->length, size);
+
+ if (xfer->length > size -
+ offsetof(struct ft260_i2c_input_report, data)) {
+ hid_err(hdev, "report %#02x: length %d exceeds HID report size\n",
+ xfer->report, xfer->length);
+ return -1;
+ }
if ((dev->read_buf == NULL) ||
(xfer->length > dev->read_len - dev->read_idx)) {
diff --git a/drivers/hid/hid-gfrm.c b/drivers/hid/hid-gfrm.c
index 699186ff2349..d2a56bf92b41 100644
--- a/drivers/hid/hid-gfrm.c
+++ b/drivers/hid/hid-gfrm.c
@@ -66,7 +66,7 @@ static int gfrm_raw_event(struct hid_device *hdev, struct hid_report *report,
switch (data[1]) {
case GFRM100_SEARCH_KEY_DOWN:
ret = hid_report_raw_event(hdev, HID_INPUT_REPORT, search_key_dn,
- sizeof(search_key_dn), 1);
+ sizeof(search_key_dn), sizeof(search_key_dn), 1);
break;
case GFRM100_SEARCH_KEY_AUDIO_DATA:
@@ -74,7 +74,7 @@ static int gfrm_raw_event(struct hid_device *hdev, struct hid_report *report,
case GFRM100_SEARCH_KEY_UP:
ret = hid_report_raw_event(hdev, HID_INPUT_REPORT, search_key_up,
- sizeof(search_key_up), 1);
+ sizeof(search_key_up), sizeof(search_key_up), 1);
break;
default:
diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c
index 1af477e58480..c99c3c0d442e 100644
--- a/drivers/hid/hid-google-hammer.c
+++ b/drivers/hid/hid-google-hammer.c
@@ -496,7 +496,7 @@ static int hammer_probe(struct hid_device *hdev,
if (error)
return error;
- error = devm_add_action(&hdev->dev, hammer_stop, hdev);
+ error = devm_add_action_or_reset(&hdev->dev, hammer_stop, hdev);
if (error)
return error;
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 0cf63742315b..4657d96fb083 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -277,6 +277,9 @@
#define USB_VENDOR_ID_BIGBEN 0x146b
#define USB_DEVICE_ID_BIGBEN_PS3OFMINIPAD 0x0902
+#define I2C_VENDOR_ID_BLTP 0x36b6
+#define I2C_PRODUCT_ID_BLTP7853 0xc001
+
#define USB_VENDOR_ID_BTC 0x046e
#define USB_DEVICE_ID_BTC_EMPREX_REMOTE 0x5578
#define USB_DEVICE_ID_BTC_EMPREX_REMOTE_2 0x5577
@@ -455,6 +458,7 @@
#define USB_DEVICE_ID_EDIFIER_QR30 0xa101 /* EDIFIER Hal0 2.0 SE */
#define USB_VENDOR_ID_ELAN 0x04f3
+#define USB_DEVICE_ID_SB974D 0x0400
#define USB_DEVICE_ID_TOSHIBA_CLICK_L9W 0x0401
#define USB_DEVICE_ID_HP_X2 0x074d
#define USB_DEVICE_ID_HP_X2_10_COVER 0x0755
diff --git a/drivers/hid/hid-lenovo-go-s.c b/drivers/hid/hid-lenovo-go-s.c
index 01c7bdd4fbe0..ff1782a75191 100644
--- a/drivers/hid/hid-lenovo-go-s.c
+++ b/drivers/hid/hid-lenovo-go-s.c
@@ -1369,6 +1369,14 @@ static void cfg_setup(struct work_struct *work)
"Failed to retrieve IMU Manufacturer: %i\n", ret);
return;
}
+
+ ret = mcu_property_out(drvdata.hdev, GET_GAMEPAD_CFG, FEATURE_OS_MODE,
+ NULL, 0);
+ if (ret) {
+ dev_err(&drvdata.hdev->dev,
+ "Failed to retrieve OS Mode: %i\n", ret);
+ return;
+ }
}
static int hid_gos_cfg_probe(struct hid_device *hdev,
@@ -1427,6 +1435,27 @@ static void hid_gos_cfg_remove(struct hid_device *hdev)
hid_set_drvdata(hdev, NULL);
}
+static int hid_gos_cfg_reset_resume(struct hid_device *hdev)
+{
+ u8 os_mode = drvdata.os_mode;
+ int ret;
+
+ ret = mcu_property_out(drvdata.hdev, SET_GAMEPAD_CFG,
+ FEATURE_OS_MODE, &os_mode, 1);
+ if (ret < 0)
+ return ret;
+
+ ret = mcu_property_out(drvdata.hdev, GET_GAMEPAD_CFG,
+ FEATURE_OS_MODE, NULL, 0);
+ if (ret < 0)
+ return ret;
+
+ if (drvdata.os_mode != os_mode)
+ return -ENODEV;
+
+ return 0;
+}
+
static int hid_gos_probe(struct hid_device *hdev,
const struct hid_device_id *id)
{
@@ -1481,6 +1510,20 @@ static void hid_gos_remove(struct hid_device *hdev)
}
}
+static int hid_gos_reset_resume(struct hid_device *hdev)
+{
+ int ep = get_endpoint_address(hdev);
+
+ switch (ep) {
+ case GO_S_CFG_INTF_IN:
+ return hid_gos_cfg_reset_resume(hdev);
+ default:
+ break;
+ }
+
+ return 0;
+}
+
static const struct hid_device_id hid_gos_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_QHE,
USB_DEVICE_ID_LENOVO_LEGION_GO_S_XINPUT) },
@@ -1496,6 +1539,7 @@ static struct hid_driver hid_lenovo_go_s = {
.probe = hid_gos_probe,
.remove = hid_gos_remove,
.raw_event = hid_gos_raw_event,
+ .reset_resume = hid_gos_reset_resume,
};
module_hid_driver(hid_lenovo_go_s);
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index b1330d23bd2d..ccbf28869a96 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -3673,7 +3673,7 @@ static int hidpp10_consumer_keys_raw_event(struct hidpp_device *hidpp,
memcpy(&consumer_report[1], &data[3], 4);
/* We are called from atomic context */
hid_report_raw_event(hidpp->hid_dev, HID_INPUT_REPORT,
- consumer_report, 5, 1);
+ consumer_report, sizeof(consumer_report), 5, 1);
return 1;
}
@@ -4685,6 +4685,44 @@ static const struct hid_device_id hidpp_devices[] = {
HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb391) },
{ /* MX Master 4 mouse over Bluetooth */
HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb042) },
+ { /* Logitech Signature K650 over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb36f) },
+ { /* Logitech Signature K650 B2B over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb370) },
+ { /* Logitech Pebble Keys 2 K380S over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb377) },
+ { /* Logitech Casa Pop-Up Desk over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb371) },
+ { /* Logitech Casa Pop-Up Desk B2B over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb374) },
+ { /* Logitech Wave Keys over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb383) },
+ { /* Logitech Wave Keys B2B over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb384) },
+ { /* Logitech Signature Slim K950 over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb386) },
+ { /* Logitech Signature Slim K950 B2B over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb388) },
+ { /* Logitech MX Keys S over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb378) },
+ { /* Logitech MX Keys S B2B over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb380) },
+ { /* Logitech Keys-To-Go 2 over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb38c) },
+ { /* Logitech Pop Icon Keys over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb38f) },
+ { /* Logitech MX Keys Mini over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb369) },
+ { /* Logitech MX Keys Mini B2B over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb36e) },
+ { /* Logitech Signature Slim Solar+ K980 B2B over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb394) },
+ { /* Logitech Bluetooth Keyboard K250/K251 over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb397) },
+ { /* Logitech Signature Comfort K880 over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb39c) },
+ { /* Logitech Signature Comfort K880 B2B over Bluetooth */
+ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb39d) },
{}
};
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index e70bd3dc07ab..802a3479e24b 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -390,6 +390,10 @@ static int magicmouse_raw_event(struct hid_device *hdev,
struct input_dev *input = msc->input;
int x = 0, y = 0, ii, clicks = 0, npoints;
+ /* Protect against zero sized recursive calls from DOUBLE_REPORT_ID */
+ if (size < 1)
+ return 0;
+
switch (data[0]) {
case TRACKPAD_REPORT_ID:
case TRACKPAD2_BT_REPORT_ID:
@@ -490,6 +494,18 @@ static int magicmouse_raw_event(struct hid_device *hdev,
/* Sometimes the trackpad sends two touch reports in one
* packet.
*/
+
+ /* Ensure that we have at least 2 elements (report type and size) */
+ if (size < 2)
+ return 0;
+
+ if (size < data[1] + 2) {
+ hid_warn(hdev,
+ "received report length (%d) was smaller than specified (%d)",
+ size, data[1] + 2);
+ return 0;
+ }
+
magicmouse_raw_event(hdev, report, data + 2, data[1]);
magicmouse_raw_event(hdev, report, data + 2 + data[1],
size - 2 - data[1]);
diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c
index be80970ab48e..e4ddd8e9293b 100644
--- a/drivers/hid/hid-mcp2221.c
+++ b/drivers/hid/hid-mcp2221.c
@@ -128,6 +128,7 @@ struct mcp2221 {
u8 *rxbuf;
u8 txbuf[64];
int rxbuf_idx;
+ int rxbuf_size;
int status;
u8 cur_i2c_clk_div;
struct gpio_chip *gc;
@@ -330,12 +331,14 @@ static int mcp_i2c_smbus_read(struct mcp2221 *mcp,
mcp->txbuf[3] = (u8)(msg->addr << 1);
total_len = msg->len;
mcp->rxbuf = msg->buf;
+ mcp->rxbuf_size = msg->len;
} else {
mcp->txbuf[1] = smbus_len;
mcp->txbuf[2] = 0;
mcp->txbuf[3] = (u8)(smbus_addr << 1);
total_len = smbus_len;
mcp->rxbuf = smbus_buf;
+ mcp->rxbuf_size = smbus_len;
}
ret = mcp_send_data_req_status(mcp, mcp->txbuf, 4);
@@ -919,6 +922,10 @@ static int mcp2221_raw_event(struct hid_device *hdev,
mcp->status = -EINVAL;
break;
}
+ if (mcp->rxbuf_idx + data[3] > mcp->rxbuf_size) {
+ mcp->status = -EINVAL;
+ break;
+ }
buf = mcp->rxbuf;
memcpy(&buf[mcp->rxbuf_idx], &data[4], data[3]);
mcp->rxbuf_idx = mcp->rxbuf_idx + data[3];
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index e82a3c4e5b44..eeab0b6e32cc 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -533,7 +533,7 @@ static void mt_get_feature(struct hid_device *hdev, struct hid_report *report)
}
ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, buf,
- size, 0);
+ size, size, 0);
if (ret)
dev_warn(&hdev->dev, "failed to report feature\n");
}
diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c
index c43caac20b61..e48537331675 100644
--- a/drivers/hid/hid-playstation.c
+++ b/drivers/hid/hid-playstation.c
@@ -2384,7 +2384,8 @@ static int dualshock4_parse_report(struct ps_device *ps_dev, struct hid_report *
}
ds4_report = &usb->common;
- num_touch_reports = usb->num_touch_reports;
+ num_touch_reports = min_t(u8, usb->num_touch_reports,
+ ARRAY_SIZE(usb->touch_reports));
touch_reports = usb->touch_reports;
} else if (hdev->bus == BUS_BLUETOOTH && report->id == DS4_INPUT_REPORT_BT &&
size == DS4_INPUT_REPORT_BT_SIZE) {
@@ -2404,7 +2405,8 @@ static int dualshock4_parse_report(struct ps_device *ps_dev, struct hid_report *
}
ds4_report = &bt->common;
- num_touch_reports = bt->num_touch_reports;
+ num_touch_reports = min_t(u8, bt->num_touch_reports,
+ ARRAY_SIZE(bt->touch_reports));
touch_reports = bt->touch_reports;
} else if (hdev->bus == BUS_BLUETOOTH &&
report->id == DS4_INPUT_REPORT_BT_MINIMAL &&
diff --git a/drivers/hid/hid-primax.c b/drivers/hid/hid-primax.c
index e44d79dff8de..8db054280afb 100644
--- a/drivers/hid/hid-primax.c
+++ b/drivers/hid/hid-primax.c
@@ -44,7 +44,7 @@ static int px_raw_event(struct hid_device *hid, struct hid_report *report,
data[0] |= (1 << (data[idx] - 0xE0));
data[idx] = 0;
}
- hid_report_raw_event(hid, HID_INPUT_REPORT, data, size, 0);
+ hid_report_raw_event(hid, HID_INPUT_REPORT, data, size, size, 0);
return 1;
default: /* unknown report */
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 9e88c9d6c6dc..512049963978 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -235,7 +235,7 @@ static const struct hid_device_id hid_quirks[] = {
* used as a driver. See hid_scan_report().
*/
static const struct hid_device_id hid_have_special_driver[] = {
-#if IS_ENABLED(CONFIG_APPLEDISPLAY)
+#if IS_ENABLED(CONFIG_USB_APPLEDISPLAY)
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, 0x9218) },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, 0x9219) },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, 0x921c) },
diff --git a/drivers/hid/hid-sjoy.c b/drivers/hid/hid-sjoy.c
index bab93d71b760..963c45113204 100644
--- a/drivers/hid/hid-sjoy.c
+++ b/drivers/hid/hid-sjoy.c
@@ -91,17 +91,17 @@ static int sjoyff_init(struct hid_device *hid)
set_bit(FF_RUMBLE, dev->ffbit);
- error = input_ff_create_memless(dev, sjoyff, hid_sjoyff_play);
- if (error) {
- kfree(sjoyff);
- return error;
- }
-
sjoyff->report = report;
sjoyff->report->field[0]->value[0] = 0x01;
sjoyff->report->field[0]->value[1] = 0x00;
sjoyff->report->field[0]->value[2] = 0x00;
hid_hw_request(hid, sjoyff->report, HID_REQ_SET_REPORT);
+
+ error = input_ff_create_memless(dev, sjoyff, hid_sjoyff_play);
+ if (error) {
+ kfree(sjoyff);
+ return error;
+ }
}
hid_info(hid, "Force feedback for SmartJoy PLUS PS2/USB adapter\n");
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index b5e724676c1d..315343415e8f 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -1169,10 +1169,9 @@ static int sony_raw_event(struct hid_device *hdev, struct hid_report *report,
sixaxis_parse_report(sc, rd, size);
} else if ((sc->quirks & MOTION_CONTROLLER_BT) && rd[0] == 0x01 && size == 49) {
sixaxis_parse_report(sc, rd, size);
- } else if ((sc->quirks & NAVIGATION_CONTROLLER) && rd[0] == 0x01 &&
- size == 49) {
+ } else if ((sc->quirks & NAVIGATION_CONTROLLER) && rd[0] == 0x01 && size == 49) {
sixaxis_parse_report(sc, rd, size);
- } else if ((sc->quirks & NSG_MRXU_REMOTE) && rd[0] == 0x02) {
+ } else if ((sc->quirks & NSG_MRXU_REMOTE) && rd[0] == 0x02 && size >= 12) {
nsg_mrxu_parse_report(sc, rd, size);
return 1;
} else if ((sc->quirks & RB4_GUITAR_PS4_USB) && rd[0] == 0x01 && size == 64) {
@@ -1189,7 +1188,7 @@ static int sony_raw_event(struct hid_device *hdev, struct hid_report *report,
/* Rock Band 3 PS3 Pro instruments set rd[24] to 0xE0 when they're
* sending full reports, and 0x02 when only sending navigation.
*/
- if ((sc->quirks & RB3_PRO_INSTRUMENT) && rd[24] == 0x02) {
+ if ((sc->quirks & RB3_PRO_INSTRUMENT) && size >= 25 && rd[24] == 0x02) {
/* Only attempt to enable full report every 8 seconds */
if (time_after(jiffies, sc->rb3_pro_poke_jiffies)) {
sc->rb3_pro_poke_jiffies = jiffies + secs_to_jiffies(8);
@@ -1640,9 +1639,6 @@ static int sony_leds_init(struct sony_sc *sc)
u8 max_brightness[MAX_LEDS] = { [0 ... (MAX_LEDS - 1)] = 1 };
u8 use_hw_blink[MAX_LEDS] = { 0 };
- if (WARN_ON(!(sc->quirks & SONY_LED_SUPPORT)))
- return -EINVAL;
-
if (sc->quirks & BUZZ_CONTROLLER) {
sc->led_count = 4;
use_color_names = 0;
@@ -2456,11 +2452,10 @@ static void sony_remove(struct hid_device *hdev)
static int sony_suspend(struct hid_device *hdev, pm_message_t message)
{
#ifdef CONFIG_SONY_FF
+ struct sony_sc *sc = hid_get_drvdata(hdev);
/* On suspend stop any running force-feedback events */
- if (SONY_FF_SUPPORT) {
- struct sony_sc *sc = hid_get_drvdata(hdev);
-
+ if (sc->quirks & SONY_FF_SUPPORT) {
sc->left = sc->right = 0;
sony_send_output_report(sc);
}
diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c
index bd7f93e96e4e..b73f09d26688 100644
--- a/drivers/hid/hid-uclogic-core.c
+++ b/drivers/hid/hid-uclogic-core.c
@@ -184,7 +184,9 @@ static int uclogic_input_configured(struct hid_device *hdev,
suffix = "System Control";
break;
}
- } else {
+ }
+
+ if (suffix) {
hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL,
"%s %s", hdev->name, suffix);
if (!hi->input->name)
diff --git a/drivers/hid/hid-vivaldi-common.c b/drivers/hid/hid-vivaldi-common.c
index bf734055d4b6..b12bb5cc091a 100644
--- a/drivers/hid/hid-vivaldi-common.c
+++ b/drivers/hid/hid-vivaldi-common.c
@@ -85,7 +85,7 @@ void vivaldi_feature_mapping(struct hid_device *hdev,
}
ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, report_data,
- report_len, 0);
+ report_len, report_len, 0);
if (ret) {
dev_warn(&hdev->dev, "failed to report feature %d\n",
field->report->id);
diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c
index 5a183af3d5c6..3adb16366e93 100644
--- a/drivers/hid/i2c-hid/i2c-hid-core.c
+++ b/drivers/hid/i2c-hid/i2c-hid-core.c
@@ -149,6 +149,8 @@ static const struct i2c_hid_quirks {
I2C_HID_QUIRK_BOGUS_IRQ },
{ I2C_VENDOR_ID_GOODIX, I2C_DEVICE_ID_GOODIX_0D42,
I2C_HID_QUIRK_DELAY_WAKEUP_AFTER_RESUME },
+ { I2C_VENDOR_ID_BLTP, I2C_PRODUCT_ID_BLTP7853,
+ I2C_HID_QUIRK_NO_IRQ_AFTER_RESET },
{ 0, 0 }
};
@@ -574,9 +576,10 @@ static void i2c_hid_get_input(struct i2c_hid *ihid)
if (ihid->hid->group != HID_GROUP_RMI)
pm_wakeup_event(&ihid->client->dev, 0);
- hid_input_report(ihid->hid, HID_INPUT_REPORT,
- ihid->inbuf + sizeof(__le16),
- ret_size - sizeof(__le16), 1);
+ hid_safe_input_report(ihid->hid, HID_INPUT_REPORT,
+ ihid->inbuf + sizeof(__le16),
+ ihid->bufsize - sizeof(__le16),
+ ret_size - sizeof(__le16), 1);
}
return;
diff --git a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c
index 16f780bc879b..cb19057f1191 100644
--- a/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c
+++ b/drivers/hid/intel-thc-hid/intel-quickspi/quickspi-protocol.c
@@ -94,7 +94,7 @@ static int quickspi_get_device_descriptor(struct quickspi_device *qsdev)
dev_err_once(qsdev->dev, "Read DEVICE_DESCRIPTOR failed, ret = %d\n", ret);
dev_err_once(qsdev->dev, "DEVICE_DESCRIPTOR expected len = %u, actual read = %u\n",
input_len, read_len);
- return ret;
+ return ret ?: -EINVAL;
}
input_rep_type = ((struct input_report_body_header *)read_buf)->input_report_type;
@@ -318,7 +318,7 @@ int reset_tic(struct quickspi_device *qsdev)
dev_err_once(qsdev->dev, "Read RESET_RESPONSE body failed, ret = %d\n", ret);
dev_err_once(qsdev->dev, "RESET_RESPONSE body expected len = %u, actual = %u\n",
read_len, actual_read_len);
- return ret;
+ return ret ?: -EINVAL;
}
input_rep_type = FIELD_GET(HIDSPI_IN_REP_BDY_HDR_REP_TYPE, reset_response);
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index fbbfc0f60829..5af93b9b1fb5 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -283,9 +283,9 @@ static void hid_irq_in(struct urb *urb)
break;
usbhid_mark_busy(usbhid);
if (!test_bit(HID_RESUME_RUNNING, &usbhid->iofl)) {
- hid_input_report(urb->context, HID_INPUT_REPORT,
- urb->transfer_buffer,
- urb->actual_length, 1);
+ hid_safe_input_report(urb->context, HID_INPUT_REPORT,
+ urb->transfer_buffer, urb->transfer_buffer_length,
+ urb->actual_length, 1);
/*
* autosuspend refused while keys are pressed
* because most keyboards don't wake up when
@@ -482,9 +482,10 @@ static void hid_ctrl(struct urb *urb)
switch (status) {
case 0: /* success */
if (usbhid->ctrl[usbhid->ctrltail].dir == USB_DIR_IN)
- hid_input_report(urb->context,
+ hid_safe_input_report(urb->context,
usbhid->ctrl[usbhid->ctrltail].report->type,
- urb->transfer_buffer, urb->actual_length, 0);
+ urb->transfer_buffer, urb->transfer_buffer_length,
+ urb->actual_length, 0);
break;
case -ESHUTDOWN: /* unplug */
unplug = 1;
diff --git a/drivers/hid/usbhid/hid-pidff.c b/drivers/hid/usbhid/hid-pidff.c
index aee8a4443305..c45f182d0448 100644
--- a/drivers/hid/usbhid/hid-pidff.c
+++ b/drivers/hid/usbhid/hid-pidff.c
@@ -11,6 +11,7 @@
#include "hid-pidff.h"
#include <linux/hid.h>
#include <linux/input.h>
+#include <linux/math64.h>
#include <linux/minmax.h>
#include <linux/slab.h>
#include <linux/stringify.h>
@@ -326,8 +327,10 @@ static s32 pidff_clamp(s32 i, struct hid_field *field)
*/
static int pidff_rescale(int i, int max, struct hid_field *field)
{
- return i * (field->logical_maximum - field->logical_minimum) / max +
- field->logical_minimum;
+ /* 64 bits needed for big values during rescale */
+ s64 result = field->logical_maximum - field->logical_minimum;
+
+ return div_s64(result * i, max) + field->logical_minimum;
}
/*
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index 0d1c6d90fe21..a32320b351e3 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -90,7 +90,7 @@ static void wacom_wac_queue_flush(struct hid_device *hdev,
kfree(buf);
continue;
}
- err = hid_report_raw_event(hdev, HID_INPUT_REPORT, buf, size, false);
+ err = hid_report_raw_event(hdev, HID_INPUT_REPORT, buf, size, size, false);
if (err) {
hid_warn(hdev, "%s: unable to flush event due to error %d\n",
__func__, err);
@@ -334,7 +334,7 @@ static void wacom_feature_mapping(struct hid_device *hdev,
data, n, WAC_CMD_RETRIES);
if (ret == n && features->type == HID_GENERIC) {
ret = hid_report_raw_event(hdev,
- HID_FEATURE_REPORT, data, n, 0);
+ HID_FEATURE_REPORT, data, n, n, 0);
} else if (ret == 2 && features->type != HID_GENERIC) {
features->touch_max = data[1];
} else {
@@ -395,7 +395,7 @@ static void wacom_feature_mapping(struct hid_device *hdev,
data, n, WAC_CMD_RETRIES);
if (ret == n) {
ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT,
- data, n, 0);
+ data, n, n, 0);
} else {
hid_warn(hdev, "%s: could not retrieve sensor offsets\n",
__func__);
diff --git a/drivers/iommu/amd/debugfs.c b/drivers/iommu/amd/debugfs.c
index 4e66473d7cea..4c53b6361314 100644
--- a/drivers/iommu/amd/debugfs.c
+++ b/drivers/iommu/amd/debugfs.c
@@ -31,11 +31,12 @@ static ssize_t iommu_mmio_write(struct file *filp, const char __user *ubuf,
if (cnt > OFS_IN_SZ)
return -EINVAL;
- ret = kstrtou32_from_user(ubuf, cnt, 0, &dbg_mmio_offset);
+ ret = kstrtos32_from_user(ubuf, cnt, 0, &dbg_mmio_offset);
if (ret)
return ret;
- if (dbg_mmio_offset > iommu->mmio_phys_end - sizeof(u64))
+ if (dbg_mmio_offset < 0 || dbg_mmio_offset >
+ iommu->mmio_phys_end - sizeof(u64))
return -EINVAL;
iommu->dbg_mmio_offset = dbg_mmio_offset;
@@ -71,12 +72,12 @@ static ssize_t iommu_capability_write(struct file *filp, const char __user *ubuf
if (cnt > OFS_IN_SZ)
return -EINVAL;
- ret = kstrtou32_from_user(ubuf, cnt, 0, &dbg_cap_offset);
+ ret = kstrtos32_from_user(ubuf, cnt, 0, &dbg_cap_offset);
if (ret)
return ret;
/* Capability register at offset 0x14 is the last IOMMU capability register. */
- if (dbg_cap_offset > 0x14)
+ if (dbg_cap_offset < 0 || dbg_cap_offset > 0x14)
return -EINVAL;
iommu->dbg_cap_offset = dbg_cap_offset;
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index f78e23f03938..57dc8fabc7d9 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -351,8 +351,12 @@ static struct amd_iommu *__rlookup_amd_iommu(u16 seg, u16 devid)
struct amd_iommu_pci_seg *pci_seg;
for_each_pci_segment(pci_seg) {
- if (pci_seg->id == seg)
- return pci_seg->rlookup_table[devid];
+ if (pci_seg->id != seg)
+ continue;
+ /* IVRS may not describe every device on the bus */
+ if (devid > pci_seg->last_bdf)
+ return NULL;
+ return pci_seg->rlookup_table[devid];
}
return NULL;
}
diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h
index 19b6daf88f2a..dc91fb4e2f61 100644
--- a/drivers/iommu/generic_pt/iommu_pt.h
+++ b/drivers/iommu/generic_pt/iommu_pt.h
@@ -534,10 +534,12 @@ static int __map_range_leaf(struct pt_range *range, void *arg,
struct pt_state pts = pt_init(range, level, table);
struct pt_iommu_map_args *map = arg;
unsigned int leaf_pgsize_lg2 = map->leaf_pgsize_lg2;
+ unsigned int leaves_avail;
unsigned int start_index;
pt_oaddr_t oa = map->oa;
- unsigned int num_leaves;
+ pt_vaddr_t num_leaves;
unsigned int orig_end;
+ unsigned int step_lg2;
pt_vaddr_t last_va;
unsigned int step;
bool need_contig;
@@ -546,21 +548,25 @@ static int __map_range_leaf(struct pt_range *range, void *arg,
PT_WARN_ON(map->leaf_level != level);
PT_WARN_ON(!pt_can_have_leaf(&pts));
- step = log2_to_int_t(unsigned int,
- leaf_pgsize_lg2 - pt_table_item_lg2sz(&pts));
- need_contig = leaf_pgsize_lg2 != pt_table_item_lg2sz(&pts);
+ step_lg2 = leaf_pgsize_lg2 - pt_table_item_lg2sz(&pts);
+ step = log2_to_int_t(unsigned int, step_lg2);
+ need_contig = step_lg2 != 0;
_pt_iter_first(&pts);
start_index = pts.index;
orig_end = pts.end_index;
- if (pts.index + map->num_leaves < pts.end_index) {
+ leaves_avail =
+ log2_div_t(unsigned int, pts.end_index - pts.index, step_lg2);
+ if (map->num_leaves <= leaves_avail) {
/* Need to stop in the middle of the table to change sizes */
- pts.end_index = pts.index + map->num_leaves;
+ pts.end_index = pts.index + log2_mul(map->num_leaves, step_lg2);
num_leaves = 0;
} else {
- num_leaves = map->num_leaves - (pts.end_index - pts.index);
+ num_leaves = map->num_leaves - leaves_avail;
}
+ PT_WARN_ON(
+ log2_mod_t(unsigned int, pts.end_index - pts.index, step_lg2));
do {
pts.type = pt_load_entry_raw(&pts);
if (pts.type != PT_ENTRY_EMPTY || need_contig) {
@@ -920,8 +926,8 @@ static int NS(map_range)(struct pt_iommu *iommu_table, dma_addr_t iova,
return ret;
/* Calculate target page size and level for the leaves */
- if (pt_has_system_page_size(common) && len == PAGE_SIZE) {
- PT_WARN_ON(!(pgsize_bitmap & PAGE_SIZE));
+ if (pt_has_system_page_size(common) && len == PAGE_SIZE &&
+ likely(pgsize_bitmap & PAGE_SIZE)) {
if (log2_mod(iova | paddr, PAGE_SHIFT))
return -ENXIO;
map.leaf_pgsize_lg2 = PAGE_SHIFT;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index c3d18cd77d2f..4d0e65bc131d 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3530,8 +3530,8 @@ void domain_remove_dev_pasid(struct iommu_domain *domain,
if (!domain)
return;
- /* Identity domain has no meta data for pasid. */
- if (domain->type == IOMMU_DOMAIN_IDENTITY)
+ /* Identity domain and blocked domain have no meta data for pasid. */
+ if (domain->type == IOMMU_DOMAIN_IDENTITY || domain->type == IOMMU_DOMAIN_BLOCKED)
return;
dmar_domain = to_dmar_domain(domain);
@@ -3545,12 +3545,13 @@ void domain_remove_dev_pasid(struct iommu_domain *domain,
}
spin_unlock_irqrestore(&dmar_domain->lock, flags);
+ if (WARN_ON_ONCE(!dev_pasid))
+ return;
+
cache_tag_unassign_domain(dmar_domain, dev, pasid);
domain_detach_iommu(dmar_domain, iommu);
- if (!WARN_ON_ONCE(!dev_pasid)) {
- intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
- kfree(dev_pasid);
- }
+ intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
+ kfree(dev_pasid);
}
static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
@@ -3937,6 +3938,9 @@ static void quirk_iommu_igfx(struct pci_dev *dev)
disable_igfx_iommu = 1;
}
+/* Q35 integrated gfx dmar support is totally busted. */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x29b2, quirk_iommu_igfx);
+
/* G4x/GM45 integrated gfx dmar support is totally busted. */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 61c12ba78206..d1a9e713d3a0 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -62,14 +62,14 @@ struct iommu_group {
int id;
struct iommu_domain *default_domain;
struct iommu_domain *blocking_domain;
- /*
- * During a group device reset, @resetting_domain points to the physical
- * domain, while @domain points to the attached domain before the reset.
- */
- struct iommu_domain *resetting_domain;
struct iommu_domain *domain;
struct list_head entry;
unsigned int owner_cnt;
+ /*
+ * Number of devices in the group undergoing or awaiting recovery.
+ * If non-zero, concurrent domain attachments are rejected.
+ */
+ unsigned int recovery_cnt;
void *owner;
};
@@ -77,12 +77,33 @@ struct group_device {
struct list_head list;
struct device *dev;
char *name;
+ /*
+ * Device is blocked for a pending recovery while its group->domain is
+ * retained. This can happen when:
+ * - Device is undergoing a reset
+ */
+ bool blocked;
+ unsigned int reset_depth;
};
/* Iterate over each struct group_device in a struct iommu_group */
#define for_each_group_device(group, pos) \
list_for_each_entry(pos, &(group)->devices, list)
+static struct group_device *__dev_to_gdev(struct device *dev)
+{
+ struct iommu_group *group = dev->iommu_group;
+ struct group_device *gdev;
+
+ lockdep_assert_held(&group->mutex);
+
+ for_each_group_device(group, gdev) {
+ if (gdev->dev == dev)
+ return gdev;
+ }
+ return NULL;
+}
+
struct iommu_group_attribute {
struct attribute attr;
ssize_t (*show)(struct iommu_group *group, char *buf);
@@ -2196,6 +2217,8 @@ EXPORT_SYMBOL_GPL(iommu_attach_device);
int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
{
+ struct group_device *gdev;
+
/*
* This is called on the dma mapping fast path so avoid locking. This is
* racy, but we have an expectation that the driver will setup its DMAs
@@ -2206,14 +2229,18 @@ int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
guard(mutex)(&dev->iommu_group->mutex);
+ gdev = __dev_to_gdev(dev);
+ if (WARN_ON(!gdev))
+ return -ENODEV;
+
/*
- * This is a concurrent attach during a device reset. Reject it until
+ * This is a concurrent attach during device recovery. Reject it until
* pci_dev_reset_iommu_done() attaches the device to group->domain.
*
* Note that this might fail the iommu_dma_map(). But there's nothing
* more we can do here.
*/
- if (dev->iommu_group->resetting_domain)
+ if (gdev->blocked)
return -EBUSY;
return __iommu_attach_device(domain, dev, NULL);
}
@@ -2270,19 +2297,24 @@ EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
struct iommu_domain *iommu_driver_get_domain_for_dev(struct device *dev)
{
struct iommu_group *group = dev->iommu_group;
+ struct group_device *gdev;
lockdep_assert_held(&group->mutex);
+ gdev = __dev_to_gdev(dev);
+ if (WARN_ON(!gdev))
+ return NULL;
+
/*
* Driver handles the low-level __iommu_attach_device(), including the
* one invoked by pci_dev_reset_iommu_done() re-attaching the device to
* the cached group->domain. In this case, the driver must get the old
- * domain from group->resetting_domain rather than group->domain. This
+ * domain from group->blocking_domain rather than group->domain. This
* prevents it from re-attaching the device from group->domain (old) to
* group->domain (new).
*/
- if (group->resetting_domain)
- return group->resetting_domain;
+ if (gdev->blocked)
+ return group->blocking_domain;
return group->domain;
}
@@ -2441,10 +2473,11 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group,
return -EINVAL;
/*
- * This is a concurrent attach during a device reset. Reject it until
- * pci_dev_reset_iommu_done() attaches the device to group->domain.
+ * This is a concurrent attach during device recovery. Reject it until
+ * pci_dev_reset_iommu_done() attaches the device to group->domain, if
+ * IOMMU_SET_DOMAIN_MUST_SUCCEED is not set.
*/
- if (group->resetting_domain)
+ if (group->recovery_cnt && !(flags & IOMMU_SET_DOMAIN_MUST_SUCCEED))
return -EBUSY;
/*
@@ -2455,6 +2488,13 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group,
*/
result = 0;
for_each_group_device(group, gdev) {
+ /*
+ * Device under recovery is attached to group->blocking_domain.
+ * Don't change that. pci_dev_reset_iommu_done() will re-attach
+ * its domain to the updated group->domain, after the recovery.
+ */
+ if (gdev->blocked)
+ continue;
ret = __iommu_device_set_domain(group, gdev->dev, new_domain,
group->domain, flags);
if (ret) {
@@ -2575,27 +2615,16 @@ out_set_count:
static int __iommu_map_domain_pgtbl(struct iommu_domain *domain,
unsigned long iova, phys_addr_t paddr,
- size_t size, int prot, gfp_t gfp)
+ size_t size, int prot, gfp_t gfp,
+ size_t *mapped)
{
const struct iommu_domain_ops *ops = domain->ops;
- unsigned long orig_iova = iova;
unsigned int min_pagesz;
- size_t orig_size = size;
int ret = 0;
- might_sleep_if(gfpflags_allow_blocking(gfp));
-
- if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
- return -EINVAL;
-
- if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL))
+ if (WARN_ON(!ops->map_pages))
return -ENODEV;
- /* Discourage passing strange GFP flags */
- if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
- __GFP_HIGHMEM)))
- return -EINVAL;
-
/* find out the minimum page size supported */
min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
@@ -2613,31 +2642,25 @@ static int __iommu_map_domain_pgtbl(struct iommu_domain *domain,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
while (size) {
- size_t pgsize, count, mapped = 0;
+ size_t pgsize, count, op_mapped = 0;
pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
iova, &paddr, pgsize, count);
ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
- gfp, &mapped);
+ gfp, &op_mapped);
/*
* Some pages may have been mapped, even if an error occurred,
* so we should account for those so they can be unmapped.
*/
- size -= mapped;
-
+ *mapped += op_mapped;
if (ret)
- break;
-
- iova += mapped;
- paddr += mapped;
- }
+ return ret;
- /* unroll mapping in case something went wrong */
- if (ret) {
- iommu_unmap(domain, orig_iova, orig_size - size);
- return ret;
+ size -= op_mapped;
+ iova += op_mapped;
+ paddr += op_mapped;
}
return 0;
}
@@ -2655,25 +2678,31 @@ int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
struct pt_iommu *pt = iommupt_from_domain(domain);
+ size_t mapped = 0;
int ret;
- if (pt) {
- size_t mapped = 0;
+ might_sleep_if(gfpflags_allow_blocking(gfp));
+ /* Discourage passing strange GFP flags or illegal domains */
+ if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING) ||
+ !domain->pgsize_bitmap ||
+ (gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 |
+ __GFP_HIGHMEM))))
+ return -EINVAL;
+
+ if (pt)
ret = pt->ops->map_range(pt, iova, paddr, size, prot, gfp,
&mapped);
- if (ret) {
- iommu_unmap(domain, iova, mapped);
- return ret;
- }
- return 0;
- }
- ret = __iommu_map_domain_pgtbl(domain, iova, paddr, size, prot, gfp);
- if (!ret)
- return ret;
+ else
+ ret = __iommu_map_domain_pgtbl(domain, iova, paddr, size, prot,
+ gfp, &mapped);
- trace_map(iova, paddr, size);
- iommu_debug_map(domain, paddr, size);
+ trace_map(iova, paddr, mapped);
+ iommu_debug_map(domain, paddr, mapped);
+ if (ret) {
+ iommu_unmap(domain, iova, mapped);
+ return ret;
+ }
return 0;
}
@@ -2702,10 +2731,7 @@ __iommu_unmap_domain_pgtbl(struct iommu_domain *domain, unsigned long iova,
size_t unmapped_page, unmapped = 0;
unsigned int min_pagesz;
- if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
- return 0;
-
- if (WARN_ON(!ops->unmap_pages || domain->pgsize_bitmap == 0UL))
+ if (WARN_ON(!ops->unmap_pages))
return 0;
/* find out the minimum page size supported */
@@ -2724,8 +2750,6 @@ __iommu_unmap_domain_pgtbl(struct iommu_domain *domain, unsigned long iova,
pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
- iommu_debug_unmap_begin(domain, iova, size);
-
/*
* Keep iterating until we either unmap 'size' bytes (or more)
* or we hit an area that isn't mapped.
@@ -2761,6 +2785,12 @@ static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long iova,
struct pt_iommu *pt = iommupt_from_domain(domain);
size_t unmapped;
+ if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING) ||
+ !domain->pgsize_bitmap))
+ return 0;
+
+ iommu_debug_unmap_begin(domain, iova, size);
+
if (pt)
unmapped = pt->ops->unmap_range(pt, iova, size, iotlb_gather);
else
@@ -3570,7 +3600,12 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
struct group_device *device;
for_each_group_device(group, device) {
- if (device->dev->iommu->max_pasids > 0)
+ /*
+ * A group-level detach cannot fail, even if there is a blocked
+ * device. In fact, blocked devices must be already detached for
+ * a pending device recovery.
+ */
+ if (!device->blocked && device->dev->iommu->max_pasids > 0)
iommu_remove_dev_pasid(device->dev, pasid, domain);
}
}
@@ -3615,10 +3650,10 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
mutex_lock(&group->mutex);
/*
- * This is a concurrent attach during a device reset. Reject it until
+ * This is a concurrent attach during device recovery. Reject it until
* pci_dev_reset_iommu_done() attaches the device to group->domain.
*/
- if (group->resetting_domain) {
+ if (group->recovery_cnt) {
ret = -EBUSY;
goto out_unlock;
}
@@ -3708,10 +3743,10 @@ int iommu_replace_device_pasid(struct iommu_domain *domain,
mutex_lock(&group->mutex);
/*
- * This is a concurrent attach during a device reset. Reject it until
+ * This is a concurrent attach during device recovery. Reject it until
* pci_dev_reset_iommu_done() attaches the device to group->domain.
*/
- if (group->resetting_domain) {
+ if (group->recovery_cnt) {
ret = -EBUSY;
goto out_unlock;
}
@@ -3982,12 +4017,12 @@ EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL");
* routine wants to block any IOMMU activity: translation and ATS invalidation.
*
* This function attaches the device's RID/PASID(s) the group->blocking_domain,
- * setting the group->resetting_domain. This allows the IOMMU driver pausing any
+ * incrementing the group->recovery_cnt, to allow the IOMMU driver pausing any
* IOMMU activity while leaving the group->domain pointer intact. Later when the
* reset is finished, pci_dev_reset_iommu_done() can restore everything.
*
* Caller must use pci_dev_reset_iommu_prepare() with pci_dev_reset_iommu_done()
- * before/after the core-level reset routine, to unset the resetting_domain.
+ * before/after the core-level reset routine, to decrement the recovery_cnt.
*
* Return: 0 on success or negative error code if the preparation failed.
*
@@ -4000,6 +4035,7 @@ EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL");
int pci_dev_reset_iommu_prepare(struct pci_dev *pdev)
{
struct iommu_group *group = pdev->dev.iommu_group;
+ struct group_device *gdev;
unsigned long pasid;
void *entry;
int ret;
@@ -4009,45 +4045,99 @@ int pci_dev_reset_iommu_prepare(struct pci_dev *pdev)
guard(mutex)(&group->mutex);
- /* Re-entry is not allowed */
- if (WARN_ON(group->resetting_domain))
- return -EBUSY;
+ gdev = __dev_to_gdev(&pdev->dev);
+ if (WARN_ON(!gdev))
+ return -ENODEV;
+
+ if (gdev->reset_depth++)
+ return 0;
ret = __iommu_group_alloc_blocking_domain(group);
- if (ret)
+ if (ret) {
+ gdev->reset_depth--;
return ret;
+ }
/* Stage RID domain at blocking_domain while retaining group->domain */
if (group->domain != group->blocking_domain) {
ret = __iommu_attach_device(group->blocking_domain, &pdev->dev,
group->domain);
- if (ret)
+ if (ret) {
+ gdev->reset_depth--;
return ret;
+ }
}
/*
+ * Update gdev->blocked upon the domain change, as it is used to return
+ * the correct domain in iommu_driver_get_domain_for_dev() that might be
+ * called in a set_dev_pasid callback function.
+ */
+ gdev->blocked = true;
+
+ /*
* Stage PASID domains at blocking_domain while retaining pasid_array.
*
* The pasid_array is mostly fenced by group->mutex, except one reader
* in iommu_attach_handle_get(), so it's safe to read without xa_lock.
*/
- xa_for_each_start(&group->pasid_array, pasid, entry, 1)
- iommu_remove_dev_pasid(&pdev->dev, pasid,
- pasid_array_entry_to_domain(entry));
+ if (pdev->dev.iommu->max_pasids > 0) {
+ xa_for_each_start(&group->pasid_array, pasid, entry, 1) {
+ struct iommu_domain *pasid_dom =
+ pasid_array_entry_to_domain(entry);
+
+ iommu_remove_dev_pasid(&pdev->dev, pasid, pasid_dom);
+ }
+ }
- group->resetting_domain = group->blocking_domain;
+ group->recovery_cnt++;
return ret;
}
EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_prepare);
+static int __group_device_cmp_dma_alias(struct pci_dev *dev, u16 alias,
+ void *data)
+{
+ return alias == *(u16 *)data;
+}
+
+static int group_device_cmp_dma_alias(struct pci_dev *dev, u16 alias,
+ void *data)
+{
+ return pci_for_each_dma_alias(data, __group_device_cmp_dma_alias,
+ &alias);
+}
+
+static bool group_device_dma_alias_is_blocked(struct iommu_group *group,
+ struct group_device *gdev)
+{
+ struct group_device *sibling;
+
+ lockdep_assert_held(&group->mutex);
+
+ if (!dev_is_pci(gdev->dev))
+ return false;
+
+ for_each_group_device(group, sibling) {
+ if (sibling == gdev || !sibling->blocked ||
+ !dev_is_pci(sibling->dev))
+ continue;
+ if (pci_for_each_dma_alias(to_pci_dev(gdev->dev),
+ group_device_cmp_dma_alias,
+ to_pci_dev(sibling->dev)))
+ return true;
+ }
+ return false;
+}
+
/**
* pci_dev_reset_iommu_done() - Restore IOMMU after a PCI device reset is done
* @pdev: PCI device that has finished a reset routine
*
* After a PCIe device finishes a reset routine, it wants to restore its IOMMU
- * IOMMU activity, including new translation as well as cache invalidation, by
- * re-attaching all RID/PASID of the device's back to the domains retained in
- * the core-level structure.
+ * activity, including new translation and cache invalidation, by re-attaching
+ * all RID/PASID of the device back to the domains retained in the core-level
+ * structure.
*
* Caller must pair it with a successful pci_dev_reset_iommu_prepare().
*
@@ -4057,6 +4147,7 @@ EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_prepare);
void pci_dev_reset_iommu_done(struct pci_dev *pdev)
{
struct iommu_group *group = pdev->dev.iommu_group;
+ struct group_device *gdev;
unsigned long pasid;
void *entry;
@@ -4065,32 +4156,70 @@ void pci_dev_reset_iommu_done(struct pci_dev *pdev)
guard(mutex)(&group->mutex);
- /* pci_dev_reset_iommu_prepare() was bypassed for the device */
- if (!group->resetting_domain)
+ gdev = __dev_to_gdev(&pdev->dev);
+ if (WARN_ON(!gdev))
+ return;
+
+ /* Unbalanced done() calls would underflow the counter */
+ if (WARN_ON(gdev->reset_depth == 0))
+ return;
+ if (--gdev->reset_depth)
return;
- /* pci_dev_reset_iommu_prepare() was not successfully called */
if (WARN_ON(!group->blocking_domain))
return;
- /* Re-attach RID domain back to group->domain */
- if (group->domain != group->blocking_domain) {
+ if (group_device_dma_alias_is_blocked(group, gdev)) {
+ /*
+ * FIXME: DMA aliased devices share the same RID, which would be
+ * convoluted to handle, as "gdev->blocked" is not sufficient:
+ * - "blocked" state is effectively shared across these devices
+ * - if the core skipped the blocking on the second device, the
+ * IOMMU driver's attachment state would diverge from the HW
+ * state
+ * For now, just warn and see whether real ATS use cases hit it.
+ */
+ pci_warn(pdev,
+ "DMA-aliased sibling may be prematurely unblocked\n");
+ }
+
+ /*
+ * Re-attach RID domain back to group->domain
+ *
+ * Leave the device parked in the blocking_domain if group->domain isn't
+ * initialized yet
+ */
+ if (group->domain && group->domain != group->blocking_domain) {
WARN_ON(__iommu_attach_device(group->domain, &pdev->dev,
group->blocking_domain));
}
/*
+ * Update gdev->blocked upon the domain change, as it is used to return
+ * the correct domain in iommu_driver_get_domain_for_dev() that might be
+ * called in a set_dev_pasid callback function.
+ */
+ gdev->blocked = false;
+
+ /*
* Re-attach PASID domains back to the domains retained in pasid_array.
*
* The pasid_array is mostly fenced by group->mutex, except one reader
* in iommu_attach_handle_get(), so it's safe to read without xa_lock.
*/
- xa_for_each_start(&group->pasid_array, pasid, entry, 1)
- WARN_ON(__iommu_set_group_pasid(
- pasid_array_entry_to_domain(entry), group, pasid,
- group->blocking_domain));
+ if (pdev->dev.iommu->max_pasids > 0) {
+ xa_for_each_start(&group->pasid_array, pasid, entry, 1) {
+ struct iommu_domain *pasid_dom =
+ pasid_array_entry_to_domain(entry);
+
+ WARN_ON(pasid_dom->ops->set_dev_pasid(
+ pasid_dom, &pdev->dev, pasid,
+ group->blocking_domain));
+ }
+ }
- group->resetting_domain = NULL;
+ if (!WARN_ON(group->recovery_cnt == 0))
+ group->recovery_cnt--;
}
EXPORT_SYMBOL_GPL(pci_dev_reset_iommu_done);
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index e67b592e5697..8c86789d867a 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -1782,20 +1782,23 @@ void ena_com_phc_destroy(struct ena_com_dev *ena_dev)
int ena_com_phc_get_timestamp(struct ena_com_dev *ena_dev, u64 *timestamp)
{
- volatile struct ena_admin_phc_resp *resp = ena_dev->phc.virt_addr;
const ktime_t zero_system_time = ktime_set(0, 0);
struct ena_com_phc_info *phc = &ena_dev->phc;
+ volatile struct ena_admin_phc_resp *resp;
ktime_t expire_time;
ktime_t block_time;
unsigned long flags = 0;
int ret = 0;
+ spin_lock_irqsave(&phc->lock, flags);
+
if (!phc->active) {
+ spin_unlock_irqrestore(&phc->lock, flags);
netdev_err(ena_dev->net_device, "PHC feature is not active in the device\n");
return -EOPNOTSUPP;
}
- spin_lock_irqsave(&phc->lock, flags);
+ resp = ena_dev->phc.virt_addr;
/* Check if PHC is in blocked state */
if (unlikely(ktime_compare(phc->system_time, zero_system_time))) {
diff --git a/drivers/net/ethernet/amazon/ena/ena_phc.c b/drivers/net/ethernet/amazon/ena/ena_phc.c
index 7867e893fd15..c2a3ff1ef645 100644
--- a/drivers/net/ethernet/amazon/ena/ena_phc.c
+++ b/drivers/net/ethernet/amazon/ena/ena_phc.c
@@ -46,9 +46,12 @@ static int ena_phc_gettimex64(struct ptp_clock_info *clock_info,
spin_unlock_irqrestore(&phc_info->lock, flags);
+ if (rc)
+ return rc;
+
*ts = ns_to_timespec64(timestamp_nsec);
- return rc;
+ return 0;
}
static int ena_phc_settime64(struct ptp_clock_info *clock_info,
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index b854b6b42d77..2926e1e59941 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -910,7 +910,9 @@ static int xgene_mdiobus_register(struct xgene_enet_pdata *pdata,
return -ENXIO;
}
- return of_mdiobus_register(mdio, mdio_np);
+ ret = of_mdiobus_register(mdio, mdio_np);
+ of_node_put(mdio_np);
+ return ret;
}
/* Mask out all PHYs from auto probing. */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index e9e38af680c3..39e1b606a75a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -371,7 +371,7 @@ static void aq_pci_shutdown(struct pci_dev *pdev)
pci_disable_device(pdev);
if (system_state == SYSTEM_POWER_OFF) {
- pci_wake_from_d3(pdev, false);
+ pci_wake_from_d3(pdev, self->aq_hw->aq_nic_cfg->wol);
pci_set_power_state(pdev, PCI_D3hot);
}
}
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index fa5857923db4..b4bfd6c174e7 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -1271,7 +1271,6 @@ static const struct net_device_ops net_ops = {
static void __init reset_chip(struct net_device *dev)
{
-#if !defined(CONFIG_MACH_MX31ADS)
struct net_local *lp = netdev_priv(dev);
unsigned long reset_start_time;
@@ -1298,7 +1297,6 @@ static void __init reset_chip(struct net_device *dev)
while ((readreg(dev, PP_SelfST) & INIT_DONE) == 0 &&
time_before(jiffies, reset_start_time + 2))
;
-#endif /* !CONFIG_MACH_MX31ADS */
}
/* This is the real probe routine.
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index 065cbbf52686..4c762229ce42 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -122,6 +122,9 @@ struct gemini_ethernet_port {
struct napi_struct napi;
struct hrtimer rx_coalesce_timer;
unsigned int rx_coalesce_nsecs;
+ struct sk_buff *rx_skb;
+ unsigned int rx_frag_nr;
+
unsigned int freeq_refill;
struct gmac_txq txq[TX_QUEUE_NUM];
unsigned int txq_order;
@@ -1442,10 +1445,11 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget)
unsigned short m = (1 << port->rxq_order) - 1;
struct gemini_ethernet *geth = port->geth;
void __iomem *ptr_reg = port->rxq_rwptr;
+ unsigned int frag_nr = port->rx_frag_nr;
+ struct sk_buff *skb = port->rx_skb;
unsigned int frame_len, frag_len;
struct gmac_rxdesc *rx = NULL;
struct gmac_queue_page *gpage;
- static struct sk_buff *skb;
union gmac_rxdesc_0 word0;
union gmac_rxdesc_1 word1;
union gmac_rxdesc_3 word3;
@@ -1455,7 +1459,6 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget)
unsigned short r, w;
union dma_rwptr rw;
dma_addr_t mapping;
- int frag_nr = 0;
spin_lock_irqsave(&geth->irq_lock, flags);
rw.bits32 = readl(ptr_reg);
@@ -1491,10 +1494,11 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget)
gpage = gmac_get_queue_page(geth, port, mapping + PAGE_SIZE);
if (!gpage) {
dev_err(geth->dev, "could not find mapping\n");
+ port->stats.rx_dropped++;
if (skb) {
napi_free_frags(&port->napi);
- port->stats.rx_dropped++;
skb = NULL;
+ frag_nr = 0;
}
continue;
}
@@ -1504,6 +1508,8 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget)
if (skb) {
napi_free_frags(&port->napi);
port->stats.rx_dropped++;
+ skb = NULL;
+ frag_nr = 0;
}
skb = gmac_skb_if_good_frame(port, word0, frame_len);
@@ -1538,6 +1544,7 @@ static unsigned int gmac_rx(struct net_device *netdev, unsigned int budget)
if (word3.bits32 & EOF_BIT) {
napi_gro_frags(&port->napi);
skb = NULL;
+ frag_nr = 0;
--budget;
}
continue;
@@ -1546,6 +1553,7 @@ err_drop:
if (skb) {
napi_free_frags(&port->napi);
skb = NULL;
+ frag_nr = 0;
}
if (mapping)
@@ -1554,6 +1562,8 @@ err_drop:
port->stats.rx_dropped++;
}
+ port->rx_skb = skb;
+ port->rx_frag_nr = frag_nr;
writew(r, ptr_reg);
return budget;
}
@@ -1881,6 +1891,8 @@ static int gmac_stop(struct net_device *netdev)
gmac_disable_tx_rx(netdev);
gmac_stop_dma(port);
napi_disable(&port->napi);
+ port->rx_skb = NULL;
+ port->rx_frag_nr = 0;
gmac_enable_irq(netdev, 0);
gmac_cleanup_rxq(netdev);
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index dcb50c2e1aa2..83e780919ac9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -1318,6 +1318,7 @@ void i40e_ptp_restore_hw_time(struct i40e_pf *pf);
void i40e_ptp_init(struct i40e_pf *pf);
void i40e_ptp_stop(struct i40e_pf *pf);
int i40e_ptp_alloc_pins(struct i40e_pf *pf);
+void i40e_ptp_free_pins(struct i40e_pf *pf);
int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset);
int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi);
int i40e_get_partition_bw_setting(struct i40e_pf *pf);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 028bd500603a..6d4f9218dc68 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -16108,9 +16108,11 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Unwind what we've done if something failed in the setup */
err_vsis:
set_bit(__I40E_DOWN, pf->state);
+ i40e_ptp_stop(pf);
i40e_clear_interrupt_scheme(pf);
kfree(pf->vsi);
err_switch_setup:
+ i40e_ptp_free_pins(pf);
i40e_reset_interrupt_capability(pf);
timer_shutdown_sync(&pf->service_timer);
err_mac_addr:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 404a716db8da..7d07c389bb23 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -940,12 +940,13 @@ int i40e_ptp_hwtstamp_get(struct net_device *netdev,
*
* Release memory allocated for PTP pins.
**/
-static void i40e_ptp_free_pins(struct i40e_pf *pf)
+void i40e_ptp_free_pins(struct i40e_pf *pf)
{
if (i40e_is_ptp_pin_dev(&pf->hw)) {
kfree(pf->ptp_pins);
kfree(pf->ptp_caps.pin_config);
pf->ptp_pins = NULL;
+ pf->ptp_caps.pin_config = NULL;
}
}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 16aa25535152..0bc6dd375687 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -537,14 +537,14 @@ void ice_dcb_rebuild(struct ice_pf *pf)
struct ice_dcbx_cfg *err_cfg;
int ret;
+ mutex_lock(&pf->tc_mutex);
+
ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
if (ret) {
dev_err(dev, "Query Port ETS failed\n");
goto dcb_error;
}
- mutex_lock(&pf->tc_mutex);
-
if (!pf->hw.port_info->qos_cfg.is_sw_lldp)
ice_cfg_etsrec_defaults(pf->hw.port_info);
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c
index 27b460926bac..892bc7c2e28b 100644
--- a/drivers/net/ethernet/intel/ice/ice_dpll.c
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.c
@@ -2523,6 +2523,8 @@ ice_dpll_rclk_state_on_pin_set(const struct dpll_pin *pin, void *pin_priv,
if (hw_idx < 0)
goto unlock;
hw_idx -= pf->dplls.base_rclk_idx;
+ if (hw_idx >= ICE_DPLL_RCLK_NUM_MAX)
+ goto unlock;
if ((enable && p->state[hw_idx] == DPLL_PIN_STATE_CONNECTED) ||
(!enable && p->state[hw_idx] == DPLL_PIN_STATE_DISCONNECTED)) {
@@ -2586,6 +2588,9 @@ ice_dpll_rclk_state_on_pin_get(const struct dpll_pin *pin, void *pin_priv,
hw_idx = ice_dpll_pin_get_parent_idx(p, parent_pin);
if (hw_idx < 0)
goto unlock;
+ hw_idx -= pf->dplls.base_rclk_idx;
+ if (hw_idx >= ICE_DPLL_RCLK_NUM_MAX)
+ goto unlock;
ret = ice_dpll_pin_state_update(pf, p, ICE_DPLL_PIN_TYPE_RCLK_INPUT,
extack);
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.h b/drivers/net/ethernet/intel/ice/ice_dpll.h
index ae42cdea0ee1..8678575359b9 100644
--- a/drivers/net/ethernet/intel/ice/ice_dpll.h
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.h
@@ -8,6 +8,22 @@
#define ICE_DPLL_RCLK_NUM_MAX 4
+#define ICE_CGU_R10 0x28
+#define ICE_CGU_R10_SYNCE_CLKO_SEL GENMASK(8, 5)
+#define ICE_CGU_R10_SYNCE_CLKODIV_M1 GENMASK(13, 9)
+#define ICE_CGU_R10_SYNCE_CLKODIV_LOAD BIT(14)
+#define ICE_CGU_R10_SYNCE_DCK_RST BIT(15)
+#define ICE_CGU_R10_SYNCE_ETHCLKO_SEL GENMASK(18, 16)
+#define ICE_CGU_R10_SYNCE_ETHDIV_M1 GENMASK(23, 19)
+#define ICE_CGU_R10_SYNCE_ETHDIV_LOAD BIT(24)
+#define ICE_CGU_R10_SYNCE_DCK2_RST BIT(25)
+#define ICE_CGU_R10_SYNCE_S_REF_CLK GENMASK(31, 27)
+
+#define ICE_CGU_R11 0x2C
+#define ICE_CGU_R11_SYNCE_S_BYP_CLK GENMASK(6, 1)
+
+#define ICE_CGU_BYPASS_MUX_OFFSET_E825C 3
+
/**
* enum ice_dpll_pin_sw - enumerate ice software pin indices:
* @ICE_DPLL_PIN_SW_1_IDX: index of first SW pin
@@ -157,19 +173,3 @@ static inline void ice_dpll_deinit(struct ice_pf *pf) { }
#endif
#endif
-
-#define ICE_CGU_R10 0x28
-#define ICE_CGU_R10_SYNCE_CLKO_SEL GENMASK(8, 5)
-#define ICE_CGU_R10_SYNCE_CLKODIV_M1 GENMASK(13, 9)
-#define ICE_CGU_R10_SYNCE_CLKODIV_LOAD BIT(14)
-#define ICE_CGU_R10_SYNCE_DCK_RST BIT(15)
-#define ICE_CGU_R10_SYNCE_ETHCLKO_SEL GENMASK(18, 16)
-#define ICE_CGU_R10_SYNCE_ETHDIV_M1 GENMASK(23, 19)
-#define ICE_CGU_R10_SYNCE_ETHDIV_LOAD BIT(24)
-#define ICE_CGU_R10_SYNCE_DCK2_RST BIT(25)
-#define ICE_CGU_R10_SYNCE_S_REF_CLK GENMASK(31, 27)
-
-#define ICE_CGU_R11 0x2C
-#define ICE_CGU_R11_SYNCE_S_BYP_CLK GENMASK(6, 1)
-
-#define ICE_CGU_BYPASS_MUX_OFFSET_E825C 3
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 1d1947a7fe11..c52c465280f7 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -8046,7 +8046,7 @@ int ice_set_rss_hfunc(struct ice_vsi *vsi, u8 hfunc)
ctx->info.q_opt_rss |=
FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hfunc);
ctx->info.q_opt_tc = vsi->info.q_opt_tc;
- ctx->info.q_opt_flags = vsi->info.q_opt_rss;
+ ctx->info.q_opt_flags = vsi->info.q_opt_flags;
err = ice_update_vsi(hw, vsi->idx, ctx, NULL);
if (err) {
diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c
index 7e4f4ac92653..b7d6b08fc89e 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_idc.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c
@@ -90,7 +90,10 @@ static int idpf_plug_vport_aux_dev(struct iidc_rdma_core_dev_info *cdev_info,
return 0;
err_aux_dev_add:
+ ida_free(&idpf_idc_ida, adev->id);
+ vdev_info->adev = NULL;
auxiliary_device_uninit(adev);
+ return ret;
err_aux_dev_init:
ida_free(&idpf_idc_ida, adev->id);
err_ida_alloc:
@@ -228,7 +231,10 @@ static int idpf_plug_core_aux_dev(struct iidc_rdma_core_dev_info *cdev_info)
return 0;
err_aux_dev_add:
+ ida_free(&idpf_idc_ida, adev->id);
+ cdev_info->adev = NULL;
auxiliary_device_uninit(adev);
+ return ret;
err_aux_dev_init:
ida_free(&idpf_idc_ida, adev->id);
err_ida_alloc:
diff --git a/drivers/net/ethernet/intel/idpf/idpf_ptp.c b/drivers/net/ethernet/intel/idpf/idpf_ptp.c
index eec91c4f0a75..4a51d2727547 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_ptp.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_ptp.c
@@ -952,6 +952,8 @@ int idpf_ptp_init(struct idpf_adapter *adapter)
goto free_ptp;
}
+ spin_lock_init(&adapter->ptp->read_dev_clk_lock);
+
err = idpf_ptp_create_clock(adapter);
if (err)
goto free_ptp;
@@ -977,8 +979,6 @@ int idpf_ptp_init(struct idpf_adapter *adapter)
goto remove_clock;
}
- spin_lock_init(&adapter->ptp->read_dev_clk_lock);
-
pci_dbg(adapter->pdev, "PTP init successful\n");
return 0;
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 47752d3fde0b..1179a6e127c5 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -749,11 +749,10 @@ static void lan966x_cleanup_ports(struct lan966x *lan966x)
for (p = 0; p < lan966x->num_phys_ports; p++) {
port = lan966x->ports[p];
- if (!port)
+ if (!port || !port->dev)
continue;
- if (port->dev)
- unregister_netdev(port->dev);
+ unregister_netdev(port->dev);
lan966x_xdp_port_deinit(port);
if (lan966x->fdma && lan966x->fdma_ndev == port->dev)
@@ -873,6 +872,9 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p,
err = register_netdev(dev);
if (err) {
dev_err(lan966x->dev, "register_netdev failed\n");
+ phylink_destroy(phylink);
+ port->phylink = NULL;
+ port->dev = NULL;
return err;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 42c6dcfb1f0f..dd75c47758e1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -5103,6 +5103,13 @@ static int qed_init_wfq_param(struct qed_hwfn *p_hwfn,
return -EINVAL;
}
+ /* All vports are already or become configured, nothing to distribute */
+ if (non_requested_count == 0) {
+ p_hwfn->qm_info.wfq_data[vport_id].min_speed = req_rate;
+ p_hwfn->qm_info.wfq_data[vport_id].configured = true;
+ return 0;
+ }
+
total_left_rate = min_pf_rate - total_req_min_rate;
left_rate_per_vp = total_left_rate / non_requested_count;
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 1dbfadb2a881..5f88733094d0 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1108,9 +1108,12 @@ static int ravb_stop_dma(struct net_device *ndev)
/* Request for transmission suspension */
ravb_modify(ndev, CCC, CCC_DTSR, CCC_DTSR);
- error = ravb_wait(ndev, CSR, CSR_DTS, CSR_DTS);
- if (error)
- netdev_err(ndev, "failed to stop AXI BUS\n");
+ /* Access to URAM will not be suspended if WoL is enabled. */
+ if (!priv->wol_enabled) {
+ error = ravb_wait(ndev, CSR, CSR_DTS, CSR_DTS);
+ if (error)
+ netdev_err(ndev, "failed to stop AXI BUS\n");
+ }
/* Stop AVB-DMAC process */
return ravb_set_opmode(ndev, CCC_OPC_CONFIG);
diff --git a/drivers/net/ethernet/ti/icssm/icssm_prueth.c b/drivers/net/ethernet/ti/icssm/icssm_prueth.c
index 53bbd9290904..b7e94244355a 100644
--- a/drivers/net/ethernet/ti/icssm/icssm_prueth.c
+++ b/drivers/net/ethernet/ti/icssm/icssm_prueth.c
@@ -1825,6 +1825,7 @@ static int icssm_prueth_probe(struct platform_device *pdev)
dev_err(dev, "%pOF error reading port_id %d\n",
eth_node, ret);
of_node_put(eth_node);
+ of_node_put(eth_ports_node);
return ret;
}
diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c
index 064fa484f797..9bfecc87d6b2 100644
--- a/drivers/net/fddi/defza.c
+++ b/drivers/net/fddi/defza.c
@@ -984,7 +984,7 @@ static irqreturn_t fza_interrupt(int irq, void *dev_id)
case FZA_STATE_UNINITIALIZED:
netif_carrier_off(dev);
- timer_delete_sync(&fp->reset_timer);
+ timer_delete_sync_try(&fp->reset_timer);
fp->ring_cmd_index = 0;
fp->ring_uns_index = 0;
fp->ring_rmc_tx_index = 0;
@@ -1018,7 +1018,9 @@ static irqreturn_t fza_interrupt(int irq, void *dev_id)
fp->queue_active = 0;
netif_stop_queue(dev);
pr_debug("%s: queue stopped\n", fp->name);
- timer_delete_sync(&fp->reset_timer);
+
+ spin_lock(&fp->lock);
+ timer_delete(&fp->reset_timer);
pr_warn("%s: halted, reason: %x\n", fp->name,
FZA_STATUS_GET_HALT(status));
fza_regs_dump(fp);
@@ -1027,6 +1029,8 @@ static irqreturn_t fza_interrupt(int irq, void *dev_id)
fp->timer_state = 0;
fp->reset_timer.expires = jiffies + 45 * HZ;
add_timer(&fp->reset_timer);
+ spin_unlock(&fp->lock);
+
break;
default:
@@ -1046,7 +1050,9 @@ static irqreturn_t fza_interrupt(int irq, void *dev_id)
static void fza_reset_timer(struct timer_list *t)
{
struct fza_private *fp = timer_container_of(fp, t, reset_timer);
+ unsigned long flags;
+ spin_lock_irqsave(&fp->lock, flags);
if (!fp->timer_state) {
pr_err("%s: RESET timed out!\n", fp->name);
pr_info("%s: trying harder...\n", fp->name);
@@ -1069,6 +1075,7 @@ static void fza_reset_timer(struct timer_list *t)
fp->reset_timer.expires = jiffies + 45 * HZ;
}
add_timer(&fp->reset_timer);
+ spin_unlock_irqrestore(&fp->lock, flags);
}
static int fza_set_mac_address(struct net_device *dev, void *addr)
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 6147ee8b1d78..f904f4d16b45 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -26,6 +26,8 @@
#include <uapi/linux/if_macsec.h>
+static struct workqueue_struct *macsec_wq;
+
/* SecTAG length = macsec_eth_header without the optional SCI */
#define MACSEC_TAG_LEN 6
@@ -174,9 +176,10 @@ static void macsec_rxsc_put(struct macsec_rx_sc *sc)
call_rcu(&sc->rcu_head, free_rx_sc_rcu);
}
-static void free_rxsa(struct rcu_head *head)
+static void free_rxsa_work(struct work_struct *work)
{
- struct macsec_rx_sa *sa = container_of(head, struct macsec_rx_sa, rcu);
+ struct macsec_rx_sa *sa =
+ container_of(to_rcu_work(work), struct macsec_rx_sa, destroy_work);
crypto_free_aead(sa->key.tfm);
free_percpu(sa->stats);
@@ -186,7 +189,7 @@ static void free_rxsa(struct rcu_head *head)
static void macsec_rxsa_put(struct macsec_rx_sa *sa)
{
if (refcount_dec_and_test(&sa->refcnt))
- call_rcu(&sa->rcu, free_rxsa);
+ queue_rcu_work(macsec_wq, &sa->destroy_work);
}
static struct macsec_tx_sa *macsec_txsa_get(struct macsec_tx_sa __rcu *ptr)
@@ -202,9 +205,10 @@ static struct macsec_tx_sa *macsec_txsa_get(struct macsec_tx_sa __rcu *ptr)
return sa;
}
-static void free_txsa(struct rcu_head *head)
+static void free_txsa_work(struct work_struct *work)
{
- struct macsec_tx_sa *sa = container_of(head, struct macsec_tx_sa, rcu);
+ struct macsec_tx_sa *sa =
+ container_of(to_rcu_work(work), struct macsec_tx_sa, destroy_work);
crypto_free_aead(sa->key.tfm);
free_percpu(sa->stats);
@@ -214,7 +218,7 @@ static void free_txsa(struct rcu_head *head)
static void macsec_txsa_put(struct macsec_tx_sa *sa)
{
if (refcount_dec_and_test(&sa->refcnt))
- call_rcu(&sa->rcu, free_txsa);
+ queue_rcu_work(macsec_wq, &sa->destroy_work);
}
static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb)
@@ -1407,6 +1411,7 @@ static int init_rx_sa(struct macsec_rx_sa *rx_sa, char *sak, int key_len,
rx_sa->next_pn = 1;
refcount_set(&rx_sa->refcnt, 1);
spin_lock_init(&rx_sa->lock);
+ INIT_RCU_WORK(&rx_sa->destroy_work, free_rxsa_work);
return 0;
}
@@ -1506,6 +1511,7 @@ static int init_tx_sa(struct macsec_tx_sa *tx_sa, char *sak, int key_len,
tx_sa->active = false;
refcount_set(&tx_sa->refcnt, 1);
spin_lock_init(&tx_sa->lock);
+ INIT_RCU_WORK(&tx_sa->destroy_work, free_txsa_work);
return 0;
}
@@ -4505,25 +4511,35 @@ static int __init macsec_init(void)
{
int err;
+ macsec_wq = alloc_workqueue("macsec", WQ_UNBOUND, 0);
+ if (!macsec_wq)
+ return -ENOMEM;
+
pr_info("MACsec IEEE 802.1AE\n");
err = register_netdevice_notifier(&macsec_notifier);
if (err)
- return err;
+ goto err_destroy_wq;
err = rtnl_link_register(&macsec_link_ops);
if (err)
- goto notifier;
+ goto err_notifier;
err = genl_register_family(&macsec_fam);
if (err)
- goto rtnl;
+ goto err_rtnl;
return 0;
-rtnl:
+err_rtnl:
rtnl_link_unregister(&macsec_link_ops);
-notifier:
+err_notifier:
unregister_netdevice_notifier(&macsec_notifier);
+err_destroy_wq:
+ /* Precautionary, mirrors macsec_exit() to stay safe if work
+ * ever becomes queueable before this point in the future.
+ */
+ rcu_barrier();
+ destroy_workqueue(macsec_wq);
return err;
}
@@ -4533,6 +4549,7 @@ static void __exit macsec_exit(void)
rtnl_link_unregister(&macsec_link_ops);
unregister_netdevice_notifier(&macsec_notifier);
rcu_barrier();
+ destroy_workqueue(macsec_wq);
}
module_init(macsec_init);
diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c
index d0361aaf25ef..3f7d31033bae 100644
--- a/drivers/net/net_failover.c
+++ b/drivers/net/net_failover.c
@@ -502,7 +502,7 @@ static int net_failover_slave_register(struct net_device *slave_dev,
/* Align MTU of slave with failover dev */
orig_mtu = slave_dev->mtu;
- err = dev_set_mtu(slave_dev, failover_dev->mtu);
+ err = netif_set_mtu(slave_dev, failover_dev->mtu);
if (err) {
netdev_err(failover_dev, "unable to change mtu of %s to %u register failed\n",
slave_dev->name, failover_dev->mtu);
@@ -512,11 +512,11 @@ static int net_failover_slave_register(struct net_device *slave_dev,
dev_hold(slave_dev);
if (netif_running(failover_dev)) {
- err = dev_open(slave_dev, NULL);
+ err = netif_open(slave_dev, NULL);
if (err && (err != -EBUSY)) {
netdev_err(failover_dev, "Opening slave %s failed err:%d\n",
slave_dev->name, err);
- goto err_dev_open;
+ goto err_netif_open;
}
}
@@ -562,10 +562,10 @@ static int net_failover_slave_register(struct net_device *slave_dev,
err_vlan_add:
dev_uc_unsync(slave_dev, failover_dev);
dev_mc_unsync(slave_dev, failover_dev);
- dev_close(slave_dev);
-err_dev_open:
+ netif_close(slave_dev);
+err_netif_open:
dev_put(slave_dev);
- dev_set_mtu(slave_dev, orig_mtu);
+ netif_set_mtu(slave_dev, orig_mtu);
done:
return err;
}
diff --git a/drivers/net/phy/dp83tc811.c b/drivers/net/phy/dp83tc811.c
index e480c2a07450..252fb12b3e68 100644
--- a/drivers/net/phy/dp83tc811.c
+++ b/drivers/net/phy/dp83tc811.c
@@ -393,6 +393,7 @@ static struct phy_driver dp83811_driver[] = {
.config_init = dp83811_config_init,
.config_aneg = dp83811_config_aneg,
.soft_reset = dp83811_phy_reset,
+ .get_features = genphy_c45_pma_read_ext_abilities,
.get_wol = dp83811_get_wol,
.set_wol = dp83811_set_wol,
.config_intr = dp83811_config_intr,
diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index 15bfb78381d4..809f21fb93f5 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -740,6 +740,8 @@ static int uhdlc_open(struct net_device *dev)
static void uhdlc_memclean(struct ucc_hdlc_private *priv)
{
+ int i;
+
qe_muram_free(ioread16be(&priv->ucc_pram->riptr));
qe_muram_free(ioread16be(&priv->ucc_pram->tiptr));
@@ -770,6 +772,11 @@ static void uhdlc_memclean(struct ucc_hdlc_private *priv)
kfree(priv->rx_skbuff);
priv->rx_skbuff = NULL;
+ for (i = 0; i < TX_BD_RING_LEN; i++) {
+ dev_kfree_skb(priv->tx_skbuff[i]);
+ priv->tx_skbuff[i] = NULL;
+ }
+
kfree(priv->tx_skbuff);
priv->tx_skbuff = NULL;
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index 423c9c628e7b..c692fc73babf 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -1009,6 +1009,7 @@ static void apple_nvme_init_queue(struct apple_nvme_queue *q)
unsigned int depth = apple_nvme_queue_depth(q);
struct apple_nvme *anv = queue_to_apple_nvme(q);
+ q->sq_tail = 0;
q->cq_head = 0;
q->cq_phase = 1;
if (anv->hw->has_lsq_nvmmu)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index dc388e24caad..c3032d6ad6b1 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3749,6 +3749,10 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended)
ret = nvme_hwmon_init(ctrl);
if (ret == -EINTR)
return ret;
+
+ if (!nvme_ctrl_sgl_supported(ctrl))
+ dev_info(ctrl->device,
+ "passthrough uses implicit buffer lengths\n");
}
clear_bit(NVME_CTRL_DIRTY_CAPABILITY, &ctrl->flags);
@@ -5041,8 +5045,8 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
nvme_mpath_update(ctrl);
}
- nvme_change_uevent(ctrl, "NVME_EVENT=connected");
set_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags);
+ nvme_change_uevent(ctrl, "NVME_EVENT=connected");
}
EXPORT_SYMBOL_GPL(nvme_start_ctrl);
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 9597a87cf05d..08889b20e5d8 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -120,21 +120,11 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
struct nvme_ns *ns = q->queuedata;
struct block_device *bdev = ns ? ns->disk->part0 : NULL;
bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk);
- struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
bool has_metadata = meta_buffer && meta_len;
- struct bio *bio = NULL;
int ret;
- if (!nvme_ctrl_sgl_supported(ctrl))
- dev_warn_once(ctrl->device, "using unchecked data buffer\n");
- if (has_metadata) {
- if (!supports_metadata)
- return -EINVAL;
-
- if (!nvme_ctrl_meta_sgl_supported(ctrl))
- dev_warn_once(ctrl->device,
- "using unchecked metadata buffer\n");
- }
+ if (has_metadata && !supports_metadata)
+ return -EINVAL;
if (iter)
ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL);
@@ -154,8 +144,8 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
return ret;
out_unmap:
- if (bio)
- blk_rq_unmap_user(bio);
+ if (req->bio)
+ blk_rq_unmap_user(req->bio);
return ret;
}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 9fd04cd7c5cb..139a10cd687f 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2533,11 +2533,13 @@ static void nvme_free_host_mem_multi(struct nvme_dev *dev)
static void nvme_free_host_mem(struct nvme_dev *dev)
{
- if (dev->hmb_sgt)
+ if (dev->hmb_sgt) {
dma_free_noncontiguous(dev->dev, dev->host_mem_size,
dev->hmb_sgt, DMA_BIDIRECTIONAL);
- else
+ dev->hmb_sgt = NULL;
+ } else {
nvme_free_host_mem_multi(dev);
+ }
dma_free_coherent(dev->dev, dev->host_mem_descs_size,
dev->host_mem_descs, dev->host_mem_descs_dma);
@@ -4107,8 +4109,6 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x1c5f, 0x0555), /* Memblaze Pblaze5 adapter */
.driver_data = NVME_QUIRK_NO_NS_DESC_LIST, },
- { PCI_DEVICE(0x144d, 0xa808), /* Samsung PM981/983 */
- .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index 4904097dfd49..69bde270115e 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -117,6 +117,15 @@ config NVME_TARGET_AUTH
If unsure, say N.
+config NVME_TARGET_AUTH_DEBUG
+ bool "NVMe over Fabrics In-band Authentication debug messages"
+ depends on NVME_TARGET_AUTH
+ help
+ This enables additional debug messages including the generated
+ DH-HMAC-CHAP secrets to help debugging authentication failures.
+
+ If unsure, say N.
+
config NVME_TARGET_PCI_EPF
tristate "NVMe PCI Endpoint Function target support"
depends on NVME_TARGET && PCI_ENDPOINT
diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c
index 9a2eccdc8b13..edb9627d97b0 100644
--- a/drivers/nvme/target/auth.c
+++ b/drivers/nvme/target/auth.c
@@ -144,7 +144,6 @@ u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, bool reset)
goto out_unlock;
list_for_each_entry(p, &ctrl->subsys->hosts, entry) {
- pr_debug("check %s\n", nvmet_host_name(p->host));
if (strcmp(nvmet_host_name(p->host), ctrl->hostnqn))
continue;
host = p->host;
@@ -189,11 +188,12 @@ u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, bool reset)
ctrl->host_key = NULL;
goto out_free_hash;
}
+#ifdef CONFIG_NVME_TARGET_AUTH_DEBUG
pr_debug("%s: using hash %s key %*ph\n", __func__,
ctrl->host_key->hash > 0 ?
nvme_auth_hmac_name(ctrl->host_key->hash) : "none",
(int)ctrl->host_key->len, ctrl->host_key->key);
-
+#endif
nvme_auth_free_key(ctrl->ctrl_key);
if (!host->dhchap_ctrl_secret) {
ctrl->ctrl_key = NULL;
@@ -207,11 +207,12 @@ u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, bool reset)
ctrl->ctrl_key = NULL;
goto out_free_hash;
}
+#ifdef CONFIG_NVME_TARGET_AUTH_DEBUG
pr_debug("%s: using ctrl hash %s key %*ph\n", __func__,
ctrl->ctrl_key->hash > 0 ?
nvme_auth_hmac_name(ctrl->ctrl_key->hash) : "none",
(int)ctrl->ctrl_key->len, ctrl->ctrl_key->key);
-
+#endif
out_free_hash:
if (ret) {
if (ctrl->host_key) {
@@ -317,7 +318,6 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response,
if (ret)
goto out_free_challenge;
}
-
pr_debug("ctrl %d qid %d host response seq %u transaction %d\n",
ctrl->cntlid, req->sq->qid, req->sq->dhchap_s1,
req->sq->dhchap_tid);
@@ -434,8 +434,10 @@ int nvmet_auth_ctrl_exponential(struct nvmet_req *req,
ret = -EINVAL;
} else {
memcpy(buf, ctrl->dh_key, buf_size);
+#ifdef CONFIG_NVME_TARGET_AUTH_DEBUG
pr_debug("%s: ctrl %d public key %*ph\n", __func__,
ctrl->cntlid, (int)buf_size, buf);
+#endif
}
return ret;
@@ -458,11 +460,12 @@ int nvmet_auth_ctrl_sesskey(struct nvmet_req *req,
ctrl->shash_id);
if (ret)
pr_debug("failed to compute session key, err %d\n", ret);
+#ifdef CONFIG_NVME_TARGET_AUTH_DEBUG
else
pr_debug("%s: session key %*ph\n", __func__,
(int)req->sq->dhchap_skey_len,
req->sq->dhchap_skey);
-
+#endif
return ret;
}
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 164a564ba3b4..20f150d17a96 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1321,8 +1321,10 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue)
queue->idx, cmd->req.cmd->common.command_id,
queue->pdu.cmd.hdr.type, le32_to_cpu(cmd->recv_ddgst),
le32_to_cpu(cmd->exp_ddgst));
- if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED))
+ if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED)) {
+ cmd->req.cqe->status = NVME_SC_CMD_SEQ_ERROR;
nvmet_req_uninit(&cmd->req);
+ }
nvmet_tcp_free_cmd_buffers(cmd);
ret = -EPROTO;
goto out;
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index b4677c5bba5b..8005c088e9ee 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -546,6 +546,15 @@ static const struct dmi_system_id asus_quirks[] = {
},
{
.callback = dmi_matched,
+ .ident = "ASUS Zenbook Duo UX8407AA",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUS"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Zenbook Duo UX8407AA"),
+ },
+ .driver_data = &quirk_asus_zenbook_duo_kbd,
+ },
+ {
+ .callback = dmi_matched,
.ident = "ASUS ROG Z13",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUS"),
diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c
index 24c151289dd3..6950bec2a9d8 100644
--- a/drivers/platform/x86/hp/hp-wmi.c
+++ b/drivers/platform/x86/hp/hp-wmi.c
@@ -206,6 +206,10 @@ static const struct dmi_system_id victus_s_thermal_profile_boards[] __initconst
.driver_data = (void *)&victus_s_thermal_params,
},
{
+ .matches = { DMI_MATCH(DMI_BOARD_NAME, "8BC2") },
+ .driver_data = (void *)&omen_v1_thermal_params,
+ },
+ {
.matches = { DMI_MATCH(DMI_BOARD_NAME, "8BCA") },
.driver_data = (void *)&omen_v1_thermal_params,
},
diff --git a/drivers/platform/x86/intel/plr_tpmi.c b/drivers/platform/x86/intel/plr_tpmi.c
index 05727169f49c..8faecc311038 100644
--- a/drivers/platform/x86/intel/plr_tpmi.c
+++ b/drivers/platform/x86/intel/plr_tpmi.c
@@ -22,6 +22,7 @@
#include <linux/module.h>
#include <linux/mod_devicetable.h>
#include <linux/mutex.h>
+#include <linux/notifier.h>
#include <linux/seq_file.h>
#include <linux/sprintf.h>
#include <linux/types.h>
@@ -60,6 +61,8 @@ struct tpmi_plr {
struct tpmi_plr_die *die_info;
int num_dies;
struct auxiliary_device *auxdev;
+ struct notifier_block nb;
+ struct mutex lock; /* Protect access to dbgfs_dir */
};
static const char * const plr_coarse_reasons[] = {
@@ -255,6 +258,30 @@ static ssize_t plr_status_write(struct file *filp, const char __user *ubuf,
}
DEFINE_SHOW_STORE_ATTRIBUTE(plr_status);
+static int intel_plr_notify(struct notifier_block *self, unsigned long action, void *data)
+{
+ struct tpmi_plr *plr = container_of(self, struct tpmi_plr, nb);
+
+ if (action == TPMI_CORE_EXIT) {
+ guard(mutex)(&plr->lock);
+ plr->dbgfs_dir = NULL;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int intel_plr_register_notifier(struct notifier_block *nb)
+{
+ nb->notifier_call = intel_plr_notify;
+ nb->priority = 0;
+ return tpmi_register_notifier(nb);
+}
+
+static void intel_plr_unregister_notifier(struct notifier_block *nb)
+{
+ tpmi_unregister_notifier(nb);
+}
+
static int intel_plr_probe(struct auxiliary_device *auxdev, const struct auxiliary_device_id *id)
{
struct oobmsm_plat_info *plat_info;
@@ -282,10 +309,18 @@ static int intel_plr_probe(struct auxiliary_device *auxdev, const struct auxilia
if (!plr)
return -ENOMEM;
+ err = devm_mutex_init(&auxdev->dev, &plr->lock);
+ if (err)
+ return err;
+
+ intel_plr_register_notifier(&plr->nb);
+
plr->die_info = devm_kcalloc(&auxdev->dev, num_resources, sizeof(*plr->die_info),
GFP_KERNEL);
- if (!plr->die_info)
- return -ENOMEM;
+ if (!plr->die_info) {
+ err = -ENOMEM;
+ goto err_notify;
+ }
plr->num_dies = num_resources;
plr->dbgfs_dir = debugfs_create_dir("plr", dentry);
@@ -326,6 +361,9 @@ static int intel_plr_probe(struct auxiliary_device *auxdev, const struct auxilia
err:
debugfs_remove_recursive(plr->dbgfs_dir);
+err_notify:
+ intel_plr_unregister_notifier(&plr->nb);
+
return err;
}
@@ -333,6 +371,9 @@ static void intel_plr_remove(struct auxiliary_device *auxdev)
{
struct tpmi_plr *plr = auxiliary_get_drvdata(auxdev);
+ intel_plr_unregister_notifier(&plr->nb);
+
+ guard(mutex)(&plr->lock);
debugfs_remove_recursive(plr->dbgfs_dir);
}
diff --git a/drivers/platform/x86/intel/vsec_tpmi.c b/drivers/platform/x86/intel/vsec_tpmi.c
index 7fc6ff8d1040..16fd7aa41f20 100644
--- a/drivers/platform/x86/intel/vsec_tpmi.c
+++ b/drivers/platform/x86/intel/vsec_tpmi.c
@@ -56,6 +56,7 @@
#include <linux/io.h>
#include <linux/iopoll.h>
#include <linux/module.h>
+#include <linux/notifier.h>
#include <linux/pci.h>
#include <linux/security.h>
#include <linux/sizes.h>
@@ -188,6 +189,20 @@ struct tpmi_feature_state {
/* Used during auxbus device creation */
static DEFINE_IDA(intel_vsec_tpmi_ida);
+static BLOCKING_NOTIFIER_HEAD(tpmi_notify_list);
+
+int tpmi_register_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&tpmi_notify_list, nb);
+}
+EXPORT_SYMBOL_NS_GPL(tpmi_register_notifier, "INTEL_TPMI");
+
+int tpmi_unregister_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&tpmi_notify_list, nb);
+}
+EXPORT_SYMBOL_NS_GPL(tpmi_unregister_notifier, "INTEL_TPMI");
+
struct oobmsm_plat_info *tpmi_get_platform_data(struct auxiliary_device *auxdev)
{
struct intel_vsec_device *vsec_dev = auxdev_to_ivdev(auxdev);
@@ -817,10 +832,6 @@ static int intel_vsec_tpmi_init(struct auxiliary_device *auxdev)
auxiliary_set_drvdata(auxdev, tpmi_info);
- ret = tpmi_create_devices(tpmi_info);
- if (ret)
- return ret;
-
/*
* Allow debugfs when security policy allows. Everything this debugfs
* interface provides, can also be done via /dev/mem access. If
@@ -830,6 +841,14 @@ static int intel_vsec_tpmi_init(struct auxiliary_device *auxdev)
if (!security_locked_down(LOCKDOWN_DEV_MEM) && capable(CAP_SYS_RAWIO))
tpmi_dbgfs_register(tpmi_info);
+ ret = tpmi_create_devices(tpmi_info);
+ if (ret) {
+ debugfs_remove_recursive(tpmi_info->dbgfs_dir);
+ return ret;
+ }
+
+ blocking_notifier_call_chain(&tpmi_notify_list, TPMI_CORE_INIT, auxdev);
+
return 0;
}
@@ -843,6 +862,8 @@ static void tpmi_remove(struct auxiliary_device *auxdev)
{
struct intel_tpmi_info *tpmi_info = auxiliary_get_drvdata(auxdev);
+ blocking_notifier_call_chain(&tpmi_notify_list, TPMI_CORE_EXIT, auxdev);
+
debugfs_remove_recursive(tpmi_info->dbgfs_dir);
}
diff --git a/drivers/platform/x86/lenovo/Kconfig b/drivers/platform/x86/lenovo/Kconfig
index f885127b007f..09b1b055d2e0 100644
--- a/drivers/platform/x86/lenovo/Kconfig
+++ b/drivers/platform/x86/lenovo/Kconfig
@@ -252,7 +252,6 @@ config LENOVO_WMI_GAMEZONE
select ACPI_PLATFORM_PROFILE
select LENOVO_WMI_EVENTS
select LENOVO_WMI_HELPERS
- select LENOVO_WMI_TUNING
help
Say Y here if you have a WMI aware Lenovo Legion device and would like to use the
platform-profile firmware interface to manage power usage.
diff --git a/drivers/platform/x86/lenovo/wmi-capdata.c b/drivers/platform/x86/lenovo/wmi-capdata.c
index b73d378f0e8b..714aa6fd6f1f 100644
--- a/drivers/platform/x86/lenovo/wmi-capdata.c
+++ b/drivers/platform/x86/lenovo/wmi-capdata.c
@@ -27,7 +27,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/acpi.h>
-#include <linux/bitfield.h>
#include <linux/bug.h>
#include <linux/cleanup.h>
#include <linux/component.h>
@@ -48,6 +47,7 @@
#include <linux/wmi.h>
#include "wmi-capdata.h"
+#include "wmi-helpers.h"
#define LENOVO_CAPABILITY_DATA_00_GUID "362A3AFE-3D96-4665-8530-96DAD5BB300E"
#define LENOVO_CAPABILITY_DATA_01_GUID "7A8F5407-CB67-4D6E-B547-39B3BE018154"
@@ -57,9 +57,9 @@
#define LWMI_FEATURE_ID_FAN_TEST 0x05
-#define LWMI_ATTR_ID_FAN_TEST \
- (FIELD_PREP(LWMI_ATTR_DEV_ID_MASK, LWMI_DEVICE_ID_FAN) | \
- FIELD_PREP(LWMI_ATTR_FEAT_ID_MASK, LWMI_FEATURE_ID_FAN_TEST))
+#define LWMI_ATTR_ID_FAN_TEST \
+ lwmi_attr_id(LWMI_DEVICE_ID_FAN, LWMI_FEATURE_ID_FAN_TEST, \
+ LWMI_GZ_THERMAL_MODE_NONE, LWMI_TYPE_ID_NONE)
enum lwmi_cd_type {
LENOVO_CAPABILITY_DATA_00,
diff --git a/drivers/platform/x86/lenovo/wmi-capdata.h b/drivers/platform/x86/lenovo/wmi-capdata.h
index 8c1df3efcc55..c3e760b8c3c3 100644
--- a/drivers/platform/x86/lenovo/wmi-capdata.h
+++ b/drivers/platform/x86/lenovo/wmi-capdata.h
@@ -6,6 +6,7 @@
#define _LENOVO_WMI_CAPDATA_H_
#include <linux/bits.h>
+#include <linux/bitfield.h>
#include <linux/types.h>
#define LWMI_SUPP_VALID BIT(0)
@@ -19,6 +20,8 @@
#define LWMI_DEVICE_ID_FAN 0x04
+#define LWMI_TYPE_ID_NONE 0x00
+
struct component_match;
struct device;
struct cd_list;
@@ -57,6 +60,23 @@ struct lwmi_cd_binder {
cd_list_cb_t cd_fan_list_cb;
};
+/**
+ * lwmi_attr_id() - Formats a capability data attribute ID
+ * @dev_id: The u8 corresponding to the device ID.
+ * @feat_id: The u8 corresponding to the feature ID on the device.
+ * @mode_id: The u8 corresponding to the wmi-gamezone mode for set/get.
+ * @type_id: The u8 corresponding to the sub-device.
+ *
+ * Return: encoded capability data attribute ID.
+ */
+static inline u32 lwmi_attr_id(u8 dev_id, u8 feat_id, u8 mode_id, u8 type_id)
+{
+ return (FIELD_PREP(LWMI_ATTR_DEV_ID_MASK, dev_id) |
+ FIELD_PREP(LWMI_ATTR_FEAT_ID_MASK, feat_id) |
+ FIELD_PREP(LWMI_ATTR_MODE_ID_MASK, mode_id) |
+ FIELD_PREP(LWMI_ATTR_TYPE_ID_MASK, type_id));
+}
+
void lwmi_cd_match_add_all(struct device *master, struct component_match **matchptr);
int lwmi_cd00_get_data(struct cd_list *list, u32 attribute_id, struct capdata00 *output);
int lwmi_cd01_get_data(struct cd_list *list, u32 attribute_id, struct capdata01 *output);
diff --git a/drivers/platform/x86/lenovo/wmi-events.c b/drivers/platform/x86/lenovo/wmi-events.c
index 4a6a2c82413a..fc25bba68a7c 100644
--- a/drivers/platform/x86/lenovo/wmi-events.c
+++ b/drivers/platform/x86/lenovo/wmi-events.c
@@ -17,7 +17,7 @@
#include <linux/wmi.h>
#include "wmi-events.h"
-#include "wmi-gamezone.h"
+#include "wmi-helpers.h"
#define THERMAL_MODE_EVENT_GUID "D320289E-8FEA-41E0-86F9-911D83151B5F"
diff --git a/drivers/platform/x86/lenovo/wmi-gamezone.c b/drivers/platform/x86/lenovo/wmi-gamezone.c
index c7fe7e3c9f17..109c0b564a9f 100644
--- a/drivers/platform/x86/lenovo/wmi-gamezone.c
+++ b/drivers/platform/x86/lenovo/wmi-gamezone.c
@@ -21,9 +21,7 @@
#include <linux/wmi.h>
#include "wmi-events.h"
-#include "wmi-gamezone.h"
#include "wmi-helpers.h"
-#include "wmi-other.h"
#define LENOVO_GAMEZONE_GUID "887B54E3-DDDC-4B2C-8B88-68A26A8835D0"
@@ -201,7 +199,7 @@ static int lwmi_gz_profile_set(struct device *dev,
enum platform_profile_option profile)
{
struct lwmi_gz_priv *priv = dev_get_drvdata(dev);
- struct wmi_method_args_32 args;
+ struct wmi_method_args_32 args = {};
enum thermal_mode mode;
int ret;
@@ -383,7 +381,7 @@ static int lwmi_gz_probe(struct wmi_device *wdev, const void *context)
return ret;
priv->mode_nb.notifier_call = lwmi_gz_mode_call;
- return devm_lwmi_om_register_notifier(&wdev->dev, &priv->mode_nb);
+ return devm_lwmi_tm_register_notifier(&wdev->dev, &priv->mode_nb);
}
static const struct wmi_device_id lwmi_gz_id_table[] = {
@@ -405,7 +403,6 @@ module_wmi_driver(lwmi_gz_driver);
MODULE_IMPORT_NS("LENOVO_WMI_EVENTS");
MODULE_IMPORT_NS("LENOVO_WMI_HELPERS");
-MODULE_IMPORT_NS("LENOVO_WMI_OTHER");
MODULE_DEVICE_TABLE(wmi, lwmi_gz_id_table);
MODULE_AUTHOR("Derek J. Clark <derekjohn.clark@gmail.com>");
MODULE_DESCRIPTION("Lenovo GameZone WMI Driver");
diff --git a/drivers/platform/x86/lenovo/wmi-gamezone.h b/drivers/platform/x86/lenovo/wmi-gamezone.h
deleted file mode 100644
index 6b163a5eeb95..000000000000
--- a/drivers/platform/x86/lenovo/wmi-gamezone.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-
-/* Copyright (C) 2025 Derek J. Clark <derekjohn.clark@gmail.com> */
-
-#ifndef _LENOVO_WMI_GAMEZONE_H_
-#define _LENOVO_WMI_GAMEZONE_H_
-
-enum gamezone_events_type {
- LWMI_GZ_GET_THERMAL_MODE = 1,
-};
-
-enum thermal_mode {
- LWMI_GZ_THERMAL_MODE_QUIET = 0x01,
- LWMI_GZ_THERMAL_MODE_BALANCED = 0x02,
- LWMI_GZ_THERMAL_MODE_PERFORMANCE = 0x03,
- LWMI_GZ_THERMAL_MODE_EXTREME = 0xE0, /* Ver 6+ */
- LWMI_GZ_THERMAL_MODE_CUSTOM = 0xFF,
-};
-
-#endif /* !_LENOVO_WMI_GAMEZONE_H_ */
diff --git a/drivers/platform/x86/lenovo/wmi-helpers.c b/drivers/platform/x86/lenovo/wmi-helpers.c
index 7379defac500..7a198259e393 100644
--- a/drivers/platform/x86/lenovo/wmi-helpers.c
+++ b/drivers/platform/x86/lenovo/wmi-helpers.c
@@ -21,11 +21,15 @@
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/module.h>
+#include <linux/notifier.h>
#include <linux/unaligned.h>
#include <linux/wmi.h>
#include "wmi-helpers.h"
+/* Thermal mode notifier chain. */
+static BLOCKING_NOTIFIER_HEAD(tm_chain_head);
+
/**
* lwmi_dev_evaluate_int() - Helper function for calling WMI methods that
* return an integer.
@@ -46,7 +50,6 @@ int lwmi_dev_evaluate_int(struct wmi_device *wdev, u8 instance, u32 method_id,
unsigned char *buf, size_t size, u32 *retval)
{
struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
- union acpi_object *ret_obj __free(kfree) = NULL;
struct acpi_buffer input = { size, buf };
acpi_status status;
@@ -55,8 +58,9 @@ int lwmi_dev_evaluate_int(struct wmi_device *wdev, u8 instance, u32 method_id,
if (ACPI_FAILURE(status))
return -EIO;
+ union acpi_object *ret_obj __free(kfree) = output.pointer;
+
if (retval) {
- ret_obj = output.pointer;
if (!ret_obj)
return -ENODATA;
@@ -84,6 +88,103 @@ int lwmi_dev_evaluate_int(struct wmi_device *wdev, u8 instance, u32 method_id,
};
EXPORT_SYMBOL_NS_GPL(lwmi_dev_evaluate_int, "LENOVO_WMI_HELPERS");
+/**
+ * lwmi_tm_register_notifier() - Add a notifier to the blocking notifier chain
+ * @nb: The notifier_block struct to register
+ *
+ * Call blocking_notifier_chain_register to register the notifier block to the
+ * thermal mode notifier chain.
+ *
+ * Return: 0 on success, %-EEXIST on error.
+ */
+int lwmi_tm_register_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&tm_chain_head, nb);
+}
+EXPORT_SYMBOL_NS_GPL(lwmi_tm_register_notifier, "LENOVO_WMI_HELPERS");
+
+/**
+ * lwmi_tm_unregister_notifier() - Remove a notifier from the blocking notifier
+ * chain.
+ * @nb: The notifier_block struct to register
+ *
+ * Call blocking_notifier_chain_unregister to unregister the notifier block from the
+ * thermal mode notifier chain.
+ *
+ * Return: 0 on success, %-ENOENT on error.
+ */
+int lwmi_tm_unregister_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&tm_chain_head, nb);
+}
+EXPORT_SYMBOL_NS_GPL(lwmi_tm_unregister_notifier, "LENOVO_WMI_HELPERS");
+
+/**
+ * devm_lwmi_tm_unregister_notifier() - Remove a notifier from the blocking
+ * notifier chain.
+ * @data: Void pointer to the notifier_block struct to register.
+ *
+ * Call lwmi_tm_unregister_notifier to unregister the notifier block from the
+ * thermal mode notifier chain.
+ *
+ * Return: 0 on success, %-ENOENT on error.
+ */
+static void devm_lwmi_tm_unregister_notifier(void *data)
+{
+ struct notifier_block *nb = data;
+
+ lwmi_tm_unregister_notifier(nb);
+}
+
+/**
+ * devm_lwmi_tm_register_notifier() - Add a notifier to the blocking notifier
+ * chain.
+ * @dev: The parent device of the notifier_block struct.
+ * @nb: The notifier_block struct to register
+ *
+ * Call lwmi_tm_register_notifier to register the notifier block to the
+ * thermal mode notifier chain. Then add devm_lwmi_tm_unregister_notifier
+ * as a device managed action to automatically unregister the notifier block
+ * upon parent device removal.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int devm_lwmi_tm_register_notifier(struct device *dev,
+ struct notifier_block *nb)
+{
+ int ret;
+
+ ret = lwmi_tm_register_notifier(nb);
+ if (ret < 0)
+ return ret;
+
+ return devm_add_action_or_reset(dev, devm_lwmi_tm_unregister_notifier,
+ nb);
+}
+EXPORT_SYMBOL_NS_GPL(devm_lwmi_tm_register_notifier, "LENOVO_WMI_HELPERS");
+
+/**
+ * lwmi_tm_notifier_call() - Call functions for the notifier call chain.
+ * @mode: Pointer to a thermal mode enum to retrieve the data from.
+ *
+ * Call blocking_notifier_call_chain to retrieve the thermal mode from the
+ * lenovo-wmi-gamezone driver.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int lwmi_tm_notifier_call(enum thermal_mode *mode)
+{
+ int ret;
+
+ ret = blocking_notifier_call_chain(&tm_chain_head,
+ LWMI_GZ_GET_THERMAL_MODE, &mode);
+ if ((ret & ~NOTIFY_STOP_MASK) != NOTIFY_OK)
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(lwmi_tm_notifier_call, "LENOVO_WMI_HELPERS");
+
MODULE_AUTHOR("Derek J. Clark <derekjohn.clark@gmail.com>");
MODULE_DESCRIPTION("Lenovo WMI Helpers Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/lenovo/wmi-helpers.h b/drivers/platform/x86/lenovo/wmi-helpers.h
index 20fd21749803..ed7db3ebba6c 100644
--- a/drivers/platform/x86/lenovo/wmi-helpers.h
+++ b/drivers/platform/x86/lenovo/wmi-helpers.h
@@ -7,6 +7,8 @@
#include <linux/types.h>
+struct device;
+struct notifier_block;
struct wmi_device;
struct wmi_method_args_32 {
@@ -14,7 +16,26 @@ struct wmi_method_args_32 {
u32 arg1;
};
+enum lwmi_event_type {
+ LWMI_GZ_GET_THERMAL_MODE = 0x01,
+};
+
+enum thermal_mode {
+ LWMI_GZ_THERMAL_MODE_NONE = 0x00,
+ LWMI_GZ_THERMAL_MODE_QUIET = 0x01,
+ LWMI_GZ_THERMAL_MODE_BALANCED = 0x02,
+ LWMI_GZ_THERMAL_MODE_PERFORMANCE = 0x03,
+ LWMI_GZ_THERMAL_MODE_EXTREME = 0xE0, /* Ver 6+ */
+ LWMI_GZ_THERMAL_MODE_CUSTOM = 0xFF,
+};
+
int lwmi_dev_evaluate_int(struct wmi_device *wdev, u8 instance, u32 method_id,
unsigned char *buf, size_t size, u32 *retval);
+int lwmi_tm_register_notifier(struct notifier_block *nb);
+int lwmi_tm_unregister_notifier(struct notifier_block *nb);
+int devm_lwmi_tm_register_notifier(struct device *dev,
+ struct notifier_block *nb);
+int lwmi_tm_notifier_call(enum thermal_mode *mode);
+
#endif /* !_LENOVO_WMI_HELPERS_H_ */
diff --git a/drivers/platform/x86/lenovo/wmi-other.c b/drivers/platform/x86/lenovo/wmi-other.c
index 6c2febe1a595..d318ba432fdc 100644
--- a/drivers/platform/x86/lenovo/wmi-other.c
+++ b/drivers/platform/x86/lenovo/wmi-other.c
@@ -40,16 +40,13 @@
#include <linux/kobject.h>
#include <linux/limits.h>
#include <linux/module.h>
-#include <linux/notifier.h>
#include <linux/platform_profile.h>
#include <linux/types.h>
#include <linux/wmi.h>
#include "wmi-capdata.h"
#include "wmi-events.h"
-#include "wmi-gamezone.h"
#include "wmi-helpers.h"
-#include "wmi-other.h"
#include "../firmware_attributes_class.h"
#define LENOVO_OTHER_MODE_GUID "DC2A8805-3A8C-41BA-A6F7-092E0089CD3B"
@@ -62,8 +59,6 @@
#define LWMI_FEATURE_ID_FAN_RPM 0x03
-#define LWMI_TYPE_ID_NONE 0x00
-
#define LWMI_FEATURE_VALUE_GET 17
#define LWMI_FEATURE_VALUE_SET 18
@@ -71,17 +66,15 @@
#define LWMI_FAN_NR 4
#define LWMI_FAN_ID(x) ((x) + LWMI_FAN_ID_BASE)
-#define LWMI_ATTR_ID_FAN_RPM(x) \
- (FIELD_PREP(LWMI_ATTR_DEV_ID_MASK, LWMI_DEVICE_ID_FAN) | \
- FIELD_PREP(LWMI_ATTR_FEAT_ID_MASK, LWMI_FEATURE_ID_FAN_RPM) | \
- FIELD_PREP(LWMI_ATTR_TYPE_ID_MASK, LWMI_FAN_ID(x)))
-
#define LWMI_FAN_DIV 100
+#define LWMI_ATTR_ID_FAN_RPM(x) \
+ lwmi_attr_id(LWMI_DEVICE_ID_FAN, LWMI_FEATURE_ID_FAN_RPM, \
+ LWMI_GZ_THERMAL_MODE_NONE, LWMI_FAN_ID(x))
+
#define LWMI_OM_FW_ATTR_BASE_PATH "lenovo-wmi-other"
#define LWMI_OM_HWMON_NAME "lenovo_wmi_other"
-static BLOCKING_NOTIFIER_HEAD(om_chain_head);
static DEFINE_IDA(lwmi_om_ida);
enum attribute_property {
@@ -109,7 +102,6 @@ struct lwmi_om_priv {
struct device *hwmon_dev;
struct device *fw_attr_dev;
struct kset *fw_attr_kset;
- struct notifier_block nb;
struct wmi_device *wdev;
int ida_id;
@@ -166,7 +158,7 @@ MODULE_PARM_DESC(relax_fan_constraint,
*/
static int lwmi_om_fan_get_set(struct lwmi_om_priv *priv, int channel, u32 *val, bool set)
{
- struct wmi_method_args_32 args;
+ struct wmi_method_args_32 args = {};
u32 method_id, retval;
int err;
@@ -548,13 +540,26 @@ out:
/* ======== fw_attributes (component: lenovo-wmi-capdata 01) ======== */
struct tunable_attr_01 {
- struct capdata01 *capdata;
struct device *dev;
- u32 feature_id;
- u32 device_id;
- u32 type_id;
+ u8 feature_id;
+ u8 device_id;
+ u8 type_id;
+ u8 cd_mode_id; /* mode arg for searching capdata */
+ u8 cv_mode_id; /* mode arg for set/get current_value */
};
+/**
+ * tunable_attr_01_id() - Formats a tunable_attr_01 to a capdata attribute ID
+ * @attr: The tunable_attr_01 to format.
+ * @mode: The u8 corresponding to the wmi-gamezone mode for set/get.
+ *
+ * Return: encoded capability data attribute ID.
+ */
+static u32 tunable_attr_01_id(struct tunable_attr_01 *attr, u8 mode)
+{
+ return lwmi_attr_id(attr->device_id, attr->feature_id, mode, attr->type_id);
+}
+
static struct tunable_attr_01 ppt_pl1_spl = {
.device_id = LWMI_DEVICE_ID_CPU,
.feature_id = LWMI_FEATURE_ID_CPU_SPL,
@@ -578,102 +583,6 @@ struct capdata01_attr_group {
struct tunable_attr_01 *tunable_attr;
};
-/**
- * lwmi_om_register_notifier() - Add a notifier to the blocking notifier chain
- * @nb: The notifier_block struct to register
- *
- * Call blocking_notifier_chain_register to register the notifier block to the
- * lenovo-wmi-other driver notifier chain.
- *
- * Return: 0 on success, %-EEXIST on error.
- */
-int lwmi_om_register_notifier(struct notifier_block *nb)
-{
- return blocking_notifier_chain_register(&om_chain_head, nb);
-}
-EXPORT_SYMBOL_NS_GPL(lwmi_om_register_notifier, "LENOVO_WMI_OTHER");
-
-/**
- * lwmi_om_unregister_notifier() - Remove a notifier from the blocking notifier
- * chain.
- * @nb: The notifier_block struct to register
- *
- * Call blocking_notifier_chain_unregister to unregister the notifier block from the
- * lenovo-wmi-other driver notifier chain.
- *
- * Return: 0 on success, %-ENOENT on error.
- */
-int lwmi_om_unregister_notifier(struct notifier_block *nb)
-{
- return blocking_notifier_chain_unregister(&om_chain_head, nb);
-}
-EXPORT_SYMBOL_NS_GPL(lwmi_om_unregister_notifier, "LENOVO_WMI_OTHER");
-
-/**
- * devm_lwmi_om_unregister_notifier() - Remove a notifier from the blocking
- * notifier chain.
- * @data: Void pointer to the notifier_block struct to register.
- *
- * Call lwmi_om_unregister_notifier to unregister the notifier block from the
- * lenovo-wmi-other driver notifier chain.
- *
- * Return: 0 on success, %-ENOENT on error.
- */
-static void devm_lwmi_om_unregister_notifier(void *data)
-{
- struct notifier_block *nb = data;
-
- lwmi_om_unregister_notifier(nb);
-}
-
-/**
- * devm_lwmi_om_register_notifier() - Add a notifier to the blocking notifier
- * chain.
- * @dev: The parent device of the notifier_block struct.
- * @nb: The notifier_block struct to register
- *
- * Call lwmi_om_register_notifier to register the notifier block to the
- * lenovo-wmi-other driver notifier chain. Then add devm_lwmi_om_unregister_notifier
- * as a device managed action to automatically unregister the notifier block
- * upon parent device removal.
- *
- * Return: 0 on success, or an error code.
- */
-int devm_lwmi_om_register_notifier(struct device *dev,
- struct notifier_block *nb)
-{
- int ret;
-
- ret = lwmi_om_register_notifier(nb);
- if (ret < 0)
- return ret;
-
- return devm_add_action_or_reset(dev, devm_lwmi_om_unregister_notifier,
- nb);
-}
-EXPORT_SYMBOL_NS_GPL(devm_lwmi_om_register_notifier, "LENOVO_WMI_OTHER");
-
-/**
- * lwmi_om_notifier_call() - Call functions for the notifier call chain.
- * @mode: Pointer to a thermal mode enum to retrieve the data from.
- *
- * Call blocking_notifier_call_chain to retrieve the thermal mode from the
- * lenovo-wmi-gamezone driver.
- *
- * Return: 0 on success, or an error code.
- */
-static int lwmi_om_notifier_call(enum thermal_mode *mode)
-{
- int ret;
-
- ret = blocking_notifier_call_chain(&om_chain_head,
- LWMI_GZ_GET_THERMAL_MODE, &mode);
- if ((ret & ~NOTIFY_STOP_MASK) != NOTIFY_OK)
- return -EINVAL;
-
- return 0;
-}
-
/* Attribute Methods */
/**
@@ -718,12 +627,7 @@ static ssize_t attr_capdata01_show(struct kobject *kobj,
u32 attribute_id;
int value, ret;
- attribute_id =
- FIELD_PREP(LWMI_ATTR_DEV_ID_MASK, tunable_attr->device_id) |
- FIELD_PREP(LWMI_ATTR_FEAT_ID_MASK, tunable_attr->feature_id) |
- FIELD_PREP(LWMI_ATTR_MODE_ID_MASK,
- LWMI_GZ_THERMAL_MODE_CUSTOM) |
- FIELD_PREP(LWMI_ATTR_TYPE_ID_MASK, tunable_attr->type_id);
+ attribute_id = tunable_attr_01_id(tunable_attr, tunable_attr->cd_mode_id);
ret = lwmi_cd01_get_data(priv->cd01_list, attribute_id, &capdata);
if (ret)
@@ -775,27 +679,22 @@ static ssize_t attr_current_value_store(struct kobject *kobj,
struct tunable_attr_01 *tunable_attr)
{
struct lwmi_om_priv *priv = dev_get_drvdata(tunable_attr->dev);
- struct wmi_method_args_32 args;
+ struct wmi_method_args_32 args = {};
struct capdata01 capdata;
enum thermal_mode mode;
- u32 attribute_id;
u32 value;
int ret;
- ret = lwmi_om_notifier_call(&mode);
+ ret = lwmi_tm_notifier_call(&mode);
if (ret)
return ret;
if (mode != LWMI_GZ_THERMAL_MODE_CUSTOM)
return -EBUSY;
- attribute_id =
- FIELD_PREP(LWMI_ATTR_DEV_ID_MASK, tunable_attr->device_id) |
- FIELD_PREP(LWMI_ATTR_FEAT_ID_MASK, tunable_attr->feature_id) |
- FIELD_PREP(LWMI_ATTR_MODE_ID_MASK, mode) |
- FIELD_PREP(LWMI_ATTR_TYPE_ID_MASK, tunable_attr->type_id);
+ args.arg0 = tunable_attr_01_id(tunable_attr, tunable_attr->cd_mode_id);
- ret = lwmi_cd01_get_data(priv->cd01_list, attribute_id, &capdata);
+ ret = lwmi_cd01_get_data(priv->cd01_list, args.arg0, &capdata);
if (ret)
return ret;
@@ -806,7 +705,7 @@ static ssize_t attr_current_value_store(struct kobject *kobj,
if (value < capdata.min_value || value > capdata.max_value)
return -EINVAL;
- args.arg0 = attribute_id;
+ args.arg0 = tunable_attr_01_id(tunable_attr, tunable_attr->cv_mode_id);
args.arg1 = value;
ret = lwmi_dev_evaluate_int(priv->wdev, 0x0, LWMI_FEATURE_VALUE_SET,
@@ -838,23 +737,20 @@ static ssize_t attr_current_value_show(struct kobject *kobj,
struct tunable_attr_01 *tunable_attr)
{
struct lwmi_om_priv *priv = dev_get_drvdata(tunable_attr->dev);
- struct wmi_method_args_32 args;
+ struct wmi_method_args_32 args = {};
enum thermal_mode mode;
- u32 attribute_id;
int retval;
int ret;
- ret = lwmi_om_notifier_call(&mode);
+ ret = lwmi_tm_notifier_call(&mode);
if (ret)
return ret;
- attribute_id =
- FIELD_PREP(LWMI_ATTR_DEV_ID_MASK, tunable_attr->device_id) |
- FIELD_PREP(LWMI_ATTR_FEAT_ID_MASK, tunable_attr->feature_id) |
- FIELD_PREP(LWMI_ATTR_MODE_ID_MASK, mode) |
- FIELD_PREP(LWMI_ATTR_TYPE_ID_MASK, tunable_attr->type_id);
+ /* If "no-mode" is the supported mode, ensure we never send current mode */
+ if (tunable_attr->cv_mode_id == LWMI_GZ_THERMAL_MODE_NONE)
+ mode = tunable_attr->cv_mode_id;
- args.arg0 = attribute_id;
+ args.arg0 = tunable_attr_01_id(tunable_attr, mode);
ret = lwmi_dev_evaluate_int(priv->wdev, 0x0, LWMI_FEATURE_VALUE_GET,
(unsigned char *)&args, sizeof(args),
@@ -865,6 +761,81 @@ static ssize_t attr_current_value_show(struct kobject *kobj,
return sysfs_emit(buf, "%d\n", retval);
}
+/**
+ * lwmi_attr_01_is_supported() - Determine if the given attribute is supported.
+ * @tunable_attr: The attribute to verify.
+ *
+ * For an attribute to be supported it must have a functional get/set method,
+ * as well as associated capability data stored in the capdata01 table.
+ *
+ * First check if the attribute has a corresponding data table under custom mode
+ * (0xff), then under no mode (0x00). If either of those passes, check if the
+ * supported field of the capdata struct is > 0. If it is supported, store the
+ * successful mode in the cd_mode_id field of tunable_attr.
+ *
+ * If the attribute capdata shows it is supported, attempt to determine the mode
+ * for the current value property get/set methods using a similar pattern to the
+ * capdata table check. If the value returned by either mode is 0 or an error,
+ * assume that mode is not supported. Otherwise, store the successful mode in the
+ * cv_mode_id field of tunable_attr.
+ *
+ * If any of the above checks fail then the attribute is not fully supported.
+ *
+ * Return: true if capdata and set/get modes are found, otherwise false.
+ */
+static bool lwmi_attr_01_is_supported(struct tunable_attr_01 *tunable_attr)
+{
+ u8 modes[2] = { LWMI_GZ_THERMAL_MODE_CUSTOM, LWMI_GZ_THERMAL_MODE_NONE };
+ struct lwmi_om_priv *priv = dev_get_drvdata(tunable_attr->dev);
+ struct wmi_method_args_32 args = {};
+ bool cd_mode_found = false;
+ bool cv_mode_found = false;
+ struct capdata01 capdata;
+ int retval, ret, i;
+
+ /* Determine tunable_attr->cd_mode_id */
+ for (i = 0; i < ARRAY_SIZE(modes); i++) {
+ args.arg0 = tunable_attr_01_id(tunable_attr, modes[i]);
+
+ ret = lwmi_cd01_get_data(priv->cd01_list, args.arg0, &capdata);
+ if (ret || !capdata.supported)
+ continue;
+
+ tunable_attr->cd_mode_id = modes[i];
+ cd_mode_found = true;
+ break;
+ }
+
+ if (!cd_mode_found)
+ return cd_mode_found;
+
+ dev_dbg(tunable_attr->dev,
+ "cd_mode_id: %#010x\n", args.arg0);
+
+ /* Determine tunable_attr->cv_mode_id, returns 1 if supported */
+ for (i = 0; i < ARRAY_SIZE(modes); i++) {
+ args.arg0 = tunable_attr_01_id(tunable_attr, modes[i]);
+
+ ret = lwmi_dev_evaluate_int(priv->wdev, 0x0, LWMI_FEATURE_VALUE_GET,
+ (u8 *)&args, sizeof(args),
+ &retval);
+ if (ret || !retval)
+ continue;
+
+ tunable_attr->cv_mode_id = modes[i];
+ cv_mode_found = true;
+ break;
+ }
+
+ if (!cv_mode_found)
+ return cv_mode_found;
+
+ dev_dbg(tunable_attr->dev, "cv_mode_id: %#010x, attribute support level: %#010x\n",
+ args.arg0, capdata.supported);
+
+ return capdata.supported > 0;
+}
+
/* Lenovo WMI Other Mode Attribute macros */
#define __LWMI_ATTR_RO(_func, _name) \
{ \
@@ -959,17 +930,17 @@ static struct capdata01_attr_group cd01_attr_groups[] = {
/**
* lwmi_om_fw_attr_add() - Register all firmware_attributes_class members
* @priv: The Other Mode driver data.
- *
- * Return: Either 0, or an error code.
*/
-static int lwmi_om_fw_attr_add(struct lwmi_om_priv *priv)
+static void lwmi_om_fw_attr_add(struct lwmi_om_priv *priv)
{
unsigned int i;
int err;
- priv->ida_id = ida_alloc(&lwmi_om_ida, GFP_KERNEL);
- if (priv->ida_id < 0)
- return priv->ida_id;
+ err = ida_alloc(&lwmi_om_ida, GFP_KERNEL);
+ if (err < 0)
+ goto err_no_ida;
+
+ priv->ida_id = err;
priv->fw_attr_dev = device_create(&firmware_attributes_class, NULL,
MKDEV(0, 0), NULL, "%s-%u",
@@ -988,14 +959,16 @@ static int lwmi_om_fw_attr_add(struct lwmi_om_priv *priv)
}
for (i = 0; i < ARRAY_SIZE(cd01_attr_groups) - 1; i++) {
+ cd01_attr_groups[i].tunable_attr->dev = &priv->wdev->dev;
+ if (!lwmi_attr_01_is_supported(cd01_attr_groups[i].tunable_attr))
+ continue;
+
err = sysfs_create_group(&priv->fw_attr_kset->kobj,
cd01_attr_groups[i].attr_group);
if (err)
goto err_remove_groups;
-
- cd01_attr_groups[i].tunable_attr->dev = &priv->wdev->dev;
}
- return 0;
+ return;
err_remove_groups:
while (i--)
@@ -1009,7 +982,12 @@ err_destroy_classdev:
err_free_ida:
ida_free(&lwmi_om_ida, priv->ida_id);
- return err;
+
+err_no_ida:
+ priv->ida_id = -EIDRM;
+
+ dev_warn(&priv->wdev->dev,
+ "failed to register firmware-attributes device: %d\n", err);
}
/**
@@ -1018,12 +996,17 @@ err_free_ida:
*/
static void lwmi_om_fw_attr_remove(struct lwmi_om_priv *priv)
{
+ if (priv->ida_id < 0)
+ return;
+
for (unsigned int i = 0; i < ARRAY_SIZE(cd01_attr_groups) - 1; i++)
sysfs_remove_group(&priv->fw_attr_kset->kobj,
cd01_attr_groups[i].attr_group);
kset_unregister(priv->fw_attr_kset);
device_unregister(priv->fw_attr_dev);
+ ida_free(&lwmi_om_ida, priv->ida_id);
+ priv->ida_id = -EIDRM;
}
/* ======== Self (master: lenovo-wmi-other) ======== */
@@ -1060,12 +1043,17 @@ static int lwmi_om_master_bind(struct device *dev)
priv->cd00_list = binder.cd00_list;
priv->cd01_list = binder.cd01_list;
- if (!priv->cd00_list || !priv->cd01_list)
+ if (!priv->cd00_list || !priv->cd01_list) {
+ component_unbind_all(dev, NULL);
+
return -ENODEV;
+ }
lwmi_om_fan_info_collect_cd00(priv);
- return lwmi_om_fw_attr_add(priv);
+ lwmi_om_fw_attr_add(priv);
+
+ return 0;
}
/**
@@ -1117,13 +1105,7 @@ static int lwmi_other_probe(struct wmi_device *wdev, const void *context)
static void lwmi_other_remove(struct wmi_device *wdev)
{
- struct lwmi_om_priv *priv = dev_get_drvdata(&wdev->dev);
-
component_master_del(&wdev->dev, &lwmi_om_master_ops);
-
- /* No IDA to free if the driver is never bound to its components. */
- if (priv->ida_id >= 0)
- ida_free(&lwmi_om_ida, priv->ida_id);
}
static const struct wmi_device_id lwmi_other_id_table[] = {
diff --git a/drivers/platform/x86/lenovo/wmi-other.h b/drivers/platform/x86/lenovo/wmi-other.h
deleted file mode 100644
index 8ebf5602bb99..000000000000
--- a/drivers/platform/x86/lenovo/wmi-other.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-
-/* Copyright (C) 2025 Derek J. Clark <derekjohn.clark@gmail.com> */
-
-#ifndef _LENOVO_WMI_OTHER_H_
-#define _LENOVO_WMI_OTHER_H_
-
-struct device;
-struct notifier_block;
-
-int lwmi_om_register_notifier(struct notifier_block *nb);
-int lwmi_om_unregister_notifier(struct notifier_block *nb);
-int devm_lwmi_om_register_notifier(struct device *dev,
- struct notifier_block *nb);
-
-#endif /* !_LENOVO_WMI_OTHER_H_ */
diff --git a/drivers/platform/x86/samsung-galaxybook.c b/drivers/platform/x86/samsung-galaxybook.c
index 755cb82bdb60..6382af0b106c 100644
--- a/drivers/platform/x86/samsung-galaxybook.c
+++ b/drivers/platform/x86/samsung-galaxybook.c
@@ -53,7 +53,7 @@ struct samsung_galaxybook {
void *i8042_filter_ptr;
struct work_struct block_recording_hotkey_work;
- struct input_dev *camera_lens_cover_switch;
+ struct input_dev *input;
struct acpi_battery_hook battery_hook;
@@ -197,6 +197,9 @@ static const guid_t performance_mode_guid =
#define GB_ACPI_NOTIFY_DEVICE_ON_TABLE 0x6c
#define GB_ACPI_NOTIFY_DEVICE_OFF_TABLE 0x6d
#define GB_ACPI_NOTIFY_HOTKEY_PERFORMANCE_MODE 0x70
+#define GB_ACPI_NOTIFY_HOTKEY_KBD_BACKLIGHT 0x7d
+#define GB_ACPI_NOTIFY_HOTKEY_MICMUTE 0x6e
+#define GB_ACPI_NOTIFY_HOTKEY_CAMERA 0x6f
#define GB_KEY_KBD_BACKLIGHT_KEYDOWN 0x2c
#define GB_KEY_KBD_BACKLIGHT_KEYUP 0xac
@@ -859,13 +862,29 @@ static int block_recording_acpi_set(struct samsung_galaxybook *galaxybook, const
if (err)
return err;
- input_report_switch(galaxybook->camera_lens_cover_switch,
+ input_report_switch(galaxybook->input,
SW_CAMERA_LENS_COVER, value ? 1 : 0);
- input_sync(galaxybook->camera_lens_cover_switch);
+ input_sync(galaxybook->input);
return 0;
}
+static int galaxybook_input_init(struct samsung_galaxybook *galaxybook)
+{
+ galaxybook->input = devm_input_allocate_device(&galaxybook->platform->dev);
+ if (!galaxybook->input)
+ return -ENOMEM;
+
+ galaxybook->input->name = "Samsung Galaxy Book Camera Lens Cover";
+ galaxybook->input->phys = DRIVER_NAME "/input0";
+ galaxybook->input->id.bustype = BUS_HOST;
+
+ input_set_capability(galaxybook->input, EV_KEY, KEY_MICMUTE);
+ input_set_capability(galaxybook->input, EV_SW, SW_CAMERA_LENS_COVER);
+
+ return input_register_device(galaxybook->input);
+}
+
static int galaxybook_block_recording_init(struct samsung_galaxybook *galaxybook)
{
bool value;
@@ -887,24 +906,8 @@ static int galaxybook_block_recording_init(struct samsung_galaxybook *galaxybook
return GB_NOT_SUPPORTED;
}
- galaxybook->camera_lens_cover_switch =
- devm_input_allocate_device(&galaxybook->platform->dev);
- if (!galaxybook->camera_lens_cover_switch)
- return -ENOMEM;
-
- galaxybook->camera_lens_cover_switch->name = "Samsung Galaxy Book Camera Lens Cover";
- galaxybook->camera_lens_cover_switch->phys = DRIVER_NAME "/input0";
- galaxybook->camera_lens_cover_switch->id.bustype = BUS_HOST;
-
- input_set_capability(galaxybook->camera_lens_cover_switch, EV_SW, SW_CAMERA_LENS_COVER);
-
- err = input_register_device(galaxybook->camera_lens_cover_switch);
- if (err)
- return err;
-
- input_report_switch(galaxybook->camera_lens_cover_switch,
- SW_CAMERA_LENS_COVER, value ? 1 : 0);
- input_sync(galaxybook->camera_lens_cover_switch);
+ input_report_switch(galaxybook->input, SW_CAMERA_LENS_COVER, value ? 1 : 0);
+ input_sync(galaxybook->input);
return 0;
}
@@ -1260,6 +1263,25 @@ static void galaxybook_acpi_notify(acpi_handle handle, u32 event, void *data)
if (galaxybook->has_performance_mode)
platform_profile_cycle();
break;
+ case GB_ACPI_NOTIFY_HOTKEY_KBD_BACKLIGHT:
+ if (galaxybook->has_kbd_backlight)
+ schedule_work(&galaxybook->kbd_backlight_hotkey_work);
+ break;
+ case GB_ACPI_NOTIFY_HOTKEY_MICMUTE:
+ input_report_key(galaxybook->input, KEY_MICMUTE, 1);
+ input_sync(galaxybook->input);
+ input_report_key(galaxybook->input, KEY_MICMUTE, 0);
+ input_sync(galaxybook->input);
+ break;
+ case GB_ACPI_NOTIFY_HOTKEY_CAMERA:
+ if (galaxybook->has_block_recording) {
+ schedule_work(&galaxybook->block_recording_hotkey_work);
+ } else {
+ input_report_switch(galaxybook->input, SW_CAMERA_LENS_COVER,
+ !test_bit(SW_CAMERA_LENS_COVER, galaxybook->input->sw));
+ input_sync(galaxybook->input);
+ }
+ break;
default:
dev_warn(&galaxybook->platform->dev,
"unknown ACPI notification event: 0x%x\n", event);
@@ -1392,6 +1414,11 @@ static int galaxybook_probe(struct platform_device *pdev)
return dev_err_probe(&galaxybook->platform->dev, err,
"failed to initialize kbd_backlight\n");
+ err = galaxybook_input_init(galaxybook);
+ if (err)
+ return dev_err_probe(&galaxybook->platform->dev, err,
+ "failed to initialize input device\n");
+
err = galaxybook_fw_attrs_init(galaxybook);
if (err)
return dev_err_probe(&galaxybook->platform->dev, err,
diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
index 41b14344b16f..988fc291241d 100644
--- a/drivers/resctrl/mpam_devices.c
+++ b/drivers/resctrl/mpam_devices.c
@@ -164,11 +164,17 @@ static void mpam_free_garbage(void)
/*
* Once mpam is enabled, new requestors cannot further reduce the available
* partid. Assert that the size is fixed, and new requestors will be turned
- * away.
+ * away. This is needed when walking over structures sized by PARTID.
+ *
+ * During mpam_disable() these structures are not fixed, but the MSC state
+ * is still reset using whatever sizes have been discovered so far. As only
+ * PARTID 0 will be used after mpam_disable(), any race would be benign.
+ * Skip the check if a mpam_disable_reason has been set.
*/
static void mpam_assert_partid_sizes_fixed(void)
{
- WARN_ON_ONCE(!partid_max_published);
+ if (!mpam_disable_reason)
+ WARN_ON_ONCE(!partid_max_published);
}
static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg)
@@ -728,10 +734,9 @@ static void mpam_enable_quirks(struct mpam_msc *msc)
* Try and see what values stick in this bit. If we can write either value,
* its probably not implemented by hardware.
*/
-static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg)
+static bool mpam_ris_hw_probe_csu_nrdy(struct mpam_msc_ris *ris)
{
- u32 now;
- u64 mon_sel;
+ u32 now, mon_sel, ctl_val;
bool can_set, can_clear;
struct mpam_msc *msc = ris->vmsc->msc;
@@ -740,23 +745,30 @@ static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg)
mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, 0) |
FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx);
- _mpam_write_monsel_reg(msc, mon_reg, mon_sel);
+ mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel);
+
+ /* Hardware might ignore nrdy if it's not enabled */
+ ctl_val = MSMON_CFG_CSU_CTL_TYPE_CSU;
+ ctl_val |= MSMON_CFG_x_CTL_MATCH_PARTID;
+ ctl_val |= MSMON_CFG_x_CTL_MATCH_PMG;
+ ctl_val |= MSMON_CFG_x_CTL_EN;
+ mpam_write_monsel_reg(msc, CFG_CSU_FLT, 0);
+ mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val);
- _mpam_write_monsel_reg(msc, mon_reg, MSMON___NRDY);
- now = _mpam_read_monsel_reg(msc, mon_reg);
+ _mpam_write_monsel_reg(msc, MSMON_CSU, MSMON___NRDY);
+ now = _mpam_read_monsel_reg(msc, MSMON_CSU);
can_set = now & MSMON___NRDY;
- _mpam_write_monsel_reg(msc, mon_reg, 0);
- now = _mpam_read_monsel_reg(msc, mon_reg);
+ _mpam_write_monsel_reg(msc, MSMON_CSU, 0);
+ /* Configuration change to try and coax hardware into setting nrdy */
+ mpam_write_monsel_reg(msc, CFG_CSU_FLT, 0x1);
+ now = _mpam_read_monsel_reg(msc, MSMON_CSU);
can_clear = !(now & MSMON___NRDY);
mpam_mon_sel_unlock(msc);
return (!can_set || !can_clear);
}
-#define mpam_ris_hw_probe_hw_nrdy(_ris, _mon_reg) \
- _mpam_ris_hw_probe_hw_nrdy(_ris, MSMON_##_mon_reg)
-
static void mpam_ris_hw_probe(struct mpam_msc_ris *ris)
{
int err;
@@ -873,20 +885,18 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris)
mpam_set_feature(mpam_feat_msmon_csu_xcl, props);
/* Is NRDY hardware managed? */
- hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, CSU);
- if (hw_managed)
- mpam_set_feature(mpam_feat_msmon_csu_hw_nrdy, props);
- }
+ hw_managed = mpam_ris_hw_probe_csu_nrdy(ris);
- /*
- * Accept the missing firmware property if NRDY appears
- * un-implemented.
- */
- if (err && mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, props))
- dev_err_once(dev, "Counters are not usable because not-ready timeout was not provided by firmware.");
+ /*
+ * Accept the missing firmware property if NRDY appears
+ * un-implemented.
+ */
+ if (err && hw_managed)
+ dev_err_once(dev, "Counters are not usable because not-ready timeout was not provided by firmware.");
+ }
}
if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_MBWU, msmon_features)) {
- bool has_long, hw_managed;
+ bool has_long;
u32 mbwumon_idr = mpam_read_partsel_reg(msc, MBWUMON_IDR);
props->num_mbwu_mon = FIELD_GET(MPAMF_MBWUMON_IDR_NUM_MON, mbwumon_idr);
@@ -905,16 +915,6 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris)
} else {
mpam_set_feature(mpam_feat_msmon_mbwu_31counter, props);
}
-
- /* Is NRDY hardware managed? */
- hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU);
- if (hw_managed)
- mpam_set_feature(mpam_feat_msmon_mbwu_hw_nrdy, props);
-
- /*
- * Don't warn about any missing firmware property for
- * MBWU NRDY - it doesn't make any sense!
- */
}
}
}
@@ -1197,7 +1197,6 @@ static void __ris_msmon_read(void *arg)
bool reset_on_next_read = false;
struct mpam_msc_ris *ris = m->ris;
struct msmon_mbwu_state *mbwu_state;
- struct mpam_props *rprops = &ris->props;
struct mpam_msc *msc = m->ris->vmsc->msc;
u32 mon_sel, ctl_val, flt_val, cur_ctl, cur_flt;
@@ -1253,8 +1252,7 @@ static void __ris_msmon_read(void *arg)
switch (m->type) {
case mpam_feat_msmon_csu:
now = mpam_read_monsel_reg(msc, CSU);
- if (mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, rprops))
- nrdy = now & MSMON___NRDY;
+ nrdy = now & MSMON___NRDY;
now = FIELD_GET(MSMON___VALUE, now);
if (mpam_has_quirk(IGNORE_CSU_NRDY, msc) && m->waited_timeout)
@@ -1266,8 +1264,7 @@ static void __ris_msmon_read(void *arg)
case mpam_feat_msmon_mbwu_63counter:
if (m->type != mpam_feat_msmon_mbwu_31counter) {
now = mpam_msc_read_mbwu_l(msc);
- if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops))
- nrdy = now & MSMON___L_NRDY;
+ nrdy = now & MSMON___L_NRDY;
if (m->type == mpam_feat_msmon_mbwu_63counter)
now = FIELD_GET(MSMON___LWD_VALUE, now);
@@ -1275,8 +1272,7 @@ static void __ris_msmon_read(void *arg)
now = FIELD_GET(MSMON___L_VALUE, now);
} else {
now = mpam_read_monsel_reg(msc, MBWU);
- if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops))
- nrdy = now & MSMON___NRDY;
+ nrdy = now & MSMON___NRDY;
now = FIELD_GET(MSMON___VALUE, now);
}
@@ -2585,6 +2581,9 @@ static void __destroy_component_cfg(struct mpam_component *comp)
lockdep_assert_held(&mpam_list_lock);
+ if (!comp->cfg)
+ return;
+
add_to_garbage(comp->cfg);
list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
msc = vmsc->msc;
diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h
index 1914aefdcba9..04d1a59f02af 100644
--- a/drivers/resctrl/mpam_internal.h
+++ b/drivers/resctrl/mpam_internal.h
@@ -181,14 +181,12 @@ enum mpam_device_features {
mpam_feat_msmon_csu,
mpam_feat_msmon_csu_capture,
mpam_feat_msmon_csu_xcl,
- mpam_feat_msmon_csu_hw_nrdy,
mpam_feat_msmon_mbwu,
mpam_feat_msmon_mbwu_31counter,
mpam_feat_msmon_mbwu_44counter,
mpam_feat_msmon_mbwu_63counter,
mpam_feat_msmon_mbwu_capture,
mpam_feat_msmon_mbwu_rwbw,
- mpam_feat_msmon_mbwu_hw_nrdy,
mpam_feat_partid_nrw,
MPAM_FEATURE_LAST
};
diff --git a/drivers/staging/greybus/hid.c b/drivers/staging/greybus/hid.c
index 1f58c907c036..f1f9f6fbc00e 100644
--- a/drivers/staging/greybus/hid.c
+++ b/drivers/staging/greybus/hid.c
@@ -201,7 +201,7 @@ static void gb_hid_init_report(struct gb_hid *ghid, struct hid_report *report)
* we just need to setup the input fields, so using
* hid_report_raw_event is safe.
*/
- hid_report_raw_event(ghid->hid, report->type, ghid->inbuf, size, 1);
+ hid_report_raw_event(ghid->hid, report->type, ghid->inbuf, ghid->bufsize, size, 1);
}
static void gb_hid_init_reports(struct gb_hid *ghid)
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 3f8d093aacf8..050e7542952e 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -483,6 +483,40 @@ static int vfio_pci_core_runtime_resume(struct device *dev)
#endif /* CONFIG_PM */
/*
+ * Eager-request BAR resources, and iomap them. Soft failures are
+ * allowed, and consumers must check the barmap before use in order to
+ * give compatible user-visible behaviour with the previous on-demand
+ * allocation method.
+ */
+static void vfio_pci_core_map_bars(struct vfio_pci_core_device *vdev)
+{
+ struct pci_dev *pdev = vdev->pdev;
+ int i;
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ int bar = i + PCI_STD_RESOURCES;
+
+ vdev->barmap[bar] = IOMEM_ERR_PTR(-ENODEV);
+
+ if (!pci_resource_len(pdev, i))
+ continue;
+
+ if (pci_request_selected_regions(pdev, 1 << bar, "vfio")) {
+ pci_dbg(pdev, "Failed to reserve region %d\n", bar);
+ vdev->barmap[bar] = IOMEM_ERR_PTR(-EBUSY);
+ continue;
+ }
+
+ vdev->barmap[bar] = pci_iomap(pdev, bar, 0);
+ if (!vdev->barmap[bar]) {
+ pci_dbg(pdev, "Failed to iomap region %d\n", bar);
+ pci_release_selected_regions(pdev, 1 << bar);
+ vdev->barmap[bar] = IOMEM_ERR_PTR(-ENOMEM);
+ }
+ }
+}
+
+/*
* The pci-driver core runtime PM routines always save the device state
* before going into suspended state. If the device is going into low power
* state with only with runtime PM ops, then no explicit handling is needed
@@ -568,6 +602,7 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
vdev->has_vga = true;
+ vfio_pci_core_map_bars(vdev);
return 0;
@@ -648,7 +683,7 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
bar = i + PCI_STD_RESOURCES;
- if (!vdev->barmap[bar])
+ if (IS_ERR_OR_NULL(vdev->barmap[bar]))
continue;
pci_iounmap(pdev, vdev->barmap[bar]);
pci_release_selected_regions(pdev, 1 << bar);
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
index f87fd32e4a01..1a177ce7de54 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -244,9 +244,11 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
return -EINVAL;
/*
- * For PCI the region_index is the BAR number like everything else.
+ * For PCI the region_index is the BAR number like everything
+ * else. Check that PCI resources have been claimed for it.
*/
- if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX)
+ if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX ||
+ vfio_pci_core_setup_barmap(vdev, get_dma_buf.region_index))
return -ENODEV;
dma_ranges = memdup_array_user(&arg->dma_ranges, get_dma_buf.nr_ranges,
@@ -354,19 +356,18 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
if (revoked) {
kref_put(&priv->kref, vfio_pci_dma_buf_done);
wait_for_completion(&priv->comp);
- } else {
/*
- * Kref is initialize again, because when revoke
- * was performed the reference counter was decreased
- * to zero to trigger completion.
+ * Re-arm the registered kref reference and the
+ * completion so the post-revoke state matches the
+ * post-creation state. An un-revoke followed by a
+ * new mapping needs the kref to be non-zero before
+ * kref_get(), and vfio_pci_dma_buf_cleanup()
+ * delegates its drain back through this revoke
+ * path on a possibly-already-revoked dma-buf.
*/
kref_init(&priv->kref);
- /*
- * There is no need to wait as no mapping was
- * performed when the previous status was
- * priv->revoked == true.
- */
reinit_completion(&priv->comp);
+ } else {
dma_resv_lock(priv->dmabuf->resv, NULL);
priv->revoked = false;
dma_resv_unlock(priv->dmabuf->resv);
@@ -382,21 +383,22 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
struct vfio_pci_dma_buf *tmp;
down_write(&vdev->memory_lock);
+
+ /*
+ * Drain any active mappings via the revoke path. The move is
+ * idempotent for dma-bufs already in the revoked state and
+ * leaves every priv with the kref re-armed and the completion
+ * ready, so cleanup itself does not need to participate in kref
+ * bookkeeping.
+ */
+ vfio_pci_dma_buf_move(vdev, true);
+
list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) {
if (!get_file_active(&priv->dmabuf->file))
continue;
- dma_resv_lock(priv->dmabuf->resv, NULL);
list_del_init(&priv->dmabufs_elm);
priv->vdev = NULL;
- priv->revoked = true;
- dma_buf_invalidate_mappings(priv->dmabuf);
- dma_resv_wait_timeout(priv->dmabuf->resv,
- DMA_RESV_USAGE_BOOKKEEP, false,
- MAX_SCHEDULE_TIMEOUT);
- dma_resv_unlock(priv->dmabuf->resv);
- kref_put(&priv->kref, vfio_pci_dma_buf_done);
- wait_for_completion(&priv->comp);
vfio_device_put_registration(&vdev->vdev);
fput(priv->dmabuf->file);
}
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 4251ee03e146..3bfbb879a005 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -198,27 +198,15 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
}
EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
+/*
+ * The barmap is set up in vfio_pci_core_enable(). Callers use this
+ * function to check that the BAR resources are requested or that the
+ * pci_iomap() was done.
+ */
int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
{
- struct pci_dev *pdev = vdev->pdev;
- int ret;
- void __iomem *io;
-
- if (vdev->barmap[bar])
- return 0;
-
- ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
- if (ret)
- return ret;
-
- io = pci_iomap(pdev, bar, 0);
- if (!io) {
- pci_release_selected_regions(pdev, 1 << bar);
- return -ENOMEM;
- }
-
- vdev->barmap[bar] = io;
-
+ if (IS_ERR(vdev->barmap[bar]))
+ return PTR_ERR(vdev->barmap[bar]);
return 0;
}
EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
diff --git a/drivers/xen/xen-acpi-pad.c b/drivers/xen/xen-acpi-pad.c
index 75a39862c1df..5b98e0e93807 100644
--- a/drivers/xen/xen-acpi-pad.c
+++ b/drivers/xen/xen-acpi-pad.c
@@ -110,9 +110,13 @@ static void acpi_pad_notify(acpi_handle handle, u32 event,
static int acpi_pad_probe(struct platform_device *pdev)
{
- struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
+ struct acpi_device *device;
acpi_status status;
+ device = ACPI_COMPANION(&pdev->dev);
+ if (!device)
+ return -ENODEV;
+
strcpy(acpi_device_name(device), ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME);
strcpy(acpi_device_class(device), ACPI_PROCESSOR_AGGREGATOR_CLASS);
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index e6f5a17a13e3..b611c64119db 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2412,29 +2412,25 @@ static struct btrfs_block_group *btrfs_create_block_group(
*/
static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
{
- u64 start = 0;
+ struct rb_node *node;
int ret = 0;
- while (1) {
+ /*
+ * This is called during mount from btrfs_read_block_groups(), before
+ * any background threads are started, so no concurrent writers can
+ * modify the mapping_tree. No lock is needed here.
+ */
+ for (node = rb_first_cached(&fs_info->mapping_tree); node; node = rb_next(node)) {
struct btrfs_chunk_map *map;
struct btrfs_block_group *bg;
- /*
- * btrfs_find_chunk_map() will return the first chunk map
- * intersecting the range, so setting @length to 1 is enough to
- * get the first chunk.
- */
- map = btrfs_find_chunk_map(fs_info, start, 1);
- if (!map)
- break;
-
+ map = rb_entry(node, struct btrfs_chunk_map, rb_node);
bg = btrfs_lookup_block_group(fs_info, map->start);
if (unlikely(!bg)) {
btrfs_err(fs_info,
"chunk start=%llu len=%llu doesn't have corresponding block group",
map->start, map->chunk_len);
ret = -EUCLEAN;
- btrfs_free_chunk_map(map);
break;
}
if (unlikely(bg->start != map->start || bg->length != map->chunk_len ||
@@ -2447,12 +2443,9 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
bg->start, bg->length,
bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
ret = -EUCLEAN;
- btrfs_free_chunk_map(map);
btrfs_put_block_group(bg);
break;
}
- start = map->start + map->chunk_len;
- btrfs_free_chunk_map(map);
btrfs_put_block_group(bg);
}
return ret;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b2393a48a8fe..a02b62e0a8f3 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -407,22 +407,18 @@ static noinline int add_ra_bio_pages(struct inode *inode,
end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
- /*
- * Avoid direct reclaim when the caller does not allow it. Since
- * add_ra_bio_pages() is always speculative, suppress allocation warnings
- * in either case.
- */
+ /* Avoid direct reclaim when the caller does not allow it. */
+ constraint_gfp = ~__GFP_FS;
+ cache_gfp = GFP_NOFS | __GFP_NOWARN;
if (!direct_reclaim) {
- constraint_gfp = ~(__GFP_FS | __GFP_DIRECT_RECLAIM) | __GFP_NOWARN;
- cache_gfp = (GFP_NOFS & ~__GFP_DIRECT_RECLAIM) | __GFP_NOWARN;
- } else {
- constraint_gfp = (~__GFP_FS) | __GFP_NOWARN;
- cache_gfp = GFP_NOFS | __GFP_NOWARN;
+ constraint_gfp &= ~__GFP_DIRECT_RECLAIM;
+ cache_gfp &= ~__GFP_DIRECT_RECLAIM;
}
while (cur < compressed_end) {
pgoff_t page_end;
pgoff_t pg_index = cur >> PAGE_SHIFT;
+ gfp_t masked_constraint_gfp;
u32 add_size;
if (pg_index > end_index)
@@ -449,8 +445,14 @@ static noinline int add_ra_bio_pages(struct inode *inode,
continue;
}
- folio = filemap_alloc_folio(mapping_gfp_constraint(mapping, constraint_gfp),
- 0, NULL);
+ /*
+ * Since add_ra_bio_pages() is always speculative, suppress
+ * allocation warnings.
+ */
+ masked_constraint_gfp = mapping_gfp_constraint(mapping, constraint_gfp);
+ masked_constraint_gfp |= __GFP_NOWARN;
+
+ folio = filemap_alloc_folio(masked_constraint_gfp, 0, NULL);
if (!folio)
break;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8a11be02eeb9..c0a30bb213d7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4686,6 +4686,7 @@ static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
free_extent_buffer_stale(eb);
}
}
+ btrfs_extent_io_tree_release(dirty_pages);
}
static void btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 906d5c21ebc4..75136a172710 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9299,10 +9299,38 @@ next:
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
(actual_len > inode->i_size) &&
(cur_offset > inode->i_size)) {
+ u64 range_start;
+ u64 range_end;
+
if (cur_offset > actual_len)
i_size = actual_len;
else
i_size = cur_offset;
+
+ /*
+ * Make sure the file_extent_tree covers the entire
+ * range [old_i_size, new_i_size) before we update
+ * disk_i_size. Without this, a previous KEEP_SIZE
+ * prealloc that extended past i_size (and was lost
+ * across umount/mount because file_extent_tree is
+ * only populated up to round_up(i_size) on inode
+ * load) can leave a gap inside this range. That gap
+ * would cause btrfs_inode_safe_disk_i_size_write()
+ * (via find_contiguous_extent_bit() starting at 0)
+ * to truncate disk_i_size to the start of the gap,
+ * making the persisted size smaller than i_size.
+ */
+ range_start = round_down(inode->i_size, fs_info->sectorsize);
+ range_end = round_up(i_size, fs_info->sectorsize);
+ ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
+ range_start, range_end - range_start);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ if (own_trans)
+ btrfs_end_transaction(trans);
+ break;
+ }
+
i_size_write(inode, i_size);
btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 248adb785051..194f581b36f3 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1293,14 +1293,13 @@ static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans)
blk_finish_plug(&plug);
ret2 = btrfs_wait_extents(fs_info, dirty_pages);
- btrfs_extent_io_tree_release(&trans->transaction->dirty_pages);
-
if (ret)
return ret;
- else if (ret2)
+ if (ret2)
return ret2;
- else
- return 0;
+
+ btrfs_extent_io_tree_release(&trans->transaction->dirty_pages);
+ return 0;
}
/*
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 1454760332ff..0a86f672cc09 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1336,6 +1336,7 @@ void ceph_process_folio_batch(struct address_space *mapping,
ceph_wbc, folio);
if (rc == -ENODATA) {
folio_unlock(folio);
+ folio_put(folio);
ceph_wbc->fbatch.folios[i] = NULL;
continue;
} else if (rc == -E2BIG) {
@@ -1346,6 +1347,7 @@ void ceph_process_folio_batch(struct address_space *mapping,
if (!folio_clear_dirty_for_io(folio)) {
doutc(cl, "%p !folio_clear_dirty_for_io\n", folio);
folio_unlock(folio);
+ folio_put(folio);
ceph_wbc->fbatch.folios[i] = NULL;
continue;
}
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 4dc9426643e8..053d5bf0c9f0 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -228,12 +228,19 @@ static int get_quota_realm(struct ceph_mds_client *mdsc, struct inode *inode,
restart:
realm = ceph_inode(inode)->i_snap_realm;
- if (realm)
+ if (realm) {
ceph_get_snap_realm(mdsc, realm);
- else
- pr_err_ratelimited_client(cl,
- "%p %llx.%llx null i_snap_realm\n",
- inode, ceph_vinop(inode));
+ } else {
+ /*
+ * i_snap_realm is NULL when all caps have been released, e.g.
+ * after an MDS session rejection. This is a transient state;
+ * the realm will be restored once caps are re-granted.
+ * Treat it as "no quota realm found".
+ */
+ doutc(cl, "%p %llx.%llx null i_snap_realm\n",
+ inode, ceph_vinop(inode));
+ }
+
while (realm) {
bool has_inode;
@@ -340,12 +347,19 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
down_read(&mdsc->snap_rwsem);
restart:
realm = ceph_inode(inode)->i_snap_realm;
- if (realm)
+ if (realm) {
ceph_get_snap_realm(mdsc, realm);
- else
- pr_err_ratelimited_client(cl,
- "%p %llx.%llx null i_snap_realm\n",
- inode, ceph_vinop(inode));
+ } else {
+ /*
+ * i_snap_realm is NULL when all caps have been released, e.g.
+ * after an MDS session rejection. This is a transient state;
+ * the realm will be restored once caps are re-granted.
+ * Treat it as "quota not exceeded".
+ */
+ doutc(cl, "%p %llx.%llx null i_snap_realm\n",
+ inode, ceph_vinop(inode));
+ }
+
while (realm) {
bool has_inode;
@@ -496,6 +510,9 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
u64 total = 0, used, free;
bool is_updated = false;
+ if (!ceph_has_realms_with_quotas(d_inode(fsc->sb->s_root)))
+ return false;
+
down_read(&mdsc->snap_rwsem);
get_quota_realm(mdsc, d_inode(fsc->sb->s_root), QUOTA_GET_MAX_BYTES,
&realm, true);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 5f87f62091a1..e773be07f767 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1254,6 +1254,22 @@ retry:
ceph_vinop(inode), name, ceph_cap_string(issued));
__build_xattrs(inode);
+ /*
+ * __build_xattrs() may have released and reacquired i_ceph_lock,
+ * during which handle_cap_grant() could have replaced i_xattrs.blob
+ * with a newer MDS-provided blob and bumped i_xattrs.version. If that
+ * caused __build_xattrs() to rebuild the rb-tree from the new blob,
+ * count/names_size/vals_size may now be larger than when
+ * required_blob_size was computed above. Recompute it here so the
+ * prealloc_blob size check below reflects the current tree state.
+ */
+ required_blob_size = __get_required_blob_size(ci, name_len, val_len);
+ if (required_blob_size > mdsc->mdsmap->m_max_xattr_size) {
+ doutc(cl, "sync (size too large): %d > %llu\n",
+ required_blob_size, mdsc->mdsmap->m_max_xattr_size);
+ goto do_sync;
+ }
+
if (!ci->i_xattrs.prealloc_blob ||
required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
struct ceph_buffer *blob;
@@ -1294,6 +1310,7 @@ retry:
do_sync:
spin_unlock(&ci->i_ceph_lock);
+ ceph_buffer_put(old_blob);
do_sync_unlocked:
if (lock_snap_rwsem)
up_read(&mdsc->snap_rwsem);
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index b0a6549b3848..b36ee619cdcd 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -355,7 +355,7 @@ static ssize_t iomap_dio_bio_iter_one(struct iomap_iter *iter,
if (dio->flags & IOMAP_DIO_BOUNCE)
ret = bio_iov_iter_bounce(bio, dio->submit.iter,
- iomap_max_bio_size(&iter->iomap));
+ iomap_max_bio_size(&iter->iomap), alignment);
else
ret = bio_iov_iter_get_pages(bio, dio->submit.iter,
alignment - 1);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 85e94c30285a..ab39ec885440 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1413,6 +1413,9 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dst, clone->cl_dst_pos, clone->cl_count,
EX_ISSYNC(cstate->current_fh.fh_export));
+ if (!status && (READ_ONCE(dst->nf_file->f_mode) & FMODE_NOCMTIME) != 0)
+ nfsd_update_cmtime_attr(dst->nf_file, 0);
+
nfsd_file_put(dst);
nfsd_file_put(src);
out:
@@ -2118,8 +2121,10 @@ do_callback:
set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
trace_nfsd_copy_async_done(copy);
- nfsd4_send_cb_offload(copy);
atomic_dec(&copy->cp_nn->pending_async_copies);
+ if (copy->cp_res.wr_bytes_written > 0 && copy->attr_update)
+ nfsd_update_cmtime_attr(copy->nf_dst->nf_file, 0);
+ nfsd4_send_cb_offload(copy);
return 0;
}
@@ -2179,6 +2184,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
memcpy(&result->cb_stateid, &copy->cp_stateid.cs_stid,
sizeof(result->cb_stateid));
dup_copy_fields(copy, async_copy);
+ if ((READ_ONCE(copy->nf_dst->nf_file->f_mode) &
+ FMODE_NOCMTIME) != 0)
+ async_copy->attr_update = true;
memcpy(async_copy->cp_cb_offload.co_referring_sessionid.data,
cstate->session->se_sessionid.data,
NFS4_MAX_SESSIONID_LEN);
@@ -2197,6 +2205,10 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
} else {
status = nfsd4_do_copy(copy, copy->nf_src->nf_file,
copy->nf_dst->nf_file, true);
+ if ((READ_ONCE(copy->nf_dst->nf_file->f_mode) &
+ FMODE_NOCMTIME) != 0 &&
+ copy->cp_res.wr_bytes_written > 0)
+ nfsd_update_cmtime_attr(copy->nf_dst->nf_file, 0);
}
out:
trace_nfsd_copy_done(copy, status);
@@ -2535,10 +2547,6 @@ nfsd4_get_dir_delegation(struct svc_rqst *rqstp,
dd = nfsd_get_dir_deleg(cstate, gdd, nf);
nfsd_file_put(nf);
if (IS_ERR(dd)) {
- int err = PTR_ERR(dd);
-
- if (err != -EAGAIN)
- return nfserrno(err);
gdd->gddrnf_status = GDD4_UNAVAIL;
return nfs_ok;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c2d13b26a687..6837b63d9864 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1221,10 +1221,6 @@ static void put_deleg_file(struct nfs4_file *fp)
static void nfsd4_finalize_deleg_timestamps(struct nfs4_delegation *dp, struct file *f)
{
- struct iattr ia = { .ia_valid = ATTR_ATIME | ATTR_CTIME | ATTR_MTIME | ATTR_DELEG };
- struct inode *inode = file_inode(f);
- int ret;
-
/* don't do anything if FMODE_NOCMTIME isn't set */
if ((READ_ONCE(f->f_mode) & FMODE_NOCMTIME) == 0)
return;
@@ -1242,17 +1238,7 @@ static void nfsd4_finalize_deleg_timestamps(struct nfs4_delegation *dp, struct f
return;
/* Stamp everything to "now" */
- inode_lock(inode);
- ret = notify_change(&nop_mnt_idmap, f->f_path.dentry, &ia, NULL);
- inode_unlock(inode);
- if (ret) {
- struct inode *inode = file_inode(f);
-
- pr_notice_ratelimited("nfsd: Unable to update timestamps on inode %02x:%02x:%llu: %d\n",
- MAJOR(inode->i_sb->s_dev),
- MINOR(inode->i_sb->s_dev),
- inode->i_ino, ret);
- }
+ nfsd_update_cmtime_attr(f, ATTR_ATIME);
}
static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
@@ -1865,6 +1851,13 @@ void nfsd4_revoke_states(struct nfsd_net *nn, struct super_block *sb)
break;
case SC_TYPE_LAYOUT:
ls = layoutstateid(stid);
+ spin_lock(&clp->cl_lock);
+ if (stid->sc_status == 0) {
+ stid->sc_status |=
+ SC_STATUS_ADMIN_REVOKED;
+ atomic_inc(&clp->cl_admin_revoked);
+ }
+ spin_unlock(&clp->cl_lock);
nfsd4_close_layout(ls);
break;
}
@@ -6378,7 +6371,6 @@ nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open,
}
open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG :
OPEN_DELEGATE_WRITE;
- dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat);
dp->dl_atime = stat.atime;
dp->dl_ctime = stat.ctime;
@@ -9429,11 +9421,15 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
if (status != nfserr_jukebox ||
!nfsd_wait_for_delegreturn(rqstp, inode))
goto out_status;
+ status = nfs_ok;
+ goto out_status;
+ }
+ if (!ncf->ncf_file_modified) {
+ if (ncf->ncf_initial_cinfo != ncf->ncf_cb_change)
+ ncf->ncf_file_modified = true;
+ else if (i_size_read(inode) != ncf->ncf_cb_fsize)
+ ncf->ncf_file_modified = true;
}
- if (!ncf->ncf_file_modified &&
- (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
- ncf->ncf_cur_fsize != ncf->ncf_cb_fsize))
- ncf->ncf_file_modified = true;
if (ncf->ncf_file_modified) {
int err;
@@ -9560,3 +9556,31 @@ out_delegees:
put_nfs4_file(fp);
return ERR_PTR(status);
}
+
+/**
+ * nfsd_update_cmtime_attr - update file's delegated ctime/mtime,
+ * and optionally other attributes (ie ATTR_ATIME).
+ * @f: pointer to an opened file
+ * @flags: any additional flags that should be updated
+ *
+ * Given upon opening a file delegated attributes were issues, update
+ * @f attributes to current times.
+ */
+void nfsd_update_cmtime_attr(struct file *f, unsigned int flags)
+{
+ int ret;
+ struct inode *inode = file_inode(f);
+ struct iattr attr = {
+ .ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_DELEG | flags,
+ };
+
+ inode_lock(inode);
+ ret = notify_change(&nop_mnt_idmap, f->f_path.dentry, &attr, NULL);
+ inode_unlock(inode);
+ if (ret)
+ pr_notice_ratelimited("nfsd: Unable to update timestamps on "
+ "inode %02x:%02x:%llu: %d\n",
+ MAJOR(inode->i_sb->s_dev),
+ MINOR(inode->i_sb->s_dev),
+ inode->i_ino, ret);
+}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 953675eba5c3..c5ccea64c281 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -843,6 +843,7 @@ extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
void nfsd4_put_client(struct nfs4_client *clp);
void nfsd4_async_copy_reaper(struct nfsd_net *nn);
bool nfsd4_has_active_async_copies(struct nfs4_client *clp);
+void nfsd_update_cmtime_attr(struct file *f, unsigned int flags);
extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
struct xdr_netobj princhash, struct nfsd_net *nn);
extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 417e9ad9fbb3..9a4124c77e04 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -752,6 +752,7 @@ struct nfsd4_copy {
struct nfsd_file *nf_src;
struct nfsd_file *nf_dst;
+ bool attr_update;
copy_stateid_t cp_stateid;
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index b63ec7ab6e51..4c8b1b9ade8b 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -736,7 +736,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
static int smb3_fs_context_parse_monolithic(struct fs_context *fc,
void *data);
static int smb3_get_tree(struct fs_context *fc);
-static void smb3_sync_ses_chan_max(struct cifs_ses *ses, unsigned int max_channels);
+static void smb3_sync_ses_chan_max(struct cifs_ses *ses, size_t max_channels);
static int smb3_reconfigure(struct fs_context *fc);
static const struct fs_context_operations smb3_fs_context_ops = {
@@ -1010,25 +1010,34 @@ do { \
int smb3_sync_session_ctx_passwords(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
{
+ char *password = NULL, *password2 = NULL;
+
if (ses->password &&
cifs_sb->ctx->password &&
strcmp(ses->password, cifs_sb->ctx->password)) {
- kfree_sensitive(cifs_sb->ctx->password);
- cifs_sb->ctx->password = kstrdup(ses->password, GFP_KERNEL);
- if (!cifs_sb->ctx->password)
+ password = kstrdup(ses->password, GFP_KERNEL);
+ if (!password)
return -ENOMEM;
}
if (ses->password2 &&
cifs_sb->ctx->password2 &&
strcmp(ses->password2, cifs_sb->ctx->password2)) {
- kfree_sensitive(cifs_sb->ctx->password2);
- cifs_sb->ctx->password2 = kstrdup(ses->password2, GFP_KERNEL);
- if (!cifs_sb->ctx->password2) {
- kfree_sensitive(cifs_sb->ctx->password);
- cifs_sb->ctx->password = NULL;
+ password2 = kstrdup(ses->password2, GFP_KERNEL);
+ if (!password2) {
+ kfree_sensitive(password);
return -ENOMEM;
}
}
+
+ if (password) {
+ kfree_sensitive(cifs_sb->ctx->password);
+ cifs_sb->ctx->password = password;
+ }
+ if (password2) {
+ kfree_sensitive(cifs_sb->ctx->password2);
+ cifs_sb->ctx->password2 = password2;
+ }
+
return 0;
}
@@ -1041,7 +1050,7 @@ int smb3_sync_session_ctx_passwords(struct cifs_sb_info *cifs_sb, struct cifs_se
* with the session's channel lock. This should be called whenever the maximum
* allowed channels for a session changes (e.g., after a remount or reconfigure).
*/
-static void smb3_sync_ses_chan_max(struct cifs_ses *ses, unsigned int max_channels)
+static void smb3_sync_ses_chan_max(struct cifs_ses *ses, size_t max_channels)
{
spin_lock(&ses->chan_lock);
ses->chan_max = max_channels;
@@ -1051,12 +1060,15 @@ static void smb3_sync_ses_chan_max(struct cifs_ses *ses, unsigned int max_channe
static int smb3_reconfigure(struct fs_context *fc)
{
struct smb3_fs_context *ctx = smb3_fc2context(fc);
+ struct smb3_fs_context *new_ctx = NULL;
+ struct smb3_fs_context *old_ctx = NULL;
struct dentry *root = fc->root;
struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb);
struct cifs_ses *ses = cifs_sb_master_tcon(cifs_sb)->ses;
unsigned int rsize = ctx->rsize, wsize = ctx->wsize;
char *new_password = NULL, *new_password2 = NULL;
bool need_recon = false;
+ bool need_mchan_update;
int rc;
if (ses->expired_pwd)
@@ -1066,6 +1078,16 @@ static int smb3_reconfigure(struct fs_context *fc)
if (rc)
return rc;
+ old_ctx = kzalloc_obj(*old_ctx);
+ if (!old_ctx)
+ return -ENOMEM;
+
+ rc = smb3_fs_context_dup(old_ctx, cifs_sb->ctx);
+ if (rc) {
+ kfree(old_ctx);
+ return rc;
+ }
+
/*
* We can not change UNC/username/password/domainname/
* workstation_name/nodename/iocharset
@@ -1075,16 +1097,22 @@ static int smb3_reconfigure(struct fs_context *fc)
STEAL_STRING(cifs_sb, ctx, UNC);
STEAL_STRING(cifs_sb, ctx, source);
STEAL_STRING(cifs_sb, ctx, username);
+ STEAL_STRING(cifs_sb, ctx, domainname);
+ STEAL_STRING(cifs_sb, ctx, nodename);
+ STEAL_STRING(cifs_sb, ctx, iocharset);
- if (need_recon == false)
+ if (!need_recon) {
STEAL_STRING_SENSITIVE(cifs_sb, ctx, password);
- else {
+ } else {
if (ctx->password) {
new_password = kstrdup(ctx->password, GFP_KERNEL);
- if (!new_password)
- return -ENOMEM;
- } else
+ if (!new_password) {
+ rc = -ENOMEM;
+ goto restore_ctx;
+ }
+ } else {
STEAL_STRING_SENSITIVE(cifs_sb, ctx, password);
+ }
}
/*
@@ -1094,11 +1122,29 @@ static int smb3_reconfigure(struct fs_context *fc)
if (ctx->password2) {
new_password2 = kstrdup(ctx->password2, GFP_KERNEL);
if (!new_password2) {
- kfree_sensitive(new_password);
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto restore_ctx;
}
- } else
+ } else {
STEAL_STRING_SENSITIVE(cifs_sb, ctx, password2);
+ }
+
+ /* if rsize or wsize not passed in on remount, use previous values */
+ ctx->rsize = rsize ? CIFS_ALIGN_RSIZE(fc, rsize) : cifs_sb->ctx->rsize;
+ ctx->wsize = wsize ? CIFS_ALIGN_WSIZE(fc, wsize) : cifs_sb->ctx->wsize;
+
+ new_ctx = kzalloc_obj(*new_ctx);
+ if (!new_ctx) {
+ rc = -ENOMEM;
+ goto restore_ctx;
+ }
+
+ rc = smb3_fs_context_dup(new_ctx, ctx);
+ if (rc)
+ goto restore_ctx;
+
+ need_mchan_update = ctx->multichannel != cifs_sb->ctx->multichannel ||
+ ctx->max_channels != cifs_sb->ctx->max_channels;
/*
* we may update the passwords in the ses struct below. Make sure we do
@@ -1109,54 +1155,55 @@ static int smb3_reconfigure(struct fs_context *fc)
/*
* smb2_reconnect may swap password and password2 in case session setup
* failed. First get ctx passwords in sync with ses passwords. It should
- * be okay to do this even if this function were to return an error at a
- * later stage
+ * be done before committing new passwords.
*/
rc = smb3_sync_session_ctx_passwords(cifs_sb, ses);
if (rc) {
mutex_unlock(&ses->session_mutex);
- kfree_sensitive(new_password);
- kfree_sensitive(new_password2);
- return rc;
+ goto cleanup_new_ctx;
+ }
+
+ /*
+ * If multichannel or max_channels has changed, update the session's channels accordingly.
+ * This may add or remove channels to match the new configuration.
+ */
+ if (need_mchan_update) {
+ /* Prevent concurrent scaling operations */
+ spin_lock(&ses->ses_lock);
+ if (ses->flags & CIFS_SES_FLAG_SCALE_CHANNELS) {
+ spin_unlock(&ses->ses_lock);
+ mutex_unlock(&ses->session_mutex);
+ rc = -EINVAL;
+ goto cleanup_new_ctx;
+ }
+ ses->flags |= CIFS_SES_FLAG_SCALE_CHANNELS;
+ spin_unlock(&ses->ses_lock);
}
/*
- * now that allocations for passwords are done, commit them
+ * Commit session passwords before any channel work so newly added
+ * channels authenticate with the new credentials.
*/
if (new_password) {
kfree_sensitive(ses->password);
ses->password = new_password;
+ new_password = NULL;
}
if (new_password2) {
kfree_sensitive(ses->password2);
ses->password2 = new_password2;
+ new_password2 = NULL;
}
- /*
- * If multichannel or max_channels has changed, update the session's channels accordingly.
- * This may add or remove channels to match the new configuration.
- */
- if ((ctx->multichannel != cifs_sb->ctx->multichannel) ||
- (ctx->max_channels != cifs_sb->ctx->max_channels)) {
-
+ if (need_mchan_update) {
/* Synchronize ses->chan_max with the new mount context */
smb3_sync_ses_chan_max(ses, ctx->max_channels);
- /* Now update the session's channels to match the new configuration */
- /* Prevent concurrent scaling operations */
- spin_lock(&ses->ses_lock);
- if (ses->flags & CIFS_SES_FLAG_SCALE_CHANNELS) {
- spin_unlock(&ses->ses_lock);
- mutex_unlock(&ses->session_mutex);
- return -EINVAL;
- }
- ses->flags |= CIFS_SES_FLAG_SCALE_CHANNELS;
- spin_unlock(&ses->ses_lock);
mutex_unlock(&ses->session_mutex);
- rc = smb3_update_ses_channels(ses, ses->server,
- false /* from_reconnect */,
- false /* disable_mchan */);
+ smb3_update_ses_channels(ses, ses->server,
+ false /* from_reconnect */,
+ false /* disable_mchan */);
/* Clear scaling flag after operation */
spin_lock(&ses->ses_lock);
@@ -1166,16 +1213,12 @@ static int smb3_reconfigure(struct fs_context *fc)
mutex_unlock(&ses->session_mutex);
}
- STEAL_STRING(cifs_sb, ctx, domainname);
- STEAL_STRING(cifs_sb, ctx, nodename);
- STEAL_STRING(cifs_sb, ctx, iocharset);
-
- /* if rsize or wsize not passed in on remount, use previous values */
- ctx->rsize = rsize ? CIFS_ALIGN_RSIZE(fc, rsize) : cifs_sb->ctx->rsize;
- ctx->wsize = wsize ? CIFS_ALIGN_WSIZE(fc, wsize) : cifs_sb->ctx->wsize;
-
smb3_cleanup_fs_context_contents(cifs_sb->ctx);
- rc = smb3_fs_context_dup(cifs_sb->ctx, ctx);
+ memcpy(cifs_sb->ctx, new_ctx, sizeof(*new_ctx));
+ kfree(new_ctx);
+ new_ctx = NULL;
+ smb3_cleanup_fs_context(old_ctx);
+ old_ctx = NULL;
smb3_update_mnt_flags(cifs_sb);
#ifdef CONFIG_CIFS_DFS_UPCALL
if (!rc)
@@ -1183,6 +1226,18 @@ static int smb3_reconfigure(struct fs_context *fc)
#endif
return rc;
+
+cleanup_new_ctx:
+ smb3_cleanup_fs_context_contents(new_ctx);
+restore_ctx:
+ kfree(new_ctx);
+ kfree_sensitive(new_password);
+ kfree_sensitive(new_password2);
+ smb3_cleanup_fs_context_contents(cifs_sb->ctx);
+ memcpy(cifs_sb->ctx, old_ctx, sizeof(*old_ctx));
+ kfree(old_ctx);
+
+ return rc;
}
static int smb3_fs_context_parse_param(struct fs_context *fc,
diff --git a/fs/smb/client/smb2file.c b/fs/smb/client/smb2file.c
index b292aa94a593..6860eff31693 100644
--- a/fs/smb/client/smb2file.c
+++ b/fs/smb/client/smb2file.c
@@ -49,6 +49,9 @@ static struct smb2_symlink_err_rsp *symlink_data(const struct kvec *iov)
__func__, le32_to_cpu(p->ErrorId));
len = ALIGN(le32_to_cpu(p->ErrorDataLength), 8);
+ if (len > end - ((u8 *)p + sizeof(*p)))
+ return ERR_PTR(-EINVAL);
+
p = (struct smb2_error_context_rsp *)(p->ErrorContextData + len);
}
} else if (le32_to_cpu(err->ByteCount) >= sizeof(*sym) &&
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index e6cb9b144530..3738204984f5 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -4721,6 +4721,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
{
unsigned int data_offset;
unsigned int data_len;
+ unsigned int end_off;
unsigned int cur_off;
unsigned int cur_page_idx;
unsigned int pad_len;
@@ -4836,7 +4837,8 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
}
rdata->got_bytes = buffer_len;
- } else if (buf_len >= data_offset + data_len) {
+ } else if (!check_add_overflow(data_offset, data_len, &end_off) &&
+ buf_len >= end_off) {
/* read response payload is in buf */
WARN_ONCE(buffer, "read data can be either in buf or in buffer");
copied = copy_to_iter(buf + data_offset, data_len, &rdata->subreq.io_iter);
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index 05f8099047e1..fdf4e50c27ce 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -1158,7 +1158,7 @@ int
cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
{
int length, len;
- unsigned int data_offset, data_len;
+ unsigned int data_offset, data_len, end_off;
struct cifs_io_subrequest *rdata = mid->callback_data;
char *buf = server->smallbuf;
unsigned int buflen = server->pdu_size;
@@ -1256,11 +1256,14 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
use_rdma_mr = rdata->mr;
#endif
data_len = server->ops->read_data_length(buf, use_rdma_mr);
- if (!use_rdma_mr && (data_offset + data_len > buflen)) {
- /* data_len is corrupt -- discard frame */
- rdata->result = smb_EIO2(smb_eio_trace_read_rsp_malformed,
- data_offset + data_len, buflen);
- return cifs_readv_discard(server, mid);
+ if (!use_rdma_mr) {
+ if (check_add_overflow(data_offset, data_len, &end_off) ||
+ end_off > buflen) {
+ /* data_len is corrupt -- discard frame */
+ rdata->result = smb_EIO2(smb_eio_trace_read_rsp_malformed,
+ end_off, buflen);
+ return cifs_readv_discard(server, mid);
+ }
}
#ifdef CONFIG_CIFS_SMB_DIRECT
diff --git a/fs/smb/common/fscc.h b/fs/smb/common/fscc.h
index b4ccddca9256..bc3012cc295d 100644
--- a/fs/smb/common/fscc.h
+++ b/fs/smb/common/fscc.h
@@ -260,12 +260,12 @@ typedef struct {
char FileName[];
} __packed FILE_DIRECTORY_INFO; /* level 0x101 FF resp data */
-/* See MS-FSCC 2.4.13 */
+/* See MS-FSCC 2.4.14 */
struct smb2_file_eof_info { /* encoding of request for level 10 */
__le64 EndOfFile; /* new end of file value */
} __packed; /* level 20 Set */
-/* See MS-FSCC 2.4.14 */
+/* See MS-FSCC 2.4.15 */
typedef struct {
__le32 NextEntryOffset;
__u32 FileIndex;
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index a4b12eb8df81..aeb0a245c532 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -1566,6 +1566,10 @@ struct validate_negotiate_info_rsp {
#define FILE_STANDARD_LINK_INFORMATION 54
#define FILE_ID_INFORMATION 59
#define FILE_ID_EXTD_DIRECTORY_INFORMATION 60 /* also for QUERY_DIR */
+#define FileId64ExtdDirectoryInformation 78 /* also for QUERY_DIR */
+#define FileId64ExtdBothDirectoryInformation 79 /* also for QUERY_DIR */
+#define FileIdAllExtdDirectoryInformation 80 /* also for QUERY_DIR */
+#define FileIdAllExtdBothDirectoryInformation 81 /* also for QUERY_DIR */
/* Used for Query Info and Find File POSIX Info for SMB3.1.1 and SMB1 */
#define SMB_FIND_FILE_POSIX_INFO 0x064
diff --git a/fs/smb/smbdirect/connection.c b/fs/smb/smbdirect/connection.c
index fe9912e53da6..8adf58097534 100644
--- a/fs/smb/smbdirect/connection.c
+++ b/fs/smb/smbdirect/connection.c
@@ -2168,7 +2168,7 @@ static ssize_t smbdirect_map_sges_from_iter(struct iov_iter *iter, size_t len,
if (ret < 0) {
while (state->num_sge > before) {
- struct ib_sge *sge = &state->sge[state->num_sge--];
+ struct ib_sge *sge = &state->sge[--state->num_sge];
ib_dma_unmap_page(state->device,
sge->addr,
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 80ba94f51e5c..aecbab61014c 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -382,6 +382,7 @@ xfs_dir3_data_write_verify(
struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+ struct xfs_dir3_data_hdr *datahdr3 = bp->b_addr;
xfs_failaddr_t fa;
fa = xfs_dir3_data_verify(bp);
@@ -396,6 +397,11 @@ xfs_dir3_data_write_verify(
if (bip)
hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ /*
+ * Zero padding that may be stale from old kernels.
+ */
+ datahdr3->pad = 0;
+
xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
}
@@ -728,7 +734,6 @@ xfs_dir3_data_init(
struct xfs_dir2_data_unused *dup;
struct xfs_dir2_data_free *bf;
int error;
- int i;
/*
* Get the buffer set up for the block.
@@ -741,13 +746,16 @@ xfs_dir3_data_init(
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_DATA_BUF);
/*
- * Initialize the header.
+ * Initialize the whole directory header region to zero
+ * so that all padding, bestfree entries, and any
+ * future header fields are clean.
*/
hdr = bp->b_addr;
+ memset(hdr, 0, geo->data_entry_offset);
+
if (xfs_has_crc(mp)) {
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
- memset(hdr3, 0, sizeof(*hdr3));
hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp));
hdr3->owner = cpu_to_be64(args->owner);
@@ -759,10 +767,6 @@ xfs_dir3_data_init(
bf = xfs_dir2_data_bestfree_p(mp, hdr);
bf[0].offset = cpu_to_be16(geo->data_entry_offset);
bf[0].length = cpu_to_be16(geo->blksize - geo->data_entry_offset);
- for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
- bf[i].length = 0;
- bf[i].offset = 0;
- }
/*
* Set up an unused entry for the block's body.
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 40c7f0ff6cf3..0ec6ccd8b4dc 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1414,8 +1414,7 @@ xfs_refcount_finish_one(
if (rcur == NULL) {
struct xfs_perag *pag = to_perag(ri->ri_group);
- error = xfs_alloc_read_agf(pag, tp,
- XFS_ALLOC_FLAG_FREEING, &agbp);
+ error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
if (error)
return error;
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 20e63069088b..3d40cb0b2496 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -251,6 +251,17 @@ xchk_ino_set_preen(
trace_xchk_ino_preen(sc, ino, __return_address);
}
+/* Record a block indexed by a file fork that could be optimized. */
+void
+xchk_fblock_set_preen(
+ struct xfs_scrub *sc,
+ int whichfork,
+ xfs_fileoff_t offset)
+{
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
+ trace_xchk_fblock_preen(sc, whichfork, offset, __return_address);
+}
+
/* Record something being wrong with the filesystem primary superblock. */
void
xchk_set_corrupt(
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index f2ecc68538f0..b494d747c008 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -25,6 +25,8 @@ bool xchk_fblock_xref_process_error(struct xfs_scrub *sc,
void xchk_block_set_preen(struct xfs_scrub *sc,
struct xfs_buf *bp);
void xchk_ino_set_preen(struct xfs_scrub *sc, xfs_ino_t ino);
+void xchk_fblock_set_preen(struct xfs_scrub *sc,
+ int whichfork, xfs_fileoff_t offset);
void xchk_set_corrupt(struct xfs_scrub *sc);
void xchk_block_set_corrupt(struct xfs_scrub *sc,
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index 1a71d36898b1..c2d6ad59d03e 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -454,7 +454,12 @@ xchk_da_btree_block(
}
}
- /* XXX: Check hdr3.pad32 once we know how to fix it. */
+ if (xfs_has_crc(ip->i_mount)) {
+ struct xfs_da3_node_hdr *nodehdr3 = blk->bp->b_addr;
+
+ if (nodehdr3->__pad32)
+ xchk_da_set_preen(ds, level);
+ }
break;
default:
xchk_da_set_corrupt(ds, level);
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index e09724cd3725..09715a4aa154 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -492,7 +492,12 @@ xchk_directory_data_bestfree(
goto out;
xchk_buffer_recheck(sc, bp);
- /* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */
+ if (xfs_has_crc(sc->mp)) {
+ struct xfs_dir3_data_hdr *hdr3 = bp->b_addr;
+
+ if (hdr3->pad)
+ xchk_fblock_set_preen(sc, XFS_DATA_FORK, lblk);
+ }
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
goto out_buf;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index beaa26ec62da..9978ac1422fc 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -699,12 +699,6 @@ xfs_create(
*/
error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
&tp);
- if (error == -ENOSPC) {
- /* flush outstanding delalloc blocks and retry */
- xfs_flush_inodes(mp);
- error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp,
- resblks, &tp);
- }
if (error)
goto out_parent;
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index 64c8afb935c2..b994ff15d5e4 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -350,7 +350,7 @@ xfs_dax_notify_dev_failure(
/*
* Shutdown fs from a force umount in pre-remove case which won't fail,
* so errors can be ignored. Otherwise, shutdown the filesystem with
- * CORRUPT flag if error occured or notify.want_shutdown was set during
+ * CORRUPT flag if error occurred or notify.want_shutdown was set during
* RMAP querying.
*/
if (mf_flags & MF_MEM_PRE_REMOVE)
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index bcc470f56e46..148cc32449c1 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1199,10 +1199,21 @@ xfs_trans_alloc_icreate(
{
struct xfs_trans *tp;
bool retried = false;
+ bool flushed = false;
int error;
retry:
error = xfs_trans_alloc(mp, resv, dblocks, 0, 0, &tp);
+ if (error == -ENOSPC && !flushed) {
+ /*
+ * Flush all delalloc blocks to reclaim space from speculative
+ * preallocation. This is similar to the quota retry below
+ * but targets FS-wide ENOSPC.
+ */
+ xfs_flush_inodes(mp);
+ flushed = true;
+ goto retry;
+ }
if (error)
return error;
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index a851b98143c0..5e297b75a85f 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -1170,7 +1170,7 @@ xfs_calc_open_zones(
if (bdev_open_zones && bdev_open_zones < mp->m_max_open_zones) {
mp->m_max_open_zones = bdev_open_zones;
- xfs_info(mp, "limiting open zones to %u due to hardware limit.\n",
+ xfs_info(mp, "limiting open zones to %u due to hardware limit.",
bdev_open_zones);
}
@@ -1217,7 +1217,7 @@ xfs_alloc_zone_info(
return zi;
out_free_bitmaps:
- while (--i > 0)
+ while (--i >= 0)
kvfree(zi->zi_used_bucket_bitmap[i]);
kfree(zi);
return NULL;
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
index fedcc47048af..c8a1d5c0332c 100644
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@@ -1221,7 +1221,7 @@ out_put_oz:
if (data->oz)
xfs_open_zone_put(data->oz);
out_free_gc_data:
- kfree(data);
+ xfs_zone_gc_data_free(data);
return error;
}
diff --git a/include/asm-generic/kprobes.h b/include/asm-generic/kprobes.h
index 060eab094e5a..5290a2b2e15a 100644
--- a/include/asm-generic/kprobes.h
+++ b/include/asm-generic/kprobes.h
@@ -14,7 +14,7 @@ static unsigned long __used \
_kbl_addr_##fname = (unsigned long)fname;
# define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname)
/* Use this to forbid a kprobes attach on very low level functions */
-# define __kprobes __section(".kprobes.text")
+# define __kprobes notrace __section(".kprobes.text")
# define nokprobe_inline __always_inline
#else
# define NOKPROBE_SYMBOL(fname)
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 50b47eba7d01..e7195750d21b 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -105,6 +105,12 @@
ARM_SMCCC_SMC_32, \
0, 0x3fff)
+/* C1-Pro erratum 4193714: SME DVMSync early acknowledgement */
+#define ARM_SMCCC_CPU_WORKAROUND_4193714 \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_32, \
+ ARM_SMCCC_OWNER_CPU, 0x10)
+
#define ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
ARM_SMCCC_SMC_32, \
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 97d747320b35..dc17780d6c1e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -475,7 +475,8 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty);
extern void bio_set_pages_dirty(struct bio *bio);
extern void bio_check_pages_dirty(struct bio *bio);
-int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen);
+int bio_iov_iter_bounce(struct bio *bio, struct iov_iter *iter, size_t maxlen,
+ size_t minsize);
void bio_iov_iter_unbounce(struct bio *bio, bool is_error, bool mark_dirty);
extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index 0a3bcd1718f3..be1b38c981d4 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -94,6 +94,7 @@ int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter
int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num);
int register_fprobe_syms(struct fprobe *fp, const char **syms, int num);
int unregister_fprobe(struct fprobe *fp);
+int unregister_fprobe_async(struct fprobe *fp);
bool fprobe_is_registered(struct fprobe *fp);
int fprobe_count_ips_from_filter(const char *filter, const char *notfilter);
#else
@@ -113,6 +114,10 @@ static inline int unregister_fprobe(struct fprobe *fp)
{
return -EOPNOTSUPP;
}
+static inline int unregister_fprobe_async(struct fprobe *fp)
+{
+ return -EOPNOTSUPP;
+}
static inline bool fprobe_is_registered(struct fprobe *fp)
{
return false;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 442a80d79e89..bfb9859f391e 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -1030,6 +1030,8 @@ struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_ty
int hid_set_field(struct hid_field *, unsigned, __s32);
int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size,
int interrupt);
+int hid_safe_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data,
+ size_t bufsize, u32 size, int interrupt);
struct hid_field *hidinput_get_led_field(struct hid_device *hid);
unsigned int hidinput_count_leds(struct hid_device *hid);
__s32 hidinput_calc_abs_res(const struct hid_field *field, __u16 code);
@@ -1298,8 +1300,8 @@ static inline u32 hid_report_len(struct hid_report *report)
return DIV_ROUND_UP(report->size, 8) + (report->id > 0);
}
-int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size,
- int interrupt);
+int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *data,
+ size_t bufsize, u32 size, int interrupt);
/* HID quirks API */
unsigned long hid_lookup_quirk(const struct hid_device *hdev);
diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index a2e47dbcf82c..19fffa4574a4 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -72,8 +72,8 @@ struct hid_ops {
int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len,
u64 source, bool from_bpf);
int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type,
- u8 *data, u32 size, int interrupt, u64 source, bool from_bpf,
- bool lock_already_taken);
+ u8 *data, size_t bufsize, u32 size, int interrupt, u64 source,
+ bool from_bpf, bool lock_already_taken);
struct module *owner;
const struct bus_type *bus_type;
};
@@ -200,7 +200,8 @@ struct hid_bpf {
#ifdef CONFIG_HID_BPF
u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data,
- u32 *size, int interrupt, u64 source, bool from_bpf);
+ size_t *buf_size, u32 *size, int interrupt, u64 source,
+ bool from_bpf);
int dispatch_hid_bpf_raw_requests(struct hid_device *hdev,
unsigned char reportnum, __u8 *buf,
u32 size, enum hid_report_type rtype,
@@ -215,8 +216,11 @@ int hid_bpf_device_init(struct hid_device *hid);
const u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, unsigned int *size);
#else /* CONFIG_HID_BPF */
static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type,
- u8 *data, u32 *size, int interrupt,
- u64 source, bool from_bpf) { return data; }
+ u8 *data, size_t *buf_size, u32 *size,
+ int interrupt, u64 source, bool from_bpf)
+{
+ return data;
+}
static inline int dispatch_hid_bpf_raw_requests(struct hid_device *hdev,
unsigned char reportnum, u8 *buf,
u32 size, enum hid_report_type rtype,
diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h
index 94c06bf214fb..15f02422e9ca 100644
--- a/include/linux/intel_tpmi.h
+++ b/include/linux/intel_tpmi.h
@@ -28,6 +28,12 @@ enum intel_tpmi_id {
TPMI_INFO_ID = 0x81, /* Special ID for PCI BDF and Package ID information */
};
+#define TPMI_CORE_INIT 0
+#define TPMI_CORE_EXIT 1
+
+int tpmi_register_notifier(struct notifier_block *nb);
+int tpmi_unregister_notifier(struct notifier_block *nb);
+
struct oobmsm_plat_info *tpmi_get_platform_data(struct auxiliary_device *auxdev);
struct resource *tpmi_get_resource_at_index(struct auxiliary_device *auxdev, int index);
int tpmi_get_resource_count(struct auxiliary_device *auxdev);
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index a81b46af5118..5a1c5c336fa4 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -305,9 +305,11 @@ struct xt_counters *xt_counters_alloc(unsigned int counters);
struct xt_table *xt_register_table(struct net *net,
const struct xt_table *table,
+ const struct nf_hook_ops *template_ops,
struct xt_table_info *bootstrap,
struct xt_table_info *newinfo);
-void *xt_unregister_table(struct xt_table *table);
+void xt_unregister_table_pre_exit(struct net *net, u8 af, const char *name);
+struct xt_table *xt_unregister_table_exit(struct net *net, u8 af, const char *name);
struct xt_table_info *xt_replace_table(struct xt_table *table,
unsigned int num_counters,
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index a40aaf645fa4..05631a25e622 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -53,7 +53,6 @@ int arpt_register_table(struct net *net, const struct xt_table *table,
const struct arpt_replace *repl,
const struct nf_hook_ops *ops);
void arpt_unregister_table(struct net *net, const char *name);
-void arpt_unregister_table_pre_exit(struct net *net, const char *name);
extern unsigned int arpt_do_table(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 132b0e4a6d4d..13593391d605 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -26,7 +26,6 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
const struct ipt_replace *repl,
const struct nf_hook_ops *ops);
-void ipt_unregister_table_pre_exit(struct net *net, const char *name);
void ipt_unregister_table_exit(struct net *net, const char *name);
/* Standard entry. */
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 8b8885a73c76..c6d5b927830d 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -27,7 +27,6 @@ extern void *ip6t_alloc_initial_table(const struct xt_table *);
int ip6t_register_table(struct net *net, const struct xt_table *table,
const struct ip6t_replace *repl,
const struct nf_hook_ops *ops);
-void ip6t_unregister_table_pre_exit(struct net *net, const char *name);
void ip6t_unregister_table_exit(struct net *net, const char *name);
extern unsigned int ip6t_do_table(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 368c7b4d7cb5..ee06cba5c6f5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1002,6 +1002,9 @@ struct task_struct {
unsigned sched_rt_mutex:1;
#endif
+ /* Save user-dumpable when mm goes away */
+ unsigned user_dumpable:1;
+
/* Bit to tell TOMOYO we're in execve(): */
unsigned in_execve:1;
unsigned in_iowait:1;
diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 1198138cb839..273538200a44 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -33,6 +33,15 @@ struct root_domain;
extern void dl_add_task_root_domain(struct task_struct *p);
extern void dl_clear_root_domain(struct root_domain *rd);
extern void dl_clear_root_domain_cpu(int cpu);
+/*
+ * Return whether moving DL task @p to @new_mask requires moving DL
+ * bandwidth accounting between root domains. This helper is specific to
+ * DL bandwidth move accounting semantics and is shared by
+ * cpuset_can_attach() and set_cpus_allowed_dl() so both paths use the
+ * same source root-domain test.
+ */
+extern bool dl_task_needs_bw_move(struct task_struct *p,
+ const struct cpumask *new_mask);
extern u64 dl_cookie;
extern bool dl_bw_visited(int cpu, u64 cookie);
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index adb9a4de068a..2129e18ada58 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -101,24 +101,27 @@ enum scx_ent_flags {
SCX_TASK_DEQD_FOR_SLEEP = 1 << 3, /* last dequeue was for SLEEP */
SCX_TASK_SUB_INIT = 1 << 4, /* task being initialized for a sub sched */
SCX_TASK_IMMED = 1 << 5, /* task is on local DSQ with %SCX_ENQ_IMMED */
- SCX_TASK_OFF_TASKS = 1 << 6, /* removed from scx_tasks by sched_ext_dead() */
/*
- * Bits 8 and 9 are used to carry task state:
+ * Bits 8 to 10 are used to carry task state:
*
* NONE ops.init_task() not called yet
+ * INIT_BEGIN ops.init_task() in flight; see sched_ext_dead()
* INIT ops.init_task() succeeded, but task can be cancelled
* READY fully initialized, but not in sched_ext
* ENABLED fully initialized and in sched_ext
+ * DEAD terminal state set by sched_ext_dead()
*/
- SCX_TASK_STATE_SHIFT = 8, /* bits 8 and 9 are used to carry task state */
- SCX_TASK_STATE_BITS = 2,
+ SCX_TASK_STATE_SHIFT = 8,
+ SCX_TASK_STATE_BITS = 3,
SCX_TASK_STATE_MASK = ((1 << SCX_TASK_STATE_BITS) - 1) << SCX_TASK_STATE_SHIFT,
SCX_TASK_NONE = 0 << SCX_TASK_STATE_SHIFT,
- SCX_TASK_INIT = 1 << SCX_TASK_STATE_SHIFT,
- SCX_TASK_READY = 2 << SCX_TASK_STATE_SHIFT,
- SCX_TASK_ENABLED = 3 << SCX_TASK_STATE_SHIFT,
+ SCX_TASK_INIT_BEGIN = 1 << SCX_TASK_STATE_SHIFT,
+ SCX_TASK_INIT = 2 << SCX_TASK_STATE_SHIFT,
+ SCX_TASK_READY = 3 << SCX_TASK_STATE_SHIFT,
+ SCX_TASK_ENABLED = 4 << SCX_TASK_STATE_SHIFT,
+ SCX_TASK_DEAD = 5 << SCX_TASK_STATE_SHIFT,
/*
* Bits 12 and 13 are used to carry reenqueue reason. In addition to
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 15a60b501b95..2b5ab488e96b 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -1234,6 +1234,9 @@ void *kvrealloc_node_align_noprof(const void *p, size_t size, unsigned long alig
extern void kvfree(const void *addr);
DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T))
+extern void kvfree_atomic(const void *addr);
+DEFINE_FREE(kvfree_atomic, void *, if (!IS_ERR_OR_NULL(_T)) kvfree_atomic(_T))
+
extern void kvfree_sensitive(const void *addr, size_t len);
unsigned int kmem_cache_size(struct kmem_cache *s);
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 2ebba746c18f..89165b769e5c 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -21,7 +21,7 @@
#define VFIO_PCI_CORE_H
#define VFIO_PCI_OFFSET_SHIFT 40
-#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
+#define VFIO_PCI_OFFSET_TO_INDEX(off) ((u64)(off) >> VFIO_PCI_OFFSET_SHIFT)
#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 7b84f2cef8b1..d70510ac31ab 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -489,8 +489,10 @@ genlmsg_multicast_netns_filtered(const struct genl_family *family,
netlink_filter_fn filter,
void *filter_data)
{
- if (WARN_ON_ONCE(group >= family->n_mcgrps))
+ if (WARN_ON_ONCE(group >= family->n_mcgrps)) {
+ nlmsg_free(skb);
return -EINVAL;
+ }
group = family->mcgrp_offset + group;
return nlmsg_multicast_filtered(net->genl_sock, skb, portid, group,
flags, filter, filter_data);
diff --git a/include/net/macsec.h b/include/net/macsec.h
index bc7de5b53e54..d962093ee923 100644
--- a/include/net/macsec.h
+++ b/include/net/macsec.h
@@ -9,6 +9,7 @@
#include <linux/u64_stats_sync.h>
#include <linux/if_vlan.h>
+#include <linux/workqueue.h>
#include <uapi/linux/if_link.h>
#include <uapi/linux/if_macsec.h>
@@ -123,6 +124,7 @@ struct macsec_dev_stats {
* @key: key structure
* @ssci: short secure channel identifier
* @stats: per-SA stats
+ * @destroy_work: deferred work to free the SA in process context after RCU grace period
*/
struct macsec_rx_sa {
struct macsec_key key;
@@ -136,7 +138,7 @@ struct macsec_rx_sa {
bool active;
struct macsec_rx_sa_stats __percpu *stats;
struct macsec_rx_sc *sc;
- struct rcu_head rcu;
+ struct rcu_work destroy_work;
};
struct pcpu_rx_sc_stats {
@@ -174,6 +176,7 @@ struct macsec_rx_sc {
* @key: key structure
* @ssci: short secure channel identifier
* @stats: per-SA stats
+ * @destroy_work: deferred work to free the SA in process context after RCU grace period
*/
struct macsec_tx_sa {
struct macsec_key key;
@@ -186,7 +189,7 @@ struct macsec_tx_sa {
refcount_t refcnt;
bool active;
struct macsec_tx_sa_stats __percpu *stats;
- struct rcu_head rcu;
+ struct rcu_work destroy_work;
};
/**
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index e9a8350e7ccf..80f50fd0f7ad 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -45,9 +45,12 @@ struct nf_conntrack_expect {
void (*expectfn)(struct nf_conn *new,
struct nf_conntrack_expect *this);
- /* Helper to assign to new connection */
+ /* Helper that created this expectation */
struct nf_conntrack_helper __rcu *helper;
+ /* Helper to assign to new connection */
+ struct nf_conntrack_helper __rcu *assign_helper;
+
/* The conntrack of the master connection */
struct nf_conn *master;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 499e4288170f..875916d60bfe 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -119,6 +119,7 @@ struct netns_ipv6 {
struct fib_notifier_ops *notifier_ops;
struct fib_notifier_ops *ip6mr_notifier_ops;
atomic_t ipmr_seq;
+ int flowlabel_count;
struct {
struct hlist_head head;
spinlock_t lock;
diff --git a/include/net/nsh.h b/include/net/nsh.h
index 16a751093896..15a26c590815 100644
--- a/include/net/nsh.h
+++ b/include/net/nsh.h
@@ -247,10 +247,10 @@ struct nshhdr {
#define NSH_M_TYPE1_LEN 24
/* NSH header maximum Length. */
-#define NSH_HDR_MAX_LEN 256
+#define NSH_HDR_MAX_LEN ((NSH_LEN_MASK >> NSH_LEN_SHIFT) * 4)
/* NSH context headers maximum Length. */
-#define NSH_CTX_HDRS_MAX_LEN 248
+#define NSH_CTX_HDRS_MAX_LEN (NSH_HDR_MAX_LEN - NSH_BASE_HDR_LEN)
static inline struct nshhdr *nsh_hdr(struct sk_buff *skb)
{
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 8ad7a2d76c1d..ec1df8b94517 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -771,10 +771,8 @@ TRACE_EVENT(btrfs_sync_file,
TP_fast_assign(
struct dentry *dentry = file_dentry(file);
struct inode *inode = file_inode(file);
- struct dentry *parent = dget_parent(dentry);
- struct inode *parent_inode = d_inode(parent);
+ struct inode *parent_inode = d_inode(dentry->d_parent);
- dput(parent);
TP_fast_assign_fsid(btrfs_sb(inode->i_sb));
__entry->ino = btrfs_ino(BTRFS_I(inode));
__entry->parent = btrfs_ino(BTRFS_I(parent_inode));
diff --git a/include/xen/arm/interface.h b/include/xen/arm/interface.h
index c3eada2642aa..61360b89da40 100644
--- a/include/xen/arm/interface.h
+++ b/include/xen/arm/interface.h
@@ -30,7 +30,7 @@
#define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* Explicitly size integers that represent pfns in the interface with
* Xen so that we can have one ABI that works for 32 and 64 bit guests.
* Note that this means that the xen_pfn_t type may be capable of
diff --git a/io_uring/cancel.c b/io_uring/cancel.c
index 5e5eb9cfc7cd..4aa3103ba9c3 100644
--- a/io_uring/cancel.c
+++ b/io_uring/cancel.c
@@ -561,8 +561,8 @@ __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
ret |= io_waitid_remove_all(ctx, tctx, cancel_all);
ret |= io_futex_remove_all(ctx, tctx, cancel_all);
ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all);
- mutex_unlock(&ctx->uring_lock);
ret |= io_kill_timeouts(ctx, tctx, cancel_all);
+ mutex_unlock(&ctx->uring_lock);
if (tctx)
ret |= io_run_task_work() > 0;
else
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index c2d3e45544bb..001fb542dc11 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -190,8 +190,9 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
get_task_struct(tsk);
rcu_read_unlock();
usec = io_sq_cpu_usec(tsk);
+ sq_pid = task_pid_nr_ns(tsk,
+ proc_pid_ns(file_inode(m->file)->i_sb));
put_task_struct(tsk);
- sq_pid = sq->task_pid;
sq_cpu = sq->sq_cpu;
sq_total_time = usec;
sq_work_time = sq->work_time;
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 7a9f94a0ce6f..8cc7b47d3089 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -1124,7 +1124,8 @@ static inline void io_wq_remove_pending(struct io_wq *wq,
if (io_wq_is_hashed(work) && work == wq->hash_tail[hash]) {
if (prev)
prev_work = container_of(prev, struct io_wq_work, list);
- if (prev_work && io_get_work_hash(prev_work) == hash)
+ if (prev_work && io_wq_is_hashed(prev_work) &&
+ io_get_work_hash(prev_work) == hash)
wq->hash_tail[hash] = prev_work;
else
wq->hash_tail[hash] = NULL;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 4ed998d60c09..036145ee466c 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -687,12 +687,26 @@ static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
}
/*
+ * Compute queued CQEs for free-space calculation, clamped to cq_entries.
+ */
+static unsigned int io_cqring_queued(struct io_ring_ctx *ctx)
+{
+ struct io_rings *rings = io_get_rings(ctx);
+ int diff;
+
+ diff = (int)(ctx->cached_cq_tail - READ_ONCE(rings->cq.head));
+ if (diff >= 0)
+ return min((unsigned int)diff, ctx->cq_entries);
+ return 0;
+}
+
+/*
* Fill an empty dummy CQE, in case alignment is off for posting a 32b CQE
* because the ring is a single 16b entry away from wrapping.
*/
static bool io_fill_nop_cqe(struct io_ring_ctx *ctx, unsigned int off)
{
- if (__io_cqring_events(ctx) < ctx->cq_entries) {
+ if (io_cqring_queued(ctx) < ctx->cq_entries) {
struct io_uring_cqe *cqe = &ctx->rings->cqes[off];
cqe->user_data = 0;
@@ -713,7 +727,7 @@ bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow, bool cqe32)
{
struct io_rings *rings = ctx->rings;
unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1);
- unsigned int free, queued, len;
+ unsigned int free, len;
/*
* Posting into the CQ when there are pending overflowed CQEs may break
@@ -733,9 +747,7 @@ bool io_cqe_cache_refill(struct io_ring_ctx *ctx, bool overflow, bool cqe32)
off = 0;
}
- /* userspace may cheat modifying the tail, be safe and do min */
- queued = min(__io_cqring_events(ctx), ctx->cq_entries);
- free = ctx->cq_entries - queued;
+ free = ctx->cq_entries - io_cqring_queued(ctx);
/* we need a contiguous range, limit based on the current array offset */
len = min(free, ctx->cq_entries - off);
if (len < (cqe32 + 1))
@@ -1452,8 +1464,13 @@ struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
struct io_kiocb *nxt = NULL;
if (req_ref_put_and_test_atomic(req)) {
- if (req->flags & IO_REQ_LINK_FLAGS)
+ if (req->flags & IO_REQ_LINK_FLAGS) {
+ struct io_ring_ctx *ctx = req->ctx;
+
+ mutex_lock(&ctx->uring_lock);
nxt = io_req_find_next(req);
+ mutex_unlock(&ctx->uring_lock);
+ }
io_free_req(req);
}
return nxt ? &nxt->work : NULL;
diff --git a/io_uring/rw.c b/io_uring/rw.c
index e729e0e7657e..0c4834645279 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -230,7 +230,7 @@ static inline void io_meta_restore(struct io_async_rw *io, struct kiocb *kiocb)
}
static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
- u64 attr_ptr, u64 attr_type_mask)
+ u64 attr_ptr)
{
struct io_uring_attr_pi pi_attr;
struct io_async_rw *io;
@@ -305,7 +305,7 @@ static int __io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
return -EINVAL;
attr_ptr = READ_ONCE(sqe->attr_ptr);
- return io_prep_rw_pi(req, rw, ddir, attr_ptr, attr_type_mask);
+ return io_prep_rw_pi(req, rw, ddir, attr_ptr);
}
return 0;
}
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index e2595cae2b07..6353a4d979dc 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -284,6 +284,10 @@ static struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
struct io_timeout *timeout = io_kiocb_to_cmd(link, struct io_timeout);
io_remove_next_linked(req);
+
+ /* If this is NULL, then timer already claimed it and will complete it */
+ if (!timeout->head)
+ return NULL;
timeout->head = NULL;
if (hrtimer_try_to_cancel(&io->timer) != -1) {
list_del(&timeout->list);
@@ -367,6 +371,14 @@ static void io_req_task_link_timeout(struct io_tw_req tw_req, io_tw_token_t tw)
int ret;
if (prev) {
+ /*
+ * splice the linked timeout out of prev's chain if the regular
+ * completion path didn't already do it.
+ */
+ if (prev->link == req)
+ prev->link = req->link;
+ req->link = NULL;
+
if (!tw.cancel) {
struct io_cancel_data cd = {
.ctx = req->ctx,
@@ -401,10 +413,10 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
/*
* We don't expect the list to be empty, that will only happen if we
- * race with the completion of the linked work.
+ * race with the completion of the linked work. Splice of prev is
+ * done in io_req_task_link_timeout(), if needed.
*/
if (prev) {
- io_remove_next_linked(prev);
if (!req_ref_inc_not_zero(prev))
prev = NULL;
}
diff --git a/kernel/audit.c b/kernel/audit.c
index e1d489bc2dff..34dc7cb246ff 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1468,6 +1468,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
err = audit_list_rules_send(skb, seq);
break;
case AUDIT_TRIM:
+ if (audit_enabled == AUDIT_LOCKED)
+ return -EPERM;
audit_trim_trees();
audit_log_common_recv_msg(audit_context(), &ab,
AUDIT_CONFIG_CHANGE);
@@ -1480,6 +1482,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
size_t msglen = data_len;
char *old, *new;
+ if (audit_enabled == AUDIT_LOCKED)
+ return -EPERM;
err = -EINVAL;
if (msglen < 2 * sizeof(u32))
break;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index ab54fccba215..abdf8da3be93 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2786,7 +2786,7 @@ void __audit_log_capset(const struct cred *new, const struct cred *old)
context->capset.pid = task_tgid_nr(current);
context->capset.cap.effective = new->cap_effective;
- context->capset.cap.inheritable = new->cap_effective;
+ context->capset.cap.inheritable = new->cap_inheritable;
context->capset.cap.permitted = new->cap_permitted;
context->capset.cap.ambient = new->cap_ambient;
context->type = AUDIT_CAPSET;
diff --git a/kernel/cgroup/cpuset-internal.h b/kernel/cgroup/cpuset-internal.h
index bb4e692bea30..f7aaf01f7cd5 100644
--- a/kernel/cgroup/cpuset-internal.h
+++ b/kernel/cgroup/cpuset-internal.h
@@ -167,6 +167,7 @@ struct cpuset {
*/
int nr_deadline_tasks;
int nr_migrate_dl_tasks;
+ /* DL bandwidth that needs destination reservation for this attach. */
u64 sum_migrate_dl_bw;
/*
* CPU used for temporary DL bandwidth allocation during attach;
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index e3a081a07c6d..5c33ab20cc20 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1718,7 +1718,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
*/
if (is_partition_valid(parent))
adding = cpumask_and(tmp->addmask,
- xcpus, parent->effective_xcpus);
+ cs->effective_xcpus,
+ parent->effective_xcpus);
if (old_prs > 0)
new_prs = -old_prs;
@@ -2993,7 +2994,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
struct cpuset *cs, *oldcs;
struct task_struct *task;
bool setsched_check;
- int ret;
+ int cpu, ret;
/* used later by cpuset_attach() */
cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
@@ -3038,31 +3039,31 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
}
if (dl_task(task)) {
+ /*
+ * Count all migrating DL tasks for cpuset task accounting.
+ * Only tasks that need a root-domain bandwidth move
+ * contribute to sum_migrate_dl_bw.
+ */
cs->nr_migrate_dl_tasks++;
- cs->sum_migrate_dl_bw += task->dl.dl_bw;
+ if (dl_task_needs_bw_move(task, cs->effective_cpus))
+ cs->sum_migrate_dl_bw += task->dl.dl_bw;
}
}
- if (!cs->nr_migrate_dl_tasks)
+ if (!cs->sum_migrate_dl_bw)
goto out_success;
- if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) {
- int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);
-
- if (unlikely(cpu >= nr_cpu_ids)) {
- reset_migrate_dl_data(cs);
- ret = -EINVAL;
- goto out_unlock;
- }
+ cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);
+ if (unlikely(cpu >= nr_cpu_ids)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
- ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
- if (ret) {
- reset_migrate_dl_data(cs);
- goto out_unlock;
- }
+ ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
+ if (ret)
+ goto out_unlock;
- cs->dl_bw_cpu = cpu;
- }
+ cs->dl_bw_cpu = cpu;
out_success:
/*
@@ -3070,7 +3071,10 @@ out_success:
* changes which zero cpus/mems_allowed.
*/
cs->attach_in_progress++;
+
out_unlock:
+ if (ret)
+ reset_migrate_dl_data(cs);
mutex_unlock(&cpuset_mutex);
return ret;
}
@@ -4176,11 +4180,11 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
* current's mems_allowed, yes. If it's not a __GFP_HARDWALL request and this
* node is set in the nearest hardwalled cpuset ancestor to current's cpuset,
* yes. If current has access to memory reserves as an oom victim, yes.
- * Otherwise, no.
+ * If the current task is PF_EXITING, yes. Otherwise, no.
*
* GFP_USER allocations are marked with the __GFP_HARDWALL bit,
* and do not allow allocations outside the current tasks cpuset
- * unless the task has been OOM killed.
+ * unless the task has been OOM killed or is exiting.
* GFP_KERNEL allocations are not so marked, so can escape to the
* nearest enclosing hardwalled ancestor cpuset.
*
@@ -4194,7 +4198,9 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
* The first call here from mm/page_alloc:get_page_from_freelist()
* has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
* so no allocation on a node outside the cpuset is allowed (unless
- * in interrupt, of course).
+ * in interrupt, of course). The PF_EXITING check must therefore
+ * come before the __GFP_HARDWALL check, otherwise a dying task
+ * would be blocked on the fast path.
*
* The second pass through get_page_from_freelist() doesn't even call
* here for GFP_ATOMIC calls. For those calls, the __alloc_pages()
@@ -4204,6 +4210,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
* in_interrupt - any node ok (current task context irrelevant)
* GFP_ATOMIC - any node ok
* tsk_is_oom_victim - any node ok
+ * PF_EXITING - any node ok (let dying task exit quickly)
* GFP_KERNEL - any node in enclosing hardwalled cpuset ok
* GFP_USER - only nodes in current tasks mems allowed ok.
*/
@@ -4223,11 +4230,10 @@ bool cpuset_current_node_allowed(int node, gfp_t gfp_mask)
*/
if (unlikely(tsk_is_oom_victim(current)))
return true;
- if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
- return false;
-
if (current->flags & PF_EXITING) /* Let dying task have memory */
return true;
+ if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
+ return false;
/* Not hardwall and node outside mems_allowed: scan up cpusets */
spin_lock_irqsave(&callback_lock, flags);
diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c
index 1ab1fb47f271..4753a67d0f0f 100644
--- a/kernel/cgroup/dmem.c
+++ b/kernel/cgroup/dmem.c
@@ -602,6 +602,7 @@ get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region)
pool = NULL;
continue;
}
+ pool = ERR_PTR(-ENOMEM);
}
}
diff --git a/kernel/exit.c b/kernel/exit.c
index 25e9cb6de7e7..f50d73c272d6 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -571,6 +571,7 @@ static void exit_mm(void)
*/
smp_mb__after_spinlock();
local_irq_disable();
+ current->user_dumpable = (get_dumpable(mm) == SUID_DUMP_USER);
current->mm = NULL;
membarrier_update_current_mm(NULL);
enter_lazy_tlb(mm, current);
@@ -1073,6 +1074,7 @@ void __noreturn make_task_dead(int signr)
futex_exit_recursive(tsk);
tsk->exit_state = EXIT_DEAD;
refcount_inc(&tsk->rcu_users);
+ preempt_disable();
do_task_dead();
}
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 18509d8082ea..2592f7ca16e2 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -1707,7 +1707,7 @@ int kho_fill_kimage(struct kimage *image)
int err = 0;
struct kexec_buf scratch;
- if (!kho_enable)
+ if (!kho_enable || image->type == KEXEC_TYPE_CRASH)
return 0;
image->kho.fdt = virt_to_phys(kho_out.fdt);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 68c17daef8d4..130043bfc209 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -272,11 +272,24 @@ static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
return ns_capable(ns, CAP_SYS_PTRACE);
}
+static bool task_still_dumpable(struct task_struct *task, unsigned int mode)
+{
+ struct mm_struct *mm = task->mm;
+ if (mm) {
+ if (get_dumpable(mm) == SUID_DUMP_USER)
+ return true;
+ return ptrace_has_cap(mm->user_ns, mode);
+ }
+
+ if (task->user_dumpable)
+ return true;
+ return ptrace_has_cap(&init_user_ns, mode);
+}
+
/* Returns 0 on success, -errno on denial. */
static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
{
const struct cred *cred = current_cred(), *tcred;
- struct mm_struct *mm;
kuid_t caller_uid;
kgid_t caller_gid;
@@ -337,11 +350,8 @@ ok:
* Pairs with a write barrier in commit_creds().
*/
smp_rmb();
- mm = task->mm;
- if (mm &&
- ((get_dumpable(mm) != SUID_DUMP_USER) &&
- !ptrace_has_cap(mm->user_ns, mode)))
- return -EPERM;
+ if (!task_still_dumpable(task, mode))
+ return -EPERM;
return security_ptrace_access_check(task, mode);
}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index edca7849b165..7db4c87df83b 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -3107,20 +3107,18 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
static void set_cpus_allowed_dl(struct task_struct *p,
struct affinity_context *ctx)
{
- struct root_domain *src_rd;
struct rq *rq;
WARN_ON_ONCE(!dl_task(p));
rq = task_rq(p);
- src_rd = rq->rd;
/*
* Migrating a SCHED_DEADLINE task between exclusive
* cpusets (different root_domains) entails a bandwidth
* update. We already made space for us in the destination
* domain (see cpuset_can_attach()).
*/
- if (!cpumask_intersects(src_rd->span, ctx->new_mask)) {
+ if (dl_task_needs_bw_move(p, ctx->new_mask)) {
struct dl_bw *src_dl_b;
src_dl_b = dl_bw_of(cpu_of(rq));
@@ -3137,6 +3135,15 @@ static void set_cpus_allowed_dl(struct task_struct *p,
set_cpus_allowed_common(p, ctx);
}
+bool dl_task_needs_bw_move(struct task_struct *p,
+ const struct cpumask *new_mask)
+{
+ if (!dl_task(p))
+ return false;
+
+ return !cpumask_intersects(task_rq(p)->rd->span, new_mask);
+}
+
/* Assumes rq->lock is held */
static void rq_online_dl(struct rq *rq)
{
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 38d90baf78cf..d3ce708991c3 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -297,7 +297,6 @@ static void scx_set_task_sched(struct task_struct *p, struct scx_sched *sch)
#else /* CONFIG_EXT_SUB_SCHED */
static struct scx_sched *scx_parent(struct scx_sched *sch) { return NULL; }
static struct scx_sched *scx_next_descendant_pre(struct scx_sched *pos, struct scx_sched *root) { return pos ? NULL : root; }
-static struct scx_sched *scx_find_sub_sched(u64 cgroup_id) { return NULL; }
static void scx_set_task_sched(struct task_struct *p, struct scx_sched *sch) {}
#endif /* CONFIG_EXT_SUB_SCHED */
@@ -712,6 +711,51 @@ struct bpf_iter_scx_dsq {
} __attribute__((aligned(8)));
+static u32 scx_get_task_state(const struct task_struct *p)
+{
+ return p->scx.flags & SCX_TASK_STATE_MASK;
+}
+
+static void scx_set_task_state(struct task_struct *p, u32 state)
+{
+ u32 prev_state = scx_get_task_state(p);
+ bool warn = false;
+
+ switch (state) {
+ case SCX_TASK_NONE:
+ warn = prev_state == SCX_TASK_DEAD;
+ break;
+ case SCX_TASK_INIT_BEGIN:
+ warn = prev_state != SCX_TASK_NONE;
+ break;
+ case SCX_TASK_INIT:
+ warn = prev_state != SCX_TASK_INIT_BEGIN;
+ p->scx.flags |= SCX_TASK_RESET_RUNNABLE_AT;
+ break;
+ case SCX_TASK_READY:
+ warn = !(prev_state == SCX_TASK_INIT ||
+ prev_state == SCX_TASK_ENABLED);
+ break;
+ case SCX_TASK_ENABLED:
+ warn = prev_state != SCX_TASK_READY;
+ break;
+ case SCX_TASK_DEAD:
+ warn = !(prev_state == SCX_TASK_NONE ||
+ prev_state == SCX_TASK_INIT_BEGIN);
+ break;
+ default:
+ WARN_ONCE(1, "sched_ext: Invalid task state %d -> %d for %s[%d]",
+ prev_state, state, p->comm, p->pid);
+ return;
+ }
+
+ WARN_ONCE(warn, "sched_ext: Invalid task state transition 0x%x -> 0x%x for %s[%d]",
+ prev_state, state, p->comm, p->pid);
+
+ p->scx.flags &= ~SCX_TASK_STATE_MASK;
+ p->scx.flags |= state;
+}
+
/*
* SCX task iterator.
*/
@@ -937,11 +981,11 @@ static struct task_struct *scx_task_iter_next_locked(struct scx_task_iter *iter)
/*
* cgroup_task_dead() removes the dead tasks from cset->tasks
* after sched_ext_dead() and cgroup iteration may see tasks
- * which already finished sched_ext_dead(). %SCX_TASK_OFF_TASKS
- * is set by sched_ext_dead() under @p's rq lock. Test it to
+ * which already finished sched_ext_dead(). %SCX_TASK_DEAD is
+ * set by sched_ext_dead() under @p's rq lock. Test it to
* avoid visiting tasks which are already dead from SCX POV.
*/
- if (p->scx.flags & SCX_TASK_OFF_TASKS) {
+ if (scx_get_task_state(p) == SCX_TASK_DEAD) {
__scx_task_iter_rq_unlock(iter);
continue;
}
@@ -3500,41 +3544,6 @@ static struct cgroup *tg_cgrp(struct task_group *tg)
#endif /* CONFIG_EXT_GROUP_SCHED */
-static u32 scx_get_task_state(const struct task_struct *p)
-{
- return p->scx.flags & SCX_TASK_STATE_MASK;
-}
-
-static void scx_set_task_state(struct task_struct *p, u32 state)
-{
- u32 prev_state = scx_get_task_state(p);
- bool warn = false;
-
- switch (state) {
- case SCX_TASK_NONE:
- break;
- case SCX_TASK_INIT:
- warn = prev_state != SCX_TASK_NONE;
- break;
- case SCX_TASK_READY:
- warn = prev_state == SCX_TASK_NONE;
- break;
- case SCX_TASK_ENABLED:
- warn = prev_state != SCX_TASK_READY;
- break;
- default:
- WARN_ONCE(1, "sched_ext: Invalid task state %d -> %d for %s[%d]",
- prev_state, state, p->comm, p->pid);
- return;
- }
-
- WARN_ONCE(warn, "sched_ext: Invalid task state transition 0x%x -> 0x%x for %s[%d]",
- prev_state, state, p->comm, p->pid);
-
- p->scx.flags &= ~SCX_TASK_STATE_MASK;
- p->scx.flags |= state;
-}
-
static int __scx_init_task(struct scx_sched *sch, struct task_struct *p, bool fork)
{
int ret;
@@ -3586,22 +3595,6 @@ static int __scx_init_task(struct scx_sched *sch, struct task_struct *p, bool fo
return 0;
}
-static int scx_init_task(struct scx_sched *sch, struct task_struct *p, bool fork)
-{
- int ret;
-
- ret = __scx_init_task(sch, p, fork);
- if (!ret) {
- /*
- * While @p's rq is not locked. @p is not visible to the rest of
- * SCX yet and it's safe to update the flags and state.
- */
- p->scx.flags |= SCX_TASK_RESET_RUNNABLE_AT;
- scx_set_task_state(p, SCX_TASK_INIT);
- }
- return ret;
-}
-
static void __scx_enable_task(struct scx_sched *sch, struct task_struct *p)
{
struct rq *rq = task_rq(p);
@@ -3716,7 +3709,8 @@ static void scx_disable_and_exit_task(struct scx_sched *sch,
* If set, @p exited between __scx_init_task() and scx_enable_task() in
* scx_sub_enable() and is initialized for both the associated sched and
* its parent. Exit for the child too - scx_enable_task() never ran for
- * it, so undo only init_task.
+ * it, so undo only init_task. The flag is only set on the sub-enable
+ * path, so it's always clear when @p arrives here in %SCX_TASK_NONE.
*/
if (p->scx.flags & SCX_TASK_SUB_INIT) {
if (!WARN_ON_ONCE(!scx_enabling_sub_sched))
@@ -3764,10 +3758,14 @@ int scx_fork(struct task_struct *p, struct kernel_clone_args *kargs)
#else
struct scx_sched *sch = scx_root;
#endif
- ret = scx_init_task(sch, p, true);
- if (!ret)
- scx_set_task_sched(p, sch);
- return ret;
+ scx_set_task_state(p, SCX_TASK_INIT_BEGIN);
+ ret = __scx_init_task(sch, p, true);
+ if (unlikely(ret)) {
+ scx_set_task_state(p, SCX_TASK_NONE);
+ return ret;
+ }
+ scx_set_task_state(p, SCX_TASK_INIT);
+ scx_set_task_sched(p, sch);
}
return 0;
@@ -3862,18 +3860,23 @@ void sched_ext_dead(struct task_struct *p)
* @p is off scx_tasks and wholly ours. scx_root_enable()'s READY ->
* ENABLED transitions can't race us. Disable ops for @p.
*
- * %SCX_TASK_OFF_TASKS synchronizes against cgroup task iteration - see
+ * %SCX_TASK_DEAD synchronizes against cgroup task iteration - see
* scx_task_iter_next_locked(). NONE tasks need no marking: cgroup
* iteration is only used from sub-sched paths, which require root
* enabled. Root enable transitions every live task to at least READY.
+ *
+ * %INIT_BEGIN means ops.init_task() is running for @p. Don't call
+ * into ops; transition to %DEAD so the post-init recheck unwinds
+ * via scx_sub_init_cancel_task().
*/
if (scx_get_task_state(p) != SCX_TASK_NONE) {
struct rq_flags rf;
struct rq *rq;
rq = task_rq_lock(p, &rf);
- scx_disable_and_exit_task(scx_task_sched(p), p);
- p->scx.flags |= SCX_TASK_OFF_TASKS;
+ if (scx_get_task_state(p) != SCX_TASK_INIT_BEGIN)
+ scx_disable_and_exit_task(scx_task_sched(p), p);
+ scx_set_task_state(p, SCX_TASK_DEAD);
task_rq_unlock(rq, p, &rf);
}
}
@@ -3919,6 +3922,16 @@ static void switched_from_scx(struct rq *rq, struct task_struct *p)
if (task_dead_and_done(p))
return;
+ /*
+ * %NONE means SCX is no longer tracking @p at the task level (e.g.
+ * scx_fail_parent() handed @p back to the parent at NONE pending the
+ * parent's own teardown). There is nothing to disable; calling
+ * scx_disable_task() would WARN on the non-%ENABLED state and trigger a
+ * NONE -> READY validation failure.
+ */
+ if (scx_get_task_state(p) == SCX_TASK_NONE)
+ return;
+
scx_disable_task(scx_task_sched(p), p);
}
@@ -4808,6 +4821,8 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
kfree(sch->cgrp_path);
if (sch_cgroup(sch))
cgroup_put(sch_cgroup(sch));
+ if (sch->sub_kset)
+ kobject_put(&sch->sub_kset->kobj);
#endif /* CONFIG_EXT_SUB_SCHED */
for_each_possible_cpu(cpu) {
@@ -5585,10 +5600,12 @@ static void refresh_watchdog(void)
static s32 scx_link_sched(struct scx_sched *sch)
{
+ const char *err_msg = "";
+ s32 ret = 0;
+
scoped_guard(raw_spinlock_irq, &scx_sched_lock) {
#ifdef CONFIG_EXT_SUB_SCHED
struct scx_sched *parent = scx_parent(sch);
- s32 ret;
if (parent) {
/*
@@ -5598,15 +5615,16 @@ static s32 scx_link_sched(struct scx_sched *sch)
* parent can shoot us down.
*/
if (atomic_read(&parent->exit_kind) != SCX_EXIT_NONE) {
- scx_error(sch, "parent disabled");
- return -ENOENT;
+ err_msg = "parent disabled";
+ ret = -ENOENT;
+ break;
}
ret = rhashtable_lookup_insert_fast(&scx_sched_hash,
&sch->hash_node, scx_sched_hash_params);
if (ret) {
- scx_error(sch, "failed to insert into scx_sched_hash (%d)", ret);
- return ret;
+ err_msg = "failed to insert into scx_sched_hash";
+ break;
}
list_add_tail(&sch->sibling, &parent->children);
@@ -5616,6 +5634,15 @@ static s32 scx_link_sched(struct scx_sched *sch)
list_add_tail_rcu(&sch->all, &scx_sched_all);
}
+ /*
+ * scx_error() takes scx_sched_lock via scx_claim_exit(), so it must run after
+ * the guard above is released.
+ */
+ if (ret) {
+ scx_error(sch, "%s (%d)", err_msg, ret);
+ return ret;
+ }
+
refresh_watchdog();
return 0;
}
@@ -5685,7 +5712,7 @@ static void scx_fail_parent(struct scx_sched *sch,
scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
scx_disable_and_exit_task(sch, p);
- rcu_assign_pointer(p->scx.sched, parent);
+ scx_set_task_sched(p, parent);
}
}
scx_task_iter_stop(&sti);
@@ -5763,6 +5790,21 @@ static void scx_sub_disable(struct scx_sched *sch)
}
rq = task_rq_lock(p, &rf);
+
+ if (scx_get_task_state(p) == SCX_TASK_DEAD) {
+ /*
+ * sched_ext_dead() raced us between __scx_init_task()
+ * and this rq lock and ran exit_task() on @sch (the
+ * sched @p was on at that point), not on $parent.
+ * $parent's just-completed init is owed an exit_task()
+ * and we issue it here.
+ */
+ scx_sub_init_cancel_task(parent, p);
+ task_rq_unlock(rq, p, &rf);
+ put_task_struct(p);
+ continue;
+ }
+
scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
/*
* $p is initialized for $parent and still attached to
@@ -5771,13 +5813,14 @@ static void scx_sub_disable(struct scx_sched *sch)
* $p having already been initialized, and then enable.
*/
scx_disable_and_exit_task(sch, p);
+ scx_set_task_state(p, SCX_TASK_INIT_BEGIN);
scx_set_task_state(p, SCX_TASK_INIT);
- rcu_assign_pointer(p->scx.sched, parent);
+ scx_set_task_sched(p, parent);
scx_set_task_state(p, SCX_TASK_READY);
scx_enable_task(parent, p);
}
- task_rq_unlock(rq, p, &rf);
+ task_rq_unlock(rq, p, &rf);
put_task_struct(p);
}
scx_task_iter_stop(&sti);
@@ -5820,7 +5863,7 @@ static void scx_sub_disable(struct scx_sched *sch)
if (sch->ops.exit)
SCX_CALL_OP(sch, exit, NULL, sch->exit_info);
if (sch->sub_kset)
- kset_unregister(sch->sub_kset);
+ kobject_del(&sch->sub_kset->kobj);
kobject_del(&sch->kobj);
}
#else /* CONFIG_EXT_SUB_SCHED */
@@ -5954,7 +5997,7 @@ static void scx_root_disable(struct scx_sched *sch)
*/
#ifdef CONFIG_EXT_SUB_SCHED
if (sch->sub_kset)
- kset_unregister(sch->sub_kset);
+ kobject_del(&sch->sub_kset->kobj);
#endif
kobject_del(&sch->kobj);
@@ -6578,7 +6621,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
sch->slice_dfl = SCX_SLICE_DFL;
atomic_set(&sch->exit_kind, SCX_EXIT_NONE);
- init_irq_work(&sch->disable_irq_work, scx_disable_irq_workfn);
+ sch->disable_irq_work = IRQ_WORK_INIT_HARD(scx_disable_irq_workfn);
kthread_init_work(&sch->disable_work, scx_disable_workfn);
timer_setup(&sch->bypass_lb_timer, scx_bypass_lb_timerfn, 0);
@@ -6594,6 +6637,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
rcu_assign_pointer(ops->priv, sch);
sch->kobj.kset = scx_kset;
+ INIT_LIST_HEAD(&sch->all);
#ifdef CONFIG_EXT_SUB_SCHED
char *buf = kzalloc(PATH_MAX, GFP_KERNEL);
@@ -6621,6 +6665,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
ret = kobject_init_and_add(&sch->kobj, &scx_ktype, NULL, "root");
if (ret < 0) {
+ RCU_INIT_POINTER(ops->priv, NULL);
kobject_put(&sch->kobj);
return ERR_PTR(ret);
}
@@ -6628,6 +6673,7 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
if (ops->sub_attach) {
sch->sub_kset = kset_create_and_add("sub", NULL, &sch->kobj);
if (!sch->sub_kset) {
+ RCU_INIT_POINTER(ops->priv, NULL);
kobject_put(&sch->kobj);
return ERR_PTR(-ENOMEM);
}
@@ -6635,14 +6681,18 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
#else /* CONFIG_EXT_SUB_SCHED */
ret = kobject_init_and_add(&sch->kobj, &scx_ktype, NULL, "root");
if (ret < 0) {
+ RCU_INIT_POINTER(ops->priv, NULL);
kobject_put(&sch->kobj);
return ERR_PTR(ret);
}
#endif /* CONFIG_EXT_SUB_SCHED */
return sch;
+#ifdef CONFIG_EXT_SUB_SCHED
err_free_lb_resched:
+ RCU_INIT_POINTER(ops->priv, NULL);
free_cpumask_var(sch->bypass_lb_resched_cpumask);
+#endif
err_free_lb_cpumask:
free_cpumask_var(sch->bypass_lb_donee_cpumask);
err_stop_helper:
@@ -6752,6 +6802,19 @@ static void scx_root_enable_workfn(struct kthread_work *work)
goto err_unlock;
}
+ /*
+ * @ops->priv binds @ops to its scx_sched instance. It is set here by
+ * scx_alloc_and_add_sched() and cleared at the tail of bpf_scx_unreg(),
+ * which runs after scx_root_disable() has dropped scx_enable_mutex. If
+ * it's still non-NULL here, a previous attachment on @ops has not
+ * finished tearing down; proceeding would let the in-flight unreg's
+ * RCU_INIT_POINTER(NULL) clobber the @ops->priv we are about to assign.
+ */
+ if (rcu_access_pointer(ops->priv)) {
+ ret = -EBUSY;
+ goto err_unlock;
+ }
+
ret = alloc_kick_syncs();
if (ret)
goto err_unlock;
@@ -6874,6 +6937,9 @@ static void scx_root_enable_workfn(struct kthread_work *work)
scx_task_iter_start(&sti, NULL);
while ((p = scx_task_iter_next_locked(&sti))) {
+ struct rq_flags rf;
+ struct rq *rq;
+
/*
* @p may already be dead, have lost all its usages counts and
* be waiting for RCU grace period before being freed. @p can't
@@ -6882,20 +6948,47 @@ static void scx_root_enable_workfn(struct kthread_work *work)
if (!tryget_task_struct(p))
continue;
+ /*
+ * Set %INIT_BEGIN under the iter's rq lock so that a concurrent
+ * sched_ext_dead() does not call ops.exit_task() on @p while
+ * ops.init_task() is running. If sched_ext_dead() runs before
+ * this store, it has already removed @p from scx_tasks and the
+ * iter won't visit @p; if it runs after, it observes
+ * %INIT_BEGIN and transitions to %DEAD without calling ops,
+ * leaving the post-init recheck below to unwind.
+ */
+ scx_set_task_state(p, SCX_TASK_INIT_BEGIN);
scx_task_iter_unlock(&sti);
- ret = scx_init_task(sch, p, false);
- if (ret) {
- put_task_struct(p);
+ ret = __scx_init_task(sch, p, false);
+
+ rq = task_rq_lock(p, &rf);
+
+ if (unlikely(ret)) {
+ if (scx_get_task_state(p) != SCX_TASK_DEAD)
+ scx_set_task_state(p, SCX_TASK_NONE);
+ task_rq_unlock(rq, p, &rf);
scx_task_iter_stop(&sti);
scx_error(sch, "ops.init_task() failed (%d) for %s[%d]",
ret, p->comm, p->pid);
+ put_task_struct(p);
goto err_disable_unlock_all;
}
- scx_set_task_sched(p, sch);
- scx_set_task_state(p, SCX_TASK_READY);
+ if (scx_get_task_state(p) == SCX_TASK_DEAD) {
+ /*
+ * sched_ext_dead() observed %INIT_BEGIN and set %DEAD.
+ * ops.exit_task() is owed to the sched __scx_init_task()
+ * ran against; call it now.
+ */
+ scx_sub_init_cancel_task(sch, p);
+ } else {
+ scx_set_task_state(p, SCX_TASK_INIT);
+ scx_set_task_sched(p, sch);
+ scx_set_task_state(p, SCX_TASK_READY);
+ }
+ task_rq_unlock(rq, p, &rf);
put_task_struct(p);
}
scx_task_iter_stop(&sti);
@@ -7039,6 +7132,12 @@ static void scx_sub_enable_workfn(struct kthread_work *work)
goto out_unlock;
}
+ /* See scx_root_enable_workfn() for the @ops->priv check. */
+ if (rcu_access_pointer(ops->priv)) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
cgrp = cgroup_get_from_id(ops->sub_cgroup_id);
if (IS_ERR(cgrp)) {
ret = PTR_ERR(cgrp);
@@ -7165,6 +7264,21 @@ static void scx_sub_enable_workfn(struct kthread_work *work)
goto abort;
rq = task_rq_lock(p, &rf);
+
+ if (scx_get_task_state(p) == SCX_TASK_DEAD) {
+ /*
+ * sched_ext_dead() raced us between __scx_init_task()
+ * and this rq lock and ran exit_task() on $parent (the
+ * sched @p was on at that point), not on @sch. @sch's
+ * just-completed init is owed an exit_task() and we
+ * issue it here.
+ */
+ scx_sub_init_cancel_task(sch, p);
+ task_rq_unlock(rq, p, &rf);
+ put_task_struct(p);
+ continue;
+ }
+
p->scx.flags |= SCX_TASK_SUB_INIT;
task_rq_unlock(rq, p, &rf);
@@ -7199,7 +7313,7 @@ static void scx_sub_enable_workfn(struct kthread_work *work)
* $p is now only initialized for @sch and READY, which
* is what we want. Assign it to @sch and enable.
*/
- rcu_assign_pointer(p->scx.sched, sch);
+ scx_set_task_sched(p, sch);
scx_enable_task(sch, p);
p->scx.flags &= ~SCX_TASK_SUB_INIT;
@@ -7301,8 +7415,7 @@ static s32 scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
static DEFINE_MUTEX(helper_mutex);
struct scx_enable_cmd cmd;
- if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN),
- cpu_possible_mask)) {
+ if (housekeeping_enabled(HK_TYPE_DOMAIN_BOOT)) {
pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n");
return -EINVAL;
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index af7079aa0f36..a02bd258677e 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2384,7 +2384,8 @@ static void bpf_kprobe_multi_link_release(struct bpf_link *link)
struct bpf_kprobe_multi_link *kmulti_link;
kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
- unregister_fprobe(&kmulti_link->fp);
+ /* Don't wait for RCU GP here. */
+ unregister_fprobe_async(&kmulti_link->fp);
kprobe_multi_put_modules(kmulti_link->mods, kmulti_link->mods_cnt);
}
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index cc49ebd2a773..f378613ad120 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -1093,14 +1093,15 @@ static int unregister_fprobe_nolock(struct fprobe *fp)
}
/**
- * unregister_fprobe() - Unregister fprobe.
+ * unregister_fprobe_async() - Unregister fprobe without RCU GP wait
* @fp: A fprobe data structure to be unregistered.
*
* Unregister fprobe (and remove ftrace hooks from the function entries).
+ * This function will NOT wait until the fprobe is no longer used.
*
* Return 0 if @fp is unregistered successfully, -errno if not.
*/
-int unregister_fprobe(struct fprobe *fp)
+int unregister_fprobe_async(struct fprobe *fp)
{
guard(mutex)(&fprobe_mutex);
if (!fp || !fprobe_registered(fp))
@@ -1108,6 +1109,24 @@ int unregister_fprobe(struct fprobe *fp)
return unregister_fprobe_nolock(fp);
}
+
+/**
+ * unregister_fprobe() - Unregister fprobe with RCU GP wait
+ * @fp: A fprobe data structure to be unregistered.
+ *
+ * Unregister fprobe (and remove ftrace hooks from the function entries).
+ * This function will block until the fprobe is no longer used.
+ *
+ * Return 0 if @fp is unregistered successfully, -errno if not.
+ */
+int unregister_fprobe(struct fprobe *fp)
+{
+ int ret = unregister_fprobe_async(fp);
+
+ if (!ret)
+ synchronize_rcu();
+ return ret;
+}
EXPORT_SYMBOL_GPL(unregister_fprobe);
static int __init fprobe_initcall(void)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3d2e3b2ec528..33b721a9af02 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2296,6 +2296,18 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
if (unlikely(wq->flags & (__WQ_DESTROYING | __WQ_DRAINING) &&
WARN_ONCE(!is_chained_work(wq), "workqueue: cannot queue %ps on wq %s\n",
work->func, wq->name))) {
+ struct work_offq_data offqd;
+
+ /*
+ * State on entry: PENDING is set, work is off-queue (no
+ * insert_work() has run).
+ *
+ * Returning without clearing PENDING would leave the work
+ * in a weird state (PENDING=1, PWQ=0, entry empty)
+ */
+ work_offqd_unpack(&offqd, *work_data_bits(work));
+ set_work_pool_and_clear_pending(work, offqd.pool_id,
+ work_offqd_pack_flags(&offqd));
return;
}
rcu_read_lock();
@@ -5642,7 +5654,9 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
ret = apply_workqueue_attrs_locked(wq, unbound_std_wq_attrs[highpri]);
}
- return ret;
+ if (ret)
+ goto enomem;
+ return 0;
enomem:
if (wq->cpu_pwq) {
diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig
index 498cc51e493d..94ff8e4089bf 100644
--- a/lib/kunit/Kconfig
+++ b/lib/kunit/Kconfig
@@ -16,8 +16,9 @@ menuconfig KUNIT
if KUNIT
config KUNIT_DEBUGFS
- bool "KUnit - Enable /sys/kernel/debug/kunit debugfs representation" if !KUNIT_ALL_TESTS
- default KUNIT_ALL_TESTS
+ bool "KUnit - Enable /sys/kernel/debug/kunit debugfs representation"
+ depends on DEBUG_FS
+ default y
help
Enable debugfs representation for kunit. Currently this consists
of /sys/kernel/debug/kunit/<test_suite>/results files for each
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 7a67ef5b67b6..04b3a808fca9 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -114,6 +114,14 @@ static void bucket_table_free(const struct bucket_table *tbl)
kvfree(tbl);
}
+static void bucket_table_free_atomic(const struct bucket_table *tbl)
+{
+ if (tbl->nest)
+ nested_bucket_table_free(tbl);
+
+ kvfree_atomic(tbl);
+}
+
static void bucket_table_free_rcu(struct rcu_head *head)
{
bucket_table_free(container_of(head, struct bucket_table, rcu));
@@ -496,7 +504,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht,
err = rhashtable_rehash_attach(ht, tbl, new_tbl);
if (err) {
- bucket_table_free(new_tbl);
+ bucket_table_free_atomic(new_tbl);
if (err == -EEXIST)
err = 0;
} else
@@ -1166,6 +1174,11 @@ static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
* This function will eventually sleep to wait for an async resize
* to complete. The caller is responsible that no further write operations
* occurs in parallel.
+ *
+ * After cancel_work_sync() has returned, the deferred rehash worker is
+ * quiesced and, per the contract above, no other concurrent access to the
+ * rhashtable is possible. The tables are therefore owned exclusively by
+ * this function and can be walked without ht->mutex held.
*/
void rhashtable_free_and_destroy(struct rhashtable *ht,
void (*free_fn)(void *ptr, void *arg),
@@ -1177,8 +1190,15 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
irq_work_sync(&ht->run_irq_work);
cancel_work_sync(&ht->run_work);
- mutex_lock(&ht->mutex);
- tbl = rht_dereference(ht->tbl, ht);
+ /*
+ * Do NOT take ht->mutex here. The rehash worker establishes
+ * ht->mutex -> fs_reclaim via GFP_KERNEL bucket allocation under
+ * the mutex; callers on the reclaim path (e.g. simple_xattr_ht_free()
+ * from evict() under the dcache shrinker for shmem/kernfs/pidfs
+ * inodes) would otherwise close a circular dependency
+ * fs_reclaim -> ht->mutex.
+ */
+ tbl = rcu_dereference_raw(ht->tbl);
restart:
if (free_fn) {
for (i = 0; i < tbl->size; i++) {
@@ -1187,22 +1207,21 @@ restart:
cond_resched();
for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)),
next = !rht_is_a_nulls(pos) ?
- rht_dereference(pos->next, ht) : NULL;
+ rcu_dereference_raw(pos->next) : NULL;
!rht_is_a_nulls(pos);
pos = next,
next = !rht_is_a_nulls(pos) ?
- rht_dereference(pos->next, ht) : NULL)
+ rcu_dereference_raw(pos->next) : NULL)
rhashtable_free_one(ht, pos, free_fn, arg);
}
}
- next_tbl = rht_dereference(tbl->future_tbl, ht);
+ next_tbl = rcu_dereference_raw(tbl->future_tbl);
bucket_table_free(tbl);
if (next_tbl) {
tbl = next_tbl;
goto restart;
}
- mutex_unlock(&ht->mutex);
}
EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
diff --git a/lib/tests/test_kprobes.c b/lib/tests/test_kprobes.c
index b7582010125c..06e729e4de05 100644
--- a/lib/tests/test_kprobes.c
+++ b/lib/tests/test_kprobes.c
@@ -12,6 +12,12 @@
#define div_factor 3
+#define KP_CLEAR(_kp) \
+do { \
+ (_kp).addr = NULL; \
+ (_kp).flags = 0; \
+} while (0)
+
static u32 rand1, preh_val, posth_val;
static u32 (*target)(u32 value);
static u32 (*recursed_target)(u32 value);
@@ -125,10 +131,6 @@ static void test_kprobes(struct kunit *test)
current_test = test;
- /* addr and flags should be cleard for reusing kprobe. */
- kp.addr = NULL;
- kp.flags = 0;
-
KUNIT_EXPECT_EQ(test, 0, register_kprobes(kps, 2));
preh_val = 0;
posth_val = 0;
@@ -226,9 +228,6 @@ static void test_kretprobes(struct kunit *test)
struct kretprobe *rps[2] = {&rp, &rp2};
current_test = test;
- /* addr and flags should be cleard for reusing kprobe. */
- rp.kp.addr = NULL;
- rp.kp.flags = 0;
KUNIT_EXPECT_EQ(test, 0, register_kretprobes(rps, 2));
krph_val = 0;
@@ -290,8 +289,6 @@ static void test_stacktrace_on_kretprobe(struct kunit *test)
unsigned long myretaddr = (unsigned long)__builtin_return_address(0);
current_test = test;
- rp3.kp.addr = NULL;
- rp3.kp.flags = 0;
/*
* Run the stacktrace_driver() to record correct return address in
@@ -352,8 +349,6 @@ static void test_stacktrace_on_nested_kretprobe(struct kunit *test)
struct kretprobe *rps[2] = {&rp3, &rp4};
current_test = test;
- rp3.kp.addr = NULL;
- rp3.kp.flags = 0;
//KUNIT_ASSERT_NE(test, myretaddr, stacktrace_driver());
@@ -367,6 +362,18 @@ static void test_stacktrace_on_nested_kretprobe(struct kunit *test)
static int kprobes_test_init(struct kunit *test)
{
+ KP_CLEAR(kp);
+ KP_CLEAR(kp2);
+ KP_CLEAR(kp_missed);
+#ifdef CONFIG_KRETPROBES
+ KP_CLEAR(rp.kp);
+ KP_CLEAR(rp2.kp);
+#ifdef CONFIG_ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
+ KP_CLEAR(rp3.kp);
+ KP_CLEAR(rp4.kp);
+#endif
+#endif
+
target = kprobe_target;
target2 = kprobe_target2;
recursed_target = kprobe_recursed_target;
diff --git a/mm/memfd_luo.c b/mm/memfd_luo.c
index b02b503c750d..59de210bee5f 100644
--- a/mm/memfd_luo.c
+++ b/mm/memfd_luo.c
@@ -50,6 +50,11 @@
* memfds are always opened with ``O_RDWR`` and ``O_LARGEFILE``. This property
* is maintained.
*
+ * Seals
+ * File seals set on the memfd are preserved and re-applied on restore.
+ * Only seals known to this LUO version (see ``MEMFD_LUO_ALL_SEALS``) may
+ * be present; preservation fails with ``-EOPNOTSUPP`` otherwise.
+ *
* Non-Preserved Properties
* ========================
*
@@ -61,10 +66,6 @@
* A memfd can be created with the ``MFD_CLOEXEC`` flag that sets the
* ``FD_CLOEXEC`` on the file. This flag is not preserved and must be set
* again after restore via ``fcntl()``.
- *
- * Seals
- * File seals are not preserved. The file is unsealed on restore and if
- * needed, must be sealed again via ``fcntl()``.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -259,7 +260,7 @@ static int memfd_luo_preserve(struct liveupdate_file_op_args *args)
struct inode *inode = file_inode(args->file);
struct memfd_luo_folio_ser *folios_ser;
struct memfd_luo_ser *ser;
- u64 nr_folios;
+ u64 nr_folios, inode_size;
int err = 0, seals;
inode_lock(inode);
@@ -285,7 +286,18 @@ static int memfd_luo_preserve(struct liveupdate_file_op_args *args)
}
ser->pos = args->file->f_pos;
- ser->size = i_size_read(inode);
+ inode_size = i_size_read(inode);
+
+ /*
+ * memfd_pin_folios() caps at UINT_MAX folios; refuse larger
+ * files to avoid silently preserving only a prefix.
+ */
+ if (DIV_ROUND_UP_ULL(inode_size, PAGE_SIZE) > UINT_MAX) {
+ err = -EFBIG;
+ goto err_free_ser;
+ }
+
+ ser->size = inode_size;
ser->seals = seals;
err = memfd_luo_preserve_folios(args->file, &ser->folios,
@@ -427,6 +439,7 @@ static int memfd_luo_retrieve_folios(struct file *file,
if (!folio) {
pr_err("Unable to restore folio at physical address: %llx\n",
phys);
+ err = -EIO;
goto put_folios;
}
index = pfolio->index;
diff --git a/mm/slub.c b/mm/slub.c
index 0baa906f39ab..8f9004536729 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -6883,6 +6883,22 @@ void kvfree(const void *addr)
EXPORT_SYMBOL(kvfree);
/**
+ * kvfree_atomic() - Free memory.
+ * @addr: Pointer to allocated memory.
+ *
+ * Same as kvfree(), but uses vfree_atomic() for vmalloc
+ * backed memory. Must not be called from NMI context.
+ */
+void kvfree_atomic(const void *addr)
+{
+ if (is_vmalloc_addr(addr))
+ vfree_atomic(addr);
+ else
+ kfree(addr);
+}
+EXPORT_SYMBOL(kvfree_atomic);
+
+/**
* kvfree_sensitive - Free a data object containing sensitive information.
* @addr: address of the data object to be freed.
* @len: length of the data object.
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 358fbe5e4d1d..b991d937205a 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -179,6 +179,7 @@ as_indicate_complete:
break;
default:
pr_alert("bad message type %d\n", (int)msg->type);
+ dev_kfree_skb(skb);
/* Paired with find_get_vcc(msg->vcc) above */
sock_put(sk);
return -EINVAL;
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index f28e9cbf8ad5..74ef7dc2b2f9 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -173,19 +173,12 @@ free_orig_node_hash:
static struct batadv_neigh_node *
batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
const u8 *neigh_addr,
- struct batadv_orig_node *orig_node,
- struct batadv_orig_node *orig_neigh)
+ struct batadv_orig_node *orig_node)
{
struct batadv_neigh_node *neigh_node;
neigh_node = batadv_neigh_node_get_or_create(orig_node,
hard_iface, neigh_addr);
- if (!neigh_node)
- goto out;
-
- neigh_node->orig_node = orig_neigh;
-
-out:
return neigh_node;
}
@@ -335,7 +328,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
struct batadv_priv *bat_priv = netdev_priv(hard_iface->mesh_iface);
const char *fwd_str;
u8 packet_num;
- s16 buff_pos;
+ int buff_pos;
struct batadv_ogm_packet *batadv_ogm_packet;
struct sk_buff *skb;
u8 *packet_pos;
@@ -907,6 +900,31 @@ static u8 batadv_iv_orig_ifinfo_sum(struct batadv_orig_node *orig_node,
}
/**
+ * batadv_iv_ogm_neigh_ifinfo_sum() - Get bcast_own sum for a last-hop neighbor
+ * @bat_priv: the bat priv with all the mesh interface information
+ * @neigh_node: last-hop neighbor of an originator
+ *
+ * Return: Number of replied (rebroadcasted) OGMs for the originator currently
+ * announced by the neighbor. Returns 0 if the neighbor's originator entry is
+ * not available anymore.
+ */
+static u8 batadv_iv_ogm_neigh_ifinfo_sum(struct batadv_priv *bat_priv,
+ const struct batadv_neigh_node *neigh_node)
+{
+ struct batadv_orig_node *orig_neigh;
+ u8 sum;
+
+ orig_neigh = batadv_orig_hash_find(bat_priv, neigh_node->addr);
+ if (!orig_neigh)
+ return 0;
+
+ sum = batadv_iv_orig_ifinfo_sum(orig_neigh, neigh_node->if_incoming);
+ batadv_orig_node_put(orig_neigh);
+
+ return sum;
+}
+
+/**
* batadv_iv_ogm_orig_update() - use OGM to update corresponding data in an
* originator
* @bat_priv: the bat priv with all the mesh interface information
@@ -975,17 +993,9 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
}
if (!neigh_node) {
- struct batadv_orig_node *orig_tmp;
-
- orig_tmp = batadv_iv_ogm_orig_get(bat_priv, ethhdr->h_source);
- if (!orig_tmp)
- goto unlock;
-
neigh_node = batadv_iv_ogm_neigh_new(if_incoming,
ethhdr->h_source,
- orig_node, orig_tmp);
-
- batadv_orig_node_put(orig_tmp);
+ orig_node);
if (!neigh_node)
goto unlock;
} else {
@@ -1037,10 +1047,9 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
*/
if (router_ifinfo &&
neigh_ifinfo->bat_iv.tq_avg == router_ifinfo->bat_iv.tq_avg) {
- sum_orig = batadv_iv_orig_ifinfo_sum(router->orig_node,
- router->if_incoming);
- sum_neigh = batadv_iv_orig_ifinfo_sum(neigh_node->orig_node,
- neigh_node->if_incoming);
+ sum_orig = batadv_iv_ogm_neigh_ifinfo_sum(bat_priv, router);
+ sum_neigh = batadv_iv_ogm_neigh_ifinfo_sum(bat_priv,
+ neigh_node);
if (sum_orig >= sum_neigh)
goto out;
}
@@ -1106,7 +1115,6 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
if (!neigh_node)
neigh_node = batadv_iv_ogm_neigh_new(if_incoming,
orig_neigh_node->orig,
- orig_neigh_node,
orig_neigh_node);
if (!neigh_node)
@@ -1303,6 +1311,32 @@ out:
}
/**
+ * batadv_orig_to_direct_router() - get direct next hop neighbor to an orig address
+ * @bat_priv: the bat priv with all the mesh interface information
+ * @orig_addr: the originator MAC address to search the best next hop router for
+ * @if_outgoing: the interface where the OGM should be sent to
+ *
+ * Return: A neighbor node which is the best router towards the given originator
+ * address. Bonding candidates are ignored.
+ */
+static struct batadv_neigh_node *
+batadv_orig_to_direct_router(struct batadv_priv *bat_priv, u8 *orig_addr,
+ struct batadv_hard_iface *if_outgoing)
+{
+ struct batadv_neigh_node *neigh_node;
+ struct batadv_orig_node *orig_node;
+
+ orig_node = batadv_orig_hash_find(bat_priv, orig_addr);
+ if (!orig_node)
+ return NULL;
+
+ neigh_node = batadv_orig_router_get(orig_node, if_outgoing);
+ batadv_orig_node_put(orig_node);
+
+ return neigh_node;
+}
+
+/**
* batadv_iv_ogm_process_per_outif() - process a batman iv OGM for an outgoing
* interface
* @skb: the skb containing the OGM
@@ -1372,8 +1406,9 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
router = batadv_orig_router_get(orig_node, if_outgoing);
if (router) {
- router_router = batadv_orig_router_get(router->orig_node,
- if_outgoing);
+ router_router = batadv_orig_to_direct_router(bat_priv,
+ router->addr,
+ if_outgoing);
router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing);
}
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 51fe028b9088..cec11f1251d6 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -318,8 +318,8 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
if (claim->backbone_gw != backbone_gw)
continue;
- batadv_claim_put(claim);
hlist_del_rcu(&claim->hash_entry);
+ batadv_claim_put(claim);
}
spin_unlock_bh(list_lock);
}
@@ -723,6 +723,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
if (unlikely(hash_added != 0)) {
/* only local changes happened. */
+ batadv_backbone_gw_put(backbone_gw);
kfree(claim);
return;
}
@@ -1288,6 +1289,13 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
rcu_read_lock();
hlist_for_each_entry_rcu(claim, head, hash_entry) {
+ /* only purge claims not currently in the process of being released.
+ * Such claims could otherwise have a NULL-ptr backbone_gw set because
+ * they already went through batadv_claim_release()
+ */
+ if (!kref_get_unless_zero(&claim->refcount))
+ continue;
+
backbone_gw = batadv_bla_claim_get_backbone_gw(claim);
if (now)
goto purge_now;
@@ -1313,6 +1321,7 @@ purge_now:
claim->addr, claim->vid);
skip:
batadv_backbone_gw_put(backbone_gw);
+ batadv_claim_put(claim);
}
rcu_read_unlock();
}
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 3a35aadd8b41..a4d33ee0fda5 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -249,6 +249,7 @@ void batadv_mesh_free(struct net_device *mesh_iface)
atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
batadv_purge_outstanding_packets(bat_priv, NULL);
+ batadv_tp_stop_all(bat_priv);
batadv_gw_node_free(bat_priv);
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 2e42f6b348c8..066c76113fc4 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -12,6 +12,7 @@
#include <linux/byteorder/generic.h>
#include <linux/cache.h>
#include <linux/compiler.h>
+#include <linux/completion.h>
#include <linux/container_of.h>
#include <linux/err.h>
#include <linux/etherdevice.h>
@@ -365,23 +366,38 @@ static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars)
}
/**
- * batadv_tp_sender_cleanup() - cleanup sender data and drop and timer
- * @bat_priv: the bat priv with all the mesh interface information
- * @tp_vars: the private data of the current TP meter session to cleanup
+ * batadv_tp_list_detach() - remove tp session from mesh session list once
+ * @tp_vars: the private data of the current TP meter session
*/
-static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv,
- struct batadv_tp_vars *tp_vars)
+static void batadv_tp_list_detach(struct batadv_tp_vars *tp_vars)
{
- cancel_delayed_work(&tp_vars->finish_work);
+ bool detached = false;
spin_lock_bh(&tp_vars->bat_priv->tp_list_lock);
- hlist_del_rcu(&tp_vars->list);
+ if (!hlist_unhashed(&tp_vars->list)) {
+ hlist_del_init_rcu(&tp_vars->list);
+ detached = true;
+ }
spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock);
+ if (!detached)
+ return;
+
+ atomic_dec(&tp_vars->bat_priv->tp_num);
+
/* drop list reference */
batadv_tp_vars_put(tp_vars);
+}
- atomic_dec(&tp_vars->bat_priv->tp_num);
+/**
+ * batadv_tp_sender_cleanup() - cleanup sender data and drop and timer
+ * @tp_vars: the private data of the current TP meter session to cleanup
+ */
+static void batadv_tp_sender_cleanup(struct batadv_tp_vars *tp_vars)
+{
+ cancel_delayed_work_sync(&tp_vars->finish_work);
+
+ batadv_tp_list_detach(tp_vars);
/* kill the timer and remove its reference */
timer_delete_sync(&tp_vars->timer);
@@ -886,7 +902,8 @@ out:
batadv_orig_node_put(orig_node);
batadv_tp_sender_end(bat_priv, tp_vars);
- batadv_tp_sender_cleanup(bat_priv, tp_vars);
+ batadv_tp_sender_cleanup(tp_vars);
+ complete(&tp_vars->finished);
batadv_tp_vars_put(tp_vars);
@@ -918,7 +935,8 @@ static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars)
batadv_tp_vars_put(tp_vars);
/* cleanup of failed tp meter variables */
- batadv_tp_sender_cleanup(bat_priv, tp_vars);
+ batadv_tp_sender_cleanup(tp_vars);
+ complete(&tp_vars->finished);
return;
}
@@ -947,6 +965,13 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
/* look for an already existing test towards this node */
spin_lock_bh(&bat_priv->tp_list_lock);
+ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) {
+ spin_unlock_bh(&bat_priv->tp_list_lock);
+ batadv_tp_batctl_error_notify(BATADV_TP_REASON_DST_UNREACHABLE,
+ dst, bat_priv, session_cookie);
+ return;
+ }
+
tp_vars = batadv_tp_list_find(bat_priv, dst);
if (tp_vars) {
spin_unlock_bh(&bat_priv->tp_list_lock);
@@ -969,6 +994,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
tp_vars = kmalloc_obj(*tp_vars, GFP_ATOMIC);
if (!tp_vars) {
+ atomic_dec(&bat_priv->tp_num);
spin_unlock_bh(&bat_priv->tp_list_lock);
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
"Meter: %s cannot allocate list elements\n",
@@ -1017,6 +1043,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
tp_vars->start_time = jiffies;
init_waitqueue_head(&tp_vars->more_bytes);
+ init_completion(&tp_vars->finished);
spin_lock_init(&tp_vars->unacked_lock);
INIT_LIST_HEAD(&tp_vars->unacked_list);
@@ -1119,14 +1146,7 @@ static void batadv_tp_receiver_shutdown(struct timer_list *t)
"Shutting down for inactivity (more than %dms) from %pM\n",
BATADV_TP_RECV_TIMEOUT, tp_vars->other_end);
- spin_lock_bh(&tp_vars->bat_priv->tp_list_lock);
- hlist_del_rcu(&tp_vars->list);
- spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock);
-
- /* drop list reference */
- batadv_tp_vars_put(tp_vars);
-
- atomic_dec(&bat_priv->tp_num);
+ batadv_tp_list_detach(tp_vars);
spin_lock_bh(&tp_vars->unacked_lock);
list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) {
@@ -1329,9 +1349,12 @@ static struct batadv_tp_vars *
batadv_tp_init_recv(struct batadv_priv *bat_priv,
const struct batadv_icmp_tp_packet *icmp)
{
- struct batadv_tp_vars *tp_vars;
+ struct batadv_tp_vars *tp_vars = NULL;
spin_lock_bh(&bat_priv->tp_list_lock);
+ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
+ goto out_unlock;
+
tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig,
icmp->session);
if (tp_vars)
@@ -1344,8 +1367,10 @@ batadv_tp_init_recv(struct batadv_priv *bat_priv,
}
tp_vars = kmalloc_obj(*tp_vars, GFP_ATOMIC);
- if (!tp_vars)
+ if (!tp_vars) {
+ atomic_dec(&bat_priv->tp_num);
goto out_unlock;
+ }
ether_addr_copy(tp_vars->other_end, icmp->orig);
tp_vars->role = BATADV_TP_RECEIVER;
@@ -1464,6 +1489,9 @@ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb)
{
struct batadv_icmp_tp_packet *icmp;
+ if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
+ goto out;
+
icmp = (struct batadv_icmp_tp_packet *)skb->data;
switch (icmp->subtype) {
@@ -1478,10 +1506,58 @@ void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb)
"Received unknown TP Metric packet type %u\n",
icmp->subtype);
}
+
+out:
consume_skb(skb);
}
/**
+ * batadv_tp_stop_all() - stop all currently running tp meter sessions
+ * @bat_priv: the bat priv with all the mesh interface information
+ */
+void batadv_tp_stop_all(struct batadv_priv *bat_priv)
+{
+ struct batadv_tp_vars *tp_vars[BATADV_TP_MAX_NUM];
+ struct batadv_tp_vars *tp_var;
+ size_t count = 0;
+ size_t i;
+
+ spin_lock_bh(&bat_priv->tp_list_lock);
+ hlist_for_each_entry(tp_var, &bat_priv->tp_list, list) {
+ if (WARN_ON_ONCE(count >= BATADV_TP_MAX_NUM))
+ break;
+
+ if (!kref_get_unless_zero(&tp_var->refcount))
+ continue;
+
+ tp_vars[count++] = tp_var;
+ }
+ spin_unlock_bh(&bat_priv->tp_list_lock);
+
+ for (i = 0; i < count; i++) {
+ tp_var = tp_vars[i];
+
+ switch (tp_var->role) {
+ case BATADV_TP_SENDER:
+ batadv_tp_sender_shutdown(tp_var,
+ BATADV_TP_REASON_CANCEL);
+ wake_up(&tp_var->more_bytes);
+ wait_for_completion(&tp_var->finished);
+ break;
+ case BATADV_TP_RECEIVER:
+ batadv_tp_list_detach(tp_var);
+ if (timer_shutdown_sync(&tp_var->timer))
+ batadv_tp_vars_put(tp_var);
+ break;
+ }
+
+ batadv_tp_vars_put(tp_var);
+ }
+
+ synchronize_net();
+}
+
+/**
* batadv_tp_meter_init() - initialize global tp_meter structures
*/
void __init batadv_tp_meter_init(void)
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
index f0046d366eac..4e97cd10cd02 100644
--- a/net/batman-adv/tp_meter.h
+++ b/net/batman-adv/tp_meter.h
@@ -17,6 +17,7 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
u32 test_length, u32 *cookie);
void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst,
u8 return_value);
+void batadv_tp_stop_all(struct batadv_priv *bat_priv);
void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb);
#endif /* _NET_BATMAN_ADV_TP_METER_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 8fc5fe0e9b05..daa06f421154 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -14,6 +14,7 @@
#include <linux/average.h>
#include <linux/bitops.h>
#include <linux/compiler.h>
+#include <linux/completion.h>
#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/kref.h>
@@ -1328,6 +1329,9 @@ struct batadv_tp_vars {
/** @finish_work: work item for the finishing procedure */
struct delayed_work finish_work;
+ /** @finished: completion signaled when a sender thread exits */
+ struct completion finished;
+
/** @test_length: test length in milliseconds */
u32 test_length;
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 741360219552..f05c79f215ea 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -112,24 +112,22 @@ static struct pernet_operations broute_net_ops = {
static int __init ebtable_broute_init(void)
{
- int ret = ebt_register_template(&broute_table, broute_table_init);
+ int ret = register_pernet_subsys(&broute_net_ops);
if (ret)
return ret;
- ret = register_pernet_subsys(&broute_net_ops);
- if (ret) {
- ebt_unregister_template(&broute_table);
- return ret;
- }
+ ret = ebt_register_template(&broute_table, broute_table_init);
+ if (ret)
+ unregister_pernet_subsys(&broute_net_ops);
- return 0;
+ return ret;
}
static void __exit ebtable_broute_fini(void)
{
- unregister_pernet_subsys(&broute_net_ops);
ebt_unregister_template(&broute_table);
+ unregister_pernet_subsys(&broute_net_ops);
}
module_init(ebtable_broute_init);
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index dacd81b12e62..0fc03b07e62a 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -93,24 +93,22 @@ static struct pernet_operations frame_filter_net_ops = {
static int __init ebtable_filter_init(void)
{
- int ret = ebt_register_template(&frame_filter, frame_filter_table_init);
+ int ret = register_pernet_subsys(&frame_filter_net_ops);
if (ret)
return ret;
- ret = register_pernet_subsys(&frame_filter_net_ops);
- if (ret) {
- ebt_unregister_template(&frame_filter);
- return ret;
- }
+ ret = ebt_register_template(&frame_filter, frame_filter_table_init);
+ if (ret)
+ unregister_pernet_subsys(&frame_filter_net_ops);
- return 0;
+ return ret;
}
static void __exit ebtable_filter_fini(void)
{
- unregister_pernet_subsys(&frame_filter_net_ops);
ebt_unregister_template(&frame_filter);
+ unregister_pernet_subsys(&frame_filter_net_ops);
}
module_init(ebtable_filter_init);
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 0f2a8c6118d4..8a10375d8909 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -93,24 +93,22 @@ static struct pernet_operations frame_nat_net_ops = {
static int __init ebtable_nat_init(void)
{
- int ret = ebt_register_template(&frame_nat, frame_nat_table_init);
+ int ret = register_pernet_subsys(&frame_nat_net_ops);
if (ret)
return ret;
- ret = register_pernet_subsys(&frame_nat_net_ops);
- if (ret) {
- ebt_unregister_template(&frame_nat);
- return ret;
- }
+ ret = ebt_register_template(&frame_nat, frame_nat_table_init);
+ if (ret)
+ unregister_pernet_subsys(&frame_nat_net_ops);
return ret;
}
static void __exit ebtable_nat_fini(void)
{
- unregister_pernet_subsys(&frame_nat_net_ops);
ebt_unregister_template(&frame_nat);
+ unregister_pernet_subsys(&frame_nat_net_ops);
}
module_init(ebtable_nat_init);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index aea3e19875c6..b9f4daac09af 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -42,6 +42,7 @@
struct ebt_pernet {
struct list_head tables;
+ struct list_head dead_tables;
};
struct ebt_template {
@@ -1162,11 +1163,6 @@ free_newinfo:
static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
{
- mutex_lock(&ebt_mutex);
- list_del(&table->list);
- mutex_unlock(&ebt_mutex);
- audit_log_nfcfg(table->name, AF_BRIDGE, table->private->nentries,
- AUDIT_XT_OP_UNREGISTER, GFP_KERNEL);
EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size,
ebt_cleanup_entry, net, NULL);
if (table->private->nentries)
@@ -1267,13 +1263,15 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
for (i = 0; i < num_ops; i++)
ops[i].priv = table;
- list_add(&table->list, &ebt_net->tables);
- mutex_unlock(&ebt_mutex);
-
table->ops = ops;
ret = nf_register_net_hooks(net, ops, num_ops);
- if (ret)
+ if (ret) {
+ synchronize_rcu();
__ebt_unregister_table(net, table);
+ } else {
+ list_add(&table->list, &ebt_net->tables);
+ }
+ mutex_unlock(&ebt_mutex);
audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries,
AUDIT_XT_OP_REGISTER, GFP_KERNEL);
@@ -1339,7 +1337,7 @@ void ebt_unregister_template(const struct ebt_table *t)
}
EXPORT_SYMBOL(ebt_unregister_template);
-static struct ebt_table *__ebt_find_table(struct net *net, const char *name)
+void ebt_unregister_table_pre_exit(struct net *net, const char *name)
{
struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
struct ebt_table *t;
@@ -1348,30 +1346,36 @@ static struct ebt_table *__ebt_find_table(struct net *net, const char *name)
list_for_each_entry(t, &ebt_net->tables, list) {
if (strcmp(t->name, name) == 0) {
+ list_move(&t->list, &ebt_net->dead_tables);
mutex_unlock(&ebt_mutex);
- return t;
+ nf_unregister_net_hooks(net, t->ops, hweight32(t->valid_hooks));
+ return;
}
}
mutex_unlock(&ebt_mutex);
- return NULL;
-}
-
-void ebt_unregister_table_pre_exit(struct net *net, const char *name)
-{
- struct ebt_table *table = __ebt_find_table(net, name);
-
- if (table)
- nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
}
EXPORT_SYMBOL(ebt_unregister_table_pre_exit);
void ebt_unregister_table(struct net *net, const char *name)
{
- struct ebt_table *table = __ebt_find_table(net, name);
+ struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
+ struct ebt_table *t;
- if (table)
- __ebt_unregister_table(net, table);
+ mutex_lock(&ebt_mutex);
+
+ list_for_each_entry(t, &ebt_net->dead_tables, list) {
+ if (strcmp(t->name, name) == 0) {
+ list_del(&t->list);
+ audit_log_nfcfg(t->name, AF_BRIDGE, t->private->nentries,
+ AUDIT_XT_OP_UNREGISTER, GFP_KERNEL);
+ __ebt_unregister_table(net, t);
+ mutex_unlock(&ebt_mutex);
+ return;
+ }
+ }
+
+ mutex_unlock(&ebt_mutex);
}
/* userspace just supplied us with counters */
@@ -2556,11 +2560,21 @@ static int __net_init ebt_pernet_init(struct net *net)
struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
INIT_LIST_HEAD(&ebt_net->tables);
+ INIT_LIST_HEAD(&ebt_net->dead_tables);
return 0;
}
+static void __net_exit ebt_pernet_exit(struct net *net)
+{
+ struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
+
+ WARN_ON_ONCE(!list_empty(&ebt_net->tables));
+ WARN_ON_ONCE(!list_empty(&ebt_net->dead_tables));
+}
+
static struct pernet_operations ebt_net_ops = {
.init = ebt_pernet_init,
+ .exit = ebt_pernet_exit,
.id = &ebt_pernet_id,
.size = sizeof(struct ebt_pernet),
};
@@ -2569,19 +2583,20 @@ static int __init ebtables_init(void)
{
int ret;
- ret = xt_register_target(&ebt_standard_target);
+ ret = register_pernet_subsys(&ebt_net_ops);
if (ret < 0)
return ret;
- ret = nf_register_sockopt(&ebt_sockopts);
+
+ ret = xt_register_target(&ebt_standard_target);
if (ret < 0) {
- xt_unregister_target(&ebt_standard_target);
+ unregister_pernet_subsys(&ebt_net_ops);
return ret;
}
- ret = register_pernet_subsys(&ebt_net_ops);
+ ret = nf_register_sockopt(&ebt_sockopts);
if (ret < 0) {
- nf_unregister_sockopt(&ebt_sockopts);
xt_unregister_target(&ebt_standard_target);
+ unregister_pernet_subsys(&ebt_net_ops);
return ret;
}
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 692e0b868822..9e64e82d0b63 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -115,6 +115,11 @@ static int __ceph_x_decrypt(const struct ceph_crypto_key *key, int usage_slot,
if (ret)
return ret;
+ if (plaintext_len < sizeof(*hdr)) {
+ pr_err("%s plaintext too small %d\n", __func__, plaintext_len);
+ return -EINVAL;
+ }
+
hdr = p + ceph_crypt_data_offset(key);
if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) {
pr_err("%s bad magic\n", __func__);
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 254ded0b05f6..521aec1d5fc0 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -47,7 +47,6 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
{
kfree(b->h.items);
- kfree(b);
}
void crush_destroy_bucket_list(struct crush_bucket_list *b)
@@ -55,14 +54,12 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b)
kfree(b->item_weights);
kfree(b->sum_weights);
kfree(b->h.items);
- kfree(b);
}
void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
{
kfree(b->h.items);
kfree(b->node_weights);
- kfree(b);
}
void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
@@ -70,14 +67,12 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
kfree(b->straws);
kfree(b->item_weights);
kfree(b->h.items);
- kfree(b);
}
void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b)
{
kfree(b->item_weights);
kfree(b->h.items);
- kfree(b);
}
void crush_destroy_bucket(struct crush_bucket *b)
@@ -99,6 +94,7 @@ void crush_destroy_bucket(struct crush_bucket *b)
crush_destroy_bucket_straw2((struct crush_bucket_straw2 *)b);
break;
}
+ kfree(b);
}
/**
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index c89e66d4fcb7..8b5b0587a0cf 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -72,8 +72,7 @@ static int crush_decode_uniform_bucket(void **p, void *end,
struct crush_bucket_uniform *b)
{
dout("crush_decode_uniform_bucket %p to %p\n", *p, end);
- ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad);
- b->item_weight = ceph_decode_32(p);
+ ceph_decode_32_safe(p, end, b->item_weight, bad);
return 0;
bad:
return -EINVAL;
@@ -389,11 +388,15 @@ static int decode_choose_args(void **p, void *end, struct crush_map *c)
goto fail;
if (arg->ids_size &&
- arg->ids_size != c->buckets[bucket_index]->size)
+ (!c->buckets[bucket_index] ||
+ arg->ids_size != c->buckets[bucket_index]->size))
goto e_inval;
}
- insert_choose_arg_map(&c->choose_args, arg_map);
+ if (!__insert_choose_arg_map(&c->choose_args, arg_map)) {
+ ret = -EEXIST;
+ goto fail;
+ }
}
return 0;
@@ -516,6 +519,10 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
b->id = ceph_decode_32(p);
b->type = ceph_decode_16(p);
b->alg = ceph_decode_8(p);
+ if (b->alg != alg) {
+ b->alg = 0;
+ goto bad;
+ }
b->hash = ceph_decode_8(p);
b->weight = ceph_decode_32(p);
b->size = ceph_decode_32(p);
@@ -1702,7 +1709,7 @@ static int osdmap_decode(void **p, void *end, bool msgr2,
ceph_decode_need(p, end, 3*sizeof(u32) +
map->max_osd*(struct_v >= 5 ? sizeof(u32) :
sizeof(u8)) +
- sizeof(*map->osd_weight), e_inval);
+ map->max_osd*sizeof(*map->osd_weight), e_inval);
if (ceph_decode_32(p) != map->max_osd)
goto e_inval;
diff --git a/net/core/dev.c b/net/core/dev.c
index 8bfa8313ef62..0c6c270d9f7d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6862,9 +6862,9 @@ static void skb_defer_free_flush(void)
#if defined(CONFIG_NET_RX_BUSY_POLL)
-static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
+static void __busy_poll_stop(struct napi_struct *napi, unsigned long timeout)
{
- if (!skip_schedule) {
+ if (!timeout) {
gro_normal_list(&napi->gro);
__napi_schedule(napi);
return;
@@ -6874,6 +6874,8 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
gro_flush_normal(&napi->gro, HZ >= 1000);
clear_bit(NAPI_STATE_SCHED, &napi->state);
+ hrtimer_start(&napi->timer, ns_to_ktime(timeout),
+ HRTIMER_MODE_REL_PINNED);
}
enum {
@@ -6885,8 +6887,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
unsigned flags, u16 budget)
{
struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
- bool skip_schedule = false;
- unsigned long timeout;
+ unsigned long timeout = 0;
int rc;
/* Busy polling means there is a high chance device driver hard irq
@@ -6906,10 +6907,12 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
if (flags & NAPI_F_PREFER_BUSY_POLL) {
napi->defer_hard_irqs_count = napi_get_defer_hard_irqs(napi);
- timeout = napi_get_gro_flush_timeout(napi);
- if (napi->defer_hard_irqs_count && timeout) {
- hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED);
- skip_schedule = true;
+ if (napi->defer_hard_irqs_count) {
+ /* A short enough gro flush timeout and long enough
+ * poll can result in timer firing too early.
+ * Timer will be armed later if necessary.
+ */
+ timeout = napi_get_gro_flush_timeout(napi);
}
}
@@ -6924,7 +6927,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
trace_napi_poll(napi, rc, budget);
netpoll_poll_unlock(have_poll_lock);
if (rc == budget)
- __busy_poll_stop(napi, skip_schedule);
+ __busy_poll_stop(napi, timeout);
bpf_net_ctx_clear(bpf_net_ctx);
local_bh_enable();
}
diff --git a/net/core/failover.c b/net/core/failover.c
index 11bb183c7a1b..e43c59cd6868 100644
--- a/net/core/failover.c
+++ b/net/core/failover.c
@@ -12,6 +12,7 @@
#include <uapi/linux/if_arp.h>
#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>
+#include <net/netdev_lock.h>
#include <net/failover.h>
static LIST_HEAD(failover_list);
@@ -221,8 +222,11 @@ failover_existing_slave_register(struct net_device *failover_dev)
for_each_netdev(net, dev) {
if (netif_is_failover(dev))
continue;
- if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr))
+ if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr)) {
+ netdev_lock_ops(dev);
failover_slave_register(dev);
+ netdev_unlock_ops(dev);
+ }
}
rtnl_unlock();
}
diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c
index 8bb98d3ea3db..a3a2cc6480a0 100644
--- a/net/ethtool/bitset.c
+++ b/net/ethtool/bitset.c
@@ -92,7 +92,7 @@ static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start,
u32 mask;
if (end <= start)
- return true;
+ return false;
if (start % 32) {
mask = ethnl_upper_bits(start);
@@ -105,11 +105,11 @@ static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start,
start_word++;
}
- if (!memchr_inv(map + start_word, '\0',
- (end_word - start_word) * sizeof(u32)))
+ if (memchr_inv(map + start_word, '\0',
+ (end_word - start_word) * sizeof(u32)))
return true;
if (end % 32 == 0)
- return true;
+ return false;
return map[end_word] & ethnl_lower_bits(end);
}
diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c
index d4e6887055ab..ddc6eab701ed 100644
--- a/net/ethtool/phy.c
+++ b/net/ethtool/phy.c
@@ -76,6 +76,7 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info,
struct nlattr **tb = info->attrs;
struct phy_device_node *pdn;
struct phy_device *phydev;
+ int ret;
/* RTNL is held by the caller */
phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PHY_HEADER,
@@ -88,8 +89,19 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info,
return -EOPNOTSUPP;
rep_data->phyindex = phydev->phyindex;
+
rep_data->name = kstrdup(dev_name(&phydev->mdio.dev), GFP_KERNEL);
- rep_data->drvname = kstrdup(phydev->drv->name, GFP_KERNEL);
+ if (!rep_data->name)
+ return -ENOMEM;
+
+ if (phydev->drv) {
+ rep_data->drvname = kstrdup(phydev->drv->name, GFP_KERNEL);
+ if (!rep_data->drvname) {
+ ret = -ENOMEM;
+ goto err_free_name;
+ }
+ }
+
rep_data->upstream_type = pdn->upstream_type;
if (pdn->upstream_type == PHY_UPSTREAM_PHY) {
@@ -97,15 +109,33 @@ static int phy_prepare_data(const struct ethnl_req_info *req_info,
rep_data->upstream_index = upstream->phyindex;
}
- if (pdn->parent_sfp_bus)
+ if (pdn->parent_sfp_bus) {
rep_data->upstream_sfp_name = kstrdup(sfp_get_name(pdn->parent_sfp_bus),
GFP_KERNEL);
+ if (!rep_data->upstream_sfp_name) {
+ ret = -ENOMEM;
+ goto err_free_drvname;
+ }
+ }
- if (phydev->sfp_bus)
+ if (phydev->sfp_bus) {
rep_data->downstream_sfp_name = kstrdup(sfp_get_name(phydev->sfp_bus),
GFP_KERNEL);
+ if (!rep_data->downstream_sfp_name) {
+ ret = -ENOMEM;
+ goto err_free_upstream_sfp;
+ }
+ }
return 0;
+
+err_free_upstream_sfp:
+ kfree(rep_data->upstream_sfp_name);
+err_free_drvname:
+ kfree(rep_data->drvname);
+err_free_name:
+ kfree(rep_data->name);
+ return ret;
}
static int phy_fill_reply(struct sk_buff *skb,
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index d09875b33588..124619920d38 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -889,7 +889,10 @@ int hsr_get_node_data(struct hsr_priv *hsr,
if (node->addr_B_port != HSR_PT_NONE) {
port = hsr_port_get_hsr(hsr, node->addr_B_port);
- *addr_b_ifindex = port->dev->ifindex;
+ if (port)
+ *addr_b_ifindex = port->dev->ifindex;
+ else
+ *addr_b_ifindex = -1;
} else {
*addr_b_ifindex = -1;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 928654c34156..dbcd37dfdc15 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1108,7 +1108,7 @@ static void reqsk_timer_handler(struct timer_list *t)
if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
/* delete timer */
- __inet_csk_reqsk_queue_drop(sk_listener, nreq, true);
+ __inet_csk_reqsk_queue_drop(sk_listener, nreq, false);
goto no_ownership;
}
@@ -1134,7 +1134,7 @@ no_ownership:
}
drop:
- __inet_csk_reqsk_queue_drop(sk_listener, oreq, true);
+ __inet_csk_reqsk_queue_drop(oreq->rsk_listener, oreq, true);
reqsk_put(oreq);
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 97ead883e4a1..ad2259678c78 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1501,13 +1501,11 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
static void __arpt_unregister_table(struct net *net, struct xt_table *table)
{
- struct xt_table_info *private;
- void *loc_cpu_entry;
+ struct xt_table_info *private = table->private;
struct module *table_owner = table->me;
+ void *loc_cpu_entry;
struct arpt_entry *iter;
- private = xt_unregister_table(table);
-
/* Decrease module usage counts and free resources */
loc_cpu_entry = private->entries;
xt_entry_foreach(iter, loc_cpu_entry, private->size)
@@ -1515,6 +1513,7 @@ static void __arpt_unregister_table(struct net *net, struct xt_table *table)
if (private->number > private->initial_entries)
module_put(table_owner);
xt_free_table_info(private);
+ kfree(table);
}
int arpt_register_table(struct net *net,
@@ -1522,13 +1521,11 @@ int arpt_register_table(struct net *net,
const struct arpt_replace *repl,
const struct nf_hook_ops *template_ops)
{
- struct nf_hook_ops *ops;
- unsigned int num_ops;
- int ret, i;
- struct xt_table_info *newinfo;
struct xt_table_info bootstrap = {0};
- void *loc_cpu_entry;
+ struct xt_table_info *newinfo;
struct xt_table *new_table;
+ void *loc_cpu_entry;
+ int ret;
newinfo = xt_alloc_table_info(repl->size);
if (!newinfo)
@@ -1543,7 +1540,7 @@ int arpt_register_table(struct net *net,
return ret;
}
- new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ new_table = xt_register_table(net, table, template_ops, &bootstrap, newinfo);
if (IS_ERR(new_table)) {
struct arpt_entry *iter;
@@ -1553,46 +1550,12 @@ int arpt_register_table(struct net *net,
return PTR_ERR(new_table);
}
- num_ops = hweight32(table->valid_hooks);
- if (num_ops == 0) {
- ret = -EINVAL;
- goto out_free;
- }
-
- ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
- if (!ops) {
- ret = -ENOMEM;
- goto out_free;
- }
-
- for (i = 0; i < num_ops; i++)
- ops[i].priv = new_table;
-
- new_table->ops = ops;
-
- ret = nf_register_net_hooks(net, ops, num_ops);
- if (ret != 0)
- goto out_free;
-
return ret;
-
-out_free:
- __arpt_unregister_table(net, new_table);
- return ret;
-}
-
-void arpt_unregister_table_pre_exit(struct net *net, const char *name)
-{
- struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name);
-
- if (table)
- nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
}
-EXPORT_SYMBOL(arpt_unregister_table_pre_exit);
void arpt_unregister_table(struct net *net, const char *name)
{
- struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name);
+ struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_ARP, name);
if (table)
__arpt_unregister_table(net, table);
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 78cd5ee24448..370b635e3523 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -43,7 +43,7 @@ static int arptable_filter_table_init(struct net *net)
static void __net_exit arptable_filter_net_pre_exit(struct net *net)
{
- arpt_unregister_table_pre_exit(net, "filter");
+ xt_unregister_table_pre_exit(net, NFPROTO_ARP, "filter");
}
static void __net_exit arptable_filter_net_exit(struct net *net)
@@ -58,32 +58,33 @@ static struct pernet_operations arptable_filter_net_ops = {
static int __init arptable_filter_init(void)
{
- int ret = xt_register_template(&packet_filter,
- arptable_filter_table_init);
-
- if (ret < 0)
- return ret;
+ int ret;
arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arpt_do_table);
- if (IS_ERR(arpfilter_ops)) {
- xt_unregister_template(&packet_filter);
+ if (IS_ERR(arpfilter_ops))
return PTR_ERR(arpfilter_ops);
- }
ret = register_pernet_subsys(&arptable_filter_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(&packet_filter,
+ arptable_filter_table_init);
if (ret < 0) {
- xt_unregister_template(&packet_filter);
- kfree(arpfilter_ops);
- return ret;
+ unregister_pernet_subsys(&arptable_filter_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(arpfilter_ops);
return ret;
}
static void __exit arptable_filter_fini(void)
{
- unregister_pernet_subsys(&arptable_filter_net_ops);
xt_unregister_template(&packet_filter);
+ unregister_pernet_subsys(&arptable_filter_net_ops);
kfree(arpfilter_ops);
}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 23c8deff8095..5cbdb0815857 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1704,12 +1704,10 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
static void __ipt_unregister_table(struct net *net, struct xt_table *table)
{
- struct xt_table_info *private;
- void *loc_cpu_entry;
+ struct xt_table_info *private = table->private;
struct module *table_owner = table->me;
struct ipt_entry *iter;
-
- private = xt_unregister_table(table);
+ void *loc_cpu_entry;
/* Decrease module usage counts and free resources */
loc_cpu_entry = private->entries;
@@ -1718,19 +1716,18 @@ static void __ipt_unregister_table(struct net *net, struct xt_table *table)
if (private->number > private->initial_entries)
module_put(table_owner);
xt_free_table_info(private);
+ kfree(table);
}
int ipt_register_table(struct net *net, const struct xt_table *table,
const struct ipt_replace *repl,
const struct nf_hook_ops *template_ops)
{
- struct nf_hook_ops *ops;
- unsigned int num_ops;
- int ret, i;
- struct xt_table_info *newinfo;
struct xt_table_info bootstrap = {0};
- void *loc_cpu_entry;
+ struct xt_table_info *newinfo;
struct xt_table *new_table;
+ void *loc_cpu_entry;
+ int ret;
newinfo = xt_alloc_table_info(repl->size);
if (!newinfo)
@@ -1745,7 +1742,7 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
return ret;
}
- new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ new_table = xt_register_table(net, table, template_ops, &bootstrap, newinfo);
if (IS_ERR(new_table)) {
struct ipt_entry *iter;
@@ -1755,51 +1752,12 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
return PTR_ERR(new_table);
}
- /* No template? No need to do anything. This is used by 'nat' table, it registers
- * with the nat core instead of the netfilter core.
- */
- if (!template_ops)
- return 0;
-
- num_ops = hweight32(table->valid_hooks);
- if (num_ops == 0) {
- ret = -EINVAL;
- goto out_free;
- }
-
- ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
- if (!ops) {
- ret = -ENOMEM;
- goto out_free;
- }
-
- for (i = 0; i < num_ops; i++)
- ops[i].priv = new_table;
-
- new_table->ops = ops;
-
- ret = nf_register_net_hooks(net, ops, num_ops);
- if (ret != 0)
- goto out_free;
-
return ret;
-
-out_free:
- __ipt_unregister_table(net, new_table);
- return ret;
-}
-
-void ipt_unregister_table_pre_exit(struct net *net, const char *name)
-{
- struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name);
-
- if (table)
- nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
}
void ipt_unregister_table_exit(struct net *net, const char *name)
{
- struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name);
+ struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_IPV4, name);
if (table)
__ipt_unregister_table(net, table);
@@ -1887,7 +1845,6 @@ static void __exit ip_tables_fini(void)
}
EXPORT_SYMBOL(ipt_register_table);
-EXPORT_SYMBOL(ipt_unregister_table_pre_exit);
EXPORT_SYMBOL(ipt_unregister_table_exit);
EXPORT_SYMBOL(ipt_do_table);
module_init(ip_tables_init);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 3ab908b74795..672d7da1071d 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -61,7 +61,7 @@ static int __net_init iptable_filter_net_init(struct net *net)
static void __net_exit iptable_filter_net_pre_exit(struct net *net)
{
- ipt_unregister_table_pre_exit(net, "filter");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "filter");
}
static void __net_exit iptable_filter_net_exit(struct net *net)
@@ -77,32 +77,33 @@ static struct pernet_operations iptable_filter_net_ops = {
static int __init iptable_filter_init(void)
{
- int ret = xt_register_template(&packet_filter,
- iptable_filter_table_init);
-
- if (ret < 0)
- return ret;
+ int ret;
filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table);
- if (IS_ERR(filter_ops)) {
- xt_unregister_template(&packet_filter);
+ if (IS_ERR(filter_ops))
return PTR_ERR(filter_ops);
- }
ret = register_pernet_subsys(&iptable_filter_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(&packet_filter,
+ iptable_filter_table_init);
if (ret < 0) {
- xt_unregister_template(&packet_filter);
- kfree(filter_ops);
- return ret;
+ unregister_pernet_subsys(&iptable_filter_net_ops);
+ goto err_free;
}
return 0;
+err_free:
+ kfree(filter_ops);
+ return ret;
}
static void __exit iptable_filter_fini(void)
{
- unregister_pernet_subsys(&iptable_filter_net_ops);
xt_unregister_template(&packet_filter);
+ unregister_pernet_subsys(&iptable_filter_net_ops);
kfree(filter_ops);
}
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 385d945d8ebe..13d25d9a4610 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -96,7 +96,7 @@ static int iptable_mangle_table_init(struct net *net)
static void __net_exit iptable_mangle_net_pre_exit(struct net *net)
{
- ipt_unregister_table_pre_exit(net, "mangle");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "mangle");
}
static void __net_exit iptable_mangle_net_exit(struct net *net)
@@ -111,32 +111,33 @@ static struct pernet_operations iptable_mangle_net_ops = {
static int __init iptable_mangle_init(void)
{
- int ret = xt_register_template(&packet_mangler,
- iptable_mangle_table_init);
- if (ret < 0)
- return ret;
+ int ret;
mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook);
- if (IS_ERR(mangle_ops)) {
- xt_unregister_template(&packet_mangler);
- ret = PTR_ERR(mangle_ops);
- return ret;
- }
+ if (IS_ERR(mangle_ops))
+ return PTR_ERR(mangle_ops);
ret = register_pernet_subsys(&iptable_mangle_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(&packet_mangler,
+ iptable_mangle_table_init);
if (ret < 0) {
- xt_unregister_template(&packet_mangler);
- kfree(mangle_ops);
- return ret;
+ unregister_pernet_subsys(&iptable_mangle_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(mangle_ops);
return ret;
}
static void __exit iptable_mangle_fini(void)
{
- unregister_pernet_subsys(&iptable_mangle_net_ops);
xt_unregister_template(&packet_mangler);
+ unregister_pernet_subsys(&iptable_mangle_net_ops);
kfree(mangle_ops);
}
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 625a1ca13b1b..a0df72554025 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -119,8 +119,11 @@ static int iptable_nat_table_init(struct net *net)
}
ret = ipt_nat_register_lookups(net);
- if (ret < 0)
+ if (ret < 0) {
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "nat");
+ synchronize_rcu();
ipt_unregister_table_exit(net, "nat");
+ }
kfree(repl);
return ret;
@@ -129,6 +132,7 @@ static int iptable_nat_table_init(struct net *net)
static void __net_exit iptable_nat_net_pre_exit(struct net *net)
{
ipt_nat_unregister_lookups(net);
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "nat");
}
static void __net_exit iptable_nat_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 0e7f53964d0a..2745c22f4034 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -53,7 +53,7 @@ static int iptable_raw_table_init(struct net *net)
static void __net_exit iptable_raw_net_pre_exit(struct net *net)
{
- ipt_unregister_table_pre_exit(net, "raw");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "raw");
}
static void __net_exit iptable_raw_net_exit(struct net *net)
@@ -77,32 +77,32 @@ static int __init iptable_raw_init(void)
pr_info("Enabling raw table before defrag\n");
}
- ret = xt_register_template(table,
- iptable_raw_table_init);
- if (ret < 0)
- return ret;
-
rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table);
- if (IS_ERR(rawtable_ops)) {
- xt_unregister_template(table);
+ if (IS_ERR(rawtable_ops))
return PTR_ERR(rawtable_ops);
- }
ret = register_pernet_subsys(&iptable_raw_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(table,
+ iptable_raw_table_init);
if (ret < 0) {
- xt_unregister_template(table);
- kfree(rawtable_ops);
- return ret;
+ unregister_pernet_subsys(&iptable_raw_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(rawtable_ops);
return ret;
}
static void __exit iptable_raw_fini(void)
{
+ xt_unregister_template(&packet_raw);
unregister_pernet_subsys(&iptable_raw_net_ops);
kfree(rawtable_ops);
- xt_unregister_template(&packet_raw);
}
module_init(iptable_raw_init);
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index d885443cb267..491894511c54 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -50,7 +50,7 @@ static int iptable_security_table_init(struct net *net)
static void __net_exit iptable_security_net_pre_exit(struct net *net)
{
- ipt_unregister_table_pre_exit(net, "security");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV4, "security");
}
static void __net_exit iptable_security_net_exit(struct net *net)
@@ -65,33 +65,34 @@ static struct pernet_operations iptable_security_net_ops = {
static int __init iptable_security_init(void)
{
- int ret = xt_register_template(&security_table,
- iptable_security_table_init);
-
- if (ret < 0)
- return ret;
+ int ret;
sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table);
- if (IS_ERR(sectbl_ops)) {
- xt_unregister_template(&security_table);
+ if (IS_ERR(sectbl_ops))
return PTR_ERR(sectbl_ops);
- }
ret = register_pernet_subsys(&iptable_security_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(&security_table,
+ iptable_security_table_init);
if (ret < 0) {
- xt_unregister_template(&security_table);
- kfree(sectbl_ops);
- return ret;
+ unregister_pernet_subsys(&iptable_security_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(sectbl_ops);
return ret;
}
static void __exit iptable_security_fini(void)
{
+ xt_unregister_template(&security_table);
unregister_pernet_subsys(&iptable_security_net_ops);
kfree(sectbl_ops);
- xt_unregister_template(&security_table);
}
module_init(iptable_security_init);
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index a97cdf3e6af4..0a4b38b315fe 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -116,7 +116,8 @@ struct tcp_ao_key *tcp_ao_established_key(const struct sock *sk,
{
struct tcp_ao_key *key;
- hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) {
+ hlist_for_each_entry_rcu(key, &ao->head, node,
+ sk_fullsock(sk) && lockdep_sock_is_held(sk)) {
if ((sndid >= 0 && key->sndid != sndid) ||
(rcvid >= 0 && key->rcvid != rcvid))
continue;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index c92f98c6f6ec..b1ccdf0dc646 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -36,11 +36,11 @@
/* FL hash table */
#define FL_MAX_PER_SOCK 32
-#define FL_MAX_SIZE 4096
+#define FL_MAX_SIZE 8192
#define FL_HASH_MASK 255
#define FL_HASH(l) (ntohl(l)&FL_HASH_MASK)
-static atomic_t fl_size = ATOMIC_INIT(0);
+static int fl_size;
static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
static void ip6_fl_gc(struct timer_list *unused);
@@ -162,8 +162,9 @@ static void ip6_fl_gc(struct timer_list *unused)
ttd = fl->expires;
if (time_after_eq(now, ttd)) {
*flp = fl->next;
+ fl_size--;
+ fl->fl_net->ipv6.flowlabel_count--;
fl_free(fl);
- atomic_dec(&fl_size);
continue;
}
if (!sched || time_before(ttd, sched))
@@ -172,7 +173,7 @@ static void ip6_fl_gc(struct timer_list *unused)
flp = &fl->next;
}
}
- if (!sched && atomic_read(&fl_size))
+ if (!sched && fl_size)
sched = now + FL_MAX_LINGER;
if (sched) {
mod_timer(&ip6_fl_gc_timer, sched);
@@ -196,7 +197,8 @@ static void __net_exit ip6_fl_purge(struct net *net)
atomic_read(&fl->users) == 0) {
*flp = fl->next;
fl_free(fl);
- atomic_dec(&fl_size);
+ fl_size--;
+ net->ipv6.flowlabel_count--;
continue;
}
flp = &fl->next;
@@ -210,10 +212,10 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
{
struct ip6_flowlabel *lfl;
+ lockdep_assert_held(&ip6_fl_lock);
+
fl->label = label & IPV6_FLOWLABEL_MASK;
- rcu_read_lock();
- spin_lock_bh(&ip6_fl_lock);
if (label == 0) {
for (;;) {
fl->label = htonl(get_random_u32())&IPV6_FLOWLABEL_MASK;
@@ -235,8 +237,6 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
lfl = __fl_lookup(net, fl->label);
if (lfl) {
atomic_inc(&lfl->users);
- spin_unlock_bh(&ip6_fl_lock);
- rcu_read_unlock();
return lfl;
}
}
@@ -244,9 +244,8 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
fl->lastuse = jiffies;
fl->next = fl_ht[FL_HASH(fl->label)];
rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
- atomic_inc(&fl_size);
- spin_unlock_bh(&ip6_fl_lock);
- rcu_read_unlock();
+ fl_size++;
+ net->ipv6.flowlabel_count++;
return NULL;
}
@@ -464,10 +463,17 @@ done:
static int mem_check(struct sock *sk)
{
- int room = FL_MAX_SIZE - atomic_read(&fl_size);
+ const int unpriv_total_limit = FL_MAX_SIZE - (FL_MAX_SIZE / 4);
+ const int unpriv_user_limit = unpriv_total_limit / 2;
+ struct net *net = sock_net(sk);
+ int room;
struct ipv6_fl_socklist *sfl;
int count = 0;
+ lockdep_assert_held(&ip6_fl_lock);
+
+ room = FL_MAX_SIZE - fl_size;
+
if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
return 0;
@@ -478,7 +484,9 @@ static int mem_check(struct sock *sk)
if (room <= 0 ||
((count >= FL_MAX_PER_SOCK ||
- (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
+ (count > 0 && room < FL_MAX_SIZE / 2) ||
+ room < FL_MAX_SIZE / 4 ||
+ net->ipv6.flowlabel_count >= unpriv_user_limit) &&
!capable(CAP_NET_ADMIN)))
return -ENOBUFS;
@@ -692,11 +700,19 @@ release:
if (!sfl1)
goto done;
+ rcu_read_lock();
+ spin_lock_bh(&ip6_fl_lock);
err = mem_check(sk);
+ if (err == 0)
+ fl1 = fl_intern(net, fl, freq->flr_label);
+ else
+ fl1 = NULL;
+ spin_unlock_bh(&ip6_fl_lock);
+ rcu_read_unlock();
+
if (err != 0)
goto done;
- fl1 = fl_intern(net, fl, freq->flr_label);
if (fl1)
goto recheck;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index d585ac3c1113..9d9c3763f2f5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1713,12 +1713,10 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
static void __ip6t_unregister_table(struct net *net, struct xt_table *table)
{
- struct xt_table_info *private;
- void *loc_cpu_entry;
+ struct xt_table_info *private = table->private;
struct module *table_owner = table->me;
struct ip6t_entry *iter;
-
- private = xt_unregister_table(table);
+ void *loc_cpu_entry;
/* Decrease module usage counts and free resources */
loc_cpu_entry = private->entries;
@@ -1727,19 +1725,18 @@ static void __ip6t_unregister_table(struct net *net, struct xt_table *table)
if (private->number > private->initial_entries)
module_put(table_owner);
xt_free_table_info(private);
+ kfree(table);
}
int ip6t_register_table(struct net *net, const struct xt_table *table,
const struct ip6t_replace *repl,
const struct nf_hook_ops *template_ops)
{
- struct nf_hook_ops *ops;
- unsigned int num_ops;
- int ret, i;
- struct xt_table_info *newinfo;
struct xt_table_info bootstrap = {0};
- void *loc_cpu_entry;
+ struct xt_table_info *newinfo;
struct xt_table *new_table;
+ void *loc_cpu_entry;
+ int ret;
newinfo = xt_alloc_table_info(repl->size);
if (!newinfo)
@@ -1754,7 +1751,7 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
return ret;
}
- new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ new_table = xt_register_table(net, table, template_ops, &bootstrap, newinfo);
if (IS_ERR(new_table)) {
struct ip6t_entry *iter;
@@ -1764,48 +1761,12 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
return PTR_ERR(new_table);
}
- if (!template_ops)
- return 0;
-
- num_ops = hweight32(table->valid_hooks);
- if (num_ops == 0) {
- ret = -EINVAL;
- goto out_free;
- }
-
- ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
- if (!ops) {
- ret = -ENOMEM;
- goto out_free;
- }
-
- for (i = 0; i < num_ops; i++)
- ops[i].priv = new_table;
-
- new_table->ops = ops;
-
- ret = nf_register_net_hooks(net, ops, num_ops);
- if (ret != 0)
- goto out_free;
-
return ret;
-
-out_free:
- __ip6t_unregister_table(net, new_table);
- return ret;
-}
-
-void ip6t_unregister_table_pre_exit(struct net *net, const char *name)
-{
- struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name);
-
- if (table)
- nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
}
void ip6t_unregister_table_exit(struct net *net, const char *name)
{
- struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name);
+ struct xt_table *table = xt_unregister_table_exit(net, NFPROTO_IPV6, name);
if (table)
__ip6t_unregister_table(net, table);
@@ -1894,7 +1855,6 @@ static void __exit ip6_tables_fini(void)
}
EXPORT_SYMBOL(ip6t_register_table);
-EXPORT_SYMBOL(ip6t_unregister_table_pre_exit);
EXPORT_SYMBOL(ip6t_unregister_table_exit);
EXPORT_SYMBOL(ip6t_do_table);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index e8992693e14a..b074fc477676 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -60,7 +60,7 @@ static int __net_init ip6table_filter_net_init(struct net *net)
static void __net_exit ip6table_filter_net_pre_exit(struct net *net)
{
- ip6t_unregister_table_pre_exit(net, "filter");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "filter");
}
static void __net_exit ip6table_filter_net_exit(struct net *net)
@@ -76,32 +76,32 @@ static struct pernet_operations ip6table_filter_net_ops = {
static int __init ip6table_filter_init(void)
{
- int ret = xt_register_template(&packet_filter,
- ip6table_filter_table_init);
-
- if (ret < 0)
- return ret;
+ int ret;
filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table);
- if (IS_ERR(filter_ops)) {
- xt_unregister_template(&packet_filter);
+ if (IS_ERR(filter_ops))
return PTR_ERR(filter_ops);
- }
ret = register_pernet_subsys(&ip6table_filter_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(&packet_filter, ip6table_filter_table_init);
if (ret < 0) {
- xt_unregister_template(&packet_filter);
- kfree(filter_ops);
- return ret;
+ unregister_pernet_subsys(&ip6table_filter_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(filter_ops);
return ret;
}
static void __exit ip6table_filter_fini(void)
{
- unregister_pernet_subsys(&ip6table_filter_net_ops);
xt_unregister_template(&packet_filter);
+ unregister_pernet_subsys(&ip6table_filter_net_ops);
kfree(filter_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 8dd4cd0c47bd..e6ee036a9b2c 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -89,7 +89,7 @@ static int ip6table_mangle_table_init(struct net *net)
static void __net_exit ip6table_mangle_net_pre_exit(struct net *net)
{
- ip6t_unregister_table_pre_exit(net, "mangle");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "mangle");
}
static void __net_exit ip6table_mangle_net_exit(struct net *net)
@@ -104,32 +104,33 @@ static struct pernet_operations ip6table_mangle_net_ops = {
static int __init ip6table_mangle_init(void)
{
- int ret = xt_register_template(&packet_mangler,
- ip6table_mangle_table_init);
-
- if (ret < 0)
- return ret;
+ int ret;
mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook);
- if (IS_ERR(mangle_ops)) {
- xt_unregister_template(&packet_mangler);
+ if (IS_ERR(mangle_ops))
return PTR_ERR(mangle_ops);
- }
ret = register_pernet_subsys(&ip6table_mangle_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(&packet_mangler,
+ ip6table_mangle_table_init);
if (ret < 0) {
- xt_unregister_template(&packet_mangler);
- kfree(mangle_ops);
- return ret;
+ unregister_pernet_subsys(&ip6table_mangle_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(mangle_ops);
return ret;
}
static void __exit ip6table_mangle_fini(void)
{
- unregister_pernet_subsys(&ip6table_mangle_net_ops);
xt_unregister_template(&packet_mangler);
+ unregister_pernet_subsys(&ip6table_mangle_net_ops);
kfree(mangle_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 5be723232df8..c2394e2c94b5 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -121,8 +121,11 @@ static int ip6table_nat_table_init(struct net *net)
}
ret = ip6t_nat_register_lookups(net);
- if (ret < 0)
+ if (ret < 0) {
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "nat");
+ synchronize_rcu();
ip6t_unregister_table_exit(net, "nat");
+ }
kfree(repl);
return ret;
@@ -131,6 +134,7 @@ static int ip6table_nat_table_init(struct net *net)
static void __net_exit ip6table_nat_net_pre_exit(struct net *net)
{
ip6t_nat_unregister_lookups(net);
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "nat");
}
static void __net_exit ip6table_nat_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index fc9f6754028f..3b161ee875bc 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -52,7 +52,7 @@ static int ip6table_raw_table_init(struct net *net)
static void __net_exit ip6table_raw_net_pre_exit(struct net *net)
{
- ip6t_unregister_table_pre_exit(net, "raw");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "raw");
}
static void __net_exit ip6table_raw_net_exit(struct net *net)
@@ -75,31 +75,31 @@ static int __init ip6table_raw_init(void)
pr_info("Enabling raw table before defrag\n");
}
- ret = xt_register_template(table, ip6table_raw_table_init);
- if (ret < 0)
- return ret;
-
/* Register hooks */
rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table);
- if (IS_ERR(rawtable_ops)) {
- xt_unregister_template(table);
+ if (IS_ERR(rawtable_ops))
return PTR_ERR(rawtable_ops);
- }
ret = register_pernet_subsys(&ip6table_raw_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(table, ip6table_raw_table_init);
if (ret < 0) {
- kfree(rawtable_ops);
- xt_unregister_template(table);
- return ret;
+ unregister_pernet_subsys(&ip6table_raw_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(rawtable_ops);
return ret;
}
static void __exit ip6table_raw_fini(void)
{
- unregister_pernet_subsys(&ip6table_raw_net_ops);
xt_unregister_template(&packet_raw);
+ unregister_pernet_subsys(&ip6table_raw_net_ops);
kfree(rawtable_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 4df14a9bae78..4bd5d97b8ab6 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -49,7 +49,7 @@ static int ip6table_security_table_init(struct net *net)
static void __net_exit ip6table_security_net_pre_exit(struct net *net)
{
- ip6t_unregister_table_pre_exit(net, "security");
+ xt_unregister_table_pre_exit(net, NFPROTO_IPV6, "security");
}
static void __net_exit ip6table_security_net_exit(struct net *net)
@@ -64,32 +64,33 @@ static struct pernet_operations ip6table_security_net_ops = {
static int __init ip6table_security_init(void)
{
- int ret = xt_register_template(&security_table,
- ip6table_security_table_init);
-
- if (ret < 0)
- return ret;
+ int ret;
sectbl_ops = xt_hook_ops_alloc(&security_table, ip6t_do_table);
- if (IS_ERR(sectbl_ops)) {
- xt_unregister_template(&security_table);
+ if (IS_ERR(sectbl_ops))
return PTR_ERR(sectbl_ops);
- }
ret = register_pernet_subsys(&ip6table_security_net_ops);
+ if (ret < 0)
+ goto err_free;
+
+ ret = xt_register_template(&security_table,
+ ip6table_security_table_init);
if (ret < 0) {
- kfree(sectbl_ops);
- xt_unregister_template(&security_table);
- return ret;
+ unregister_pernet_subsys(&ip6table_security_net_ops);
+ goto err_free;
}
+ return 0;
+err_free:
+ kfree(sectbl_ops);
return ret;
}
static void __exit ip6table_security_fini(void)
{
- unregister_pernet_subsys(&ip6table_security_net_ops);
xt_unregister_template(&security_table);
+ unregister_pernet_subsys(&ip6table_security_net_ops);
kfree(sectbl_ops);
}
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 4f39bf7c843f..75e53fde6b29 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -72,6 +72,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
exp->flags = NF_CT_EXPECT_PERMANENT;
exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
rcu_assign_pointer(exp->helper, helper);
+ rcu_assign_pointer(exp->assign_helper, NULL);
write_pnet(&exp->net, net);
#ifdef CONFIG_NF_CONNTRACK_ZONES
exp->zone = ct->zone;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b08189226320..8ba5b22a1eef 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1811,14 +1811,17 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
spin_lock_bh(&nf_conntrack_expect_lock);
exp = nf_ct_find_expectation(net, zone, tuple, !tmpl || nf_ct_is_confirmed(tmpl));
if (exp) {
+ struct nf_conntrack_helper *assign_helper;
+
/* Welcome, Mr. Bond. We've been expecting you... */
__set_bit(IPS_EXPECTED_BIT, &ct->status);
/* exp->master safe, refcnt bumped in nf_ct_find_expectation */
ct->master = exp->master;
- if (exp->helper) {
+ assign_helper = rcu_dereference(exp->assign_helper);
+ if (assign_helper) {
help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
if (help)
- rcu_assign_pointer(help->helper, exp->helper);
+ rcu_assign_pointer(help->helper, assign_helper);
}
#ifdef CONFIG_NF_CONNTRACK_MARK
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 24d0576d84b7..8e943efbdf0a 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -344,6 +344,7 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
helper = rcu_dereference(help->helper);
rcu_assign_pointer(exp->helper, helper);
+ rcu_assign_pointer(exp->assign_helper, NULL);
write_pnet(&exp->net, net);
#ifdef CONFIG_NF_CONNTRACK_ZONES
exp->zone = ct->zone;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 3f5c50455b71..b2fe6554b9cf 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -643,7 +643,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3,
&ct->tuplehash[!dir].tuple.dst.u3,
IPPROTO_TCP, NULL, &port);
- rcu_assign_pointer(exp->helper, &nf_conntrack_helper_h245);
+ rcu_assign_pointer(exp->assign_helper, &nf_conntrack_helper_h245);
nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
@@ -767,7 +767,7 @@ static int expect_callforwarding(struct sk_buff *skb,
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
- rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
+ rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931);
nathook = rcu_dereference(nfct_h323_nat_hook);
if (memcmp(&ct->tuplehash[dir].tuple.src.u3,
@@ -1234,7 +1234,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3 : NULL,
&ct->tuplehash[!dir].tuple.dst.u3,
IPPROTO_TCP, NULL, &port);
- rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
+ rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931);
exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */
nathook = rcu_dereference(nfct_h323_nat_hook);
@@ -1306,7 +1306,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_UDP, NULL, &port);
- rcu_assign_pointer(exp->helper, nf_conntrack_helper_ras);
+ rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_ras);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect RAS ");
@@ -1523,7 +1523,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
exp->flags = NF_CT_EXPECT_PERMANENT;
- rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
+ rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
@@ -1577,7 +1577,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.src.u3, &addr,
IPPROTO_TCP, NULL, &port);
exp->flags = NF_CT_EXPECT_PERMANENT;
- rcu_assign_pointer(exp->helper, nf_conntrack_helper_q931);
+ rcu_assign_pointer(exp->assign_helper, nf_conntrack_helper_q931);
if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index a715304a53d8..b594cd244fe1 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -400,6 +400,11 @@ static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data)
this = rcu_dereference_protected(exp->helper,
lockdep_is_held(&nf_conntrack_expect_lock));
+ if (this == me)
+ return true;
+
+ this = rcu_dereference_protected(exp->assign_helper,
+ lockdep_is_held(&nf_conntrack_expect_lock));
return this == me;
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index eda5fe4a75c8..befa7e83ee49 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2634,6 +2634,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
static struct nf_conntrack_expect *
ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
+ const struct nf_conntrack_helper *assign_helper,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask);
@@ -2860,6 +2861,7 @@ static int
ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
u32 portid, u32 report)
{
+ struct nf_conntrack_helper *assign_helper = NULL;
struct nlattr *cda[CTA_EXPECT_MAX+1];
struct nf_conntrack_tuple tuple, mask;
struct nf_conntrack_expect *exp;
@@ -2870,13 +2872,26 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
if (err < 0)
return err;
+ if (!cda[CTA_EXPECT_TUPLE] || !cda[CTA_EXPECT_MASK])
+ return -EINVAL;
+
err = ctnetlink_glue_exp_parse((const struct nlattr * const *)cda,
ct, &tuple, &mask);
if (err < 0)
return err;
+ if (cda[CTA_EXPECT_HELP_NAME]) {
+ const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
+
+ assign_helper = __nf_conntrack_helper_find(helpname,
+ nf_ct_l3num(ct),
+ tuple.dst.protonum);
+ if (!assign_helper)
+ return -EOPNOTSUPP;
+ }
+
exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct,
- &tuple, &mask);
+ assign_helper, &tuple, &mask);
if (IS_ERR(exp))
return PTR_ERR(exp);
@@ -3515,6 +3530,7 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
static struct nf_conntrack_expect *
ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
+ const struct nf_conntrack_helper *assign_helper,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask)
{
@@ -3568,6 +3584,7 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
exp->zone = ct->zone;
#endif
rcu_assign_pointer(exp->helper, helper);
+ rcu_assign_pointer(exp->assign_helper, assign_helper);
exp->tuple = *tuple;
exp->mask.src.u3 = mask->src.u3;
exp->mask.src.u.all = mask->src.u.all;
@@ -3623,7 +3640,7 @@ ctnetlink_create_expect(struct net *net,
ct = nf_ct_tuplehash_to_ctrack(h);
rcu_read_lock();
- exp = ctnetlink_alloc_expect(cda, ct, &tuple, &mask);
+ exp = ctnetlink_alloc_expect(cda, ct, NULL, &tuple, &mask);
if (IS_ERR(exp)) {
err = PTR_ERR(exp);
goto err_rcu;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 1eb55907d470..e69941f1a101 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1366,6 +1366,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
goto store_cseq;
}
+ helper = rcu_dereference(nfct_help(ct)->helper);
+ if (!helper)
+ return NF_DROP;
+
exp = nf_ct_expect_alloc(ct);
if (!exp) {
nf_ct_helper_log(skb, ct, "cannot alloc expectation");
@@ -1376,14 +1380,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
if (sip_direct_signalling)
saddr = &ct->tuplehash[!dir].tuple.src.u3;
- helper = rcu_dereference(nfct_help(ct)->helper);
- if (!helper)
- return NF_DROP;
-
nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct),
saddr, &daddr, proto, NULL, &port);
exp->timeout.expires = sip_timeout * HZ;
- rcu_assign_pointer(exp->helper, helper);
+ rcu_assign_pointer(exp->assign_helper, helper);
exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
hooks = rcu_dereference(nf_nat_sip_hooks);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 60ee8d932fcb..fa2cc556331c 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1334,6 +1334,8 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj,
if (nf_ct_expect_related(exp, 0) != 0)
regs->verdict.code = NF_DROP;
+
+ nf_ct_expect_put(exp);
}
static const struct nla_policy nft_ct_expect_policy[NFTA_CT_EXPECT_MAX + 1] = {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 2c67c2e6b132..4e6708c23922 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -55,6 +55,9 @@ static struct list_head xt_templates[NFPROTO_NUMPROTO];
struct xt_pernet {
struct list_head tables[NFPROTO_NUMPROTO];
+
+ /* stash area used during netns exit */
+ struct list_head dead_tables[NFPROTO_NUMPROTO];
};
struct compat_delta {
@@ -1472,11 +1475,9 @@ struct xt_counters *xt_counters_alloc(unsigned int counters)
}
EXPORT_SYMBOL(xt_counters_alloc);
-struct xt_table_info *
-xt_replace_table(struct xt_table *table,
- unsigned int num_counters,
- struct xt_table_info *newinfo,
- int *error)
+static struct xt_table_info *
+do_replace_table(struct xt_table *table, unsigned int num_counters,
+ struct xt_table_info *newinfo, int *error)
{
struct xt_table_info *private;
unsigned int cpu;
@@ -1531,30 +1532,54 @@ xt_replace_table(struct xt_table *table,
}
}
- audit_log_nfcfg(table->name, table->af, private->number,
- !private->number ? AUDIT_XT_OP_REGISTER :
- AUDIT_XT_OP_REPLACE,
- GFP_KERNEL);
+ return private;
+}
+
+struct xt_table_info *
+xt_replace_table(struct xt_table *table, unsigned int num_counters,
+ struct xt_table_info *newinfo,
+ int *error)
+{
+ struct xt_table_info *private;
+
+ private = do_replace_table(table, num_counters, newinfo, error);
+ if (private)
+ audit_log_nfcfg(table->name, table->af, private->number,
+ AUDIT_XT_OP_REPLACE,
+ GFP_KERNEL);
+
return private;
}
EXPORT_SYMBOL_GPL(xt_replace_table);
struct xt_table *xt_register_table(struct net *net,
const struct xt_table *input_table,
+ const struct nf_hook_ops *template_ops,
struct xt_table_info *bootstrap,
struct xt_table_info *newinfo)
{
struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
+ struct xt_table *t, *table = NULL;
+ struct nf_hook_ops *ops = NULL;
struct xt_table_info *private;
- struct xt_table *t, *table;
- int ret;
+ unsigned int num_ops;
+ int ret = -EINVAL;
+
+ num_ops = hweight32(input_table->valid_hooks);
+ if (num_ops == 0)
+ goto out;
+
+ ret = -ENOMEM;
+ if (template_ops) {
+ ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
+ if (!ops)
+ goto out;
+ }
/* Don't add one object to multiple lists. */
table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
- if (!table) {
- ret = -ENOMEM;
+ if (!table)
goto out;
- }
mutex_lock(&xt[table->af].mutex);
/* Don't autoload: we'd eat our tail... */
@@ -1568,7 +1593,7 @@ struct xt_table *xt_register_table(struct net *net,
/* Simplifies replace_table code. */
table->private = bootstrap;
- if (!xt_replace_table(table, 0, newinfo, &ret))
+ if (!do_replace_table(table, 0, newinfo, &ret))
goto unlock;
private = table->private;
@@ -1577,34 +1602,122 @@ struct xt_table *xt_register_table(struct net *net,
/* save number of initial entries */
private->initial_entries = private->number;
+ if (ops) {
+ int i;
+
+ for (i = 0; i < num_ops; i++)
+ ops[i].priv = table;
+
+ ret = nf_register_net_hooks(net, ops, num_ops);
+ if (ret != 0) {
+ mutex_unlock(&xt[table->af].mutex);
+ /* nf_register_net_hooks() might have published a
+ * base chain before internal error unwind.
+ */
+ synchronize_rcu();
+ goto out;
+ }
+
+ table->ops = ops;
+ }
+
+ audit_log_nfcfg(table->name, table->af, private->number,
+ AUDIT_XT_OP_REGISTER, GFP_KERNEL);
+
list_add(&table->list, &xt_net->tables[table->af]);
mutex_unlock(&xt[table->af].mutex);
return table;
unlock:
mutex_unlock(&xt[table->af].mutex);
- kfree(table);
out:
+ kfree(table);
+ kfree(ops);
return ERR_PTR(ret);
}
EXPORT_SYMBOL_GPL(xt_register_table);
-void *xt_unregister_table(struct xt_table *table)
+/**
+ * xt_unregister_table_pre_exit - pre-shutdown unregister of a table
+ * @net: network namespace
+ * @af: address family (e.g., NFPROTO_IPV4, NFPROTO_IPV6)
+ * @name: name of the table to unregister
+ *
+ * Unregisters the specified netfilter table from the given network namespace
+ * and also unregisters the hooks from netfilter core: no new packets will be
+ * processed.
+ *
+ * This must be called prior to xt_unregister_table_exit() from the pernet
+ * .pre_exit callback. After this call, the table is no longer visible to
+ * the get/setsockopt path. In case of rmmod, module exit path must have
+ * called xt_unregister_template() prior to unregistering pernet ops to
+ * prevent re-instantiation of the table.
+ *
+ * See also: xt_unregister_table_exit()
+ */
+void xt_unregister_table_pre_exit(struct net *net, u8 af, const char *name)
{
- struct xt_table_info *private;
+ struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
+ struct xt_table *t;
- mutex_lock(&xt[table->af].mutex);
- private = table->private;
- list_del(&table->list);
- mutex_unlock(&xt[table->af].mutex);
- audit_log_nfcfg(table->name, table->af, private->number,
- AUDIT_XT_OP_UNREGISTER, GFP_KERNEL);
- kfree(table->ops);
- kfree(table);
+ mutex_lock(&xt[af].mutex);
+ list_for_each_entry(t, &xt_net->tables[af], list) {
+ if (strcmp(t->name, name) == 0) {
+ list_move(&t->list, &xt_net->dead_tables[af]);
+ mutex_unlock(&xt[af].mutex);
- return private;
+ if (t->ops) /* nat table registers with nat core, t->ops is NULL. */
+ nf_unregister_net_hooks(net, t->ops, hweight32(t->valid_hooks));
+ return;
+ }
+ }
+ mutex_unlock(&xt[af].mutex);
+}
+EXPORT_SYMBOL(xt_unregister_table_pre_exit);
+
+/**
+ * xt_unregister_table_exit - remove a table during namespace teardown
+ * @net: the network namespace from which to unregister the table
+ * @af: address family (e.g., NFPROTO_IPV4, NFPROTO_IPV6)
+ * @name: name of the table to unregister
+ *
+ * Completes the unregister process for a table. This must be called from
+ * the pernet ops .exit callback. This is the second stage after
+ * xt_unregister_table_pre_exit().
+ *
+ * pair with xt_unregister_table_pre_exit() during namespace shutdown.
+ *
+ * Return: the unregistered table or NULL if the table was never
+ * instantiated. The caller needs to kfree() the table after it
+ * has removed the family specific matches/targets.
+ */
+struct xt_table *xt_unregister_table_exit(struct net *net, u8 af, const char *name)
+{
+ struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
+ struct xt_table *table;
+
+ mutex_lock(&xt[af].mutex);
+ list_for_each_entry(table, &xt_net->dead_tables[af], list) {
+ struct nf_hook_ops *ops = NULL;
+
+ if (strcmp(table->name, name) != 0)
+ continue;
+
+ list_del(&table->list);
+
+ audit_log_nfcfg(table->name, table->af, table->private->number,
+ AUDIT_XT_OP_UNREGISTER, GFP_KERNEL);
+ swap(table->ops, ops);
+ mutex_unlock(&xt[af].mutex);
+
+ kfree(ops);
+ return table;
+ }
+ mutex_unlock(&xt[af].mutex);
+
+ return NULL;
}
-EXPORT_SYMBOL_GPL(xt_unregister_table);
+EXPORT_SYMBOL_GPL(xt_unregister_table_exit);
#endif
#ifdef CONFIG_PROC_FS
@@ -2051,8 +2164,10 @@ static int __net_init xt_net_init(struct net *net)
struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
int i;
- for (i = 0; i < NFPROTO_NUMPROTO; i++)
+ for (i = 0; i < NFPROTO_NUMPROTO; i++) {
INIT_LIST_HEAD(&xt_net->tables[i]);
+ INIT_LIST_HEAD(&xt_net->dead_tables[i]);
+ }
return 0;
}
@@ -2061,8 +2176,10 @@ static void __net_exit xt_net_exit(struct net *net)
struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
int i;
- for (i = 0; i < NFPROTO_NUMPROTO; i++)
+ for (i = 0; i < NFPROTO_NUMPROTO; i++) {
WARN_ON_ONCE(!list_empty(&xt_net->tables[i]));
+ WARN_ON_ONCE(!list_empty(&xt_net->dead_tables[i]));
+ }
}
static struct pernet_operations xt_net_ops = {
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index d251d894afd4..0da39eaed255 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1972,8 +1972,10 @@ int genlmsg_multicast_allns(const struct genl_family *family,
struct sk_buff *skb, u32 portid,
unsigned int group)
{
- if (WARN_ON_ONCE(group >= family->n_mcgrps))
+ if (WARN_ON_ONCE(group >= family->n_mcgrps)) {
+ kfree_skb(skb);
return -EINVAL;
+ }
group = family->mcgrp_offset + group;
return genlmsg_mcast(skb, portid, group);
@@ -1986,8 +1988,10 @@ void genl_notify(const struct genl_family *family, struct sk_buff *skb,
struct net *net = genl_info_net(info);
struct sock *sk = net->genl_sock;
- if (WARN_ON_ONCE(group >= family->n_mcgrps))
+ if (WARN_ON_ONCE(group >= family->n_mcgrps)) {
+ kfree_skb(skb);
return;
+ }
group = family->mcgrp_offset + group;
nlmsg_notify(sk, skb, info->snd_portid, group,
diff --git a/net/rds/message.c b/net/rds/message.c
index 25fedcb3cd00..7feb0eb6537d 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -448,6 +448,7 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *
for (i = 0; i < rm->data.op_nents; i++)
put_page(sg_page(&rm->data.op_sg[i]));
+ rm->data.op_nents = 0;
mmp = &rm->data.op_mmp_znotifier->z_mmp;
mm_unaccount_pinned_pages(mmp);
ret = -EFAULT;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 8c9a0400c862..0f953bd46b58 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -243,6 +243,20 @@ static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
return q->dequeue(sch);
}
+static void cbs_reset(struct Qdisc *sch)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+
+ /* Nothing to do if we couldn't create the underlying qdisc */
+ if (!q->qdisc)
+ return;
+
+ qdisc_reset(q->qdisc);
+ qdisc_watchdog_cancel(&q->watchdog);
+ q->credits = 0;
+ q->last = 0;
+}
+
static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
[TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) },
};
@@ -540,7 +554,7 @@ static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
.dequeue = cbs_dequeue,
.peek = qdisc_peek_dequeued,
.init = cbs_init,
- .reset = qdisc_reset_queue,
+ .reset = cbs_reset,
.destroy = cbs_destroy,
.change = cbs_change,
.dump = cbs_dump,
diff --git a/net/sched/sch_dualpi2.c b/net/sched/sch_dualpi2.c
index 241e6a46bd00..a22489c14458 100644
--- a/net/sched/sch_dualpi2.c
+++ b/net/sched/sch_dualpi2.c
@@ -938,6 +938,8 @@ static int dualpi2_init(struct Qdisc *sch, struct nlattr *opt,
int err;
sch->flags |= TCQ_F_DEQUEUE_DROPS;
+ hrtimer_setup(&q->pi2_timer, dualpi2_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_ABS_PINNED_SOFT);
q->l_queue = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
TC_H_MAKE(sch->handle, 1), extack);
@@ -950,8 +952,6 @@ static int dualpi2_init(struct Qdisc *sch, struct nlattr *opt,
q->sch = sch;
dualpi2_reset_default(sch);
- hrtimer_setup(&q->pi2_timer, dualpi2_timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS_PINNED_SOFT);
if (opt && nla_len(opt)) {
err = dualpi2_change(sch, opt, extack);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 58d0d9747f0b..1d2568bb6bc2 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1986,6 +1986,15 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
goto out_unlock;
iov_iter_revert(&msg->msg_iter, err);
+
+ /* sctp_sendmsg_to_asoc() may have released the socket
+ * lock (sctp_wait_for_sndbuf), during which other
+ * associations on ep->asocs could have been peeled
+ * off or freed. @asoc itself is revalidated by the
+ * base.dead and base.sk checks in sctp_wait_for_sndbuf,
+ * so re-derive the cached cursor from it.
+ */
+ tmp = list_next_entry(asoc, asocs);
}
goto out_unlock;
diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c
index 94bc9c7382ea..b1c65110f04d 100644
--- a/net/shaper/shaper.c
+++ b/net/shaper/shaper.c
@@ -21,6 +21,8 @@
#define NET_SHAPER_ID_UNSPEC NET_SHAPER_ID_MASK
+static_assert(NET_SHAPER_ID_UNSPEC == NET_SHAPER_MAX_HANDLE_ID + 1);
+
struct net_shaper_hierarchy {
struct xarray shapers;
};
@@ -90,6 +92,12 @@ static int net_shaper_handle_size(void)
nla_total_size(sizeof(u32)));
}
+static int net_shaper_group_reply_size(void)
+{
+ return nla_total_size(sizeof(u32)) + /* NET_SHAPER_A_IFINDEX */
+ net_shaper_handle_size(); /* NET_SHAPER_A_HANDLE */
+}
+
static int net_shaper_fill_binding(struct sk_buff *msg,
const struct net_shaper_binding *binding,
u32 type)
@@ -275,11 +283,13 @@ static void net_shaper_default_parent(const struct net_shaper_handle *handle,
parent->id = 0;
}
-/*
- * MARK_0 is already in use due to XA_FLAGS_ALLOC, can't reuse such flag as
- * it's cleared by xa_store().
+/* MARK_0 is already in use due to XA_FLAGS_ALLOC. The VALID mark is set on
+ * an entry only after the device-side configuration has completed
+ * successfully (see net_shaper_commit()). Lookups and dumps must filter on
+ * this mark to avoid exposing tentative entries inserted by
+ * net_shaper_pre_insert() while the driver call is still in flight.
*/
-#define NET_SHAPER_NOT_VALID XA_MARK_1
+#define NET_SHAPER_VALID XA_MARK_1
static struct net_shaper *
net_shaper_lookup(struct net_shaper_binding *binding,
@@ -289,10 +299,14 @@ net_shaper_lookup(struct net_shaper_binding *binding,
struct net_shaper_hierarchy *hierarchy;
hierarchy = net_shaper_hierarchy_rcu(binding);
- if (!hierarchy || xa_get_mark(&hierarchy->shapers, index,
- NET_SHAPER_NOT_VALID))
+ if (!hierarchy || !xa_get_mark(&hierarchy->shapers, index,
+ NET_SHAPER_VALID))
return NULL;
+ /* Pairs with smp_wmb() in net_shaper_commit(): if the entry is
+ * valid, its contents must be visible too.
+ */
+ smp_rmb();
return xa_load(&hierarchy->shapers, index);
}
@@ -348,7 +362,7 @@ static int net_shaper_pre_insert(struct net_shaper_binding *binding,
handle->id == NET_SHAPER_ID_UNSPEC) {
u32 min, max;
- handle->id = NET_SHAPER_ID_MASK - 1;
+ handle->id = NET_SHAPER_MAX_HANDLE_ID;
max = net_shaper_handle_to_index(handle);
handle->id = 0;
min = net_shaper_handle_to_index(handle);
@@ -370,13 +384,10 @@ static int net_shaper_pre_insert(struct net_shaper_binding *binding,
goto free_id;
}
- /* Mark 'tentative' shaper inside the hierarchy container.
- * xa_set_mark is a no-op if the previous store fails.
+ /* Insert as 'tentative' (no VALID mark). The mark will be set by
+ * net_shaper_commit() once the driver-side configuration succeeds.
*/
- xa_lock(&hierarchy->shapers);
- prev = __xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL);
- __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_NOT_VALID);
- xa_unlock(&hierarchy->shapers);
+ prev = xa_store(&hierarchy->shapers, index, cur, GFP_KERNEL);
if (xa_err(prev)) {
NL_SET_ERR_MSG(extack, "Can't insert shaper into device store");
kfree_rcu(cur, rcu);
@@ -413,9 +424,9 @@ static void net_shaper_commit(struct net_shaper_binding *binding,
/* Successful update: drop the tentative mark
* and update the hierarchy container.
*/
- __xa_clear_mark(&hierarchy->shapers, index,
- NET_SHAPER_NOT_VALID);
*cur = shapers[i];
+ smp_wmb();
+ __xa_set_mark(&hierarchy->shapers, index, NET_SHAPER_VALID);
}
xa_unlock(&hierarchy->shapers);
}
@@ -431,8 +442,9 @@ static void net_shaper_rollback(struct net_shaper_binding *binding)
return;
xa_lock(&hierarchy->shapers);
- xa_for_each_marked(&hierarchy->shapers, index, cur,
- NET_SHAPER_NOT_VALID) {
+ xa_for_each(&hierarchy->shapers, index, cur) {
+ if (xa_get_mark(&hierarchy->shapers, index, NET_SHAPER_VALID))
+ continue;
__xa_erase(&hierarchy->shapers, index);
kfree(cur);
}
@@ -465,10 +477,21 @@ static int net_shaper_parse_handle(const struct nlattr *attr,
* shaper (any other value).
*/
id_attr = tb[NET_SHAPER_A_HANDLE_ID];
- if (id_attr)
+ if (id_attr) {
id = nla_get_u32(id_attr);
- else if (handle->scope == NET_SHAPER_SCOPE_NODE)
+ } else if (handle->scope == NET_SHAPER_SCOPE_NODE) {
id = NET_SHAPER_ID_UNSPEC;
+ } else if (handle->scope == NET_SHAPER_SCOPE_QUEUE) {
+ NL_SET_ERR_ATTR_MISS(info->extack, attr,
+ NET_SHAPER_A_HANDLE_ID);
+ return -EINVAL;
+ }
+
+ if (id && handle->scope == NET_SHAPER_SCOPE_NETDEV) {
+ NL_SET_ERR_MSG_ATTR(info->extack, id_attr,
+ "Netdev scope is a singleton, must use ID 0");
+ return -EINVAL;
+ }
handle->id = id;
return 0;
@@ -836,7 +859,12 @@ int net_shaper_nl_get_dumpit(struct sk_buff *skb,
goto out_unlock;
for (; (shaper = xa_find(&hierarchy->shapers, &ctx->start_index,
- U32_MAX, XA_PRESENT)); ctx->start_index++) {
+ U32_MAX, NET_SHAPER_VALID));
+ ctx->start_index++) {
+ /* Pairs with smp_wmb() in net_shaper_commit(): the entry
+ * is marked VALID, so its contents must be visible too.
+ */
+ smp_rmb();
ret = net_shaper_fill_one(skb, binding, shaper, info);
if (ret)
break;
@@ -932,6 +960,46 @@ static int net_shaper_handle_cmp(const struct net_shaper_handle *a,
return memcmp(a, b, sizeof(*a));
}
+static int net_shaper_parse_leaves(struct net_shaper_binding *binding,
+ struct genl_info *info,
+ const struct net_shaper *node,
+ struct net_shaper *leaves,
+ int leaves_count)
+{
+ struct nlattr *attr;
+ int i, j, ret, rem;
+
+ i = 0;
+ nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES,
+ genlmsg_data(info->genlhdr),
+ genlmsg_len(info->genlhdr), rem) {
+ if (WARN_ON_ONCE(i >= leaves_count))
+ return -EINVAL;
+
+ ret = net_shaper_parse_leaf(binding, attr, info,
+ node, &leaves[i]);
+ if (ret)
+ return ret;
+
+ /* Reject duplicates */
+ for (j = 0; j < i; j++) {
+ if (net_shaper_handle_cmp(&leaves[i].handle,
+ &leaves[j].handle))
+ continue;
+
+ NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr,
+ "Duplicate leaf shaper %d:%d",
+ leaves[i].handle.scope,
+ leaves[i].handle.id);
+ return -EINVAL;
+ }
+
+ i++;
+ }
+
+ return 0;
+}
+
static int net_shaper_parent_from_leaves(int leaves_count,
const struct net_shaper *leaves,
struct net_shaper *node,
@@ -964,15 +1032,22 @@ static int __net_shaper_group(struct net_shaper_binding *binding,
int i, ret;
if (node->handle.scope == NET_SHAPER_SCOPE_NODE) {
+ struct net_shaper *cur = NULL;
+
new_node = node->handle.id == NET_SHAPER_ID_UNSPEC;
- if (!new_node && !net_shaper_lookup(binding, &node->handle)) {
- /* The related attribute is not available when
- * reaching here from the delete() op.
- */
- NL_SET_ERR_MSG_FMT(extack, "Node shaper %d:%d does not exists",
- node->handle.scope, node->handle.id);
- return -ENOENT;
+ if (!new_node) {
+ cur = net_shaper_lookup(binding, &node->handle);
+ if (!cur) {
+ /* The related attribute is not available
+ * when reaching here from the delete() op.
+ */
+ NL_SET_ERR_MSG_FMT(extack,
+ "Node shaper %d:%d does not exist",
+ node->handle.scope,
+ node->handle.id);
+ return -ENOENT;
+ }
}
/* When unspecified, the node parent scope is inherited from
@@ -986,6 +1061,15 @@ static int __net_shaper_group(struct net_shaper_binding *binding,
return ret;
}
+ if (cur && net_shaper_handle_cmp(&cur->parent,
+ &node->parent)) {
+ NL_SET_ERR_MSG_FMT(extack,
+ "Cannot reparent node shaper %d:%d",
+ node->handle.scope,
+ node->handle.id);
+ return -EOPNOTSUPP;
+ }
+
} else {
net_shaper_default_parent(&node->handle, &node->parent);
}
@@ -1162,7 +1246,7 @@ static int net_shaper_group_send_reply(struct net_shaper_binding *binding,
free_msg:
/* Should never happen as msg is pre-allocated with enough space. */
WARN_ONCE(true, "calculated message payload length (%d)",
- net_shaper_handle_size());
+ net_shaper_group_reply_size());
nlmsg_free(msg);
return -EMSGSIZE;
}
@@ -1172,10 +1256,9 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info)
struct net_shaper **old_nodes, *leaves, node = {};
struct net_shaper_hierarchy *hierarchy;
struct net_shaper_binding *binding;
- int i, ret, rem, leaves_count;
+ int i, ret, leaves_count;
int old_nodes_count = 0;
struct sk_buff *msg;
- struct nlattr *attr;
if (GENL_REQ_ATTR_CHECK(info, NET_SHAPER_A_LEAVES))
return -EINVAL;
@@ -1203,26 +1286,19 @@ int net_shaper_nl_group_doit(struct sk_buff *skb, struct genl_info *info)
if (ret)
goto free_leaves;
- i = 0;
- nla_for_each_attr_type(attr, NET_SHAPER_A_LEAVES,
- genlmsg_data(info->genlhdr),
- genlmsg_len(info->genlhdr), rem) {
- if (WARN_ON_ONCE(i >= leaves_count))
- goto free_leaves;
-
- ret = net_shaper_parse_leaf(binding, attr, info,
- &node, &leaves[i]);
- if (ret)
- goto free_leaves;
- i++;
- }
+ ret = net_shaper_parse_leaves(binding, info, &node,
+ leaves, leaves_count);
+ if (ret)
+ goto free_leaves;
/* Prepare the msg reply in advance, to avoid device operation
* rollback on allocation failure.
*/
- msg = genlmsg_new(net_shaper_handle_size(), GFP_KERNEL);
- if (!msg)
+ msg = genlmsg_new(net_shaper_group_reply_size(), GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
goto free_leaves;
+ }
hierarchy = net_shaper_hierarchy_setup(binding);
if (!hierarchy) {
diff --git a/net/shaper/shaper_nl_gen.c b/net/shaper/shaper_nl_gen.c
index 9b29be3ef19a..76eff85ec66d 100644
--- a/net/shaper/shaper_nl_gen.c
+++ b/net/shaper/shaper_nl_gen.c
@@ -11,10 +11,15 @@
#include <uapi/linux/net_shaper.h>
+/* Integer value ranges */
+static const struct netlink_range_validation net_shaper_a_handle_id_range = {
+ .max = NET_SHAPER_MAX_HANDLE_ID,
+};
+
/* Common nested types */
const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1] = {
[NET_SHAPER_A_HANDLE_SCOPE] = NLA_POLICY_MAX(NLA_U32, 3),
- [NET_SHAPER_A_HANDLE_ID] = { .type = NLA_U32, },
+ [NET_SHAPER_A_HANDLE_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &net_shaper_a_handle_id_range),
};
const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1] = {
diff --git a/net/shaper/shaper_nl_gen.h b/net/shaper/shaper_nl_gen.h
index 42c46c52c775..2406652a9014 100644
--- a/net/shaper/shaper_nl_gen.h
+++ b/net/shaper/shaper_nl_gen.h
@@ -12,6 +12,8 @@
#include <uapi/linux/net_shaper.h>
+#define NET_SHAPER_MAX_HANDLE_ID 67108862
+
/* Common nested types */
extern const struct nla_policy net_shaper_handle_nl_policy[NET_SHAPER_A_HANDLE_ID + 1];
extern const struct nla_policy net_shaper_leaf_info_nl_policy[NET_SHAPER_A_WEIGHT + 1];
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 185dbed7de5d..dffbd529762d 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1400,7 +1400,8 @@ smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm *aclc,
int i;
for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
- if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
+ if (ini->ism_dev[i] &&
+ ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
ini->ism_selected = i;
return 0;
}
@@ -3054,18 +3055,17 @@ static int __smc_setsockopt(struct socket *sock, int level, int optname,
smc = smc_sk(sk);
+ /* pre-fetch user data outside the lock */
+ if (optname == SMC_LIMIT_HS) {
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (copy_from_sockptr(&val, optval, sizeof(int)))
+ return -EFAULT;
+ }
+
lock_sock(sk);
switch (optname) {
case SMC_LIMIT_HS:
- if (optlen < sizeof(int)) {
- rc = -EINVAL;
- break;
- }
- if (copy_from_sockptr(&val, optval, sizeof(int))) {
- rc = -EFAULT;
- break;
- }
-
smc->limit_smc_hs = !!val;
rc = 0;
break;
diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h
index a9a6e3c1113a..53da84f57fd6 100644
--- a/net/smc/smc_tracepoint.h
+++ b/net/smc/smc_tracepoint.h
@@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(smc_msg_event,
__field(const void *, smc)
__field(u64, net_cookie)
__field(size_t, len)
- __string(name, smc->conn.lnk->ibname)
+ __string(name, smc->conn.lnk ? smc->conn.lnk->ibname : "")
),
TP_fast_assign(
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 7081c1214e6c..b5474ce534fb 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -403,7 +403,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
INIT_LIST_HEAD(&cd->readers);
spin_lock_init(&cd->queue_lock);
init_waitqueue_head(&cd->queue_wait);
- cd->next_seqno = 0;
+ cd->next_seqno = 1;
spin_lock(&cache_list_lock);
cd->nextcheck = 0;
cd->entries = 0;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 2590e855f6a5..3bfdaf5e64f5 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -789,23 +789,33 @@ static int tls_push_record(struct sock *sk, int flags,
i = msg_pl->sg.end;
sk_msg_iter_var_prev(i);
+ /* msg_pl->sg.data is a ring; data[MAX+1] is reserved for the wrap
+ * link (frags won't use it). 'i' is now the last filled entry:
+ *
+ * i end start
+ * v v v [ rsv ]
+ * [ d ][ d ][ ][ ]...[ ][ d ][ d ][ d ][chain]
+ * ^ END v
+ * `-----------------------------------------'
+ *
+ * Note that SGL does not allow chain-after-chain, so for TLS 1.3,
+ * we must make sure we don't create the wrap entry and then chain
+ * link to content_type immediately at index 0.
+ */
+ if (i < msg_pl->sg.start)
+ sg_chain(msg_pl->sg.data, ARRAY_SIZE(msg_pl->sg.data),
+ msg_pl->sg.data);
+
rec->content_type = record_type;
if (prot->version == TLS_1_3_VERSION) {
/* Add content type to end of message. No padding added */
sg_set_buf(&rec->sg_content_type, &rec->content_type, 1);
sg_mark_end(&rec->sg_content_type);
- sg_chain(msg_pl->sg.data, msg_pl->sg.end + 1,
- &rec->sg_content_type);
+ sg_chain(msg_pl->sg.data, i + 2, &rec->sg_content_type);
} else {
sg_mark_end(sk_msg_elem(msg_pl, i));
}
- if (msg_pl->sg.end < msg_pl->sg.start) {
- sg_chain(&msg_pl->sg.data[msg_pl->sg.start],
- MAX_SKB_FRAGS - msg_pl->sg.start + 1,
- msg_pl->sg.data);
- }
-
i = msg_pl->sg.start;
sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 9b8014516f4f..989cc252d3d3 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -136,27 +136,6 @@ static void virtio_transport_init_hdr(struct sk_buff *skb,
hdr->fwd_cnt = cpu_to_le32(0);
}
-static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb,
- void *dst,
- size_t len)
-{
- struct iov_iter iov_iter = { 0 };
- struct kvec kvec;
- size_t to_copy;
-
- kvec.iov_base = dst;
- kvec.iov_len = len;
-
- iov_iter.iter_type = ITER_KVEC;
- iov_iter.kvec = &kvec;
- iov_iter.nr_segs = 1;
-
- to_copy = min_t(size_t, len, skb->len);
-
- skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset,
- &iov_iter, to_copy);
-}
-
/* Packet capture */
static struct sk_buff *virtio_transport_build_skb(void *opaque)
{
@@ -166,12 +145,12 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
struct sk_buff *skb;
size_t payload_len;
- /* A packet could be split to fit the RX buffer, so we can retrieve
- * the payload length from the header and the buffer pointer taking
- * care of the offset in the original packet.
+ /* A packet could be split to fit the RX buffer, so we use
+ * the payload length from the header, which has been updated
+ * by the sender to reflect the fragment size.
*/
pkt_hdr = virtio_vsock_hdr(pkt);
- payload_len = pkt->len;
+ payload_len = le32_to_cpu(pkt_hdr->len);
skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len,
GFP_ATOMIC);
@@ -214,12 +193,18 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr));
if (payload_len) {
- if (skb_is_nonlinear(pkt)) {
- void *data = skb_put(skb, payload_len);
-
- virtio_transport_copy_nonlinear_skb(pkt, data, payload_len);
- } else {
- skb_put_data(skb, pkt->data, payload_len);
+ struct iov_iter iov_iter;
+ struct kvec kvec;
+ void *data = skb_put(skb, payload_len);
+
+ kvec.iov_base = data;
+ kvec.iov_len = payload_len;
+ iov_iter_kvec(&iov_iter, ITER_DEST, &kvec, 1, payload_len);
+
+ if (skb_copy_datagram_iter(pkt, VIRTIO_VSOCK_SKB_CB(pkt)->offset,
+ &iov_iter, payload_len)) {
+ kfree_skb(skb);
+ return NULL;
}
}
diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h
index 8f1b3500f8e2..abb1964c44d4 100644
--- a/scripts/gcc-plugins/gcc-common.h
+++ b/scripts/gcc-plugins/gcc-common.h
@@ -309,7 +309,9 @@ typedef const gimple *const_gimple_ptr;
#define gimple gimple_ptr
#define const_gimple const_gimple_ptr
#undef CONST_CAST_GIMPLE
-#define CONST_CAST_GIMPLE(X) CONST_CAST(gimple, (X))
+#define CONST_CAST_GIMPLE(X) const_cast<gimple>((X))
+#undef CONST_CAST_TREE
+#define CONST_CAST_TREE(X) const_cast<tree>((X))
/* gimple related */
static inline gimple gimple_build_assign_with_ops(enum tree_code subcode, tree lhs, tree op1, tree op2 MEM_STAT_DECL)
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 6673601246b3..eff29645719b 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -793,9 +793,10 @@
#define MSR_AMD64_LBR_SELECT 0xc000010e
/* Zen4 */
-#define MSR_ZEN4_BP_CFG 0xc001102e
+#define MSR_ZEN4_BP_CFG 0xc001102e
#define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4
#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
+#define MSR_ZEN2_BP_CFG_BUG_FIX_BIT 33
/* Fam 19h MSRs */
#define MSR_F19H_UMC_PERF_CTL 0xc0010800
diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
index 08205f9fc525..cc53b2f21c44 100644
--- a/tools/net/ynl/Makefile.deps
+++ b/tools/net/ynl/Makefile.deps
@@ -15,9 +15,11 @@ UAPI_PATH:=../../../../include/uapi/
get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2)
get_hdr_inc2=-D$(1) -D$(2) -include $(UAPI_PATH)/linux/$(3)
+CFLAGS_dev-energymodel:=$(call get_hdr_inc,_LINUX_DEV_ENERGYMODEL_H,dev_energymodel.h)
CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h)
CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h)
-CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \
+CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_TYPELIMITS_H,typelimits.h) \
+ $(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \
$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) \
$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_GENERATED_H,ethtool_netlink_generated.h)
CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h)
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index 0e1e486c1185..cdc3646f2642 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -3212,6 +3212,8 @@ def render_uapi(family, cw):
for const in family['definitions']:
if const.get('header'):
continue
+ if const.get('scope', 'uapi') != 'uapi':
+ continue
if const['type'] != 'const':
cw.writes_defines(defines)
@@ -3339,6 +3341,25 @@ def render_uapi(family, cw):
cw.p(f'#endif /* {hdr_prot} */')
+def render_scoped_consts(family, cw, scope):
+ defines = []
+ for const in family['definitions']:
+ if const['type'] != 'const':
+ continue
+ if const.get('header'):
+ continue
+ if const.get('scope') != scope:
+ continue
+ name_pfx = const.get('name-prefix', f"{family.ident_name}-")
+ defines.append([
+ c_upper(family.get('c-define-name',
+ f"{name_pfx}{const['name']}")),
+ const['value']])
+ if defines:
+ cw.writes_defines(defines)
+ cw.nl()
+
+
def _render_user_ntf_entry(ri, op):
if not ri.family.is_classic():
ri.cw.block_start(line=f"[{op.enum_name}] = ")
@@ -3504,8 +3525,12 @@ def main():
cw.p('#include "ynl.h"')
headers = []
for definition in parsed['definitions'] + parsed['attribute-sets']:
- if 'header' in definition:
- headers.append(definition['header'])
+ if 'header' not in definition:
+ continue
+ scope = definition.get('scope', 'uapi')
+ if scope != 'uapi' and scope != args.mode:
+ continue
+ headers.append(definition['header'])
if args.mode == 'user':
headers.append(parsed.uapi_header)
seen_header = []
@@ -3522,6 +3547,7 @@ def main():
for one in args.user_header:
cw.p(f'#include "{one}"')
else:
+ render_scoped_consts(parsed, cw, 'user')
cw.p('struct ynl_sock;')
cw.nl()
render_user_family(parsed, cw, True)
@@ -3529,6 +3555,7 @@ def main():
if args.mode == "kernel":
if args.header:
+ render_scoped_consts(parsed, cw, 'kernel')
for _, struct in sorted(parsed.pure_nested_structs.items()):
if struct.request:
cw.p('/* Common nested types */')
diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c
index 6a7295347e90..42f54936f4bb 100644
--- a/tools/testing/selftests/cgroup/lib/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c
@@ -106,8 +106,9 @@ int cg_read_strcmp(const char *cgroup, const char *control,
/* Handle the case of comparing against empty string */
if (!expected)
return -1;
- else
- size = strlen(expected) + 1;
+
+ /* needs size > 1, otherwise cg_read() reads 0 bytes */
+ size = (expected[0] == '\0') ? 2 : strlen(expected) + 1;
buf = malloc(size);
if (!buf)
diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh
index 42a6628fb8bc..1c0444729e70 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh
@@ -18,7 +18,7 @@ write_test() {
echo "testing $interface $value"
echo $value > $dir/$interface
new=$(cat $dir/$interface)
- [[ $value -ne $(cat $dir/$interface) ]] && {
+ [[ "$value" != "$new" ]] && {
echo "$interface write $value failed: new:$new"
exit 1
}
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index eeabd34bf083..12f59925500b 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -368,11 +368,15 @@ static int test_percpu_basic(const char *root)
for (i = 0; i < 1000; i++) {
child = cg_name_indexed(parent, "child", i);
- if (!child)
- return -1;
+ if (!child) {
+ ret = -1;
+ goto cleanup_children;
+ }
- if (cg_create(child))
+ if (cg_create(child)) {
+ free(child);
goto cleanup_children;
+ }
free(child);
}
diff --git a/tools/testing/selftests/drivers/net/shaper.py b/tools/testing/selftests/drivers/net/shaper.py
index 11310f19bfa0..e39d270e688d 100755
--- a/tools/testing/selftests/drivers/net/shaper.py
+++ b/tools/testing/selftests/drivers/net/shaper.py
@@ -1,7 +1,10 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
-from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_true, KsftSkipEx
+import errno
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, ksft_raises, ksft_true, KsftSkipEx
from lib.py import EthtoolFamily, NetshaperFamily
from lib.py import NetDrvEnv
from lib.py import NlError
@@ -438,6 +441,21 @@ def queue_update(cfg, nl_shaper) -> None:
nl_shaper.delete({'ifindex': cfg.ifindex,
'handle': {'scope': 'queue', 'id': i}})
+def dup_leaves(cfg, nl_shaper) -> None:
+ """ Ensure that the kernel rejects duplicate leaves. """
+ if not cfg.groups:
+ raise KsftSkipEx("device does not support node scope")
+
+ with ksft_raises(NlError) as cm:
+ nl_shaper.group({
+ 'ifindex': cfg.ifindex,
+ 'leaves':[{'handle': {'scope': 'queue', 'id': 0}},
+ {'handle': {'scope': 'queue', 'id': 0}}],
+ 'handle': {'scope':'node'},
+ 'metric': 'bps',
+ 'bw-max': 10000})
+ ksft_eq(cm.exception.error, errno.EINVAL)
+
def main() -> None:
with NetDrvEnv(__file__, queue_count=4) as cfg:
cfg.queues = False
@@ -453,7 +471,9 @@ def main() -> None:
basic_groups,
qgroups,
delegation,
- queue_update], args=(cfg, NetshaperFamily()))
+ dup_leaves,
+ queue_update],
+ args=(cfg, NetshaperFamily()))
ksft_exit()
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index d6528c6f5e03..253e748c1d4a 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -510,7 +510,12 @@ static void test_guest_memfd_guest(void)
"Default VM type should support INIT_SHARED, supported flags = 0x%x",
vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
- size = vm->page_size;
+ /*
+ * Use the max of the host or guest page size for all operations, as
+ * KVM requires guest_memfd files and memslots to be sized to multiples
+ * of the host page size.
+ */
+ size = max_t(size_t, vm->page_size, page_size);
fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP |
GUEST_MEMFD_FLAG_INIT_SHARED);
vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0);
@@ -519,7 +524,7 @@ static void test_guest_memfd_guest(void)
memset(mem, 0xaa, size);
kvm_munmap(mem, size);
- virt_pg_map(vm, gpa, gpa);
+ virt_map(vm, gpa, gpa, size / vm->page_size);
vcpu_args_set(vcpu, 2, gpa, size);
vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index 7df2bc8eec02..76fcdd1fd3cb 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -220,6 +220,8 @@ static void check_steal_time_uapi(void)
};
vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, 1, 0);
+ virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, 1);
st_ipa = (ulong)ST_GPA_BASE | 1;
ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
diff --git a/tools/testing/selftests/sched_ext/dequeue.c b/tools/testing/selftests/sched_ext/dequeue.c
index 4e93262703ca..383d06e972a4 100644
--- a/tools/testing/selftests/sched_ext/dequeue.c
+++ b/tools/testing/selftests/sched_ext/dequeue.c
@@ -33,6 +33,7 @@ static void worker_fn(int id)
/* Do some work to trigger scheduling events */
for (j = 0; j < 10000; j++)
sum += j;
+ asm volatile("" : : "r"(sum));
/* Sleep to trigger dequeue */
usleep(1000 + (id * 100));
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index b1f856cf62c1..848696c373fc 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -1284,5 +1284,46 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "3a62",
+ "name": "Try to create a qlen underflow with QFQ/CBS",
+ "category": [
+ "qdisc",
+ "qfq",
+ "cbs"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: qfq",
+ "$TC class add dev $DUMMY classid 1:1 parent 1: qfq",
+ "$TC class add dev $DUMMY classid 1:2 parent 1: qfq",
+ "$TC qdisc add dev $DUMMY handle 2: parent 1:1 cbs",
+ "$TC qdisc add dev $DUMMY handle 3: parent 2: netem delay 5000000000",
+ "$TC filter add dev $DUMMY parent 1: prio 1 u32 match ip dst 10.10.10.1 classid 1:1 action ok",
+ "$TC filter add dev $DUMMY parent 1: prio 2 u32 match ip dst 10.10.10.2 classid 1:2 action ok",
+ "ping -c 1 10.10.10.1 -W0.01 -I$DUMMY || true",
+ "$IP l set $DUMMY down",
+ "$IP l set $DUMMY up",
+ "$TC qdisc replace dev $DUMMY handle 4: parent 2: pfifo"
+ ],
+ "cmdUnderTest": "ping -c 1 10.10.10.2 -W0.01 -I$DUMMY",
+ "expExitCode": "1",
+ "verifyCmd": "$TC -s -j qdisc ls dev $DUMMY parent 1:1",
+ "matchJSON": [
+ {
+ "kind": "cbs",
+ "handle": "2:",
+ "bytes": 0,
+ "packets": 0
+ }
+ ],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
}
]
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index fbd9b1e7342a..0b23c09daea5 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -1735,6 +1735,17 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
goto fail;
}
+ /*
+ * The kernel may reduce nr_hw_queues (e.g. capped to nr_cpu_ids).
+ * Cap nthreads to the actual queue count to avoid creating extra
+ * handler threads that will hang during device removal.
+ *
+ * per_io_tasks mode is excluded: threads interleave across all
+ * queues so nthreads > nr_hw_queues is valid and intentional.
+ */
+ if (!ctx->per_io_tasks && dev->nthreads > info->nr_hw_queues)
+ dev->nthreads = info->nr_hw_queues;
+
ret = ublk_start_daemon(ctx, dev);
ublk_dbg(UBLK_DBG_DEV, "%s: daemon exit %d\n", __func__, ret);
if (ret < 0)
diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c
index 02bc6b00d76c..572b854edf74 100644
--- a/virt/kvm/dirty_ring.c
+++ b/virt/kvm/dirty_ring.c
@@ -63,7 +63,8 @@ static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask)
memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
- if (!memslot || (offset + __fls(mask)) >= memslot->npages)
+ if (!memslot || offset >= memslot->npages ||
+ offset + __fls(mask) >= memslot->npages)
return;
KVM_MMU_LOCK(kvm);