summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c45
-rw-r--r--net/8021q/vlan.h1
-rw-r--r--net/8021q/vlan_dev.c31
-rw-r--r--net/9p/client.c30
-rw-r--r--net/9p/trans_fd.c17
-rw-r--r--net/appletalk/aarp.c24
-rw-r--r--net/appletalk/ddp.c1
-rw-r--r--net/atm/clip.c75
-rw-r--r--net/atm/common.c1
-rw-r--r--net/atm/lec.c15
-rw-r--r--net/atm/mpc.c2
-rw-r--r--net/atm/raw.c2
-rw-r--r--net/atm/resources.c3
-rw-r--r--net/ax25/af_ax25.c23
-rw-r--r--net/ax25/ax25_dev.c4
-rw-r--r--net/ax25/ax25_ip.c3
-rw-r--r--net/ax25/ax25_out.c22
-rw-r--r--net/ax25/ax25_route.c2
-rw-r--r--net/batman-adv/bat_iv_ogm.c3
-rw-r--r--net/batman-adv/bat_v.c2
-rw-r--r--net/batman-adv/bat_v_elp.c122
-rw-r--r--net/batman-adv/bat_v_elp.h2
-rw-r--r--net/batman-adv/bat_v_ogm.c3
-rw-r--r--net/batman-adv/types.h3
-rw-r--r--net/bluetooth/6lowpan.c7
-rw-r--r--net/bluetooth/eir.c17
-rw-r--r--net/bluetooth/eir.h2
-rw-r--r--net/bluetooth/hci_conn.c191
-rw-r--r--net/bluetooth/hci_core.c73
-rw-r--r--net/bluetooth/hci_event.c123
-rw-r--r--net/bluetooth/hci_sock.c2
-rw-r--r--net/bluetooth/hci_sync.c447
-rw-r--r--net/bluetooth/iso.c41
-rw-r--r--net/bluetooth/l2cap_core.c255
-rw-r--r--net/bluetooth/l2cap_sock.c25
-rw-r--r--net/bluetooth/mgmt.c179
-rw-r--r--net/bluetooth/mgmt_util.c49
-rw-r--r--net/bluetooth/mgmt_util.h8
-rw-r--r--net/bluetooth/rfcomm/core.c6
-rw-r--r--net/bluetooth/sco.c12
-rw-r--r--net/bluetooth/smp.c21
-rw-r--r--net/bluetooth/smp.h1
-rw-r--r--net/bpf/test_run.c5
-rw-r--r--net/bridge/br_mdb.c2
-rw-r--r--net/bridge/br_mst.c4
-rw-r--r--net/bridge/br_multicast.c103
-rw-r--r--net/bridge/br_netfilter_hooks.c8
-rw-r--r--net/bridge/br_nf_core.c7
-rw-r--r--net/bridge/br_private.h12
-rw-r--r--net/bridge/br_switchdev.c3
-rw-r--r--net/bridge/br_vlan.c4
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c12
-rw-r--r--net/caif/cfctrl.c294
-rw-r--r--net/can/af_can.c12
-rw-r--r--net/can/af_can.h12
-rw-r--r--net/can/bcm.c79
-rw-r--r--net/can/gw.c149
-rw-r--r--net/can/j1939/socket.c4
-rw-r--r--net/can/j1939/transport.c5
-rw-r--r--net/can/proc.c46
-rw-r--r--net/core/dev.c75
-rw-r--r--net/core/dev.h12
-rw-r--r--net/core/devmem.c4
-rw-r--r--net/core/drop_monitor.c29
-rw-r--r--net/core/dst.c12
-rw-r--r--net/core/fib_rules.c24
-rw-r--r--net/core/filter.c140
-rw-r--r--net/core/flow_dissector.c70
-rw-r--r--net/core/gro.c4
-rw-r--r--net/core/ieee8021q_helpers.c44
-rw-r--r--net/core/lwtunnel.c91
-rw-r--r--net/core/neighbour.c21
-rw-r--r--net/core/net_namespace.c8
-rw-r--r--net/core/netdev-genl-gen.c14
-rw-r--r--net/core/netdev-genl.c69
-rw-r--r--net/core/netmem_priv.h33
-rw-r--r--net/core/netpoll.c16
-rw-r--r--net/core/page_pool.c127
-rw-r--r--net/core/page_pool_user.c2
-rw-r--r--net/core/pktgen.c19
-rw-r--r--net/core/rtnetlink.c3
-rw-r--r--net/core/scm.c10
-rw-r--r--net/core/selftests.c23
-rw-r--r--net/core/skbuff.c31
-rw-r--r--net/core/skmsg.c70
-rw-r--r--net/core/sock.c44
-rw-r--r--net/core/sock_map.c8
-rw-r--r--net/core/sysctl_net_core.c8
-rw-r--r--net/core/utils.c4
-rw-r--r--net/core/xdp.c4
-rw-r--r--net/devlink/core.c2
-rw-r--r--net/dsa/dsa.c59
-rw-r--r--net/dsa/tag_8021q.c2
-rw-r--r--net/dsa/tag_brcm.c2
-rw-r--r--net/dsa/tag_ksz.c19
-rw-r--r--net/ethtool/cabletest.c8
-rw-r--r--net/ethtool/cmis_cdb.c2
-rw-r--r--net/ethtool/ioctl.c8
-rw-r--r--net/ethtool/linkstate.c26
-rw-r--r--net/ethtool/netlink.c16
-rw-r--r--net/ethtool/netlink.h5
-rw-r--r--net/ethtool/phy.c2
-rw-r--r--net/ethtool/plca.c6
-rw-r--r--net/ethtool/pse-pd.c4
-rw-r--r--net/ethtool/rss.c3
-rw-r--r--net/ethtool/stats.c18
-rw-r--r--net/ethtool/strset.c2
-rw-r--r--net/hsr/hsr_device.c2
-rw-r--r--net/hsr/hsr_forward.c11
-rw-r--r--net/hsr/hsr_framereg.c95
-rw-r--r--net/hsr/hsr_framereg.h8
-rw-r--r--net/hsr/hsr_main.h2
-rw-r--r--net/ipv4/arp.c6
-rw-r--r--net/ipv4/devinet.c3
-rw-r--r--net/ipv4/esp4.c53
-rw-r--r--net/ipv4/esp4_offload.c6
-rw-r--r--net/ipv4/fib_frontend.c18
-rw-r--r--net/ipv4/fib_rules.c4
-rw-r--r--net/ipv4/fib_trie.c22
-rw-r--r--net/ipv4/icmp.c59
-rw-r--r--net/ipv4/inet_hashtables.c37
-rw-r--r--net/ipv4/inetpeer.c31
-rw-r--r--net/ipv4/ip_fragment.c15
-rw-r--r--net/ipv4/ip_gre.c16
-rw-r--r--net/ipv4/ip_options.c3
-rw-r--r--net/ipv4/ip_tunnel_core.c4
-rw-r--r--net/ipv4/ipmr.c28
-rw-r--r--net/ipv4/ipmr_base.c9
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c20
-rw-r--r--net/ipv4/route.c65
-rw-r--r--net/ipv4/tcp.c57
-rw-r--r--net/ipv4/tcp_bpf.c36
-rw-r--r--net/ipv4/tcp_cubic.c8
-rw-r--r--net/ipv4/tcp_fastopen.c7
-rw-r--r--net/ipv4/tcp_input.c159
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_minisocks.c10
-rw-r--r--net/ipv4/tcp_offload.c14
-rw-r--r--net/ipv4/tcp_output.c9
-rw-r--r--net/ipv4/udp.c102
-rw-r--r--net/ipv4/udp_offload.c77
-rw-r--r--net/ipv4/xfrm4_input.c21
-rw-r--r--net/ipv6/addrconf.c68
-rw-r--r--net/ipv6/calipso.c29
-rw-r--r--net/ipv6/esp6.c53
-rw-r--r--net/ipv6/esp6_offload.c6
-rw-r--r--net/ipv6/fib6_rules.c4
-rw-r--r--net/ipv6/icmp.c48
-rw-r--r--net/ipv6/ila/ila_common.c6
-rw-r--r--net/ipv6/ila/ila_lwt.c4
-rw-r--r--net/ipv6/ioam6_iptunnel.c79
-rw-r--r--net/ipv6/ip6_fib.c24
-rw-r--r--net/ipv6/ip6_gre.c2
-rw-r--r--net/ipv6/ip6_offload.c4
-rw-r--r--net/ipv6/ip6_output.c17
-rw-r--r--net/ipv6/ip6_tunnel.c7
-rw-r--r--net/ipv6/ip6_vti.c3
-rw-r--r--net/ipv6/ip6mr.c31
-rw-r--r--net/ipv6/mcast.c58
-rw-r--r--net/ipv6/ndisc.c32
-rw-r--r--net/ipv6/netfilter.c12
-rw-r--r--net/ipv6/netfilter/nf_socket_ipv6.c23
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c36
-rw-r--r--net/ipv6/raw.c8
-rw-r--r--net/ipv6/route.c111
-rw-r--r--net/ipv6/rpl_iptunnel.c79
-rw-r--r--net/ipv6/seg6_iptunnel.c110
-rw-r--r--net/ipv6/seg6_local.c6
-rw-r--r--net/ipv6/sit.c8
-rw-r--r--net/ipv6/tcpv6_offload.c23
-rw-r--r--net/ipv6/udp.c61
-rw-r--r--net/ipv6/xfrm6_input.c21
-rw-r--r--net/kcm/kcmsock.c10
-rw-r--r--net/key/af_key.c7
-rw-r--r--net/l2tp/l2tp_ip6.c8
-rw-r--r--net/llc/af_llc.c8
-rw-r--r--net/llc/llc_s_ac.c49
-rw-r--r--net/mac80211/cfg.c16
-rw-r--r--net/mac80211/chan.c4
-rw-r--r--net/mac80211/debugfs.c44
-rw-r--r--net/mac80211/debugfs_netdev.c2
-rw-r--r--net/mac80211/debugfs_sta.c6
-rw-r--r--net/mac80211/driver-ops.c10
-rw-r--r--net/mac80211/driver-ops.h6
-rw-r--r--net/mac80211/ieee80211_i.h14
-rw-r--r--net/mac80211/iface.c31
-rw-r--r--net/mac80211/link.c103
-rw-r--r--net/mac80211/main.c6
-rw-r--r--net/mac80211/mesh_hwmp.c20
-rw-r--r--net/mac80211/mlme.c94
-rw-r--r--net/mac80211/parse.c170
-rw-r--r--net/mac80211/rate.c2
-rw-r--r--net/mac80211/rx.c27
-rw-r--r--net/mac80211/scan.c11
-rw-r--r--net/mac80211/sta_info.c48
-rw-r--r--net/mac80211/sta_info.h11
-rw-r--r--net/mac80211/tdls.c2
-rw-r--r--net/mac80211/tx.c29
-rw-r--r--net/mac80211/util.c15
-rw-r--r--net/mac802154/iface.c4
-rw-r--r--net/mctp/af_mctp.c29
-rw-r--r--net/mctp/device.c65
-rw-r--r--net/mctp/route.c14
-rw-r--r--net/mctp/test/route-test.c109
-rw-r--r--net/mpls/af_mpls.c4
-rw-r--r--net/mptcp/ctrl.c4
-rw-r--r--net/mptcp/options.c28
-rw-r--r--net/mptcp/pm.c8
-rw-r--r--net/mptcp/pm_netlink.c26
-rw-r--r--net/mptcp/pm_userspace.c8
-rw-r--r--net/mptcp/protocol.c61
-rw-r--r--net/mptcp/protocol.h66
-rw-r--r--net/mptcp/sockopt.c28
-rw-r--r--net/mptcp/subflow.c67
-rw-r--r--net/ncsi/internal.h25
-rw-r--r--net/ncsi/ncsi-manage.c29
-rw-r--r--net/ncsi/ncsi-pkt.h23
-rw-r--r--net/ncsi/ncsi-rsp.c59
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h2
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c27
-rw-r--r--net/netfilter/nf_bpf_link.c5
-rw-r--r--net/netfilter/nf_conncount.c6
-rw-r--r--net/netfilter/nf_conntrack_core.c26
-rw-r--r--net/netfilter/nf_conntrack_netlink.c24
-rw-r--r--net/netfilter/nf_conntrack_standalone.c12
-rw-r--r--net/netfilter/nf_nat_core.c12
-rw-r--r--net/netfilter/nf_tables_api.c114
-rw-r--r--net/netfilter/nft_compat.c8
-rw-r--r--net/netfilter/nft_ct.c6
-rw-r--r--net/netfilter/nft_exthdr.c10
-rw-r--r--net/netfilter/nft_flow_offload.c16
-rw-r--r--net/netfilter/nft_quota.c20
-rw-r--r--net/netfilter/nft_set_hash.c3
-rw-r--r--net/netfilter/nft_set_pipapo.c73
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c24
-rw-r--r--net/netfilter/nft_set_rbtree.c43
-rw-r--r--net/netfilter/nft_tunnel.c14
-rw-r--r--net/netfilter/xt_TCPOPTSTRIP.c4
-rw-r--r--net/netfilter/xt_mark.c2
-rw-r--r--net/netfilter/xt_nfacct.c4
-rw-r--r--net/netlabel/netlabel_kapi.c5
-rw-r--r--net/netlink/af_netlink.c92
-rw-r--r--net/nfc/nci/hci.c2
-rw-r--r--net/nfc/nci/uart.c8
-rw-r--r--net/openvswitch/actions.c13
-rw-r--r--net/openvswitch/conntrack.c30
-rw-r--r--net/openvswitch/datapath.c12
-rw-r--r--net/openvswitch/datapath.h3
-rw-r--r--net/openvswitch/flow.c2
-rw-r--r--net/openvswitch/flow_netlink.c18
-rw-r--r--net/packet/af_packet.c39
-rw-r--r--net/phonet/pep.c2
-rw-r--r--net/rds/tcp.c8
-rw-r--r--net/rose/af_rose.c40
-rw-r--r--net/rose/rose_route.c15
-rw-r--r--net/rose/rose_timer.c15
-rw-r--r--net/rxrpc/ar-internal.h2
-rw-r--r--net/rxrpc/call_accept.c5
-rw-r--r--net/rxrpc/call_object.c6
-rw-r--r--net/rxrpc/conn_event.c33
-rw-r--r--net/rxrpc/conn_object.c1
-rw-r--r--net/rxrpc/input.c2
-rw-r--r--net/rxrpc/output.c3
-rw-r--r--net/rxrpc/peer_event.c16
-rw-r--r--net/rxrpc/peer_object.c12
-rw-r--r--net/rxrpc/recvmsg.c19
-rw-r--r--net/rxrpc/rxperf.c12
-rw-r--r--net/rxrpc/sendmsg.c2
-rw-r--r--net/sched/act_ctinfo.c19
-rw-r--r--net/sched/act_tunnel_key.c2
-rw-r--r--net/sched/cls_api.c125
-rw-r--r--net/sched/cls_bpf.c2
-rw-r--r--net/sched/cls_flower.c4
-rw-r--r--net/sched/cls_matchall.c2
-rw-r--r--net/sched/cls_u32.c4
-rw-r--r--net/sched/sch_api.c52
-rw-r--r--net/sched/sch_codel.c7
-rw-r--r--net/sched/sch_drr.c16
-rw-r--r--net/sched/sch_ets.c28
-rw-r--r--net/sched/sch_fifo.c3
-rw-r--r--net/sched/sch_fq.c2
-rw-r--r--net/sched/sch_fq_codel.c8
-rw-r--r--net/sched/sch_fq_pie.c2
-rw-r--r--net/sched/sch_generic.c4
-rw-r--r--net/sched/sch_gred.c3
-rw-r--r--net/sched/sch_hfsc.c46
-rw-r--r--net/sched/sch_hhf.c2
-rw-r--r--net/sched/sch_htb.c17
-rw-r--r--net/sched/sch_mqprio.c2
-rw-r--r--net/sched/sch_netem.c42
-rw-r--r--net/sched/sch_pie.c2
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_qfq.c51
-rw-r--r--net/sched/sch_red.c2
-rw-r--r--net/sched/sch_sfq.c108
-rw-r--r--net/sched/sch_skbprio.c3
-rw-r--r--net/sched/sch_taprio.c27
-rw-r--r--net/sched/sch_tbf.c2
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/socket.c25
-rw-r--r--net/sctp/stream.c2
-rw-r--r--net/sctp/sysctl.c4
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/smc/af_smc.c26
-rw-r--r--net/smc/smc.h8
-rw-r--r--net/smc/smc_pnet.c8
-rw-r--r--net/smc/smc_rx.c37
-rw-r--r--net/smc/smc_rx.h8
-rw-r--r--net/strparser/strparser.c11
-rw-r--r--net/sunrpc/cache.c10
-rw-r--r--net/sunrpc/clnt.c12
-rw-r--r--net/sunrpc/rpc_pipe.c14
-rw-r--r--net/sunrpc/rpcb_clnt.c5
-rw-r--r--net/sunrpc/sched.c4
-rw-r--r--net/sunrpc/svc.c11
-rw-r--r--net/sunrpc/svcsock.c60
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c18
-rw-r--r--net/sunrpc/xprtsock.c63
-rw-r--r--net/switchdev/switchdev.c25
-rw-r--r--net/tipc/crypto.c15
-rw-r--r--net/tipc/link.c1
-rw-r--r--net/tipc/monitor.c3
-rw-r--r--net/tipc/topsrv.c2
-rw-r--r--net/tipc/udp_media.c4
-rw-r--r--net/tls/tls.h2
-rw-r--r--net/tls/tls_main.c6
-rw-r--r--net/tls/tls_strp.c17
-rw-r--r--net/tls/tls_sw.c31
-rw-r--r--net/unix/af_unix.c31
-rw-r--r--net/vmw_vsock/af_vsock.c104
-rw-r--r--net/vmw_vsock/virtio_transport.c12
-rw-r--r--net/vmw_vsock/virtio_transport_common.c62
-rw-r--r--net/vmw_vsock/vmci_transport.c4
-rw-r--r--net/vmw_vsock/vsock_bpf.c9
-rw-r--r--net/wireless/chan.c8
-rw-r--r--net/wireless/core.c13
-rw-r--r--net/wireless/mlme.c3
-rw-r--r--net/wireless/nl80211.c17
-rw-r--r--net/wireless/reg.c7
-rw-r--r--net/wireless/scan.c27
-rw-r--r--net/wireless/tests/scan.c2
-rw-r--r--net/wireless/util.c52
-rw-r--r--net/xdp/xsk.c6
-rw-r--r--net/xdp/xsk_buff_pool.c3
-rw-r--r--net/xfrm/espintcp.c4
-rw-r--r--net/xfrm/xfrm_compat.c6
-rw-r--r--net/xfrm/xfrm_device.c2
-rw-r--r--net/xfrm/xfrm_input.c2
-rw-r--r--net/xfrm/xfrm_interface_core.c7
-rw-r--r--net/xfrm/xfrm_output.c43
-rw-r--r--net/xfrm/xfrm_policy.c15
-rw-r--r--net/xfrm/xfrm_replay.c10
-rw-r--r--net/xfrm/xfrm_state.c335
-rw-r--r--net/xfrm/xfrm_user.c103
355 files changed, 6548 insertions, 3505 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index e45187b88220..49a6d49c23dc 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -131,7 +131,8 @@ int vlan_check_real_dev(struct net_device *real_dev,
{
const char *name = real_dev->name;
- if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
+ if (real_dev->features & NETIF_F_VLAN_CHALLENGED ||
+ real_dev->type != ARPHRD_ETHER) {
pr_info("VLANs not supported on %s\n", name);
NL_SET_ERR_MSG_MOD(extack, "VLANs not supported on device");
return -EOPNOTSUPP;
@@ -357,6 +358,35 @@ static int __vlan_device_event(struct net_device *dev, unsigned long event)
return err;
}
+static void vlan_vid0_add(struct net_device *dev)
+{
+ struct vlan_info *vlan_info;
+ int err;
+
+ if (!(dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ return;
+
+ pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name);
+
+ err = vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
+ if (err)
+ return;
+
+ vlan_info = rtnl_dereference(dev->vlan_info);
+ vlan_info->auto_vid0 = true;
+}
+
+static void vlan_vid0_del(struct net_device *dev)
+{
+ struct vlan_info *vlan_info = rtnl_dereference(dev->vlan_info);
+
+ if (!vlan_info || !vlan_info->auto_vid0)
+ return;
+
+ vlan_info->auto_vid0 = false;
+ vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
+}
+
static int vlan_device_event(struct notifier_block *unused, unsigned long event,
void *ptr)
{
@@ -378,15 +408,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
return notifier_from_errno(err);
}
- if ((event == NETDEV_UP) &&
- (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
- pr_info("adding VLAN 0 to HW filter on device %s\n",
- dev->name);
- vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
- }
- if (event == NETDEV_DOWN &&
- (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
- vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
+ if (event == NETDEV_UP)
+ vlan_vid0_add(dev);
+ else if (event == NETDEV_DOWN)
+ vlan_vid0_del(dev);
vlan_info = rtnl_dereference(dev->vlan_info);
if (!vlan_info)
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5eaf38875554..c7ffe591d593 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -33,6 +33,7 @@ struct vlan_info {
struct vlan_group grp;
struct list_head vid_list;
unsigned int nr_vids;
+ bool auto_vid0;
struct rcu_head rcu;
};
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 458040e8a0e0..9184cf7eb128 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -273,17 +273,6 @@ static int vlan_dev_open(struct net_device *dev)
goto out;
}
- if (dev->flags & IFF_ALLMULTI) {
- err = dev_set_allmulti(real_dev, 1);
- if (err < 0)
- goto del_unicast;
- }
- if (dev->flags & IFF_PROMISC) {
- err = dev_set_promiscuity(real_dev, 1);
- if (err < 0)
- goto clear_allmulti;
- }
-
ether_addr_copy(vlan->real_dev_addr, real_dev->dev_addr);
if (vlan->flags & VLAN_FLAG_GVRP)
@@ -297,12 +286,6 @@ static int vlan_dev_open(struct net_device *dev)
netif_carrier_on(dev);
return 0;
-clear_allmulti:
- if (dev->flags & IFF_ALLMULTI)
- dev_set_allmulti(real_dev, -1);
-del_unicast:
- if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr))
- dev_uc_del(real_dev, dev->dev_addr);
out:
netif_carrier_off(dev);
return err;
@@ -315,10 +298,6 @@ static int vlan_dev_stop(struct net_device *dev)
dev_mc_unsync(real_dev, dev);
dev_uc_unsync(real_dev, dev);
- if (dev->flags & IFF_ALLMULTI)
- dev_set_allmulti(real_dev, -1);
- if (dev->flags & IFF_PROMISC)
- dev_set_promiscuity(real_dev, -1);
if (!ether_addr_equal(dev->dev_addr, real_dev->dev_addr))
dev_uc_del(real_dev, dev->dev_addr);
@@ -490,12 +469,10 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
{
struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
- if (dev->flags & IFF_UP) {
- if (change & IFF_ALLMULTI)
- dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1);
- if (change & IFF_PROMISC)
- dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1);
- }
+ if (change & IFF_ALLMULTI)
+ dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1);
+ if (change & IFF_PROMISC)
+ dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1);
}
static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
diff --git a/net/9p/client.c b/net/9p/client.c
index 09f8ced9f8bb..52a5497cfca7 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1548,7 +1548,8 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
struct p9_client *clnt = fid->clnt;
struct p9_req_t *req;
int count = iov_iter_count(to);
- int rsize, received, non_zc = 0;
+ u32 rsize, received;
+ bool non_zc = false;
char *dataptr;
*err = 0;
@@ -1571,7 +1572,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
0, 11, "dqd", fid->fid,
offset, rsize);
} else {
- non_zc = 1;
+ non_zc = true;
req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset,
rsize);
}
@@ -1592,11 +1593,11 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
return 0;
}
if (rsize < received) {
- pr_err("bogus RREAD count (%d > %d)\n", received, rsize);
+ pr_err("bogus RREAD count (%u > %u)\n", received, rsize);
received = rsize;
}
- p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received);
+ p9_debug(P9_DEBUG_9P, "<<< RREAD count %u\n", received);
if (non_zc) {
int n = copy_to_iter(dataptr, received, to);
@@ -1623,9 +1624,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
*err = 0;
while (iov_iter_count(from)) {
- int count = iov_iter_count(from);
- int rsize = fid->iounit;
- int written;
+ size_t count = iov_iter_count(from);
+ u32 rsize = fid->iounit;
+ u32 written;
if (!rsize || rsize > clnt->msize - P9_IOHDRSZ)
rsize = clnt->msize - P9_IOHDRSZ;
@@ -1633,7 +1634,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
if (count < rsize)
rsize = count;
- p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n",
+ p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %u (/%zu)\n",
fid->fid, offset, rsize, count);
/* Don't bother zerocopy for small IO (< 1024) */
@@ -1659,11 +1660,11 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
break;
}
if (rsize < written) {
- pr_err("bogus RWRITE count (%d > %d)\n", written, rsize);
+ pr_err("bogus RWRITE count (%u > %u)\n", written, rsize);
written = rsize;
}
- p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written);
+ p9_debug(P9_DEBUG_9P, "<<< RWRITE count %u\n", written);
p9_req_put(clnt, req);
iov_iter_revert(from, count - written - iov_iter_count(from));
@@ -2098,7 +2099,8 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate);
int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
{
- int err, rsize, non_zc = 0;
+ int err, non_zc = 0;
+ u32 rsize;
struct p9_client *clnt;
struct p9_req_t *req;
char *dataptr;
@@ -2107,7 +2109,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
iov_iter_kvec(&to, ITER_DEST, &kv, 1, count);
- p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
+ p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %u\n",
fid->fid, offset, count);
clnt = fid->clnt;
@@ -2142,11 +2144,11 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
goto free_and_error;
}
if (rsize < count) {
- pr_err("bogus RREADDIR count (%d > %d)\n", count, rsize);
+ pr_err("bogus RREADDIR count (%u > %u)\n", count, rsize);
count = rsize;
}
- p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
+ p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %u\n", count);
if (non_zc)
memmove(data, dataptr, count);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 196060dc6138..791e4868f2d4 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -191,12 +191,13 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
spin_lock(&m->req_lock);
- if (m->err) {
+ if (READ_ONCE(m->err)) {
spin_unlock(&m->req_lock);
return;
}
- m->err = err;
+ WRITE_ONCE(m->err, err);
+ ASSERT_EXCLUSIVE_WRITER(m->err);
list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
list_move(&req->req_list, &cancel_list);
@@ -283,7 +284,7 @@ static void p9_read_work(struct work_struct *work)
m = container_of(work, struct p9_conn, rq);
- if (m->err < 0)
+ if (READ_ONCE(m->err) < 0)
return;
p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset);
@@ -450,7 +451,7 @@ static void p9_write_work(struct work_struct *work)
m = container_of(work, struct p9_conn, wq);
- if (m->err < 0) {
+ if (READ_ONCE(m->err) < 0) {
clear_bit(Wworksched, &m->wsched);
return;
}
@@ -622,7 +623,7 @@ static void p9_poll_mux(struct p9_conn *m)
__poll_t n;
int err = -ECONNRESET;
- if (m->err < 0)
+ if (READ_ONCE(m->err) < 0)
return;
n = p9_fd_poll(m->client, NULL, &err);
@@ -665,6 +666,7 @@ static void p9_poll_mux(struct p9_conn *m)
static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
{
__poll_t n;
+ int err;
struct p9_trans_fd *ts = client->trans;
struct p9_conn *m = &ts->conn;
@@ -673,9 +675,10 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
spin_lock(&m->req_lock);
- if (m->err < 0) {
+ err = READ_ONCE(m->err);
+ if (err < 0) {
spin_unlock(&m->req_lock);
- return m->err;
+ return err;
}
WRITE_ONCE(req->status, REQ_STATUS_UNSENT);
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 9fa0b246902b..70c5a508ffac 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -35,6 +35,7 @@
#include <linux/seq_file.h>
#include <linux/export.h>
#include <linux/etherdevice.h>
+#include <linux/refcount.h>
int sysctl_aarp_expiry_time = AARP_EXPIRY_TIME;
int sysctl_aarp_tick_time = AARP_TICK_TIME;
@@ -44,6 +45,7 @@ int sysctl_aarp_resolve_time = AARP_RESOLVE_TIME;
/* Lists of aarp entries */
/**
* struct aarp_entry - AARP entry
+ * @refcnt: Reference count
* @last_sent: Last time we xmitted the aarp request
* @packet_queue: Queue of frames wait for resolution
* @status: Used for proxy AARP
@@ -55,6 +57,7 @@ int sysctl_aarp_resolve_time = AARP_RESOLVE_TIME;
* @next: Next entry in chain
*/
struct aarp_entry {
+ refcount_t refcnt;
/* These first two are only used for unresolved entries */
unsigned long last_sent;
struct sk_buff_head packet_queue;
@@ -79,6 +82,17 @@ static DEFINE_RWLOCK(aarp_lock);
/* Used to walk the list and purge/kick entries. */
static struct timer_list aarp_timer;
+static inline void aarp_entry_get(struct aarp_entry *a)
+{
+ refcount_inc(&a->refcnt);
+}
+
+static inline void aarp_entry_put(struct aarp_entry *a)
+{
+ if (refcount_dec_and_test(&a->refcnt))
+ kfree(a);
+}
+
/*
* Delete an aarp queue
*
@@ -87,7 +101,7 @@ static struct timer_list aarp_timer;
static void __aarp_expire(struct aarp_entry *a)
{
skb_queue_purge(&a->packet_queue);
- kfree(a);
+ aarp_entry_put(a);
}
/*
@@ -380,9 +394,11 @@ static void aarp_purge(void)
static struct aarp_entry *aarp_alloc(void)
{
struct aarp_entry *a = kmalloc(sizeof(*a), GFP_ATOMIC);
+ if (!a)
+ return NULL;
- if (a)
- skb_queue_head_init(&a->packet_queue);
+ refcount_set(&a->refcnt, 1);
+ skb_queue_head_init(&a->packet_queue);
return a;
}
@@ -508,6 +524,7 @@ int aarp_proxy_probe_network(struct atalk_iface *atif, struct atalk_addr *sa)
entry->dev = atif->dev;
write_lock_bh(&aarp_lock);
+ aarp_entry_get(entry);
hash = sa->s_node % (AARP_HASH_SIZE - 1);
entry->next = proxies[hash];
@@ -533,6 +550,7 @@ int aarp_proxy_probe_network(struct atalk_iface *atif, struct atalk_addr *sa)
retval = 1;
}
+ aarp_entry_put(entry);
write_unlock_bh(&aarp_lock);
out:
return retval;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index b068651984fe..fa7f002b14fa 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -576,6 +576,7 @@ static int atrtr_create(struct rtentry *r, struct net_device *devhint)
/* Fill in the routing entry */
rt->target = ta->sat_addr;
+ dev_put(rt->dev); /* Release old device */
dev_hold(devhint);
rt->dev = devhint;
rt->flags = r->rt_flags;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 42b910cb4e8e..ebba0d6ae324 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -45,7 +45,8 @@
#include <net/atmclip.h>
static struct net_device *clip_devs;
-static struct atm_vcc *atmarpd;
+static struct atm_vcc __rcu *atmarpd;
+static DEFINE_MUTEX(atmarpd_lock);
static struct timer_list idle_timer;
static const struct neigh_ops clip_neigh_ops;
@@ -53,24 +54,35 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip)
{
struct sock *sk;
struct atmarp_ctrl *ctrl;
+ struct atm_vcc *vcc;
struct sk_buff *skb;
+ int err = 0;
pr_debug("(%d)\n", type);
- if (!atmarpd)
- return -EUNATCH;
+
+ rcu_read_lock();
+ vcc = rcu_dereference(atmarpd);
+ if (!vcc) {
+ err = -EUNATCH;
+ goto unlock;
+ }
skb = alloc_skb(sizeof(struct atmarp_ctrl), GFP_ATOMIC);
- if (!skb)
- return -ENOMEM;
+ if (!skb) {
+ err = -ENOMEM;
+ goto unlock;
+ }
ctrl = skb_put(skb, sizeof(struct atmarp_ctrl));
ctrl->type = type;
ctrl->itf_num = itf;
ctrl->ip = ip;
- atm_force_charge(atmarpd, skb->truesize);
+ atm_force_charge(vcc, skb->truesize);
- sk = sk_atm(atmarpd);
+ sk = sk_atm(vcc);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk);
- return 0;
+unlock:
+ rcu_read_unlock();
+ return err;
}
static void link_vcc(struct clip_vcc *clip_vcc, struct atmarp_entry *entry)
@@ -193,12 +205,6 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
pr_debug("\n");
- if (!clip_devs) {
- atm_return(vcc, skb->truesize);
- kfree_skb(skb);
- return;
- }
-
if (!skb) {
pr_debug("removing VCC %p\n", clip_vcc);
if (clip_vcc->entry)
@@ -208,6 +214,11 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
return;
}
atm_return(vcc, skb->truesize);
+ if (!clip_devs) {
+ kfree_skb(skb);
+ return;
+ }
+
skb->dev = clip_vcc->entry ? clip_vcc->entry->neigh->dev : clip_devs;
/* clip_vcc->entry == NULL if we don't have an IP address yet */
if (!skb->dev) {
@@ -418,6 +429,8 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout)
if (!vcc->push)
return -EBADFD;
+ if (vcc->user_back)
+ return -EINVAL;
clip_vcc = kmalloc(sizeof(struct clip_vcc), GFP_KERNEL);
if (!clip_vcc)
return -ENOMEM;
@@ -608,17 +621,27 @@ static void atmarpd_close(struct atm_vcc *vcc)
{
pr_debug("\n");
- rtnl_lock();
- atmarpd = NULL;
+ mutex_lock(&atmarpd_lock);
+ RCU_INIT_POINTER(atmarpd, NULL);
+ mutex_unlock(&atmarpd_lock);
+
+ synchronize_rcu();
skb_queue_purge(&sk_atm(vcc)->sk_receive_queue);
- rtnl_unlock();
pr_debug("(done)\n");
module_put(THIS_MODULE);
}
+static int atmarpd_send(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+ atm_return_tx(vcc, skb);
+ dev_kfree_skb_any(skb);
+ return 0;
+}
+
static const struct atmdev_ops atmarpd_dev_ops = {
- .close = atmarpd_close
+ .close = atmarpd_close,
+ .send = atmarpd_send
};
@@ -632,15 +655,18 @@ static struct atm_dev atmarpd_dev = {
static int atm_init_atmarp(struct atm_vcc *vcc)
{
- rtnl_lock();
+ if (vcc->push == clip_push)
+ return -EINVAL;
+
+ mutex_lock(&atmarpd_lock);
if (atmarpd) {
- rtnl_unlock();
+ mutex_unlock(&atmarpd_lock);
return -EADDRINUSE;
}
mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ);
- atmarpd = vcc;
+ rcu_assign_pointer(atmarpd, vcc);
set_bit(ATM_VF_META, &vcc->flags);
set_bit(ATM_VF_READY, &vcc->flags);
/* allow replies and avoid getting closed if signaling dies */
@@ -649,13 +675,14 @@ static int atm_init_atmarp(struct atm_vcc *vcc)
vcc->push = NULL;
vcc->pop = NULL; /* crash */
vcc->push_oam = NULL; /* crash */
- rtnl_unlock();
+ mutex_unlock(&atmarpd_lock);
return 0;
}
static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct atm_vcc *vcc = ATM_SD(sock);
+ struct sock *sk = sock->sk;
int err = 0;
switch (cmd) {
@@ -676,14 +703,18 @@ static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
err = clip_create(arg);
break;
case ATMARPD_CTRL:
+ lock_sock(sk);
err = atm_init_atmarp(vcc);
if (!err) {
sock->state = SS_CONNECTED;
__module_get(THIS_MODULE);
}
+ release_sock(sk);
break;
case ATMARP_MKIP:
+ lock_sock(sk);
err = clip_mkip(vcc, arg);
+ release_sock(sk);
break;
case ATMARP_SETENTRY:
err = clip_setentry(vcc, (__force __be32)arg);
diff --git a/net/atm/common.c b/net/atm/common.c
index 9b75699992ff..d7f7976ea13a 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -635,6 +635,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
skb->dev = NULL; /* for paths shared with net_device interfaces */
if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) {
+ atm_return_tx(vcc, skb);
kfree_skb(skb);
error = -EFAULT;
goto out;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index ffef658862db..42e8047c6510 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -124,6 +124,7 @@ static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
/* Device structures */
static struct net_device *dev_lec[MAX_LEC_ITF];
+static DEFINE_MUTEX(lec_mutex);
#if IS_ENABLED(CONFIG_BRIDGE)
static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
@@ -181,6 +182,7 @@ static void
lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
+ unsigned int len = skb->len;
ATM_SKB(skb)->vcc = vcc;
atm_account_tx(vcc, skb);
@@ -191,7 +193,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
}
dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
+ dev->stats.tx_bytes += len;
}
static void lec_tx_timeout(struct net_device *dev, unsigned int txqueue)
@@ -684,6 +686,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
int bytes_left;
struct atmlec_ioc ioc_data;
+ lockdep_assert_held(&lec_mutex);
/* Lecd must be up in this case */
bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc));
if (bytes_left != 0)
@@ -709,6 +712,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
static int lec_mcast_attach(struct atm_vcc *vcc, int arg)
{
+ lockdep_assert_held(&lec_mutex);
if (arg < 0 || arg >= MAX_LEC_ITF)
return -EINVAL;
arg = array_index_nospec(arg, MAX_LEC_ITF);
@@ -724,6 +728,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
int i;
struct lec_priv *priv;
+ lockdep_assert_held(&lec_mutex);
if (arg < 0)
arg = 0;
if (arg >= MAX_LEC_ITF)
@@ -741,6 +746,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i);
if (register_netdev(dev_lec[i])) {
free_netdev(dev_lec[i]);
+ dev_lec[i] = NULL;
return -EINVAL;
}
@@ -903,7 +909,6 @@ static void *lec_itf_walk(struct lec_state *state, loff_t *l)
v = (dev && netdev_priv(dev)) ?
lec_priv_walk(state, l, netdev_priv(dev)) : NULL;
if (!v && dev) {
- dev_put(dev);
/* Partial state reset for the next time we get called */
dev = NULL;
}
@@ -927,6 +932,7 @@ static void *lec_seq_start(struct seq_file *seq, loff_t *pos)
{
struct lec_state *state = seq->private;
+ mutex_lock(&lec_mutex);
state->itf = 0;
state->dev = NULL;
state->locked = NULL;
@@ -944,8 +950,9 @@ static void lec_seq_stop(struct seq_file *seq, void *v)
if (state->dev) {
spin_unlock_irqrestore(&state->locked->lec_arp_lock,
state->flags);
- dev_put(state->dev);
+ state->dev = NULL;
}
+ mutex_unlock(&lec_mutex);
}
static void *lec_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -1002,6 +1009,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return -ENOIOCTLCMD;
}
+ mutex_lock(&lec_mutex);
switch (cmd) {
case ATMLEC_CTRL:
err = lecd_attach(vcc, (int)arg);
@@ -1016,6 +1024,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
break;
}
+ mutex_unlock(&lec_mutex);
return err;
}
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 324e3ab96bb3..12da0269275c 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -1314,6 +1314,8 @@ static void MPOA_cache_impos_rcvd(struct k_message *msg,
holding_time = msg->content.eg_info.holding_time;
dprintk("(%s) entry = %p, holding_time = %u\n",
mpc->dev->name, entry, holding_time);
+ if (entry == NULL && !holding_time)
+ return;
if (entry == NULL && holding_time) {
entry = mpc->eg_ops->add_entry(msg, mpc);
mpc->eg_ops->put(entry);
diff --git a/net/atm/raw.c b/net/atm/raw.c
index 2b5f78a7ec3e..1e6511ec842c 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -36,7 +36,7 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb)
pr_debug("(%d) %d -= %d\n",
vcc->vci, sk_wmem_alloc_get(sk), ATM_SKB(skb)->acct_truesize);
- WARN_ON(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, &sk->sk_wmem_alloc));
+ atm_return_tx(vcc, skb);
dev_kfree_skb_any(skb);
sk->sk_write_space(sk);
}
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 995d29e7fb13..b19d851e1f44 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -146,11 +146,10 @@ void atm_dev_deregister(struct atm_dev *dev)
*/
mutex_lock(&atm_dev_mutex);
list_del(&dev->dev_list);
- mutex_unlock(&atm_dev_mutex);
-
atm_dev_release_vccs(dev);
atm_unregister_sysfs(dev);
atm_proc_dev_deregister(dev);
+ mutex_unlock(&atm_dev_mutex);
atm_dev_put(dev);
}
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index d6f9fae06a9d..9f3b8b682adb 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -467,7 +467,7 @@ einval_put:
goto out_put;
}
-static void ax25_fillin_cb_from_dev(ax25_cb *ax25, ax25_dev *ax25_dev)
+static void ax25_fillin_cb_from_dev(ax25_cb *ax25, const ax25_dev *ax25_dev)
{
ax25->rtt = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T1]) / 2;
ax25->t1 = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T1]);
@@ -677,22 +677,33 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
break;
}
- rtnl_lock();
- dev = __dev_get_by_name(&init_net, devname);
+ rcu_read_lock();
+ dev = dev_get_by_name_rcu(&init_net, devname);
if (!dev) {
- rtnl_unlock();
+ rcu_read_unlock();
res = -ENODEV;
break;
}
+ if (ax25->ax25_dev) {
+ if (dev == ax25->ax25_dev->dev) {
+ rcu_read_unlock();
+ break;
+ }
+ netdev_put(ax25->ax25_dev->dev, &ax25->dev_tracker);
+ ax25_dev_put(ax25->ax25_dev);
+ }
+
ax25->ax25_dev = ax25_dev_ax25dev(dev);
if (!ax25->ax25_dev) {
- rtnl_unlock();
+ rcu_read_unlock();
res = -ENODEV;
break;
}
ax25_fillin_cb(ax25, ax25->ax25_dev);
- rtnl_unlock();
+ netdev_hold(dev, &ax25->dev_tracker, GFP_ATOMIC);
+ ax25_dev_hold(ax25->ax25_dev);
+ rcu_read_unlock();
break;
default:
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 9efd6690b344..3733c0254a50 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -90,7 +90,7 @@ void ax25_dev_device_up(struct net_device *dev)
spin_lock_bh(&ax25_dev_lock);
list_add(&ax25_dev->list, &ax25_dev_list);
- dev->ax25_ptr = ax25_dev;
+ rcu_assign_pointer(dev->ax25_ptr, ax25_dev);
spin_unlock_bh(&ax25_dev_lock);
ax25_register_dev_sysctl(ax25_dev);
@@ -125,7 +125,7 @@ void ax25_dev_device_down(struct net_device *dev)
}
}
- dev->ax25_ptr = NULL;
+ RCU_INIT_POINTER(dev->ax25_ptr, NULL);
spin_unlock_bh(&ax25_dev_lock);
netdev_put(dev, &ax25_dev->dev_tracker);
ax25_dev_put(ax25_dev);
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 36249776c021..215d4ccf12b9 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -122,6 +122,7 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
if (dev == NULL)
dev = skb->dev;
+ rcu_read_lock();
if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) {
kfree_skb(skb);
goto put;
@@ -202,7 +203,7 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
ax25_queue_xmit(skb, dev);
put:
-
+ rcu_read_unlock();
ax25_route_lock_unuse();
return NETDEV_TX_OK;
}
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 3db76d2470e9..8bca2ace98e5 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -39,10 +39,14 @@ ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, const ax25_address *sr
* specified.
*/
if (paclen == 0) {
- if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
+ rcu_read_lock();
+ ax25_dev = ax25_dev_ax25dev(dev);
+ if (!ax25_dev) {
+ rcu_read_unlock();
return NULL;
-
+ }
paclen = ax25_dev->values[AX25_VALUES_PACLEN];
+ rcu_read_unlock();
}
/*
@@ -53,13 +57,19 @@ ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, const ax25_address *sr
return ax25; /* It already existed */
}
- if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
+ rcu_read_lock();
+ ax25_dev = ax25_dev_ax25dev(dev);
+ if (!ax25_dev) {
+ rcu_read_unlock();
return NULL;
+ }
- if ((ax25 = ax25_create_cb()) == NULL)
+ if ((ax25 = ax25_create_cb()) == NULL) {
+ rcu_read_unlock();
return NULL;
-
+ }
ax25_fillin_cb(ax25, ax25_dev);
+ rcu_read_unlock();
ax25->source_addr = *src;
ax25->dest_addr = *dest;
@@ -358,7 +368,9 @@ void ax25_queue_xmit(struct sk_buff *skb, struct net_device *dev)
{
unsigned char *ptr;
+ rcu_read_lock();
skb->protocol = ax25_type_trans(skb, ax25_fwd_dev(dev));
+ rcu_read_unlock();
ptr = skb_push(skb, 1);
*ptr = 0x00; /* KISS */
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index b7c4d656a94b..69de75db0c9c 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -406,6 +406,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
ax25_route_lock_unuse();
return -EHOSTUNREACH;
}
+ rcu_read_lock();
if ((ax25->ax25_dev = ax25_dev_ax25dev(ax25_rt->dev)) == NULL) {
err = -EHOSTUNREACH;
goto put;
@@ -442,6 +443,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
}
put:
+ rcu_read_unlock();
ax25_route_lock_unuse();
return err;
}
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 74b49c35ddc1..209180b4c268 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -324,8 +324,7 @@ batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len,
/* check if there is enough space for the optional TVLV */
next_buff_pos += ntohs(ogm_packet->tvlv_len);
- return (next_buff_pos <= packet_len) &&
- (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES);
+ return next_buff_pos <= packet_len;
}
/* send a batman ogm to a given interface */
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index ac11f1f08db0..d35479c465e2 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -113,8 +113,6 @@ static void
batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
{
ewma_throughput_init(&hardif_neigh->bat_v.throughput);
- INIT_WORK(&hardif_neigh->bat_v.metric_work,
- batadv_v_elp_throughput_metric_update);
}
/**
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 1d704574e6bf..b065578b4436 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -18,6 +18,7 @@
#include <linux/if_ether.h>
#include <linux/jiffies.h>
#include <linux/kref.h>
+#include <linux/list.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
#include <linux/nl80211.h>
@@ -26,6 +27,7 @@
#include <linux/rcupdate.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -42,6 +44,18 @@
#include "send.h"
/**
+ * struct batadv_v_metric_queue_entry - list of hardif neighbors which require
+ * and metric update
+ */
+struct batadv_v_metric_queue_entry {
+ /** @hardif_neigh: hardif neighbor scheduled for metric update */
+ struct batadv_hardif_neigh_node *hardif_neigh;
+
+ /** @list: list node for metric_queue */
+ struct list_head list;
+};
+
+/**
* batadv_v_elp_start_timer() - restart timer for ELP periodic work
* @hard_iface: the interface for which the timer has to be reset
*/
@@ -59,25 +73,36 @@ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface)
/**
* batadv_v_elp_get_throughput() - get the throughput towards a neighbour
* @neigh: the neighbour for which the throughput has to be obtained
+ * @pthroughput: calculated throughput towards the given neighbour in multiples
+ * of 100kpbs (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc).
*
- * Return: The throughput towards the given neighbour in multiples of 100kpbs
- * (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc).
+ * Return: true when value behind @pthroughput was set
*/
-static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
+static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh,
+ u32 *pthroughput)
{
struct batadv_hard_iface *hard_iface = neigh->if_incoming;
+ struct net_device *soft_iface = hard_iface->soft_iface;
struct ethtool_link_ksettings link_settings;
struct net_device *real_netdev;
struct station_info sinfo;
u32 throughput;
int ret;
+ /* don't query throughput when no longer associated with any
+ * batman-adv interface
+ */
+ if (!soft_iface)
+ return false;
+
/* if the user specified a customised value for this interface, then
* return it directly
*/
throughput = atomic_read(&hard_iface->bat_v.throughput_override);
- if (throughput != 0)
- return throughput;
+ if (throughput != 0) {
+ *pthroughput = throughput;
+ return true;
+ }
/* if this is a wireless device, then ask its throughput through
* cfg80211 API
@@ -104,27 +129,39 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
* possible to delete this neighbor. For now set
* the throughput metric to 0.
*/
- return 0;
+ *pthroughput = 0;
+ return true;
}
if (ret)
goto default_throughput;
- if (sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT))
- return sinfo.expected_throughput / 100;
+ if (sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT)) {
+ *pthroughput = sinfo.expected_throughput / 100;
+ return true;
+ }
/* try to estimate the expected throughput based on reported tx
* rates
*/
- if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE))
- return cfg80211_calculate_bitrate(&sinfo.txrate) / 3;
+ if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)) {
+ *pthroughput = cfg80211_calculate_bitrate(&sinfo.txrate) / 3;
+ return true;
+ }
goto default_throughput;
}
+ /* only use rtnl_trylock because the elp worker will be cancelled while
+ * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise
+ * wait forever when the elp work_item was started and it is then also
+ * trying to rtnl_lock
+ */
+ if (!rtnl_trylock())
+ return false;
+
/* if not a wifi interface, check if this device provides data via
* ethtool (e.g. an Ethernet adapter)
*/
- rtnl_lock();
ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings);
rtnl_unlock();
if (ret == 0) {
@@ -135,13 +172,15 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
throughput = link_settings.base.speed;
- if (throughput && throughput != SPEED_UNKNOWN)
- return throughput * 10;
+ if (throughput && throughput != SPEED_UNKNOWN) {
+ *pthroughput = throughput * 10;
+ return true;
+ }
}
default_throughput:
if (!(hard_iface->bat_v.flags & BATADV_WARNING_DEFAULT)) {
- batadv_info(hard_iface->soft_iface,
+ batadv_info(soft_iface,
"WiFi driver or ethtool info does not provide information about link speeds on interface %s, therefore defaulting to hardcoded throughput values of %u.%1u Mbps. Consider overriding the throughput manually or checking your driver.\n",
hard_iface->net_dev->name,
BATADV_THROUGHPUT_DEFAULT_VALUE / 10,
@@ -150,31 +189,26 @@ default_throughput:
}
/* if none of the above cases apply, return the base_throughput */
- return BATADV_THROUGHPUT_DEFAULT_VALUE;
+ *pthroughput = BATADV_THROUGHPUT_DEFAULT_VALUE;
+ return true;
}
/**
* batadv_v_elp_throughput_metric_update() - worker updating the throughput
* metric of a single hop neighbour
- * @work: the work queue item
+ * @neigh: the neighbour to probe
*/
-void batadv_v_elp_throughput_metric_update(struct work_struct *work)
+static void
+batadv_v_elp_throughput_metric_update(struct batadv_hardif_neigh_node *neigh)
{
- struct batadv_hardif_neigh_node_bat_v *neigh_bat_v;
- struct batadv_hardif_neigh_node *neigh;
-
- neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v,
- metric_work);
- neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node,
- bat_v);
+ u32 throughput;
+ bool valid;
- ewma_throughput_add(&neigh->bat_v.throughput,
- batadv_v_elp_get_throughput(neigh));
+ valid = batadv_v_elp_get_throughput(neigh, &throughput);
+ if (!valid)
+ return;
- /* decrement refcounter to balance increment performed before scheduling
- * this task
- */
- batadv_hardif_neigh_put(neigh);
+ ewma_throughput_add(&neigh->bat_v.throughput, throughput);
}
/**
@@ -248,14 +282,16 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
*/
static void batadv_v_elp_periodic_work(struct work_struct *work)
{
+ struct batadv_v_metric_queue_entry *metric_entry;
+ struct batadv_v_metric_queue_entry *metric_safe;
struct batadv_hardif_neigh_node *hardif_neigh;
struct batadv_hard_iface *hard_iface;
struct batadv_hard_iface_bat_v *bat_v;
struct batadv_elp_packet *elp_packet;
+ struct list_head metric_queue;
struct batadv_priv *bat_priv;
struct sk_buff *skb;
u32 elp_interval;
- bool ret;
bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work);
hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v);
@@ -291,6 +327,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
atomic_inc(&hard_iface->bat_v.elp_seqno);
+ INIT_LIST_HEAD(&metric_queue);
+
/* The throughput metric is updated on each sent packet. This way, if a
* node is dead and no longer sends packets, batman-adv is still able to
* react timely to its death.
@@ -315,16 +353,28 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
/* Reading the estimated throughput from cfg80211 is a task that
* may sleep and that is not allowed in an rcu protected
- * context. Therefore schedule a task for that.
+ * context. Therefore add it to metric_queue and process it
+ * outside rcu protected context.
*/
- ret = queue_work(batadv_event_workqueue,
- &hardif_neigh->bat_v.metric_work);
-
- if (!ret)
+ metric_entry = kzalloc(sizeof(*metric_entry), GFP_ATOMIC);
+ if (!metric_entry) {
batadv_hardif_neigh_put(hardif_neigh);
+ continue;
+ }
+
+ metric_entry->hardif_neigh = hardif_neigh;
+ list_add(&metric_entry->list, &metric_queue);
}
rcu_read_unlock();
+ list_for_each_entry_safe(metric_entry, metric_safe, &metric_queue, list) {
+ batadv_v_elp_throughput_metric_update(metric_entry->hardif_neigh);
+
+ batadv_hardif_neigh_put(metric_entry->hardif_neigh);
+ list_del(&metric_entry->list);
+ kfree(metric_entry);
+ }
+
restart_timer:
batadv_v_elp_start_timer(hard_iface);
out:
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 9e2740195fa2..c9cb0a307100 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -10,7 +10,6 @@
#include "main.h"
#include <linux/skbuff.h>
-#include <linux/workqueue.h>
int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface);
void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface);
@@ -19,6 +18,5 @@ void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface,
void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface);
int batadv_v_elp_packet_recv(struct sk_buff *skb,
struct batadv_hard_iface *if_incoming);
-void batadv_v_elp_throughput_metric_update(struct work_struct *work);
#endif /* _NET_BATMAN_ADV_BAT_V_ELP_H_ */
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index e503ee0d896b..8f89ffe6020c 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -839,8 +839,7 @@ batadv_v_ogm_aggr_packet(int buff_pos, int packet_len,
/* check if there is enough space for the optional TVLV */
next_buff_pos += ntohs(ogm2_packet->tvlv_len);
- return (next_buff_pos <= packet_len) &&
- (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES);
+ return next_buff_pos <= packet_len;
}
/**
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 04f6398b3a40..85a50096f5b2 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -596,9 +596,6 @@ struct batadv_hardif_neigh_node_bat_v {
* neighbor
*/
unsigned long last_unicast_tx;
-
- /** @metric_work: work queue callback item for metric update */
- struct work_struct metric_work;
};
/**
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 50cfec8ccac4..3c29778171c5 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -825,11 +825,16 @@ static struct sk_buff *chan_alloc_skb_cb(struct l2cap_chan *chan,
unsigned long hdr_len,
unsigned long len, int nb)
{
+ struct sk_buff *skb;
+
/* Note that we must allocate using GFP_ATOMIC here as
* this function is called originally from netdev hard xmit
* function in atomic context.
*/
- return bt_skb_alloc(hdr_len + len, GFP_ATOMIC);
+ skb = bt_skb_alloc(hdr_len + len, GFP_ATOMIC);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+ return skb;
}
static void chan_suspend_cb(struct l2cap_chan *chan)
diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c
index 1bc51e2b05a3..3f72111ba651 100644
--- a/net/bluetooth/eir.c
+++ b/net/bluetooth/eir.c
@@ -242,7 +242,7 @@ u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
return ad_len;
}
-u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
+u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size)
{
struct adv_info *adv = NULL;
u8 ad_len = 0, flags = 0;
@@ -286,7 +286,7 @@ u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
/* If flags would still be empty, then there is no need to
* include the "Flags" AD field".
*/
- if (flags) {
+ if (flags && (ad_len + eir_precalc_len(1) <= size)) {
ptr[0] = 0x02;
ptr[1] = EIR_FLAGS;
ptr[2] = flags;
@@ -316,7 +316,8 @@ skip_flags:
}
/* Provide Tx Power only if we can provide a valid value for it */
- if (adv_tx_power != HCI_TX_POWER_INVALID) {
+ if (adv_tx_power != HCI_TX_POWER_INVALID &&
+ (ad_len + eir_precalc_len(1) <= size)) {
ptr[0] = 0x02;
ptr[1] = EIR_TX_POWER;
ptr[2] = (u8)adv_tx_power;
@@ -366,17 +367,19 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr)
void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len)
{
- while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, len))) {
+ size_t dlen;
+
+ while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, &dlen))) {
u16 value = get_unaligned_le16(eir);
if (uuid == value) {
if (len)
- *len -= 2;
+ *len = dlen - 2;
return &eir[2];
}
- eir += *len;
- eir_len -= *len;
+ eir += dlen;
+ eir_len -= dlen;
}
return NULL;
diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h
index 5c89a05e8b29..9372db83f912 100644
--- a/net/bluetooth/eir.h
+++ b/net/bluetooth/eir.h
@@ -9,7 +9,7 @@
void eir_create(struct hci_dev *hdev, u8 *data);
-u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr);
+u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size);
u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr);
u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index e6591f487a51..c6c1232db4e2 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -2061,109 +2061,14 @@ static int create_big_sync(struct hci_dev *hdev, void *data)
return hci_le_create_big(conn, &conn->iso_qos);
}
-static void create_pa_complete(struct hci_dev *hdev, void *data, int err)
-{
- bt_dev_dbg(hdev, "");
-
- if (err)
- bt_dev_err(hdev, "Unable to create PA: %d", err);
-}
-
-static bool hci_conn_check_create_pa_sync(struct hci_conn *conn)
-{
- if (conn->type != ISO_LINK || conn->sid == HCI_SID_INVALID)
- return false;
-
- return true;
-}
-
-static int create_pa_sync(struct hci_dev *hdev, void *data)
-{
- struct hci_cp_le_pa_create_sync *cp = NULL;
- struct hci_conn *conn;
- int err = 0;
-
- hci_dev_lock(hdev);
-
- rcu_read_lock();
-
- /* The spec allows only one pending LE Periodic Advertising Create
- * Sync command at a time. If the command is pending now, don't do
- * anything. We check for pending connections after each PA Sync
- * Established event.
- *
- * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
- * page 2493:
- *
- * If the Host issues this command when another HCI_LE_Periodic_
- * Advertising_Create_Sync command is pending, the Controller shall
- * return the error code Command Disallowed (0x0C).
- */
- list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
- if (test_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags))
- goto unlock;
- }
-
- list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
- if (hci_conn_check_create_pa_sync(conn)) {
- struct bt_iso_qos *qos = &conn->iso_qos;
-
- cp = kzalloc(sizeof(*cp), GFP_KERNEL);
- if (!cp) {
- err = -ENOMEM;
- goto unlock;
- }
-
- cp->options = qos->bcast.options;
- cp->sid = conn->sid;
- cp->addr_type = conn->dst_type;
- bacpy(&cp->addr, &conn->dst);
- cp->skip = cpu_to_le16(qos->bcast.skip);
- cp->sync_timeout = cpu_to_le16(qos->bcast.sync_timeout);
- cp->sync_cte_type = qos->bcast.sync_cte_type;
-
- break;
- }
- }
-
-unlock:
- rcu_read_unlock();
-
- hci_dev_unlock(hdev);
-
- if (cp) {
- hci_dev_set_flag(hdev, HCI_PA_SYNC);
- set_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags);
-
- err = __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC,
- sizeof(*cp), cp, HCI_CMD_TIMEOUT);
- if (!err)
- err = hci_update_passive_scan_sync(hdev);
-
- kfree(cp);
-
- if (err) {
- hci_dev_clear_flag(hdev, HCI_PA_SYNC);
- clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags);
- }
- }
-
- return err;
-}
-
-int hci_pa_create_sync_pending(struct hci_dev *hdev)
-{
- /* Queue start pa_create_sync and scan */
- return hci_cmd_sync_queue(hdev, create_pa_sync,
- NULL, create_pa_complete);
-}
-
struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
__u8 dst_type, __u8 sid,
struct bt_iso_qos *qos)
{
struct hci_conn *conn;
+ bt_dev_dbg(hdev, "dst %pMR type %d sid %d", dst, dst_type, sid);
+
conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_SLAVE);
if (IS_ERR(conn))
return conn;
@@ -2172,97 +2077,18 @@ struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
conn->dst_type = dst_type;
conn->sid = sid;
conn->state = BT_LISTEN;
+ conn->conn_timeout = msecs_to_jiffies(qos->bcast.sync_timeout * 10);
hci_conn_hold(conn);
- hci_pa_create_sync_pending(hdev);
+ hci_connect_pa_sync(hdev, conn);
return conn;
}
-static bool hci_conn_check_create_big_sync(struct hci_conn *conn)
-{
- if (!conn->num_bis)
- return false;
-
- return true;
-}
-
-static void big_create_sync_complete(struct hci_dev *hdev, void *data, int err)
-{
- bt_dev_dbg(hdev, "");
-
- if (err)
- bt_dev_err(hdev, "Unable to create BIG sync: %d", err);
-}
-
-static int big_create_sync(struct hci_dev *hdev, void *data)
-{
- DEFINE_FLEX(struct hci_cp_le_big_create_sync, pdu, bis, num_bis, 0x11);
- struct hci_conn *conn;
-
- rcu_read_lock();
-
- pdu->num_bis = 0;
-
- /* The spec allows only one pending LE BIG Create Sync command at
- * a time. If the command is pending now, don't do anything. We
- * check for pending connections after each BIG Sync Established
- * event.
- *
- * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
- * page 2586:
- *
- * If the Host sends this command when the Controller is in the
- * process of synchronizing to any BIG, i.e. the HCI_LE_BIG_Sync_
- * Established event has not been generated, the Controller shall
- * return the error code Command Disallowed (0x0C).
- */
- list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
- if (test_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags))
- goto unlock;
- }
-
- list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) {
- if (hci_conn_check_create_big_sync(conn)) {
- struct bt_iso_qos *qos = &conn->iso_qos;
-
- set_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags);
-
- pdu->handle = qos->bcast.big;
- pdu->sync_handle = cpu_to_le16(conn->sync_handle);
- pdu->encryption = qos->bcast.encryption;
- memcpy(pdu->bcode, qos->bcast.bcode,
- sizeof(pdu->bcode));
- pdu->mse = qos->bcast.mse;
- pdu->timeout = cpu_to_le16(qos->bcast.timeout);
- pdu->num_bis = conn->num_bis;
- memcpy(pdu->bis, conn->bis, conn->num_bis);
-
- break;
- }
- }
-
-unlock:
- rcu_read_unlock();
-
- if (!pdu->num_bis)
- return 0;
-
- return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC,
- struct_size(pdu, bis, pdu->num_bis), pdu);
-}
-
-int hci_le_big_create_sync_pending(struct hci_dev *hdev)
-{
- /* Queue big_create_sync */
- return hci_cmd_sync_queue_once(hdev, big_create_sync,
- NULL, big_create_sync_complete);
-}
-
-int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
- struct bt_iso_qos *qos,
- __u16 sync_handle, __u8 num_bis, __u8 bis[])
+int hci_conn_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
+ struct bt_iso_qos *qos, __u16 sync_handle,
+ __u8 num_bis, __u8 bis[])
{
int err;
@@ -2279,9 +2105,10 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon,
hcon->num_bis = num_bis;
memcpy(hcon->bis, bis, num_bis);
+ hcon->conn_timeout = msecs_to_jiffies(qos->bcast.timeout * 10);
}
- return hci_le_big_create_sync_pending(hdev);
+ return hci_connect_big_sync(hdev, hcon);
}
static void create_big_complete(struct hci_dev *hdev, void *data, int err)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index b5553c08e731..b74ada809237 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -57,13 +57,14 @@ DEFINE_RWLOCK(hci_dev_list_lock);
/* HCI callback list */
LIST_HEAD(hci_cb_list);
+DEFINE_MUTEX(hci_cb_list_lock);
/* HCI ID Numbering */
static DEFINE_IDA(hci_index_ida);
/* Get HCI device by index.
* Device is held on return. */
-struct hci_dev *hci_dev_get(int index)
+static struct hci_dev *__hci_dev_get(int index, int *srcu_index)
{
struct hci_dev *hdev = NULL, *d;
@@ -76,6 +77,8 @@ struct hci_dev *hci_dev_get(int index)
list_for_each_entry(d, &hci_dev_list, list) {
if (d->id == index) {
hdev = hci_dev_hold(d);
+ if (srcu_index)
+ *srcu_index = srcu_read_lock(&d->srcu);
break;
}
}
@@ -83,6 +86,22 @@ struct hci_dev *hci_dev_get(int index)
return hdev;
}
+struct hci_dev *hci_dev_get(int index)
+{
+ return __hci_dev_get(index, NULL);
+}
+
+static struct hci_dev *hci_dev_get_srcu(int index, int *srcu_index)
+{
+ return __hci_dev_get(index, srcu_index);
+}
+
+static void hci_dev_put_srcu(struct hci_dev *hdev, int srcu_index)
+{
+ srcu_read_unlock(&hdev->srcu, srcu_index);
+ hci_dev_put(hdev);
+}
+
/* ---- Inquiry support ---- */
bool hci_discovery_active(struct hci_dev *hdev)
@@ -567,9 +586,9 @@ static int hci_dev_do_reset(struct hci_dev *hdev)
int hci_dev_reset(__u16 dev)
{
struct hci_dev *hdev;
- int err;
+ int err, srcu_index;
- hdev = hci_dev_get(dev);
+ hdev = hci_dev_get_srcu(dev, &srcu_index);
if (!hdev)
return -ENODEV;
@@ -591,7 +610,7 @@ int hci_dev_reset(__u16 dev)
err = hci_dev_do_reset(hdev);
done:
- hci_dev_put(hdev);
+ hci_dev_put_srcu(hdev, srcu_index);
return err;
}
@@ -1876,10 +1895,8 @@ void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor)
if (monitor->handle)
idr_remove(&hdev->adv_monitors_idr, monitor->handle);
- if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) {
+ if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED)
hdev->adv_monitors_cnt--;
- mgmt_adv_monitor_removed(hdev, monitor->handle);
- }
kfree(monitor);
}
@@ -2440,6 +2457,11 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
if (!hdev)
return NULL;
+ if (init_srcu_struct(&hdev->srcu)) {
+ kfree(hdev);
+ return NULL;
+ }
+
hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1);
hdev->esco_type = (ESCO_HV1);
hdev->link_mode = (HCI_LM_ACCEPT);
@@ -2506,6 +2528,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
mutex_init(&hdev->lock);
mutex_init(&hdev->req_lock);
+ mutex_init(&hdev->mgmt_pending_lock);
ida_init(&hdev->unset_handle_ida);
@@ -2684,6 +2707,9 @@ void hci_unregister_dev(struct hci_dev *hdev)
list_del(&hdev->list);
write_unlock(&hci_dev_list_lock);
+ synchronize_srcu(&hdev->srcu);
+ cleanup_srcu_struct(&hdev->srcu);
+
disable_work_sync(&hdev->rx_work);
disable_work_sync(&hdev->cmd_work);
disable_work_sync(&hdev->tx_work);
@@ -2992,7 +3018,9 @@ int hci_register_cb(struct hci_cb *cb)
{
BT_DBG("%p name %s", cb, cb->name);
- list_add_tail_rcu(&cb->list, &hci_cb_list);
+ mutex_lock(&hci_cb_list_lock);
+ list_add_tail(&cb->list, &hci_cb_list);
+ mutex_unlock(&hci_cb_list_lock);
return 0;
}
@@ -3002,8 +3030,9 @@ int hci_unregister_cb(struct hci_cb *cb)
{
BT_DBG("%p name %s", cb, cb->name);
- list_del_rcu(&cb->list);
- synchronize_rcu();
+ mutex_lock(&hci_cb_list_lock);
+ list_del(&cb->list);
+ mutex_unlock(&hci_cb_list_lock);
return 0;
}
@@ -3412,23 +3441,18 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
bt_dev_err(hdev, "link tx timeout");
- rcu_read_lock();
+ hci_dev_lock(hdev);
/* Kill stalled connections */
- list_for_each_entry_rcu(c, &h->list, list) {
+ list_for_each_entry(c, &h->list, list) {
if (c->type == type && c->sent) {
bt_dev_err(hdev, "killing stalled connection %pMR",
&c->dst);
- /* hci_disconnect might sleep, so, we have to release
- * the RCU read lock before calling it.
- */
- rcu_read_unlock();
hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
- rcu_read_lock();
}
}
- rcu_read_unlock();
+ hci_dev_unlock(hdev);
}
static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type,
@@ -4067,10 +4091,13 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb)
return;
}
- err = hci_send_frame(hdev, skb);
- if (err < 0) {
- hci_cmd_sync_cancel_sync(hdev, -err);
- return;
+ if (hci_skb_opcode(skb) != HCI_OP_NOP) {
+ err = hci_send_frame(hdev, skb);
+ if (err < 0) {
+ hci_cmd_sync_cancel_sync(hdev, -err);
+ return;
+ }
+ atomic_dec(&hdev->cmd_cnt);
}
if (hdev->req_status == HCI_REQ_PEND &&
@@ -4078,8 +4105,6 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb)
kfree_skb(hdev->req_skb);
hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL);
}
-
- atomic_dec(&hdev->cmd_cnt);
}
static void hci_cmd_work(struct work_struct *work)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 388d46c6a043..38643ffa65a9 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -932,6 +932,9 @@ static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data,
hdev->sco_pkts = 8;
}
+ if (!read_voice_setting_capable(hdev))
+ hdev->sco_pkts = 0;
+
hdev->acl_cnt = hdev->acl_pkts;
hdev->sco_cnt = hdev->sco_pkts;
@@ -2138,40 +2141,6 @@ static u8 hci_cc_set_adv_param(struct hci_dev *hdev, void *data,
return rp->status;
}
-static u8 hci_cc_set_ext_adv_param(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_rp_le_set_ext_adv_params *rp = data;
- struct hci_cp_le_set_ext_adv_params *cp;
- struct adv_info *adv_instance;
-
- bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
-
- if (rp->status)
- return rp->status;
-
- cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS);
- if (!cp)
- return rp->status;
-
- hci_dev_lock(hdev);
- hdev->adv_addr_type = cp->own_addr_type;
- if (!cp->handle) {
- /* Store in hdev for instance 0 */
- hdev->adv_tx_power = rp->tx_power;
- } else {
- adv_instance = hci_find_adv_instance(hdev, cp->handle);
- if (adv_instance)
- adv_instance->tx_power = rp->tx_power;
- }
- /* Update adv data as tx power is known now */
- hci_update_adv_data(hdev, cp->handle);
-
- hci_dev_unlock(hdev);
-
- return rp->status;
-}
-
static u8 hci_cc_read_rssi(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -3393,23 +3362,30 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data,
hci_update_scan(hdev);
}
- params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
- if (params) {
- switch (params->auto_connect) {
- case HCI_AUTO_CONN_LINK_LOSS:
- if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT)
+ /* Re-enable passive scanning if disconnected device is marked
+ * as auto-connectable.
+ */
+ if (conn->type == LE_LINK) {
+ params = hci_conn_params_lookup(hdev, &conn->dst,
+ conn->dst_type);
+ if (params) {
+ switch (params->auto_connect) {
+ case HCI_AUTO_CONN_LINK_LOSS:
+ if (ev->reason != HCI_ERROR_CONNECTION_TIMEOUT)
+ break;
+ fallthrough;
+
+ case HCI_AUTO_CONN_DIRECT:
+ case HCI_AUTO_CONN_ALWAYS:
+ hci_pend_le_list_del_init(params);
+ hci_pend_le_list_add(params,
+ &hdev->pend_le_conns);
+ hci_update_passive_scan(hdev);
break;
- fallthrough;
- case HCI_AUTO_CONN_DIRECT:
- case HCI_AUTO_CONN_ALWAYS:
- hci_pend_le_list_del_init(params);
- hci_pend_le_list_add(params, &hdev->pend_le_conns);
- hci_update_passive_scan(hdev);
- break;
-
- default:
- break;
+ default:
+ break;
+ }
}
}
@@ -4145,8 +4121,6 @@ static const struct hci_cc {
HCI_CC(HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS,
hci_cc_le_read_num_adv_sets,
sizeof(struct hci_rp_le_read_num_supported_adv_sets)),
- HCI_CC(HCI_OP_LE_SET_EXT_ADV_PARAMS, hci_cc_set_ext_adv_param,
- sizeof(struct hci_rp_le_set_ext_adv_params)),
HCI_CC_STATUS(HCI_OP_LE_SET_EXT_ADV_ENABLE,
hci_cc_le_set_ext_adv_enable),
HCI_CC_STATUS(HCI_OP_LE_SET_ADV_SET_RAND_ADDR,
@@ -6143,11 +6117,12 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
* event or send an immediate device found event if the data
* should not be stored for later.
*/
- if (!ext_adv && !has_pending_adv_report(hdev)) {
+ if (!has_pending_adv_report(hdev)) {
/* If the report will trigger a SCAN_REQ store it for
* later merging.
*/
- if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) {
+ if (!ext_adv && (type == LE_ADV_IND ||
+ type == LE_ADV_SCAN_IND)) {
store_pending_adv_report(hdev, bdaddr, bdaddr_type,
rssi, flags, data, len);
return;
@@ -6246,6 +6221,11 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data,
static u8 ext_evt_type_to_legacy(struct hci_dev *hdev, u16 evt_type)
{
+ u16 pdu_type = evt_type & ~LE_EXT_ADV_DATA_STATUS_MASK;
+
+ if (!pdu_type)
+ return LE_ADV_NONCONN_IND;
+
if (evt_type & LE_EXT_ADV_LEGACY_PDU) {
switch (evt_type) {
case LE_LEGACY_ADV_IND:
@@ -6277,8 +6257,7 @@ static u8 ext_evt_type_to_legacy(struct hci_dev *hdev, u16 evt_type)
if (evt_type & LE_EXT_ADV_SCAN_IND)
return LE_ADV_SCAN_IND;
- if (evt_type == LE_EXT_ADV_NON_CONN_IND ||
- evt_type & LE_EXT_ADV_DIRECT_IND)
+ if (evt_type & LE_EXT_ADV_DIRECT_IND)
return LE_ADV_NONCONN_IND;
invalid:
@@ -6322,6 +6301,17 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data,
info->secondary_phy &= 0x1f;
}
+ /* Check if PA Sync is pending and if the hci_conn SID has not
+ * been set update it.
+ */
+ if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) {
+ struct hci_conn *conn;
+
+ conn = hci_conn_hash_lookup_create_pa_sync(hdev);
+ if (conn && conn->sid == HCI_SID_INVALID)
+ conn->sid = info->sid;
+ }
+
if (legacy_evt_type != LE_ADV_INVALID) {
process_adv_report(hdev, legacy_evt_type, &info->bdaddr,
info->bdaddr_type, NULL, 0,
@@ -6360,8 +6350,7 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
hci_dev_clear_flag(hdev, HCI_PA_SYNC);
- conn = hci_conn_hash_lookup_sid(hdev, ev->sid, &ev->bdaddr,
- ev->bdaddr_type);
+ conn = hci_conn_hash_lookup_create_pa_sync(hdev);
if (!conn) {
bt_dev_err(hdev,
"Unable to find connection for dst %pMR sid 0x%2.2x",
@@ -6400,9 +6389,6 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
}
unlock:
- /* Handle any other pending PA sync command */
- hci_pa_create_sync_pending(hdev);
-
hci_dev_unlock(hdev);
}
@@ -6914,7 +6900,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "status 0x%2.2x", ev->status);
- if (!hci_le_ev_skb_pull(hdev, skb, HCI_EVT_LE_BIG_SYNC_ESTABILISHED,
+ if (!hci_le_ev_skb_pull(hdev, skb, HCI_EVT_LE_BIG_SYNC_ESTABLISHED,
flex_array_size(ev, bis, ev->num_bis)))
return;
@@ -6963,7 +6949,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
bis->iso_qos.bcast.in.sdu = le16_to_cpu(ev->max_pdu);
if (!ev->status) {
+ bis->state = BT_CONNECTED;
set_bit(HCI_CONN_BIG_SYNC, &bis->flags);
+ hci_debugfs_create_conn(bis);
+ hci_conn_add_sysfs(bis);
hci_iso_setup_path(bis);
}
}
@@ -6984,9 +6973,6 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
}
unlock:
- /* Handle any other pending BIG sync command */
- hci_le_big_create_sync_pending(hdev);
-
hci_dev_unlock(hdev);
}
@@ -7108,8 +7094,8 @@ static const struct hci_le_ev {
hci_le_create_big_complete_evt,
sizeof(struct hci_evt_le_create_big_complete),
HCI_MAX_EVENT_SIZE),
- /* [0x1d = HCI_EV_LE_BIG_SYNC_ESTABILISHED] */
- HCI_LE_EV_VL(HCI_EVT_LE_BIG_SYNC_ESTABILISHED,
+ /* [0x1d = HCI_EV_LE_BIG_SYNC_ESTABLISHED] */
+ HCI_LE_EV_VL(HCI_EVT_LE_BIG_SYNC_ESTABLISHED,
hci_le_big_sync_established_evt,
sizeof(struct hci_evt_le_big_sync_estabilished),
HCI_MAX_EVENT_SIZE),
@@ -7132,7 +7118,8 @@ static void hci_le_meta_evt(struct hci_dev *hdev, void *data,
/* Only match event if command OGF is for LE */
if (hdev->req_skb &&
- hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) == 0x08 &&
+ (hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) == 0x08 ||
+ hci_skb_opcode(hdev->req_skb) == HCI_OP_NOP) &&
hci_skb_event(hdev->req_skb) == ev->subevent) {
*opcode = hci_skb_opcode(hdev->req_skb);
hci_req_cmd_complete(hdev, *opcode, 0x00, req_complete,
@@ -7488,8 +7475,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
goto done;
}
+ hci_dev_lock(hdev);
kfree_skb(hdev->recv_event);
hdev->recv_event = skb_clone(skb, GFP_KERNEL);
+ hci_dev_unlock(hdev);
event = hdr->evt;
if (!event) {
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 022b86797acd..4ad5296d7934 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -118,7 +118,7 @@ static void hci_sock_free_cookie(struct sock *sk)
int id = hci_pi(sk)->cookie;
if (id) {
- hci_pi(sk)->cookie = 0xffffffff;
+ hci_pi(sk)->cookie = 0;
ida_free(&sock_cookie_ida, id);
}
}
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 7b2b04d6b856..bbd809414b2f 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -1205,9 +1205,126 @@ static int hci_set_adv_set_random_addr_sync(struct hci_dev *hdev, u8 instance,
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
+static int
+hci_set_ext_adv_params_sync(struct hci_dev *hdev, struct adv_info *adv,
+ const struct hci_cp_le_set_ext_adv_params *cp,
+ struct hci_rp_le_set_ext_adv_params *rp)
+{
+ struct sk_buff *skb;
+
+ skb = __hci_cmd_sync(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(*cp),
+ cp, HCI_CMD_TIMEOUT);
+
+ /* If command return a status event, skb will be set to -ENODATA */
+ if (skb == ERR_PTR(-ENODATA))
+ return 0;
+
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev, "Opcode 0x%4.4x failed: %ld",
+ HCI_OP_LE_SET_EXT_ADV_PARAMS, PTR_ERR(skb));
+ return PTR_ERR(skb);
+ }
+
+ if (skb->len != sizeof(*rp)) {
+ bt_dev_err(hdev, "Invalid response length for 0x%4.4x: %u",
+ HCI_OP_LE_SET_EXT_ADV_PARAMS, skb->len);
+ kfree_skb(skb);
+ return -EIO;
+ }
+
+ memcpy(rp, skb->data, sizeof(*rp));
+ kfree_skb(skb);
+
+ if (!rp->status) {
+ hdev->adv_addr_type = cp->own_addr_type;
+ if (!cp->handle) {
+ /* Store in hdev for instance 0 */
+ hdev->adv_tx_power = rp->tx_power;
+ } else if (adv) {
+ adv->tx_power = rp->tx_power;
+ }
+ }
+
+ return rp->status;
+}
+
+static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
+{
+ DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length,
+ HCI_MAX_EXT_AD_LENGTH);
+ u8 len;
+ struct adv_info *adv = NULL;
+ int err;
+
+ if (instance) {
+ adv = hci_find_adv_instance(hdev, instance);
+ if (!adv || !adv->adv_data_changed)
+ return 0;
+ }
+
+ len = eir_create_adv_data(hdev, instance, pdu->data,
+ HCI_MAX_EXT_AD_LENGTH);
+
+ pdu->length = len;
+ pdu->handle = adv ? adv->handle : instance;
+ pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
+ pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG;
+
+ err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA,
+ struct_size(pdu, data, len), pdu,
+ HCI_CMD_TIMEOUT);
+ if (err)
+ return err;
+
+ /* Update data if the command succeed */
+ if (adv) {
+ adv->adv_data_changed = false;
+ } else {
+ memcpy(hdev->adv_data, pdu->data, len);
+ hdev->adv_data_len = len;
+ }
+
+ return 0;
+}
+
+static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance)
+{
+ struct hci_cp_le_set_adv_data cp;
+ u8 len;
+
+ memset(&cp, 0, sizeof(cp));
+
+ len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data));
+
+ /* There's nothing to do if the data hasn't changed */
+ if (hdev->adv_data_len == len &&
+ memcmp(cp.data, hdev->adv_data, len) == 0)
+ return 0;
+
+ memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
+ hdev->adv_data_len = len;
+
+ cp.length = len;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+}
+
+int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance)
+{
+ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
+ return 0;
+
+ if (ext_adv_capable(hdev))
+ return hci_set_ext_adv_data_sync(hdev, instance);
+
+ return hci_set_adv_data_sync(hdev, instance);
+}
+
int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
{
struct hci_cp_le_set_ext_adv_params cp;
+ struct hci_rp_le_set_ext_adv_params rp;
bool connectable;
u32 flags;
bdaddr_t random_addr;
@@ -1228,7 +1345,7 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
* Command Disallowed error, so we must first disable the
* instance if it is active.
*/
- if (adv && !adv->pending) {
+ if (adv) {
err = hci_disable_ext_adv_instance_sync(hdev, instance);
if (err)
return err;
@@ -1314,8 +1431,12 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
cp.secondary_phy = HCI_ADV_PHY_1M;
}
- err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+ err = hci_set_ext_adv_params_sync(hdev, adv, &cp, &rp);
+ if (err)
+ return err;
+
+ /* Update adv data as tx power is known now */
+ err = hci_set_ext_adv_data_sync(hdev, cp.handle);
if (err)
return err;
@@ -1559,7 +1680,8 @@ static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv)
{
u8 bid[3];
- u8 ad[4 + 3];
+ u8 ad[HCI_MAX_EXT_AD_LENGTH];
+ u8 len;
/* Skip if NULL adv as instance 0x00 is used for general purpose
* advertising so it cannot used for the likes of Broadcast Announcement
@@ -1585,8 +1707,10 @@ static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv)
/* Generate Broadcast ID */
get_random_bytes(bid, sizeof(bid));
- eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid));
- hci_set_adv_instance_data(hdev, adv->instance, sizeof(ad), ad, 0, NULL);
+ len = eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid));
+ memcpy(ad + len, adv->adv_data, adv->adv_data_len);
+ hci_set_adv_instance_data(hdev, adv->instance, len + adv->adv_data_len,
+ ad, 0, NULL);
return hci_update_adv_data_sync(hdev, adv->instance);
}
@@ -1603,8 +1727,15 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len,
if (instance) {
adv = hci_find_adv_instance(hdev, instance);
- /* Create an instance if that could not be found */
- if (!adv) {
+ if (adv) {
+ /* Turn it into periodic advertising */
+ adv->periodic = true;
+ adv->per_adv_data_len = data_len;
+ if (data)
+ memcpy(adv->per_adv_data, data, data_len);
+ adv->flags = flags;
+ } else if (!adv) {
+ /* Create an instance if that could not be found */
adv = hci_add_per_instance(hdev, instance, flags,
data_len, data,
sync_interval,
@@ -1822,78 +1953,6 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason)
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
-static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
-{
- DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length,
- HCI_MAX_EXT_AD_LENGTH);
- u8 len;
- struct adv_info *adv = NULL;
- int err;
-
- if (instance) {
- adv = hci_find_adv_instance(hdev, instance);
- if (!adv || !adv->adv_data_changed)
- return 0;
- }
-
- len = eir_create_adv_data(hdev, instance, pdu->data);
-
- pdu->length = len;
- pdu->handle = adv ? adv->handle : instance;
- pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
- pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG;
-
- err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA,
- struct_size(pdu, data, len), pdu,
- HCI_CMD_TIMEOUT);
- if (err)
- return err;
-
- /* Update data if the command succeed */
- if (adv) {
- adv->adv_data_changed = false;
- } else {
- memcpy(hdev->adv_data, pdu->data, len);
- hdev->adv_data_len = len;
- }
-
- return 0;
-}
-
-static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance)
-{
- struct hci_cp_le_set_adv_data cp;
- u8 len;
-
- memset(&cp, 0, sizeof(cp));
-
- len = eir_create_adv_data(hdev, instance, cp.data);
-
- /* There's nothing to do if the data hasn't changed */
- if (hdev->adv_data_len == len &&
- memcmp(cp.data, hdev->adv_data, len) == 0)
- return 0;
-
- memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
- hdev->adv_data_len = len;
-
- cp.length = len;
-
- return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
-}
-
-int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance)
-{
- if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
- return 0;
-
- if (ext_adv_capable(hdev))
- return hci_set_ext_adv_data_sync(hdev, instance);
-
- return hci_set_adv_data_sync(hdev, instance);
-}
-
int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance,
bool force)
{
@@ -1969,13 +2028,10 @@ static int hci_clear_adv_sets_sync(struct hci_dev *hdev, struct sock *sk)
static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force)
{
struct adv_info *adv, *n;
- int err = 0;
if (ext_adv_capable(hdev))
/* Remove all existing sets */
- err = hci_clear_adv_sets_sync(hdev, sk);
- if (ext_adv_capable(hdev))
- return err;
+ return hci_clear_adv_sets_sync(hdev, sk);
/* This is safe as long as there is no command send while the lock is
* held.
@@ -2003,13 +2059,11 @@ static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force)
static int hci_remove_adv_sync(struct hci_dev *hdev, u8 instance,
struct sock *sk)
{
- int err = 0;
+ int err;
/* If we use extended advertising, instance has to be removed first. */
if (ext_adv_capable(hdev))
- err = hci_remove_ext_adv_instance_sync(hdev, instance, sk);
- if (ext_adv_capable(hdev))
- return err;
+ return hci_remove_ext_adv_instance_sync(hdev, instance, sk);
/* This is safe as long as there is no command send while the lock is
* held.
@@ -2108,16 +2162,13 @@ int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type)
int hci_disable_advertising_sync(struct hci_dev *hdev)
{
u8 enable = 0x00;
- int err = 0;
/* If controller is not advertising we are done. */
if (!hci_dev_test_flag(hdev, HCI_LE_ADV))
return 0;
if (ext_adv_capable(hdev))
- err = hci_disable_ext_adv_instance_sync(hdev, 0x00);
- if (ext_adv_capable(hdev))
- return err;
+ return hci_disable_ext_adv_instance_sync(hdev, 0x00);
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE,
sizeof(enable), &enable, HCI_CMD_TIMEOUT);
@@ -2480,6 +2531,10 @@ static int hci_pause_advertising_sync(struct hci_dev *hdev)
int err;
int old_state;
+ /* If controller is not advertising we are done. */
+ if (!hci_dev_test_flag(hdev, HCI_LE_ADV))
+ return 0;
+
/* If already been paused there is nothing to do. */
if (hdev->advertising_paused)
return 0;
@@ -2717,16 +2772,16 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
/* Force address filtering if PA Sync is in progress */
if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) {
- struct hci_cp_le_pa_create_sync *sent;
+ struct hci_conn *conn;
- sent = hci_sent_cmd_data(hdev, HCI_OP_LE_PA_CREATE_SYNC);
- if (sent) {
+ conn = hci_conn_hash_lookup_create_pa_sync(hdev);
+ if (conn) {
struct conn_params pa;
memset(&pa, 0, sizeof(pa));
- bacpy(&pa.addr, &sent->addr);
- pa.addr_type = sent->addr_type;
+ bacpy(&pa.addr, &conn->dst);
+ pa.addr_type = conn->dst_type;
/* Clear first since there could be addresses left
* behind.
@@ -3720,6 +3775,9 @@ static int hci_read_local_name_sync(struct hci_dev *hdev)
/* Read Voice Setting */
static int hci_read_voice_setting_sync(struct hci_dev *hdev)
{
+ if (!read_voice_setting_capable(hdev))
+ return 0;
+
return __hci_cmd_sync_status(hdev, HCI_OP_READ_VOICE_SETTING,
0, NULL, HCI_CMD_TIMEOUT);
}
@@ -4153,7 +4211,8 @@ static int hci_read_page_scan_type_sync(struct hci_dev *hdev)
* support the Read Page Scan Type command. Check support for
* this command in the bit mask of supported commands.
*/
- if (!(hdev->commands[13] & 0x01))
+ if (!(hdev->commands[13] & 0x01) ||
+ test_bit(HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE, &hdev->quirks))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_READ_PAGE_SCAN_TYPE,
@@ -6236,6 +6295,7 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev,
struct hci_conn *conn)
{
struct hci_cp_le_set_ext_adv_params cp;
+ struct hci_rp_le_set_ext_adv_params rp;
int err;
bdaddr_t random_addr;
u8 own_addr_type;
@@ -6277,8 +6337,12 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev,
if (err)
return err;
- err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+ err = hci_set_ext_adv_params_sync(hdev, NULL, &cp, &rp);
+ if (err)
+ return err;
+
+ /* Update adv data as tx power is known now */
+ err = hci_set_ext_adv_data_sync(hdev, cp.handle);
if (err)
return err;
@@ -6725,8 +6789,8 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
return 0;
}
- /* No privacy so use a public address. */
- *own_addr_type = ADDR_LE_DEV_PUBLIC;
+ /* No privacy, use the current address */
+ hci_copy_identity_address(hdev, rand_addr, own_addr_type);
return 0;
}
@@ -6883,3 +6947,182 @@ int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn,
return __hci_cmd_sync_status(hdev, HCI_OP_LE_CONN_UPDATE,
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
+
+static void create_pa_complete(struct hci_dev *hdev, void *data, int err)
+{
+ struct hci_conn *conn = data;
+ struct hci_conn *pa_sync;
+
+ bt_dev_dbg(hdev, "err %d", err);
+
+ if (err == -ECANCELED)
+ return;
+
+ hci_dev_lock(hdev);
+
+ hci_dev_clear_flag(hdev, HCI_PA_SYNC);
+
+ if (!hci_conn_valid(hdev, conn))
+ clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags);
+
+ if (!err)
+ goto unlock;
+
+ /* Add connection to indicate PA sync error */
+ pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY,
+ HCI_ROLE_SLAVE);
+
+ if (IS_ERR(pa_sync))
+ goto unlock;
+
+ set_bit(HCI_CONN_PA_SYNC_FAILED, &pa_sync->flags);
+
+ /* Notify iso layer */
+ hci_connect_cfm(pa_sync, bt_status(err));
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
+static int hci_le_pa_create_sync(struct hci_dev *hdev, void *data)
+{
+ struct hci_cp_le_pa_create_sync cp;
+ struct hci_conn *conn = data;
+ struct bt_iso_qos *qos = &conn->iso_qos;
+ int err;
+
+ if (!hci_conn_valid(hdev, conn))
+ return -ECANCELED;
+
+ if (conn->sync_handle != HCI_SYNC_HANDLE_INVALID)
+ return -EINVAL;
+
+ if (hci_dev_test_and_set_flag(hdev, HCI_PA_SYNC))
+ return -EBUSY;
+
+ /* Stop scanning if SID has not been set and active scanning is enabled
+ * so we use passive scanning which will be scanning using the allow
+ * list programmed to contain only the connection address.
+ */
+ if (conn->sid == HCI_SID_INVALID &&
+ hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
+ hci_scan_disable_sync(hdev);
+ hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED);
+ hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
+ }
+
+ /* Mark HCI_CONN_CREATE_PA_SYNC so hci_update_passive_scan_sync can
+ * program the address in the allow list so PA advertisements can be
+ * received.
+ */
+ set_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags);
+
+ hci_update_passive_scan_sync(hdev);
+
+ /* SID has not been set listen for HCI_EV_LE_EXT_ADV_REPORT to update
+ * it.
+ */
+ if (conn->sid == HCI_SID_INVALID)
+ __hci_cmd_sync_status_sk(hdev, HCI_OP_NOP, 0, NULL,
+ HCI_EV_LE_EXT_ADV_REPORT,
+ conn->conn_timeout, NULL);
+
+ memset(&cp, 0, sizeof(cp));
+ cp.options = qos->bcast.options;
+ cp.sid = conn->sid;
+ cp.addr_type = conn->dst_type;
+ bacpy(&cp.addr, &conn->dst);
+ cp.skip = cpu_to_le16(qos->bcast.skip);
+ cp.sync_timeout = cpu_to_le16(qos->bcast.sync_timeout);
+ cp.sync_cte_type = qos->bcast.sync_cte_type;
+
+ /* The spec allows only one pending LE Periodic Advertising Create
+ * Sync command at a time so we forcefully wait for PA Sync Established
+ * event since cmd_work can only schedule one command at a time.
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 2493:
+ *
+ * If the Host issues this command when another HCI_LE_Periodic_
+ * Advertising_Create_Sync command is pending, the Controller shall
+ * return the error code Command Disallowed (0x0C).
+ */
+ err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_PA_CREATE_SYNC,
+ sizeof(cp), &cp,
+ HCI_EV_LE_PA_SYNC_ESTABLISHED,
+ conn->conn_timeout, NULL);
+ if (err == -ETIMEDOUT)
+ __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC_CANCEL,
+ 0, NULL, HCI_CMD_TIMEOUT);
+
+ return err;
+}
+
+int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn)
+{
+ return hci_cmd_sync_queue_once(hdev, hci_le_pa_create_sync, conn,
+ create_pa_complete);
+}
+
+static void create_big_complete(struct hci_dev *hdev, void *data, int err)
+{
+ struct hci_conn *conn = data;
+
+ bt_dev_dbg(hdev, "err %d", err);
+
+ if (err == -ECANCELED)
+ return;
+
+ if (hci_conn_valid(hdev, conn))
+ clear_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags);
+}
+
+static int hci_le_big_create_sync(struct hci_dev *hdev, void *data)
+{
+ DEFINE_FLEX(struct hci_cp_le_big_create_sync, cp, bis, num_bis, 0x11);
+ struct hci_conn *conn = data;
+ struct bt_iso_qos *qos = &conn->iso_qos;
+ int err;
+
+ if (!hci_conn_valid(hdev, conn))
+ return -ECANCELED;
+
+ set_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags);
+
+ memset(cp, 0, sizeof(*cp));
+ cp->handle = qos->bcast.big;
+ cp->sync_handle = cpu_to_le16(conn->sync_handle);
+ cp->encryption = qos->bcast.encryption;
+ memcpy(cp->bcode, qos->bcast.bcode, sizeof(cp->bcode));
+ cp->mse = qos->bcast.mse;
+ cp->timeout = cpu_to_le16(qos->bcast.timeout);
+ cp->num_bis = conn->num_bis;
+ memcpy(cp->bis, conn->bis, conn->num_bis);
+
+ /* The spec allows only one pending LE BIG Create Sync command at
+ * a time, so we forcefully wait for BIG Sync Established event since
+ * cmd_work can only schedule one command at a time.
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E
+ * page 2586:
+ *
+ * If the Host sends this command when the Controller is in the
+ * process of synchronizing to any BIG, i.e. the HCI_LE_BIG_Sync_
+ * Established event has not been generated, the Controller shall
+ * return the error code Command Disallowed (0x0C).
+ */
+ err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_BIG_CREATE_SYNC,
+ struct_size(cp, bis, cp->num_bis), cp,
+ HCI_EVT_LE_BIG_SYNC_ESTABLISHED,
+ conn->conn_timeout, NULL);
+ if (err == -ETIMEDOUT)
+ hci_le_big_terminate_sync(hdev, cp->handle);
+
+ return err;
+}
+
+int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn)
+{
+ return hci_cmd_sync_queue_once(hdev, hci_le_big_create_sync, conn,
+ create_big_complete);
+}
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index bda2f2da7d73..a08a0f3d5003 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -938,7 +938,7 @@ static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr,
iso_pi(sk)->dst_type = sa->iso_bc->bc_bdaddr_type;
- if (sa->iso_bc->bc_sid > 0x0f)
+ if (sa->iso_bc->bc_sid > 0x0f && sa->iso_bc->bc_sid != HCI_SID_INVALID)
return -EINVAL;
iso_pi(sk)->bc_sid = sa->iso_bc->bc_sid;
@@ -1414,14 +1414,13 @@ static void iso_conn_big_sync(struct sock *sk)
lock_sock(sk);
if (!test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) {
- err = hci_le_big_create_sync(hdev, iso_pi(sk)->conn->hcon,
- &iso_pi(sk)->qos,
- iso_pi(sk)->sync_handle,
- iso_pi(sk)->bc_num_bis,
- iso_pi(sk)->bc_bis);
+ err = hci_conn_big_create_sync(hdev, iso_pi(sk)->conn->hcon,
+ &iso_pi(sk)->qos,
+ iso_pi(sk)->sync_handle,
+ iso_pi(sk)->bc_num_bis,
+ iso_pi(sk)->bc_bis);
if (err)
- bt_dev_err(hdev, "hci_le_big_create_sync: %d",
- err);
+ bt_dev_err(hdev, "hci_big_create_sync: %d", err);
}
release_sock(sk);
@@ -1855,7 +1854,7 @@ static void iso_conn_ready(struct iso_conn *conn)
if (test_bit(HCI_CONN_BIG_SYNC, &hcon->flags) ||
test_bit(HCI_CONN_BIG_SYNC_FAILED, &hcon->flags)) {
ev = hci_recv_event_data(hcon->hdev,
- HCI_EVT_LE_BIG_SYNC_ESTABILISHED);
+ HCI_EVT_LE_BIG_SYNC_ESTABLISHED);
/* Get reference to PA sync parent socket, if it exists */
parent = iso_get_sock(&hcon->src, &hcon->dst,
@@ -1964,6 +1963,9 @@ static bool iso_match_sid(struct sock *sk, void *data)
{
struct hci_ev_le_pa_sync_established *ev = data;
+ if (iso_pi(sk)->bc_sid == HCI_SID_INVALID)
+ return true;
+
return ev->sid == iso_pi(sk)->bc_sid;
}
@@ -2010,8 +2012,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
if (ev1) {
sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_LISTEN,
iso_match_sid, ev1);
- if (sk && !ev1->status)
+ if (sk && !ev1->status) {
iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle);
+ iso_pi(sk)->bc_sid = ev1->sid;
+ }
goto done;
}
@@ -2047,12 +2051,11 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) &&
!test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) {
- err = hci_le_big_create_sync(hdev,
- hcon,
- &iso_pi(sk)->qos,
- iso_pi(sk)->sync_handle,
- iso_pi(sk)->bc_num_bis,
- iso_pi(sk)->bc_bis);
+ err = hci_conn_big_create_sync(hdev, hcon,
+ &iso_pi(sk)->qos,
+ iso_pi(sk)->sync_handle,
+ iso_pi(sk)->bc_num_bis,
+ iso_pi(sk)->bc_bis);
if (err) {
bt_dev_err(hdev, "hci_le_big_create_sync: %d",
err);
@@ -2137,11 +2140,6 @@ done:
return HCI_LM_ACCEPT;
}
-static bool iso_match(struct hci_conn *hcon)
-{
- return hcon->type == ISO_LINK || hcon->type == LE_LINK;
-}
-
static void iso_connect_cfm(struct hci_conn *hcon, __u8 status)
{
if (hcon->type != ISO_LINK) {
@@ -2323,7 +2321,6 @@ drop:
static struct hci_cb iso_cb = {
.name = "ISO",
- .match = iso_match,
.connect_cfm = iso_connect_cfm,
.disconn_cfm = iso_disconn_cfm,
};
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 27b4c4a2ba1f..7dafc3e0a15a 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -119,7 +119,6 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
{
struct l2cap_chan *c;
- mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_scid(conn, cid);
if (c) {
/* Only lock if chan reference is not 0 */
@@ -127,7 +126,6 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
if (c)
l2cap_chan_lock(c);
}
- mutex_unlock(&conn->chan_lock);
return c;
}
@@ -140,7 +138,6 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
{
struct l2cap_chan *c;
- mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_dcid(conn, cid);
if (c) {
/* Only lock if chan reference is not 0 */
@@ -148,7 +145,6 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
if (c)
l2cap_chan_lock(c);
}
- mutex_unlock(&conn->chan_lock);
return c;
}
@@ -418,7 +414,7 @@ static void l2cap_chan_timeout(struct work_struct *work)
if (!conn)
return;
- mutex_lock(&conn->chan_lock);
+ mutex_lock(&conn->lock);
/* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling
* this work. No need to call l2cap_chan_hold(chan) here again.
*/
@@ -439,7 +435,7 @@ static void l2cap_chan_timeout(struct work_struct *work)
l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
- mutex_unlock(&conn->chan_lock);
+ mutex_unlock(&conn->lock);
}
struct l2cap_chan *l2cap_chan_create(void)
@@ -636,14 +632,15 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan)
test_bit(FLAG_HOLD_HCI_CONN, &chan->flags))
hci_conn_hold(conn->hcon);
- list_add(&chan->list, &conn->chan_l);
+ /* Append to the list since the order matters for ECRED */
+ list_add_tail(&chan->list, &conn->chan_l);
}
void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan)
{
- mutex_lock(&conn->chan_lock);
+ mutex_lock(&conn->lock);
__l2cap_chan_add(conn, chan);
- mutex_unlock(&conn->chan_lock);
+ mutex_unlock(&conn->lock);
}
void l2cap_chan_del(struct l2cap_chan *chan, int err)
@@ -731,9 +728,9 @@ void l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func,
if (!conn)
return;
- mutex_lock(&conn->chan_lock);
+ mutex_lock(&conn->lock);
__l2cap_chan_list(conn, func, data);
- mutex_unlock(&conn->chan_lock);
+ mutex_unlock(&conn->lock);
}
EXPORT_SYMBOL_GPL(l2cap_chan_list);
@@ -745,7 +742,7 @@ static void l2cap_conn_update_id_addr(struct work_struct *work)
struct hci_conn *hcon = conn->hcon;
struct l2cap_chan *chan;
- mutex_lock(&conn->chan_lock);
+ mutex_lock(&conn->lock);
list_for_each_entry(chan, &conn->chan_l, list) {
l2cap_chan_lock(chan);
@@ -754,7 +751,7 @@ static void l2cap_conn_update_id_addr(struct work_struct *work)
l2cap_chan_unlock(chan);
}
- mutex_unlock(&conn->chan_lock);
+ mutex_unlock(&conn->lock);
}
static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan)
@@ -948,6 +945,16 @@ static u8 l2cap_get_ident(struct l2cap_conn *conn)
return id;
}
+static void l2cap_send_acl(struct l2cap_conn *conn, struct sk_buff *skb,
+ u8 flags)
+{
+ /* Check if the hcon still valid before attempting to send */
+ if (hci_conn_valid(conn->hcon->hdev, conn->hcon))
+ hci_send_acl(conn->hchan, skb, flags);
+ else
+ kfree_skb(skb);
+}
+
static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len,
void *data)
{
@@ -970,7 +977,7 @@ static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len,
bt_cb(skb)->force_active = BT_POWER_FORCE_ACTIVE_ON;
skb->priority = HCI_PRIO_MAX;
- hci_send_acl(conn->hchan, skb, flags);
+ l2cap_send_acl(conn, skb, flags);
}
static void l2cap_do_send(struct l2cap_chan *chan, struct sk_buff *skb)
@@ -1404,7 +1411,8 @@ static void l2cap_request_info(struct l2cap_conn *conn)
sizeof(req), &req);
}
-static bool l2cap_check_enc_key_size(struct hci_conn *hcon)
+static bool l2cap_check_enc_key_size(struct hci_conn *hcon,
+ struct l2cap_chan *chan)
{
/* The minimum encryption key size needs to be enforced by the
* host stack before establishing any L2CAP connections. The
@@ -1418,7 +1426,7 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon)
int min_key_size = hcon->hdev->min_enc_key_size;
/* On FIPS security level, key size must be 16 bytes */
- if (hcon->sec_level == BT_SECURITY_FIPS)
+ if (chan->sec_level == BT_SECURITY_FIPS)
min_key_size = 16;
return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) ||
@@ -1446,7 +1454,7 @@ static void l2cap_do_start(struct l2cap_chan *chan)
!__l2cap_no_conn_pending(chan))
return;
- if (l2cap_check_enc_key_size(conn->hcon))
+ if (l2cap_check_enc_key_size(conn->hcon, chan))
l2cap_start_connection(chan);
else
__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);
@@ -1497,8 +1505,6 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
BT_DBG("conn %p", conn);
- mutex_lock(&conn->chan_lock);
-
list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) {
l2cap_chan_lock(chan);
@@ -1523,7 +1529,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
continue;
}
- if (l2cap_check_enc_key_size(conn->hcon))
+ if (l2cap_check_enc_key_size(conn->hcon, chan))
l2cap_start_connection(chan);
else
l2cap_chan_close(chan, ECONNREFUSED);
@@ -1567,8 +1573,6 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
l2cap_chan_unlock(chan);
}
-
- mutex_unlock(&conn->chan_lock);
}
static void l2cap_le_conn_ready(struct l2cap_conn *conn)
@@ -1614,7 +1618,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
if (hcon->type == ACL_LINK)
l2cap_request_info(conn);
- mutex_lock(&conn->chan_lock);
+ mutex_lock(&conn->lock);
list_for_each_entry(chan, &conn->chan_l, list) {
@@ -1632,7 +1636,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
l2cap_chan_unlock(chan);
}
- mutex_unlock(&conn->chan_lock);
+ mutex_unlock(&conn->lock);
if (hcon->type == LE_LINK)
l2cap_le_conn_ready(conn);
@@ -1647,14 +1651,10 @@ static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err)
BT_DBG("conn %p", conn);
- mutex_lock(&conn->chan_lock);
-
list_for_each_entry(chan, &conn->chan_l, list) {
if (test_bit(FLAG_FORCE_RELIABLE, &chan->flags))
l2cap_chan_set_err(chan, err);
}
-
- mutex_unlock(&conn->chan_lock);
}
static void l2cap_info_timeout(struct work_struct *work)
@@ -1665,7 +1665,9 @@ static void l2cap_info_timeout(struct work_struct *work)
conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
conn->info_ident = 0;
+ mutex_lock(&conn->lock);
l2cap_conn_start(conn);
+ mutex_unlock(&conn->lock);
}
/*
@@ -1757,6 +1759,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
+ mutex_lock(&conn->lock);
+
kfree_skb(conn->rx_skb);
skb_queue_purge(&conn->pending_rx);
@@ -1775,8 +1779,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
/* Force the connection to be immediately dropped */
hcon->disc_timeout = 0;
- mutex_lock(&conn->chan_lock);
-
/* Kill channels */
list_for_each_entry_safe(chan, l, &conn->chan_l, list) {
l2cap_chan_hold(chan);
@@ -1790,15 +1792,14 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
l2cap_chan_put(chan);
}
- mutex_unlock(&conn->chan_lock);
-
- hci_chan_del(conn->hchan);
-
if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT)
cancel_delayed_work_sync(&conn->info_timer);
- hcon->l2cap_data = NULL;
+ hci_chan_del(conn->hchan);
conn->hchan = NULL;
+
+ hcon->l2cap_data = NULL;
+ mutex_unlock(&conn->lock);
l2cap_conn_put(conn);
}
@@ -2916,8 +2917,6 @@ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb)
BT_DBG("conn %p", conn);
- mutex_lock(&conn->chan_lock);
-
list_for_each_entry(chan, &conn->chan_l, list) {
if (chan->chan_type != L2CAP_CHAN_RAW)
continue;
@@ -2932,8 +2931,6 @@ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb)
if (chan->ops->recv(chan, nskb))
kfree_skb(nskb);
}
-
- mutex_unlock(&conn->chan_lock);
}
/* ---- L2CAP signalling commands ---- */
@@ -3383,7 +3380,7 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC };
struct l2cap_conf_efs efs;
u8 remote_efs = 0;
- u16 mtu = L2CAP_DEFAULT_MTU;
+ u16 mtu = 0;
u16 result = L2CAP_CONF_SUCCESS;
u16 size;
@@ -3488,6 +3485,29 @@ done:
/* Configure output options and let the other side know
* which ones we don't like. */
+ /* If MTU is not provided in configure request, try adjusting it
+ * to the current output MTU if it has been set
+ *
+ * Bluetooth Core 6.1, Vol 3, Part A, Section 4.5
+ *
+ * Each configuration parameter value (if any is present) in an
+ * L2CAP_CONFIGURATION_RSP packet reflects an ‘adjustment’ to a
+ * configuration parameter value that has been sent (or, in case
+ * of default values, implied) in the corresponding
+ * L2CAP_CONFIGURATION_REQ packet.
+ */
+ if (!mtu) {
+ /* Only adjust for ERTM channels as for older modes the
+ * remote stack may not be able to detect that the
+ * adjustment causing it to silently drop packets.
+ */
+ if (chan->mode == L2CAP_MODE_ERTM &&
+ chan->omtu && chan->omtu != L2CAP_DEFAULT_MTU)
+ mtu = chan->omtu;
+ else
+ mtu = L2CAP_DEFAULT_MTU;
+ }
+
if (mtu < L2CAP_DEFAULT_MIN_MTU)
result = L2CAP_CONF_UNACCEPT;
else {
@@ -3776,7 +3796,11 @@ static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data)
struct l2cap_ecred_conn_rsp *rsp_flex =
container_of(&rsp->pdu.rsp, struct l2cap_ecred_conn_rsp, hdr);
- if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags))
+ /* Check if channel for outgoing connection or if it wasn't deferred
+ * since in those cases it must be skipped.
+ */
+ if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags) ||
+ !test_and_clear_bit(FLAG_DEFER_SETUP, &chan->flags))
return;
/* Reset ident so only one response is sent */
@@ -3952,12 +3976,12 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd,
goto response;
}
- mutex_lock(&conn->chan_lock);
l2cap_chan_lock(pchan);
/* Check if the ACL is secure enough (if not SDP) */
if (psm != cpu_to_le16(L2CAP_PSM_SDP) &&
- !hci_conn_check_link_mode(conn->hcon)) {
+ (!hci_conn_check_link_mode(conn->hcon) ||
+ !l2cap_check_enc_key_size(conn->hcon, pchan))) {
conn->disc_reason = HCI_ERROR_AUTH_FAILURE;
result = L2CAP_CR_SEC_BLOCK;
goto response;
@@ -4059,7 +4083,6 @@ response:
}
l2cap_chan_unlock(pchan);
- mutex_unlock(&conn->chan_lock);
l2cap_chan_put(pchan);
}
@@ -4098,27 +4121,19 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x",
dcid, scid, result, status);
- mutex_lock(&conn->chan_lock);
-
if (scid) {
chan = __l2cap_get_chan_by_scid(conn, scid);
- if (!chan) {
- err = -EBADSLT;
- goto unlock;
- }
+ if (!chan)
+ return -EBADSLT;
} else {
chan = __l2cap_get_chan_by_ident(conn, cmd->ident);
- if (!chan) {
- err = -EBADSLT;
- goto unlock;
- }
+ if (!chan)
+ return -EBADSLT;
}
chan = l2cap_chan_hold_unless_zero(chan);
- if (!chan) {
- err = -EBADSLT;
- goto unlock;
- }
+ if (!chan)
+ return -EBADSLT;
err = 0;
@@ -4156,9 +4171,6 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
-unlock:
- mutex_unlock(&conn->chan_lock);
-
return err;
}
@@ -4446,11 +4458,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn,
chan->ops->set_shutdown(chan);
- l2cap_chan_unlock(chan);
- mutex_lock(&conn->chan_lock);
- l2cap_chan_lock(chan);
l2cap_chan_del(chan, ECONNRESET);
- mutex_unlock(&conn->chan_lock);
chan->ops->close(chan);
@@ -4487,11 +4495,7 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn,
return 0;
}
- l2cap_chan_unlock(chan);
- mutex_lock(&conn->chan_lock);
- l2cap_chan_lock(chan);
l2cap_chan_del(chan, 0);
- mutex_unlock(&conn->chan_lock);
chan->ops->close(chan);
@@ -4689,13 +4693,9 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
BT_DBG("dcid 0x%4.4x mtu %u mps %u credits %u result 0x%2.2x",
dcid, mtu, mps, credits, result);
- mutex_lock(&conn->chan_lock);
-
chan = __l2cap_get_chan_by_ident(conn, cmd->ident);
- if (!chan) {
- err = -EBADSLT;
- goto unlock;
- }
+ if (!chan)
+ return -EBADSLT;
err = 0;
@@ -4743,9 +4743,6 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
l2cap_chan_unlock(chan);
-unlock:
- mutex_unlock(&conn->chan_lock);
-
return err;
}
@@ -4857,12 +4854,12 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
goto response;
}
- mutex_lock(&conn->chan_lock);
l2cap_chan_lock(pchan);
if (!smp_sufficient_security(conn->hcon, pchan->sec_level,
SMP_ALLOW_STK)) {
- result = L2CAP_CR_LE_AUTHENTICATION;
+ result = pchan->sec_level == BT_SECURITY_MEDIUM ?
+ L2CAP_CR_LE_ENCRYPTION : L2CAP_CR_LE_AUTHENTICATION;
chan = NULL;
goto response_unlock;
}
@@ -4923,7 +4920,6 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
response_unlock:
l2cap_chan_unlock(pchan);
- mutex_unlock(&conn->chan_lock);
l2cap_chan_put(pchan);
if (result == L2CAP_CR_PEND)
@@ -5057,7 +5053,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
goto response;
}
- mutex_lock(&conn->chan_lock);
l2cap_chan_lock(pchan);
if (!smp_sufficient_security(conn->hcon, pchan->sec_level,
@@ -5132,7 +5127,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
unlock:
l2cap_chan_unlock(pchan);
- mutex_unlock(&conn->chan_lock);
l2cap_chan_put(pchan);
response:
@@ -5169,8 +5163,6 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn,
BT_DBG("mtu %u mps %u credits %u result 0x%4.4x", mtu, mps, credits,
result);
- mutex_lock(&conn->chan_lock);
-
cmd_len -= sizeof(*rsp);
list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) {
@@ -5256,8 +5248,6 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn,
l2cap_chan_unlock(chan);
}
- mutex_unlock(&conn->chan_lock);
-
return err;
}
@@ -5370,8 +5360,6 @@ static inline int l2cap_le_command_rej(struct l2cap_conn *conn,
if (cmd_len < sizeof(*rej))
return -EPROTO;
- mutex_lock(&conn->chan_lock);
-
chan = __l2cap_get_chan_by_ident(conn, cmd->ident);
if (!chan)
goto done;
@@ -5386,7 +5374,6 @@ static inline int l2cap_le_command_rej(struct l2cap_conn *conn,
l2cap_chan_put(chan);
done:
- mutex_unlock(&conn->chan_lock);
return 0;
}
@@ -6841,8 +6828,12 @@ static void process_pending_rx(struct work_struct *work)
BT_DBG("");
+ mutex_lock(&conn->lock);
+
while ((skb = skb_dequeue(&conn->pending_rx)))
l2cap_recv_frame(conn, skb);
+
+ mutex_unlock(&conn->lock);
}
static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
@@ -6881,7 +6872,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR;
mutex_init(&conn->ident_lock);
- mutex_init(&conn->chan_lock);
+ mutex_init(&conn->lock);
INIT_LIST_HEAD(&conn->chan_l);
INIT_LIST_HEAD(&conn->users);
@@ -7072,7 +7063,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
}
}
- mutex_lock(&conn->chan_lock);
+ mutex_lock(&conn->lock);
l2cap_chan_lock(chan);
if (cid && __l2cap_get_chan_by_dcid(conn, cid)) {
@@ -7113,7 +7104,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
chan_unlock:
l2cap_chan_unlock(chan);
- mutex_unlock(&conn->chan_lock);
+ mutex_unlock(&conn->lock);
done:
hci_dev_unlock(hdev);
hci_dev_put(hdev);
@@ -7217,11 +7208,6 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,
return NULL;
}
-static bool l2cap_match(struct hci_conn *hcon)
-{
- return hcon->type == ACL_LINK || hcon->type == LE_LINK;
-}
-
static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
{
struct hci_dev *hdev = hcon->hdev;
@@ -7229,6 +7215,9 @@ static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
struct l2cap_chan *pchan;
u8 dst_type;
+ if (hcon->type != ACL_LINK && hcon->type != LE_LINK)
+ return;
+
BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
if (status) {
@@ -7293,6 +7282,9 @@ int l2cap_disconn_ind(struct hci_conn *hcon)
static void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
{
+ if (hcon->type != ACL_LINK && hcon->type != LE_LINK)
+ return;
+
BT_DBG("hcon %p reason %d", hcon, reason);
l2cap_conn_del(hcon, bt_to_errno(reason));
@@ -7325,7 +7317,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
BT_DBG("conn %p status 0x%2.2x encrypt %u", conn, status, encrypt);
- mutex_lock(&conn->chan_lock);
+ mutex_lock(&conn->lock);
list_for_each_entry(chan, &conn->chan_l, list) {
l2cap_chan_lock(chan);
@@ -7350,7 +7342,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
}
if (chan->state == BT_CONNECT) {
- if (!status && l2cap_check_enc_key_size(hcon))
+ if (!status && l2cap_check_enc_key_size(hcon, chan))
l2cap_start_connection(chan);
else
__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);
@@ -7360,7 +7352,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
struct l2cap_conn_rsp rsp;
__u16 res, stat;
- if (!status && l2cap_check_enc_key_size(hcon)) {
+ if (!status && l2cap_check_enc_key_size(hcon, chan)) {
if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) {
res = L2CAP_CR_PEND;
stat = L2CAP_CS_AUTHOR_PEND;
@@ -7399,7 +7391,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
l2cap_chan_unlock(chan);
}
- mutex_unlock(&conn->chan_lock);
+ mutex_unlock(&conn->lock);
}
/* Append fragment into frame respecting the maximum len of rx_skb */
@@ -7413,6 +7405,9 @@ static int l2cap_recv_frag(struct l2cap_conn *conn, struct sk_buff *skb,
return -ENOMEM;
/* Init rx_len */
conn->rx_len = len;
+
+ skb_set_delivery_time(conn->rx_skb, skb->tstamp,
+ skb->tstamp_type);
}
/* Copy as much as the rx_skb can hold */
@@ -7466,19 +7461,45 @@ static void l2cap_recv_reset(struct l2cap_conn *conn)
conn->rx_len = 0;
}
+struct l2cap_conn *l2cap_conn_hold_unless_zero(struct l2cap_conn *c)
+{
+ if (!c)
+ return NULL;
+
+ BT_DBG("conn %p orig refcnt %u", c, kref_read(&c->ref));
+
+ if (!kref_get_unless_zero(&c->ref))
+ return NULL;
+
+ return c;
+}
+
void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
{
- struct l2cap_conn *conn = hcon->l2cap_data;
+ struct l2cap_conn *conn;
int len;
+ /* Lock hdev to access l2cap_data to avoid race with l2cap_conn_del */
+ hci_dev_lock(hcon->hdev);
+
+ conn = hcon->l2cap_data;
+
if (!conn)
conn = l2cap_conn_add(hcon);
- if (!conn)
- goto drop;
+ conn = l2cap_conn_hold_unless_zero(conn);
+
+ hci_dev_unlock(hcon->hdev);
+
+ if (!conn) {
+ kfree_skb(skb);
+ return;
+ }
BT_DBG("conn %p len %u flags 0x%x", conn, skb->len, flags);
+ mutex_lock(&conn->lock);
+
switch (flags) {
case ACL_START:
case ACL_START_NO_FLUSH:
@@ -7503,7 +7524,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
if (len == skb->len) {
/* Complete frame received */
l2cap_recv_frame(conn, skb);
- return;
+ goto unlock;
}
BT_DBG("Start: total len %d, frag len %u", len, skb->len);
@@ -7511,8 +7532,24 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
if (skb->len > len) {
BT_ERR("Frame is too long (len %u, expected len %d)",
skb->len, len);
+ /* PTS test cases L2CAP/COS/CED/BI-14-C and BI-15-C
+ * (Multiple Signaling Command in one PDU, Data
+ * Truncated, BR/EDR) send a C-frame to the IUT with
+ * PDU Length set to 8 and Channel ID set to the
+ * correct signaling channel for the logical link.
+ * The Information payload contains one L2CAP_ECHO_REQ
+ * packet with Data Length set to 0 with 0 octets of
+ * echo data and one invalid command packet due to
+ * data truncated in PDU but present in HCI packet.
+ *
+ * Shorter the socket buffer to the PDU length to
+ * allow to process valid commands from the PDU before
+ * setting the socket unreliable.
+ */
+ skb->len = len;
+ l2cap_recv_frame(conn, skb);
l2cap_conn_unreliable(conn, ECOMM);
- goto drop;
+ goto unlock;
}
/* Append fragment into frame (with header) */
@@ -7567,11 +7604,13 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
drop:
kfree_skb(skb);
+unlock:
+ mutex_unlock(&conn->lock);
+ l2cap_conn_put(conn);
}
static struct hci_cb l2cap_cb = {
.name = "L2CAP",
- .match = l2cap_match,
.connect_cfm = l2cap_connect_cfm,
.disconn_cfm = l2cap_disconn_cfm,
.security_cfm = l2cap_security_cfm,
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 3d2553dcdb1b..615c18e290ab 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -710,12 +710,12 @@ static bool l2cap_valid_mtu(struct l2cap_chan *chan, u16 mtu)
{
switch (chan->scid) {
case L2CAP_CID_ATT:
- if (mtu < L2CAP_LE_MIN_MTU)
+ if (mtu && mtu < L2CAP_LE_MIN_MTU)
return false;
break;
default:
- if (mtu < L2CAP_DEFAULT_MIN_MTU)
+ if (mtu && mtu < L2CAP_DEFAULT_MIN_MTU)
return false;
}
@@ -1326,9 +1326,10 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
/* prevent sk structure from being freed whilst unlocked */
sock_hold(sk);
- chan = l2cap_pi(sk)->chan;
/* prevent chan structure from being freed whilst unlocked */
- l2cap_chan_hold(chan);
+ chan = l2cap_chan_hold_unless_zero(l2cap_pi(sk)->chan);
+ if (!chan)
+ goto shutdown_already;
BT_DBG("chan %p state %s", chan, state_to_string(chan->state));
@@ -1358,22 +1359,20 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
release_sock(sk);
l2cap_chan_lock(chan);
- conn = chan->conn;
- if (conn)
- /* prevent conn structure from being freed */
- l2cap_conn_get(conn);
+ /* prevent conn structure from being freed */
+ conn = l2cap_conn_hold_unless_zero(chan->conn);
l2cap_chan_unlock(chan);
if (conn)
/* mutex lock must be taken before l2cap_chan_lock() */
- mutex_lock(&conn->chan_lock);
+ mutex_lock(&conn->lock);
l2cap_chan_lock(chan);
l2cap_chan_close(chan, 0);
l2cap_chan_unlock(chan);
if (conn) {
- mutex_unlock(&conn->chan_lock);
+ mutex_unlock(&conn->lock);
l2cap_conn_put(conn);
}
@@ -1691,6 +1690,9 @@ static void l2cap_sock_resume_cb(struct l2cap_chan *chan)
{
struct sock *sk = chan->data;
+ if (!sk)
+ return;
+
if (test_and_clear_bit(FLAG_PENDING_SECURITY, &chan->flags)) {
sk->sk_state = BT_CONNECTED;
chan->state = BT_CONNECTED;
@@ -1888,7 +1890,8 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
chan = l2cap_chan_create();
if (!chan) {
sk_free(sk);
- sock->sk = NULL;
+ if (sock)
+ sock->sk = NULL;
return NULL;
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 7dc315c1658e..ade93532db34 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1073,7 +1073,8 @@ static int mesh_send_done_sync(struct hci_dev *hdev, void *data)
struct mgmt_mesh_tx *mesh_tx;
hci_dev_clear_flag(hdev, HCI_MESH_SENDING);
- hci_disable_advertising_sync(hdev);
+ if (list_empty(&hdev->adv_instances))
+ hci_disable_advertising_sync(hdev);
mesh_tx = mgmt_mesh_next(hdev, NULL);
if (mesh_tx)
@@ -1440,22 +1441,17 @@ static void settings_rsp(struct mgmt_pending_cmd *cmd, void *data)
send_settings_rsp(cmd->sk, cmd->opcode, match->hdev);
- list_del(&cmd->list);
-
if (match->sk == NULL) {
match->sk = cmd->sk;
sock_hold(match->sk);
}
-
- mgmt_pending_free(cmd);
}
static void cmd_status_rsp(struct mgmt_pending_cmd *cmd, void *data)
{
u8 *status = data;
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, *status);
- mgmt_pending_remove(cmd);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, *status);
}
static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data)
@@ -1469,8 +1465,6 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data)
if (cmd->cmd_complete) {
cmd->cmd_complete(cmd, match->mgmt_status);
- mgmt_pending_remove(cmd);
-
return;
}
@@ -1479,13 +1473,13 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data)
static int generic_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status)
{
- return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status,
+ return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status,
cmd->param, cmd->param_len);
}
static int addr_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status)
{
- return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status,
+ return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status,
cmd->param, sizeof(struct mgmt_addr_info));
}
@@ -1525,7 +1519,7 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data,
if (err) {
u8 mgmt_err = mgmt_status(err);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
goto done;
}
@@ -1700,7 +1694,7 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
if (err) {
u8 mgmt_err = mgmt_status(err);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
goto done;
}
@@ -1936,8 +1930,8 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
new_settings(hdev, NULL);
}
- mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, cmd_status_rsp,
- &mgmt_err);
+ mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true,
+ cmd_status_rsp, &mgmt_err);
return;
}
@@ -1947,7 +1941,7 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED);
}
- mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true, settings_rsp, &match);
if (changed)
new_settings(hdev, match.sk);
@@ -2067,12 +2061,12 @@ static void set_le_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
if (status) {
- mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp,
- &status);
+ mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, cmd_status_rsp,
+ &status);
return;
}
- mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, settings_rsp, &match);
new_settings(hdev, match.sk);
@@ -2131,7 +2125,7 @@ static void set_mesh_complete(struct hci_dev *hdev, void *data, int err)
struct sock *sk = cmd->sk;
if (status) {
- mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev,
+ mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, true,
cmd_status_rsp, &status);
return;
}
@@ -2153,6 +2147,9 @@ static int set_mesh_sync(struct hci_dev *hdev, void *data)
else
hci_dev_clear_flag(hdev, HCI_MESH);
+ hdev->le_scan_interval = __le16_to_cpu(cp->period);
+ hdev->le_scan_window = __le16_to_cpu(cp->window);
+
len -= sizeof(*cp);
/* If filters don't fit, forward all adv pkts */
@@ -2167,6 +2164,7 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
{
struct mgmt_cp_set_mesh *cp = data;
struct mgmt_pending_cmd *cmd;
+ __u16 period, window;
int err = 0;
bt_dev_dbg(hdev, "sock %p", sk);
@@ -2180,6 +2178,23 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
MGMT_STATUS_INVALID_PARAMS);
+ /* Keep allowed ranges in sync with set_scan_params() */
+ period = __le16_to_cpu(cp->period);
+
+ if (period < 0x0004 || period > 0x4000)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ window = __le16_to_cpu(cp->window);
+
+ if (window < 0x0004 || window > 0x4000)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ if (window > period)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_STATUS_INVALID_PARAMS);
+
hci_dev_lock(hdev);
cmd = mgmt_pending_add(sk, MGMT_OP_SET_MESH_RECEIVER, hdev, data, len);
@@ -2572,7 +2587,7 @@ static void mgmt_class_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), hdev->dev_class, 3);
mgmt_pending_free(cmd);
@@ -3360,7 +3375,7 @@ static int pairing_complete(struct mgmt_pending_cmd *cmd, u8 status)
bacpy(&rp.addr.bdaddr, &conn->dst);
rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type);
- err = mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE,
+ err = mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_PAIR_DEVICE,
status, &rp, sizeof(rp));
/* So we don't get further callbacks for this connection */
@@ -5172,24 +5187,14 @@ static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev,
mgmt_event(MGMT_EV_ADV_MONITOR_ADDED, hdev, &ev, sizeof(ev), sk);
}
-void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle)
+static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev,
+ __le16 handle)
{
struct mgmt_ev_adv_monitor_removed ev;
- struct mgmt_pending_cmd *cmd;
- struct sock *sk_skip = NULL;
- struct mgmt_cp_remove_adv_monitor *cp;
-
- cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev);
- if (cmd) {
- cp = cmd->param;
- if (cp->monitor_handle)
- sk_skip = cmd->sk;
- }
+ ev.monitor_handle = handle;
- ev.monitor_handle = cpu_to_le16(handle);
-
- mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk_skip);
+ mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk);
}
static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev,
@@ -5260,7 +5265,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev,
hci_update_passive_scan(hdev);
}
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(status), &rp, sizeof(rp));
mgmt_pending_remove(cmd);
@@ -5291,8 +5296,7 @@ static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev,
if (pending_find(MGMT_OP_SET_LE, hdev) ||
pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) ||
- pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) {
+ pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) {
status = MGMT_STATUS_BUSY;
goto unlock;
}
@@ -5460,18 +5464,25 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev,
{
struct mgmt_rp_remove_adv_monitor rp;
struct mgmt_pending_cmd *cmd = data;
- struct mgmt_cp_remove_adv_monitor *cp = cmd->param;
+ struct mgmt_cp_remove_adv_monitor *cp;
+
+ if (status == -ECANCELED)
+ return;
hci_dev_lock(hdev);
+ cp = cmd->param;
+
rp.monitor_handle = cp->monitor_handle;
- if (!status)
+ if (!status) {
+ mgmt_adv_monitor_removed(cmd->sk, hdev, cp->monitor_handle);
hci_update_passive_scan(hdev);
+ }
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(status), &rp, sizeof(rp));
- mgmt_pending_remove(cmd);
+ mgmt_pending_free(cmd);
hci_dev_unlock(hdev);
bt_dev_dbg(hdev, "remove monitor %d complete, status %d",
@@ -5499,14 +5510,13 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev,
hci_dev_lock(hdev);
if (pending_find(MGMT_OP_SET_LE, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev) ||
pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) ||
pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) {
status = MGMT_STATUS_BUSY;
goto unlock;
}
- cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len);
+ cmd = mgmt_pending_new(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len);
if (!cmd) {
status = MGMT_STATUS_NO_RESOURCES;
goto unlock;
@@ -5516,7 +5526,7 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev,
mgmt_remove_adv_monitor_complete);
if (err) {
- mgmt_pending_remove(cmd);
+ mgmt_pending_free(cmd);
if (err == -ENOMEM)
status = MGMT_STATUS_NO_RESOURCES;
@@ -5869,7 +5879,7 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err)
cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev))
return;
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err),
cmd->param, 1);
mgmt_pending_remove(cmd);
@@ -6107,7 +6117,7 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err),
cmd->param, 1);
mgmt_pending_remove(cmd);
@@ -6332,7 +6342,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err)
u8 status = mgmt_status(err);
if (status) {
- mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev,
+ mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true,
cmd_status_rsp, &status);
return;
}
@@ -6342,7 +6352,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err)
else
hci_dev_clear_flag(hdev, HCI_ADVERTISING);
- mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp,
+ mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true, settings_rsp,
&match);
new_settings(hdev, match.sk);
@@ -6548,6 +6558,7 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev,
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS,
MGMT_STATUS_NOT_SUPPORTED);
+ /* Keep allowed ranges in sync with set_mesh() */
interval = __le16_to_cpu(cp->interval);
if (interval < 0x0004 || interval > 0x4000)
@@ -6686,7 +6697,7 @@ static void set_bredr_complete(struct hci_dev *hdev, void *data, int err)
*/
hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
} else {
send_settings_rsp(cmd->sk, MGMT_OP_SET_BREDR, hdev);
new_settings(hdev, cmd->sk);
@@ -6823,7 +6834,7 @@ static void set_secure_conn_complete(struct hci_dev *hdev, void *data, int err)
if (err) {
u8 mgmt_err = mgmt_status(err);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
goto done;
}
@@ -7270,7 +7281,7 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err)
rp.max_tx_power = HCI_TX_POWER_INVALID;
}
- mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_GET_CONN_INFO, status,
&rp, sizeof(rp));
mgmt_pending_free(cmd);
@@ -7430,7 +7441,7 @@ static void get_clock_info_complete(struct hci_dev *hdev, void *data, int err)
}
complete:
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, &rp,
sizeof(rp));
mgmt_pending_free(cmd);
@@ -7602,11 +7613,16 @@ static void add_device_complete(struct hci_dev *hdev, void *data, int err)
struct mgmt_cp_add_device *cp = cmd->param;
if (!err) {
+ struct hci_conn_params *params;
+
+ params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
+ le_addr_type(cp->addr.type));
+
device_added(cmd->sk, hdev, &cp->addr.bdaddr, cp->addr.type,
cp->action);
device_flags_changed(NULL, hdev, &cp->addr.bdaddr,
cp->addr.type, hdev->conn_flags,
- PTR_UINT(cmd->user_data));
+ params ? params->flags : 0);
}
mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_ADD_DEVICE,
@@ -7709,8 +7725,6 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- cmd->user_data = UINT_PTR(current_flags);
-
err = hci_cmd_sync_queue(hdev, add_device_sync, cmd,
add_device_complete);
if (err < 0) {
@@ -8677,10 +8691,10 @@ static void add_advertising_complete(struct hci_dev *hdev, void *data, int err)
rp.instance = cp->instance;
if (err)
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
else
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), &rp, sizeof(rp));
add_adv_complete(hdev, cmd->sk, cp->instance, err);
@@ -8868,10 +8882,10 @@ static void add_ext_adv_params_complete(struct hci_dev *hdev, void *data,
hci_remove_adv_instance(hdev, cp->instance);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
} else {
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), &rp, sizeof(rp));
}
@@ -9018,10 +9032,10 @@ static void add_ext_adv_data_complete(struct hci_dev *hdev, void *data, int err)
rp.instance = cp->instance;
if (err)
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
else
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), &rp, sizeof(rp));
mgmt_pending_free(cmd);
@@ -9180,10 +9194,10 @@ static void remove_advertising_complete(struct hci_dev *hdev, void *data,
rp.instance = cp->instance;
if (err)
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
else
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
mgmt_pending_free(cmd);
@@ -9454,7 +9468,7 @@ void mgmt_index_removed(struct hci_dev *hdev)
if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
return;
- mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match);
+ mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match);
if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0,
@@ -9492,7 +9506,8 @@ void mgmt_power_on(struct hci_dev *hdev, int err)
hci_update_passive_scan(hdev);
}
- mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp,
+ &match);
new_settings(hdev, match.sk);
@@ -9507,7 +9522,8 @@ void __mgmt_power_off(struct hci_dev *hdev)
struct cmd_lookup match = { NULL, hdev };
u8 zero_cod[] = { 0, 0, 0 };
- mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp,
+ &match);
/* If the power off is because of hdev unregistration let
* use the appropriate INVALID_INDEX status. Otherwise use
@@ -9521,7 +9537,7 @@ void __mgmt_power_off(struct hci_dev *hdev)
else
match.mgmt_status = MGMT_STATUS_NOT_POWERED;
- mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match);
+ mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match);
if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) {
mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
@@ -9721,6 +9737,9 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn,
sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0) +
eir_precalc_len(sizeof(conn->dev_class)));
+ if (!skb)
+ return;
+
ev = skb_put(skb, sizeof(*ev));
bacpy(&ev->addr.bdaddr, &conn->dst);
ev->addr.type = link_to_bdaddr(conn->type, conn->dst_type);
@@ -9759,7 +9778,6 @@ static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data)
device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, cmd->sk);
cmd->cmd_complete(cmd, 0);
- mgmt_pending_remove(cmd);
}
bool mgmt_powering_down(struct hci_dev *hdev)
@@ -9815,8 +9833,8 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr,
struct mgmt_cp_disconnect *cp;
struct mgmt_pending_cmd *cmd;
- mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp,
- hdev);
+ mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, true,
+ unpair_device_rsp, hdev);
cmd = pending_find(MGMT_OP_DISCONNECT, hdev);
if (!cmd)
@@ -10009,7 +10027,7 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
if (status) {
u8 mgmt_err = mgmt_status(status);
- mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev,
+ mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true,
cmd_status_rsp, &mgmt_err);
return;
}
@@ -10019,8 +10037,8 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
else
changed = hci_dev_test_and_clear_flag(hdev, HCI_LINK_SECURITY);
- mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, settings_rsp,
- &match);
+ mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true,
+ settings_rsp, &match);
if (changed)
new_settings(hdev, match.sk);
@@ -10044,9 +10062,12 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
{
struct cmd_lookup match = { NULL, hdev, mgmt_status(status) };
- mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match);
- mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match);
- mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, false, sk_lookup,
+ &match);
+ mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, false, sk_lookup,
+ &match);
+ mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, false, sk_lookup,
+ &match);
if (!status) {
mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class,
@@ -10474,6 +10495,8 @@ void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND,
sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0));
+ if (!skb)
+ return;
ev = skb_put(skb, sizeof(*ev));
bacpy(&ev->addr.bdaddr, bdaddr);
diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c
index 17ab909a7c07..a88a07da3947 100644
--- a/net/bluetooth/mgmt_util.c
+++ b/net/bluetooth/mgmt_util.c
@@ -217,47 +217,47 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status,
struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode,
struct hci_dev *hdev)
{
- struct mgmt_pending_cmd *cmd;
+ struct mgmt_pending_cmd *cmd, *tmp;
+
+ mutex_lock(&hdev->mgmt_pending_lock);
- list_for_each_entry(cmd, &hdev->mgmt_pending, list) {
+ list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) {
if (hci_sock_get_channel(cmd->sk) != channel)
continue;
- if (cmd->opcode == opcode)
- return cmd;
- }
- return NULL;
-}
-
-struct mgmt_pending_cmd *mgmt_pending_find_data(unsigned short channel,
- u16 opcode,
- struct hci_dev *hdev,
- const void *data)
-{
- struct mgmt_pending_cmd *cmd;
-
- list_for_each_entry(cmd, &hdev->mgmt_pending, list) {
- if (cmd->user_data != data)
- continue;
- if (cmd->opcode == opcode)
+ if (cmd->opcode == opcode) {
+ mutex_unlock(&hdev->mgmt_pending_lock);
return cmd;
+ }
}
+ mutex_unlock(&hdev->mgmt_pending_lock);
+
return NULL;
}
-void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev,
+void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove,
void (*cb)(struct mgmt_pending_cmd *cmd, void *data),
void *data)
{
struct mgmt_pending_cmd *cmd, *tmp;
+ mutex_lock(&hdev->mgmt_pending_lock);
+
list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) {
if (opcode > 0 && cmd->opcode != opcode)
continue;
+ if (remove)
+ list_del(&cmd->list);
+
cb(cmd, data);
+
+ if (remove)
+ mgmt_pending_free(cmd);
}
+
+ mutex_unlock(&hdev->mgmt_pending_lock);
}
struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode,
@@ -271,7 +271,7 @@ struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode,
return NULL;
cmd->opcode = opcode;
- cmd->index = hdev->id;
+ cmd->hdev = hdev;
cmd->param = kmemdup(data, len, GFP_KERNEL);
if (!cmd->param) {
@@ -297,7 +297,9 @@ struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode,
if (!cmd)
return NULL;
+ mutex_lock(&hdev->mgmt_pending_lock);
list_add_tail(&cmd->list, &hdev->mgmt_pending);
+ mutex_unlock(&hdev->mgmt_pending_lock);
return cmd;
}
@@ -311,7 +313,10 @@ void mgmt_pending_free(struct mgmt_pending_cmd *cmd)
void mgmt_pending_remove(struct mgmt_pending_cmd *cmd)
{
+ mutex_lock(&cmd->hdev->mgmt_pending_lock);
list_del(&cmd->list);
+ mutex_unlock(&cmd->hdev->mgmt_pending_lock);
+
mgmt_pending_free(cmd);
}
@@ -321,7 +326,7 @@ void mgmt_mesh_foreach(struct hci_dev *hdev,
{
struct mgmt_mesh_tx *mesh_tx, *tmp;
- list_for_each_entry_safe(mesh_tx, tmp, &hdev->mgmt_pending, list) {
+ list_for_each_entry_safe(mesh_tx, tmp, &hdev->mesh_pending, list) {
if (!sk || mesh_tx->sk == sk)
cb(mesh_tx, data);
}
diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h
index bdf978605d5a..024e51dd6937 100644
--- a/net/bluetooth/mgmt_util.h
+++ b/net/bluetooth/mgmt_util.h
@@ -33,7 +33,7 @@ struct mgmt_mesh_tx {
struct mgmt_pending_cmd {
struct list_head list;
u16 opcode;
- int index;
+ struct hci_dev *hdev;
void *param;
size_t param_len;
struct sock *sk;
@@ -54,11 +54,7 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status,
struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode,
struct hci_dev *hdev);
-struct mgmt_pending_cmd *mgmt_pending_find_data(unsigned short channel,
- u16 opcode,
- struct hci_dev *hdev,
- const void *data);
-void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev,
+void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove,
void (*cb)(struct mgmt_pending_cmd *cmd, void *data),
void *data);
struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode,
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 4c56ca5a216c..ad5177e3a69b 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -2134,11 +2134,6 @@ static int rfcomm_run(void *unused)
return 0;
}
-static bool rfcomm_match(struct hci_conn *hcon)
-{
- return hcon->type == ACL_LINK;
-}
-
static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
{
struct rfcomm_session *s;
@@ -2185,7 +2180,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
static struct hci_cb rfcomm_cb = {
.name = "RFCOMM",
- .match = rfcomm_match,
.security_cfm = rfcomm_security_cfm
};
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 071c404c790a..b872a2ca3ff3 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -1355,13 +1355,11 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
return lm;
}
-static bool sco_match(struct hci_conn *hcon)
-{
- return hcon->type == SCO_LINK || hcon->type == ESCO_LINK;
-}
-
static void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
{
+ if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
+ return;
+
BT_DBG("hcon %p bdaddr %pMR status %u", hcon, &hcon->dst, status);
if (!status) {
@@ -1376,6 +1374,9 @@ static void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
static void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
{
+ if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
+ return;
+
BT_DBG("hcon %p reason %d", hcon, reason);
sco_conn_del(hcon, bt_to_errno(reason));
@@ -1401,7 +1402,6 @@ drop:
static struct hci_cb sco_cb = {
.name = "SCO",
- .match = sco_match,
.connect_cfm = sco_connect_cfm,
.disconn_cfm = sco_disconn_cfm,
};
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 8b9724fd752a..a31971fe2fd7 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1379,7 +1379,7 @@ static void smp_timeout(struct work_struct *work)
bt_dev_dbg(conn->hcon->hdev, "conn %p", conn);
- hci_disconnect(conn->hcon, HCI_ERROR_REMOTE_USER_TERM);
+ hci_disconnect(conn->hcon, HCI_ERROR_AUTH_FAILURE);
}
static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
@@ -2977,8 +2977,25 @@ static int smp_sig_channel(struct l2cap_chan *chan, struct sk_buff *skb)
if (code > SMP_CMD_MAX)
goto drop;
- if (smp && !test_and_clear_bit(code, &smp->allow_cmd))
+ if (smp && !test_and_clear_bit(code, &smp->allow_cmd)) {
+ /* If there is a context and the command is not allowed consider
+ * it a failure so the session is cleanup properly.
+ */
+ switch (code) {
+ case SMP_CMD_IDENT_INFO:
+ case SMP_CMD_IDENT_ADDR_INFO:
+ case SMP_CMD_SIGN_INFO:
+ /* 3.6.1. Key distribution and generation
+ *
+ * A device may reject a distributed key by sending the
+ * Pairing Failed command with the reason set to
+ * "Key Rejected".
+ */
+ smp_failure(conn, SMP_KEY_REJECTED);
+ break;
+ }
goto drop;
+ }
/* If we don't have a context the only allowed commands are
* pairing request and security request.
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index 87a59ec2c9f0..c5da53dfab04 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -138,6 +138,7 @@ struct smp_cmd_keypress_notify {
#define SMP_NUMERIC_COMP_FAILED 0x0c
#define SMP_BREDR_PAIRING_IN_PROGRESS 0x0d
#define SMP_CROSS_TRANSP_NOT_ALLOWED 0x0e
+#define SMP_KEY_REJECTED 0x0f
#define SMP_MIN_ENC_KEY_SIZE 7
#define SMP_MAX_ENC_KEY_SIZE 16
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 501ec4249fed..8612023bec60 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -660,12 +660,9 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
void *data;
- if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
+ if (user_size < ETH_HLEN || user_size > PAGE_SIZE - headroom - tailroom)
return ERR_PTR(-EINVAL);
- if (user_size > size)
- return ERR_PTR(-EMSGSIZE);
-
size = SKB_DATA_ALIGN(size);
data = kzalloc(size + headroom + tailroom, GFP_USER);
if (!data)
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 1a52a0bca086..7e1ad229e133 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -1040,7 +1040,7 @@ static int br_mdb_add_group(const struct br_mdb_config *cfg,
/* host join */
if (!port) {
- if (mp->host_joined) {
+ if (mp->host_joined && !(cfg->nlflags & NLM_F_REPLACE)) {
NL_SET_ERR_MSG_MOD(extack, "Group is already joined by host");
return -EEXIST;
}
diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c
index 1820f09ff59c..3f24b4ee49c2 100644
--- a/net/bridge/br_mst.c
+++ b/net/bridge/br_mst.c
@@ -80,10 +80,10 @@ static void br_mst_vlan_set_state(struct net_bridge_vlan_group *vg,
if (br_vlan_get_state(v) == state)
return;
- br_vlan_set_state(v, state);
-
if (v->vid == vg->pvid)
br_vlan_set_pvid_state(vg, state);
+
+ br_vlan_set_state(v, state);
}
int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state,
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index b2ae0d2434d2..733ff6b758f6 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2105,12 +2105,17 @@ static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx)
}
}
-void br_multicast_enable_port(struct net_bridge_port *port)
+static void br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx)
{
- struct net_bridge *br = port->br;
+ struct net_bridge *br = pmctx->port->br;
spin_lock_bh(&br->multicast_lock);
- __br_multicast_enable_port_ctx(&port->multicast_ctx);
+ if (br_multicast_port_ctx_is_vlan(pmctx) &&
+ !(pmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED)) {
+ spin_unlock_bh(&br->multicast_lock);
+ return;
+ }
+ __br_multicast_enable_port_ctx(pmctx);
spin_unlock_bh(&br->multicast_lock);
}
@@ -2137,11 +2142,67 @@ static void __br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx)
br_multicast_rport_del_notify(pmctx, del);
}
+static void br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx)
+{
+ struct net_bridge *br = pmctx->port->br;
+
+ spin_lock_bh(&br->multicast_lock);
+ if (br_multicast_port_ctx_is_vlan(pmctx) &&
+ !(pmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED)) {
+ spin_unlock_bh(&br->multicast_lock);
+ return;
+ }
+
+ __br_multicast_disable_port_ctx(pmctx);
+ spin_unlock_bh(&br->multicast_lock);
+}
+
+static void br_multicast_toggle_port(struct net_bridge_port *port, bool on)
+{
+#if IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
+ if (br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *vlan;
+
+ rcu_read_lock();
+ vg = nbp_vlan_group_rcu(port);
+ if (!vg) {
+ rcu_read_unlock();
+ return;
+ }
+
+ /* iterate each vlan, toggle vlan multicast context */
+ list_for_each_entry_rcu(vlan, &vg->vlan_list, vlist) {
+ struct net_bridge_mcast_port *pmctx =
+ &vlan->port_mcast_ctx;
+ u8 state = br_vlan_get_state(vlan);
+ /* enable vlan multicast context when state is
+ * LEARNING or FORWARDING
+ */
+ if (on && br_vlan_state_allowed(state, true))
+ br_multicast_enable_port_ctx(pmctx);
+ else
+ br_multicast_disable_port_ctx(pmctx);
+ }
+ rcu_read_unlock();
+ return;
+ }
+#endif
+ /* toggle port multicast context when vlan snooping is disabled */
+ if (on)
+ br_multicast_enable_port_ctx(&port->multicast_ctx);
+ else
+ br_multicast_disable_port_ctx(&port->multicast_ctx);
+}
+
+void br_multicast_enable_port(struct net_bridge_port *port)
+{
+ br_multicast_toggle_port(port, true);
+}
+
void br_multicast_disable_port(struct net_bridge_port *port)
{
- spin_lock_bh(&port->br->multicast_lock);
- __br_multicast_disable_port_ctx(&port->multicast_ctx);
- spin_unlock_bh(&port->br->multicast_lock);
+ br_multicast_toggle_port(port, false);
}
static int __grp_src_delete_marked(struct net_bridge_port_group *pg)
@@ -4211,6 +4272,32 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx)
#endif
}
+void br_multicast_update_vlan_mcast_ctx(struct net_bridge_vlan *v, u8 state)
+{
+#if IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
+ struct net_bridge *br;
+
+ if (!br_vlan_should_use(v))
+ return;
+
+ if (br_vlan_is_master(v))
+ return;
+
+ br = v->port->br;
+
+ if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
+ return;
+
+ if (br_vlan_state_allowed(state, true))
+ br_multicast_enable_port_ctx(&v->port_mcast_ctx);
+
+ /* Multicast is not disabled for the vlan when it goes in
+ * blocking state because the timers will expire and stop by
+ * themselves without sending more queries.
+ */
+#endif
+}
+
void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on)
{
struct net_bridge *br;
@@ -4304,9 +4391,9 @@ int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on,
__br_multicast_open(&br->multicast_ctx);
list_for_each_entry(p, &br->port_list, list) {
if (on)
- br_multicast_disable_port(p);
+ br_multicast_disable_port_ctx(&p->multicast_ctx);
else
- br_multicast_enable_port(p);
+ br_multicast_enable_port_ctx(&p->multicast_ctx);
}
list_for_each_entry(vlan, &vg->vlan_list, vlist)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 1d458e9da660..17a5f5923d61 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -370,9 +370,9 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
*/
static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_device *dev = skb->dev, *br_indev;
- struct iphdr *iph = ip_hdr(skb);
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+ struct net_device *dev = skb->dev, *br_indev;
+ const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
int err;
@@ -390,7 +390,9 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
}
nf_bridge->in_prerouting = 0;
if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
- if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
+ err = ip_route_input(skb, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), dev);
+ if (err) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
/* If err equals -EHOSTUNREACH the error is due to a
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
index 98aea5485aae..a8c67035e23c 100644
--- a/net/bridge/br_nf_core.c
+++ b/net/bridge/br_nf_core.c
@@ -65,17 +65,14 @@ static struct dst_ops fake_dst_ops = {
* ipt_REJECT needs it. Future netfilter modules might
* require us to fill additional fields.
*/
-static const u32 br_dst_default_metrics[RTAX_MAX] = {
- [RTAX_MTU - 1] = 1500,
-};
-
void br_netfilter_rtable_init(struct net_bridge *br)
{
struct rtable *rt = &br->fake_rtable;
rcuref_init(&rt->dst.__rcuref, 1);
rt->dst.dev = br->dev;
- dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
+ dst_init_metrics(&rt->dst, br->metrics, false);
+ dst_metric_set(&rt->dst, RTAX_MTU, br->dev->mtu);
rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
rt->dst.ops = &fake_dst_ops;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 041f6e571a20..6a1bce8959af 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -505,6 +505,7 @@ struct net_bridge {
struct rtable fake_rtable;
struct rt6_info fake_rt6_info;
};
+ u32 metrics[RTAX_MAX];
#endif
u16 group_fwd_mask;
u16 group_fwd_mask_required;
@@ -1052,6 +1053,7 @@ void br_multicast_port_ctx_init(struct net_bridge_port *port,
struct net_bridge_vlan *vlan,
struct net_bridge_mcast_port *pmctx);
void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx);
+void br_multicast_update_vlan_mcast_ctx(struct net_bridge_vlan *v, u8 state);
void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on);
int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on,
struct netlink_ext_ack *extack);
@@ -1502,6 +1504,11 @@ static inline void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pm
{
}
+static inline void br_multicast_update_vlan_mcast_ctx(struct net_bridge_vlan *v,
+ u8 state)
+{
+}
+
static inline void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan,
bool on)
{
@@ -1853,7 +1860,9 @@ bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr,
bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range,
const struct net_bridge_vlan *v_opts);
-/* vlan state manipulation helpers using *_ONCE to annotate lock-free access */
+/* vlan state manipulation helpers using *_ONCE to annotate lock-free access,
+ * while br_vlan_set_state() may access data protected by multicast_lock.
+ */
static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v)
{
return READ_ONCE(v->state);
@@ -1862,6 +1871,7 @@ static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v)
static inline void br_vlan_set_state(struct net_bridge_vlan *v, u8 state)
{
WRITE_ONCE(v->state, state);
+ br_multicast_update_vlan_mcast_ctx(v, state);
}
static inline u8 br_vlan_get_pvid_state(const struct net_bridge_vlan_group *vg)
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 7b41ee8740cb..f10bd6a233dc 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -17,6 +17,9 @@ static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p,
if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
return false;
+ if (br_multicast_igmp_type(skb))
+ return false;
+
return (p->flags & BR_TX_FWD_OFFLOAD) &&
(p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom);
}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 89f51ea4cabe..f2efb58d152b 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -715,8 +715,8 @@ static int br_vlan_add_existing(struct net_bridge *br,
u16 flags, bool *changed,
struct netlink_ext_ack *extack)
{
- bool would_change = __vlan_flags_would_change(vlan, flags);
bool becomes_brentry = false;
+ bool would_change = false;
int err;
if (!br_vlan_is_brentry(vlan)) {
@@ -725,6 +725,8 @@ static int br_vlan_add_existing(struct net_bridge *br,
return -EINVAL;
becomes_brentry = true;
+ } else {
+ would_change = __vlan_flags_would_change(vlan, flags);
}
/* Master VLANs that aren't brentries weren't notified before,
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 816bb0fde718..6482de4d8750 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -60,19 +60,19 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
struct ip_fraglist_iter iter;
struct sk_buff *frag;
- if (first_len - hlen > mtu ||
- skb_headroom(skb) < ll_rs)
+ if (first_len - hlen > mtu)
goto blackhole;
- if (skb_cloned(skb))
+ if (skb_cloned(skb) ||
+ skb_headroom(skb) < ll_rs)
goto slow_path;
skb_walk_frags(skb, frag) {
- if (frag->len > mtu ||
- skb_headroom(frag) < hlen + ll_rs)
+ if (frag->len > mtu)
goto blackhole;
- if (skb_shared(frag))
+ if (skb_shared(frag) ||
+ skb_headroom(frag) < hlen + ll_rs)
goto slow_path;
}
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 20139fa1be1f..06b604cf9d58 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -351,17 +351,154 @@ int cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
return found;
}
+static int cfctrl_link_setup(struct cfctrl *cfctrl, struct cfpkt *pkt, u8 cmdrsp)
+{
+ u8 len;
+ u8 linkid = 0;
+ enum cfctrl_srv serv;
+ enum cfctrl_srv servtype;
+ u8 endpoint;
+ u8 physlinkid;
+ u8 prio;
+ u8 tmp;
+ u8 *cp;
+ int i;
+ struct cfctrl_link_param linkparam;
+ struct cfctrl_request_info rsp, *req;
+
+ memset(&linkparam, 0, sizeof(linkparam));
+
+ tmp = cfpkt_extr_head_u8(pkt);
+
+ serv = tmp & CFCTRL_SRV_MASK;
+ linkparam.linktype = serv;
+
+ servtype = tmp >> 4;
+ linkparam.chtype = servtype;
+
+ tmp = cfpkt_extr_head_u8(pkt);
+ physlinkid = tmp & 0x07;
+ prio = tmp >> 3;
+
+ linkparam.priority = prio;
+ linkparam.phyid = physlinkid;
+ endpoint = cfpkt_extr_head_u8(pkt);
+ linkparam.endpoint = endpoint & 0x03;
+
+ switch (serv) {
+ case CFCTRL_SRV_VEI:
+ case CFCTRL_SRV_DBG:
+ if (CFCTRL_ERR_BIT & cmdrsp)
+ break;
+ /* Link ID */
+ linkid = cfpkt_extr_head_u8(pkt);
+ break;
+ case CFCTRL_SRV_VIDEO:
+ tmp = cfpkt_extr_head_u8(pkt);
+ linkparam.u.video.connid = tmp;
+ if (CFCTRL_ERR_BIT & cmdrsp)
+ break;
+ /* Link ID */
+ linkid = cfpkt_extr_head_u8(pkt);
+ break;
+
+ case CFCTRL_SRV_DATAGRAM:
+ linkparam.u.datagram.connid = cfpkt_extr_head_u32(pkt);
+ if (CFCTRL_ERR_BIT & cmdrsp)
+ break;
+ /* Link ID */
+ linkid = cfpkt_extr_head_u8(pkt);
+ break;
+ case CFCTRL_SRV_RFM:
+ /* Construct a frame, convert
+ * DatagramConnectionID
+ * to network format long and copy it out...
+ */
+ linkparam.u.rfm.connid = cfpkt_extr_head_u32(pkt);
+ cp = (u8 *) linkparam.u.rfm.volume;
+ for (tmp = cfpkt_extr_head_u8(pkt);
+ cfpkt_more(pkt) && tmp != '\0';
+ tmp = cfpkt_extr_head_u8(pkt))
+ *cp++ = tmp;
+ *cp = '\0';
+
+ if (CFCTRL_ERR_BIT & cmdrsp)
+ break;
+ /* Link ID */
+ linkid = cfpkt_extr_head_u8(pkt);
+
+ break;
+ case CFCTRL_SRV_UTIL:
+ /* Construct a frame, convert
+ * DatagramConnectionID
+ * to network format long and copy it out...
+ */
+ /* Fifosize KB */
+ linkparam.u.utility.fifosize_kb = cfpkt_extr_head_u16(pkt);
+ /* Fifosize bufs */
+ linkparam.u.utility.fifosize_bufs = cfpkt_extr_head_u16(pkt);
+ /* name */
+ cp = (u8 *) linkparam.u.utility.name;
+ caif_assert(sizeof(linkparam.u.utility.name)
+ >= UTILITY_NAME_LENGTH);
+ for (i = 0; i < UTILITY_NAME_LENGTH && cfpkt_more(pkt); i++) {
+ tmp = cfpkt_extr_head_u8(pkt);
+ *cp++ = tmp;
+ }
+ /* Length */
+ len = cfpkt_extr_head_u8(pkt);
+ linkparam.u.utility.paramlen = len;
+ /* Param Data */
+ cp = linkparam.u.utility.params;
+ while (cfpkt_more(pkt) && len--) {
+ tmp = cfpkt_extr_head_u8(pkt);
+ *cp++ = tmp;
+ }
+ if (CFCTRL_ERR_BIT & cmdrsp)
+ break;
+ /* Link ID */
+ linkid = cfpkt_extr_head_u8(pkt);
+ /* Length */
+ len = cfpkt_extr_head_u8(pkt);
+ /* Param Data */
+ cfpkt_extr_head(pkt, NULL, len);
+ break;
+ default:
+ pr_warn("Request setup, invalid type (%d)\n", serv);
+ return -1;
+ }
+
+ rsp.cmd = CFCTRL_CMD_LINK_SETUP;
+ rsp.param = linkparam;
+ spin_lock_bh(&cfctrl->info_list_lock);
+ req = cfctrl_remove_req(cfctrl, &rsp);
+
+ if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
+ cfpkt_erroneous(pkt)) {
+ pr_err("Invalid O/E bit or parse error "
+ "on CAIF control channel\n");
+ cfctrl->res.reject_rsp(cfctrl->serv.layer.up, 0,
+ req ? req->client_layer : NULL);
+ } else {
+ cfctrl->res.linksetup_rsp(cfctrl->serv.layer.up, linkid,
+ serv, physlinkid,
+ req ? req->client_layer : NULL);
+ }
+
+ kfree(req);
+
+ spin_unlock_bh(&cfctrl->info_list_lock);
+
+ return 0;
+}
+
static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
{
u8 cmdrsp;
u8 cmd;
- int ret = -1;
- u8 len;
- u8 param[255];
+ int ret = 0;
u8 linkid = 0;
struct cfctrl *cfctrl = container_obj(layer);
- struct cfctrl_request_info rsp, *req;
-
cmdrsp = cfpkt_extr_head_u8(pkt);
cmd = cmdrsp & CFCTRL_CMD_MASK;
@@ -374,150 +511,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
switch (cmd) {
case CFCTRL_CMD_LINK_SETUP:
- {
- enum cfctrl_srv serv;
- enum cfctrl_srv servtype;
- u8 endpoint;
- u8 physlinkid;
- u8 prio;
- u8 tmp;
- u8 *cp;
- int i;
- struct cfctrl_link_param linkparam;
- memset(&linkparam, 0, sizeof(linkparam));
-
- tmp = cfpkt_extr_head_u8(pkt);
-
- serv = tmp & CFCTRL_SRV_MASK;
- linkparam.linktype = serv;
-
- servtype = tmp >> 4;
- linkparam.chtype = servtype;
-
- tmp = cfpkt_extr_head_u8(pkt);
- physlinkid = tmp & 0x07;
- prio = tmp >> 3;
-
- linkparam.priority = prio;
- linkparam.phyid = physlinkid;
- endpoint = cfpkt_extr_head_u8(pkt);
- linkparam.endpoint = endpoint & 0x03;
-
- switch (serv) {
- case CFCTRL_SRV_VEI:
- case CFCTRL_SRV_DBG:
- if (CFCTRL_ERR_BIT & cmdrsp)
- break;
- /* Link ID */
- linkid = cfpkt_extr_head_u8(pkt);
- break;
- case CFCTRL_SRV_VIDEO:
- tmp = cfpkt_extr_head_u8(pkt);
- linkparam.u.video.connid = tmp;
- if (CFCTRL_ERR_BIT & cmdrsp)
- break;
- /* Link ID */
- linkid = cfpkt_extr_head_u8(pkt);
- break;
-
- case CFCTRL_SRV_DATAGRAM:
- linkparam.u.datagram.connid =
- cfpkt_extr_head_u32(pkt);
- if (CFCTRL_ERR_BIT & cmdrsp)
- break;
- /* Link ID */
- linkid = cfpkt_extr_head_u8(pkt);
- break;
- case CFCTRL_SRV_RFM:
- /* Construct a frame, convert
- * DatagramConnectionID
- * to network format long and copy it out...
- */
- linkparam.u.rfm.connid =
- cfpkt_extr_head_u32(pkt);
- cp = (u8 *) linkparam.u.rfm.volume;
- for (tmp = cfpkt_extr_head_u8(pkt);
- cfpkt_more(pkt) && tmp != '\0';
- tmp = cfpkt_extr_head_u8(pkt))
- *cp++ = tmp;
- *cp = '\0';
-
- if (CFCTRL_ERR_BIT & cmdrsp)
- break;
- /* Link ID */
- linkid = cfpkt_extr_head_u8(pkt);
-
- break;
- case CFCTRL_SRV_UTIL:
- /* Construct a frame, convert
- * DatagramConnectionID
- * to network format long and copy it out...
- */
- /* Fifosize KB */
- linkparam.u.utility.fifosize_kb =
- cfpkt_extr_head_u16(pkt);
- /* Fifosize bufs */
- linkparam.u.utility.fifosize_bufs =
- cfpkt_extr_head_u16(pkt);
- /* name */
- cp = (u8 *) linkparam.u.utility.name;
- caif_assert(sizeof(linkparam.u.utility.name)
- >= UTILITY_NAME_LENGTH);
- for (i = 0;
- i < UTILITY_NAME_LENGTH
- && cfpkt_more(pkt); i++) {
- tmp = cfpkt_extr_head_u8(pkt);
- *cp++ = tmp;
- }
- /* Length */
- len = cfpkt_extr_head_u8(pkt);
- linkparam.u.utility.paramlen = len;
- /* Param Data */
- cp = linkparam.u.utility.params;
- while (cfpkt_more(pkt) && len--) {
- tmp = cfpkt_extr_head_u8(pkt);
- *cp++ = tmp;
- }
- if (CFCTRL_ERR_BIT & cmdrsp)
- break;
- /* Link ID */
- linkid = cfpkt_extr_head_u8(pkt);
- /* Length */
- len = cfpkt_extr_head_u8(pkt);
- /* Param Data */
- cfpkt_extr_head(pkt, &param, len);
- break;
- default:
- pr_warn("Request setup, invalid type (%d)\n",
- serv);
- goto error;
- }
-
- rsp.cmd = cmd;
- rsp.param = linkparam;
- spin_lock_bh(&cfctrl->info_list_lock);
- req = cfctrl_remove_req(cfctrl, &rsp);
-
- if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
- cfpkt_erroneous(pkt)) {
- pr_err("Invalid O/E bit or parse error "
- "on CAIF control channel\n");
- cfctrl->res.reject_rsp(cfctrl->serv.layer.up,
- 0,
- req ? req->client_layer
- : NULL);
- } else {
- cfctrl->res.linksetup_rsp(cfctrl->serv.
- layer.up, linkid,
- serv, physlinkid,
- req ? req->
- client_layer : NULL);
- }
-
- kfree(req);
-
- spin_unlock_bh(&cfctrl->info_list_lock);
- }
+ ret = cfctrl_link_setup(cfctrl, pkt, cmdrsp);
break;
case CFCTRL_CMD_LINK_DESTROY:
linkid = cfpkt_extr_head_u8(pkt);
@@ -544,9 +538,9 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
break;
default:
pr_err("Unrecognized Control Frame\n");
+ ret = -1;
goto error;
}
- ret = 0;
error:
cfpkt_destroy(pkt);
return ret;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 01f3fbb3b67d..65230e81fa08 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -287,8 +287,8 @@ int can_send(struct sk_buff *skb, int loop)
netif_rx(newskb);
/* update statistics */
- pkg_stats->tx_frames++;
- pkg_stats->tx_frames_delta++;
+ atomic_long_inc(&pkg_stats->tx_frames);
+ atomic_long_inc(&pkg_stats->tx_frames_delta);
return 0;
@@ -647,8 +647,8 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
int matches;
/* update statistics */
- pkg_stats->rx_frames++;
- pkg_stats->rx_frames_delta++;
+ atomic_long_inc(&pkg_stats->rx_frames);
+ atomic_long_inc(&pkg_stats->rx_frames_delta);
/* create non-zero unique skb identifier together with *skb */
while (!(can_skb_prv(skb)->skbcnt))
@@ -669,8 +669,8 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
consume_skb(skb);
if (matches > 0) {
- pkg_stats->matches++;
- pkg_stats->matches_delta++;
+ atomic_long_inc(&pkg_stats->matches);
+ atomic_long_inc(&pkg_stats->matches_delta);
}
}
diff --git a/net/can/af_can.h b/net/can/af_can.h
index 7c2d9161e224..22f3352c77fe 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -66,9 +66,9 @@ struct receiver {
struct can_pkg_stats {
unsigned long jiffies_init;
- unsigned long rx_frames;
- unsigned long tx_frames;
- unsigned long matches;
+ atomic_long_t rx_frames;
+ atomic_long_t tx_frames;
+ atomic_long_t matches;
unsigned long total_rx_rate;
unsigned long total_tx_rate;
@@ -82,9 +82,9 @@ struct can_pkg_stats {
unsigned long max_tx_rate;
unsigned long max_rx_match_ratio;
- unsigned long rx_frames_delta;
- unsigned long tx_frames_delta;
- unsigned long matches_delta;
+ atomic_long_t rx_frames_delta;
+ atomic_long_t tx_frames_delta;
+ atomic_long_t matches_delta;
};
/* persistent statistics */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 217049fa496e..e33ff2a5b20c 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -58,6 +58,7 @@
#include <linux/can/skb.h>
#include <linux/can/bcm.h>
#include <linux/slab.h>
+#include <linux/spinlock.h>
#include <net/sock.h>
#include <net/net_namespace.h>
@@ -122,6 +123,7 @@ struct bcm_op {
struct canfd_frame last_sframe;
struct sock *sk;
struct net_device *rx_reg_dev;
+ spinlock_t bcm_tx_lock; /* protect currframe/count in runtime updates */
};
struct bcm_sock {
@@ -217,7 +219,9 @@ static int bcm_proc_show(struct seq_file *m, void *v)
seq_printf(m, " / bound %s", bcm_proc_getifname(net, ifname, bo->ifindex));
seq_printf(m, " <<<\n");
- list_for_each_entry(op, &bo->rx_ops, list) {
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(op, &bo->rx_ops, list) {
unsigned long reduction;
@@ -273,6 +277,9 @@ static int bcm_proc_show(struct seq_file *m, void *v)
seq_printf(m, "# sent %ld\n", op->frames_abs);
}
seq_putc(m, '\n');
+
+ rcu_read_unlock();
+
return 0;
}
#endif /* CONFIG_PROC_FS */
@@ -285,13 +292,18 @@ static void bcm_can_tx(struct bcm_op *op)
{
struct sk_buff *skb;
struct net_device *dev;
- struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe;
+ struct canfd_frame *cf;
int err;
/* no target device? => exit */
if (!op->ifindex)
return;
+ /* read currframe under lock protection */
+ spin_lock_bh(&op->bcm_tx_lock);
+ cf = op->frames + op->cfsiz * op->currframe;
+ spin_unlock_bh(&op->bcm_tx_lock);
+
dev = dev_get_by_index(sock_net(op->sk), op->ifindex);
if (!dev) {
/* RFC: should this bcm_op remove itself here? */
@@ -312,6 +324,10 @@ static void bcm_can_tx(struct bcm_op *op)
skb->dev = dev;
can_skb_set_owner(skb, op->sk);
err = can_send(skb, 1);
+
+ /* update currframe and count under lock protection */
+ spin_lock_bh(&op->bcm_tx_lock);
+
if (!err)
op->frames_abs++;
@@ -320,6 +336,11 @@ static void bcm_can_tx(struct bcm_op *op)
/* reached last frame? */
if (op->currframe >= op->nframes)
op->currframe = 0;
+
+ if (op->count > 0)
+ op->count--;
+
+ spin_unlock_bh(&op->bcm_tx_lock);
out:
dev_put(dev);
}
@@ -430,7 +451,7 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
struct bcm_msg_head msg_head;
if (op->kt_ival1 && (op->count > 0)) {
- op->count--;
+ bcm_can_tx(op);
if (!op->count && (op->flags & TX_COUNTEVT)) {
/* create notification to user */
@@ -445,7 +466,6 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
bcm_send_to_user(op, &msg_head, NULL, 0);
}
- bcm_can_tx(op);
} else if (op->kt_ival2) {
bcm_can_tx(op);
@@ -843,7 +863,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
REGMASK(op->can_id),
bcm_rx_handler, op);
- list_del(&op->list);
+ list_del_rcu(&op->list);
bcm_remove_op(op);
return 1; /* done */
}
@@ -863,7 +883,7 @@ static int bcm_delete_tx_op(struct list_head *ops, struct bcm_msg_head *mh,
list_for_each_entry_safe(op, n, ops, list) {
if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
(op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) {
- list_del(&op->list);
+ list_del_rcu(&op->list);
bcm_remove_op(op);
return 1; /* done */
}
@@ -956,6 +976,27 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
}
op->flags = msg_head->flags;
+ /* only lock for unlikely count/nframes/currframe changes */
+ if (op->nframes != msg_head->nframes ||
+ op->flags & TX_RESET_MULTI_IDX ||
+ op->flags & SETTIMER) {
+
+ spin_lock_bh(&op->bcm_tx_lock);
+
+ if (op->nframes != msg_head->nframes ||
+ op->flags & TX_RESET_MULTI_IDX) {
+ /* potentially update changed nframes */
+ op->nframes = msg_head->nframes;
+ /* restart multiple frame transmission */
+ op->currframe = 0;
+ }
+
+ if (op->flags & SETTIMER)
+ op->count = msg_head->count;
+
+ spin_unlock_bh(&op->bcm_tx_lock);
+ }
+
} else {
/* insert new BCM operation for the given can_id */
@@ -963,9 +1004,14 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (!op)
return -ENOMEM;
+ spin_lock_init(&op->bcm_tx_lock);
op->can_id = msg_head->can_id;
op->cfsiz = CFSIZ(msg_head->flags);
op->flags = msg_head->flags;
+ op->nframes = msg_head->nframes;
+
+ if (op->flags & SETTIMER)
+ op->count = msg_head->count;
/* create array for CAN frames and copy the data */
if (msg_head->nframes > 1) {
@@ -1024,22 +1070,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
} /* if ((op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex))) */
- if (op->nframes != msg_head->nframes) {
- op->nframes = msg_head->nframes;
- /* start multiple frame transmission with index 0 */
- op->currframe = 0;
- }
-
- /* check flags */
-
- if (op->flags & TX_RESET_MULTI_IDX) {
- /* start multiple frame transmission with index 0 */
- op->currframe = 0;
- }
-
if (op->flags & SETTIMER) {
/* set timer values */
- op->count = msg_head->count;
op->ival1 = msg_head->ival1;
op->ival2 = msg_head->ival2;
op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1);
@@ -1056,11 +1088,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
op->flags |= TX_ANNOUNCE;
}
- if (op->flags & TX_ANNOUNCE) {
+ if (op->flags & TX_ANNOUNCE)
bcm_can_tx(op);
- if (op->count)
- op->count--;
- }
if (op->flags & STARTTIMER)
bcm_tx_start_timer(op);
@@ -1276,7 +1305,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
bcm_rx_handler, op, "bcm", sk);
if (err) {
/* this bcm rx op is broken -> remove it */
- list_del(&op->list);
+ list_del_rcu(&op->list);
bcm_remove_op(op);
return err;
}
diff --git a/net/can/gw.c b/net/can/gw.c
index 37528826935e..e65500c52bf5 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -130,7 +130,7 @@ struct cgw_job {
u32 handled_frames;
u32 dropped_frames;
u32 deleted_frames;
- struct cf_mod mod;
+ struct cf_mod __rcu *cf_mod;
union {
/* CAN frame data source */
struct net_device *dev;
@@ -459,6 +459,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
struct cgw_job *gwj = (struct cgw_job *)data;
struct canfd_frame *cf;
struct sk_buff *nskb;
+ struct cf_mod *mod;
int modidx = 0;
/* process strictly Classic CAN or CAN FD frames */
@@ -506,7 +507,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
* When there is at least one modification function activated,
* we need to copy the skb as we want to modify skb->data.
*/
- if (gwj->mod.modfunc[0])
+ mod = rcu_dereference(gwj->cf_mod);
+ if (mod->modfunc[0])
nskb = skb_copy(skb, GFP_ATOMIC);
else
nskb = skb_clone(skb, GFP_ATOMIC);
@@ -529,8 +531,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
cf = (struct canfd_frame *)nskb->data;
/* perform preprocessed modification functions if there are any */
- while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx])
- (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod);
+ while (modidx < MAX_MODFUNCTIONS && mod->modfunc[modidx])
+ (*mod->modfunc[modidx++])(cf, mod);
/* Has the CAN frame been modified? */
if (modidx) {
@@ -546,11 +548,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
}
/* check for checksum updates */
- if (gwj->mod.csumfunc.crc8)
- (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8);
+ if (mod->csumfunc.crc8)
+ (*mod->csumfunc.crc8)(cf, &mod->csum.crc8);
- if (gwj->mod.csumfunc.xor)
- (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor);
+ if (mod->csumfunc.xor)
+ (*mod->csumfunc.xor)(cf, &mod->csum.xor);
}
/* clear the skb timestamp if not configured the other way */
@@ -581,9 +583,20 @@ static void cgw_job_free_rcu(struct rcu_head *rcu_head)
{
struct cgw_job *gwj = container_of(rcu_head, struct cgw_job, rcu);
+ /* cgw_job::cf_mod is always accessed from the same cgw_job object within
+ * the same RCU read section. Once cgw_job is scheduled for removal,
+ * cf_mod can also be removed without mandating an additional grace period.
+ */
+ kfree(rcu_access_pointer(gwj->cf_mod));
kmem_cache_free(cgw_cache, gwj);
}
+/* Return cgw_job::cf_mod with RTNL protected section */
+static struct cf_mod *cgw_job_cf_mod(struct cgw_job *gwj)
+{
+ return rcu_dereference_protected(gwj->cf_mod, rtnl_is_locked());
+}
+
static int cgw_notifier(struct notifier_block *nb,
unsigned long msg, void *ptr)
{
@@ -616,6 +629,7 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
{
struct rtcanmsg *rtcan;
struct nlmsghdr *nlh;
+ struct cf_mod *mod;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtcan), flags);
if (!nlh)
@@ -650,82 +664,83 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
goto cancel;
}
+ mod = cgw_job_cf_mod(gwj);
if (gwj->flags & CGW_FLAGS_CAN_FD) {
struct cgw_fdframe_mod mb;
- if (gwj->mod.modtype.and) {
- memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
- mb.modtype = gwj->mod.modtype.and;
+ if (mod->modtype.and) {
+ memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf));
+ mb.modtype = mod->modtype.and;
if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0)
goto cancel;
}
- if (gwj->mod.modtype.or) {
- memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
- mb.modtype = gwj->mod.modtype.or;
+ if (mod->modtype.or) {
+ memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf));
+ mb.modtype = mod->modtype.or;
if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0)
goto cancel;
}
- if (gwj->mod.modtype.xor) {
- memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
- mb.modtype = gwj->mod.modtype.xor;
+ if (mod->modtype.xor) {
+ memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf));
+ mb.modtype = mod->modtype.xor;
if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0)
goto cancel;
}
- if (gwj->mod.modtype.set) {
- memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
- mb.modtype = gwj->mod.modtype.set;
+ if (mod->modtype.set) {
+ memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf));
+ mb.modtype = mod->modtype.set;
if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0)
goto cancel;
}
} else {
struct cgw_frame_mod mb;
- if (gwj->mod.modtype.and) {
- memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
- mb.modtype = gwj->mod.modtype.and;
+ if (mod->modtype.and) {
+ memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf));
+ mb.modtype = mod->modtype.and;
if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0)
goto cancel;
}
- if (gwj->mod.modtype.or) {
- memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
- mb.modtype = gwj->mod.modtype.or;
+ if (mod->modtype.or) {
+ memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf));
+ mb.modtype = mod->modtype.or;
if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0)
goto cancel;
}
- if (gwj->mod.modtype.xor) {
- memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
- mb.modtype = gwj->mod.modtype.xor;
+ if (mod->modtype.xor) {
+ memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf));
+ mb.modtype = mod->modtype.xor;
if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0)
goto cancel;
}
- if (gwj->mod.modtype.set) {
- memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
- mb.modtype = gwj->mod.modtype.set;
+ if (mod->modtype.set) {
+ memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf));
+ mb.modtype = mod->modtype.set;
if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0)
goto cancel;
}
}
- if (gwj->mod.uid) {
- if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0)
+ if (mod->uid) {
+ if (nla_put_u32(skb, CGW_MOD_UID, mod->uid) < 0)
goto cancel;
}
- if (gwj->mod.csumfunc.crc8) {
+ if (mod->csumfunc.crc8) {
if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN,
- &gwj->mod.csum.crc8) < 0)
+ &mod->csum.crc8) < 0)
goto cancel;
}
- if (gwj->mod.csumfunc.xor) {
+ if (mod->csumfunc.xor) {
if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN,
- &gwj->mod.csum.xor) < 0)
+ &mod->csum.xor) < 0)
goto cancel;
}
@@ -1059,7 +1074,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct rtcanmsg *r;
struct cgw_job *gwj;
- struct cf_mod mod;
+ struct cf_mod *mod;
struct can_can_gw ccgw;
u8 limhops = 0;
int err = 0;
@@ -1078,37 +1093,48 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh,
if (r->gwtype != CGW_TYPE_CAN_CAN)
return -EINVAL;
- err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops);
+ mod = kmalloc(sizeof(*mod), GFP_KERNEL);
+ if (!mod)
+ return -ENOMEM;
+
+ err = cgw_parse_attr(nlh, mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops);
if (err < 0)
- return err;
+ goto out_free_cf;
- if (mod.uid) {
+ if (mod->uid) {
ASSERT_RTNL();
/* check for updating an existing job with identical uid */
hlist_for_each_entry(gwj, &net->can.cgw_list, list) {
- if (gwj->mod.uid != mod.uid)
+ struct cf_mod *old_cf;
+
+ old_cf = cgw_job_cf_mod(gwj);
+ if (old_cf->uid != mod->uid)
continue;
/* interfaces & filters must be identical */
- if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
- return -EINVAL;
+ if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) {
+ err = -EINVAL;
+ goto out_free_cf;
+ }
- /* update modifications with disabled softirq & quit */
- local_bh_disable();
- memcpy(&gwj->mod, &mod, sizeof(mod));
- local_bh_enable();
+ rcu_assign_pointer(gwj->cf_mod, mod);
+ kfree_rcu_mightsleep(old_cf);
return 0;
}
}
/* ifindex == 0 is not allowed for job creation */
- if (!ccgw.src_idx || !ccgw.dst_idx)
- return -ENODEV;
+ if (!ccgw.src_idx || !ccgw.dst_idx) {
+ err = -ENODEV;
+ goto out_free_cf;
+ }
gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL);
- if (!gwj)
- return -ENOMEM;
+ if (!gwj) {
+ err = -ENOMEM;
+ goto out_free_cf;
+ }
gwj->handled_frames = 0;
gwj->dropped_frames = 0;
@@ -1118,7 +1144,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh,
gwj->limit_hops = limhops;
/* insert already parsed information */
- memcpy(&gwj->mod, &mod, sizeof(mod));
+ RCU_INIT_POINTER(gwj->cf_mod, mod);
memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw));
err = -ENODEV;
@@ -1152,9 +1178,11 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!err)
hlist_add_head_rcu(&gwj->list, &net->can.cgw_list);
out:
- if (err)
+ if (err) {
kmem_cache_free(cgw_cache, gwj);
-
+out_free_cf:
+ kfree(mod);
+ }
return err;
}
@@ -1214,19 +1242,22 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
/* remove only the first matching entry */
hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
+ struct cf_mod *cf_mod;
+
if (gwj->flags != r->flags)
continue;
if (gwj->limit_hops != limhops)
continue;
+ cf_mod = cgw_job_cf_mod(gwj);
/* we have a match when uid is enabled and identical */
- if (gwj->mod.uid || mod.uid) {
- if (gwj->mod.uid != mod.uid)
+ if (cf_mod->uid || mod.uid) {
+ if (cf_mod->uid != mod.uid)
continue;
} else {
/* no uid => check for identical modifications */
- if (memcmp(&gwj->mod, &mod, sizeof(mod)))
+ if (memcmp(cf_mod, &mod, sizeof(mod)))
continue;
}
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 305dd72c844c..17226b2341d0 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -1132,7 +1132,7 @@ static int j1939_sk_send_loop(struct j1939_priv *priv, struct sock *sk,
todo_size = size;
- while (todo_size) {
+ do {
struct j1939_sk_buff_cb *skcb;
segment_size = min_t(size_t, J1939_MAX_TP_PACKET_SIZE,
@@ -1177,7 +1177,7 @@ static int j1939_sk_send_loop(struct j1939_priv *priv, struct sock *sk,
todo_size -= segment_size;
session->total_queued_size += segment_size;
- }
+ } while (todo_size);
switch (ret) {
case 0: /* OK */
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 95f7a7e65a73..9b72d118d756 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -382,8 +382,9 @@ sk_buff *j1939_session_skb_get_by_offset(struct j1939_session *session,
skb_queue_walk(&session->skb_queue, do_skb) {
do_skcb = j1939_skb_to_cb(do_skb);
- if (offset_start >= do_skcb->offset &&
- offset_start < (do_skcb->offset + do_skb->len)) {
+ if ((offset_start >= do_skcb->offset &&
+ offset_start < (do_skcb->offset + do_skb->len)) ||
+ (offset_start == 0 && do_skcb->offset == 0 && do_skb->len == 0)) {
skb = do_skb;
}
}
diff --git a/net/can/proc.c b/net/can/proc.c
index bbce97825f13..25fdf060e30d 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -118,6 +118,13 @@ void can_stat_update(struct timer_list *t)
struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
unsigned long j = jiffies; /* snapshot */
+ long rx_frames = atomic_long_read(&pkg_stats->rx_frames);
+ long tx_frames = atomic_long_read(&pkg_stats->tx_frames);
+ long matches = atomic_long_read(&pkg_stats->matches);
+ long rx_frames_delta = atomic_long_read(&pkg_stats->rx_frames_delta);
+ long tx_frames_delta = atomic_long_read(&pkg_stats->tx_frames_delta);
+ long matches_delta = atomic_long_read(&pkg_stats->matches_delta);
+
/* restart counting in timer context on user request */
if (user_reset)
can_init_stats(net);
@@ -127,35 +134,33 @@ void can_stat_update(struct timer_list *t)
can_init_stats(net);
/* prevent overflow in calc_rate() */
- if (pkg_stats->rx_frames > (ULONG_MAX / HZ))
+ if (rx_frames > (LONG_MAX / HZ))
can_init_stats(net);
/* prevent overflow in calc_rate() */
- if (pkg_stats->tx_frames > (ULONG_MAX / HZ))
+ if (tx_frames > (LONG_MAX / HZ))
can_init_stats(net);
/* matches overflow - very improbable */
- if (pkg_stats->matches > (ULONG_MAX / 100))
+ if (matches > (LONG_MAX / 100))
can_init_stats(net);
/* calc total values */
- if (pkg_stats->rx_frames)
- pkg_stats->total_rx_match_ratio = (pkg_stats->matches * 100) /
- pkg_stats->rx_frames;
+ if (rx_frames)
+ pkg_stats->total_rx_match_ratio = (matches * 100) / rx_frames;
pkg_stats->total_tx_rate = calc_rate(pkg_stats->jiffies_init, j,
- pkg_stats->tx_frames);
+ tx_frames);
pkg_stats->total_rx_rate = calc_rate(pkg_stats->jiffies_init, j,
- pkg_stats->rx_frames);
+ rx_frames);
/* calc current values */
- if (pkg_stats->rx_frames_delta)
+ if (rx_frames_delta)
pkg_stats->current_rx_match_ratio =
- (pkg_stats->matches_delta * 100) /
- pkg_stats->rx_frames_delta;
+ (matches_delta * 100) / rx_frames_delta;
- pkg_stats->current_tx_rate = calc_rate(0, HZ, pkg_stats->tx_frames_delta);
- pkg_stats->current_rx_rate = calc_rate(0, HZ, pkg_stats->rx_frames_delta);
+ pkg_stats->current_tx_rate = calc_rate(0, HZ, tx_frames_delta);
+ pkg_stats->current_rx_rate = calc_rate(0, HZ, rx_frames_delta);
/* check / update maximum values */
if (pkg_stats->max_tx_rate < pkg_stats->current_tx_rate)
@@ -168,9 +173,9 @@ void can_stat_update(struct timer_list *t)
pkg_stats->max_rx_match_ratio = pkg_stats->current_rx_match_ratio;
/* clear values for 'current rate' calculation */
- pkg_stats->tx_frames_delta = 0;
- pkg_stats->rx_frames_delta = 0;
- pkg_stats->matches_delta = 0;
+ atomic_long_set(&pkg_stats->tx_frames_delta, 0);
+ atomic_long_set(&pkg_stats->rx_frames_delta, 0);
+ atomic_long_set(&pkg_stats->matches_delta, 0);
/* restart timer (one second) */
mod_timer(&net->can.stattimer, round_jiffies(jiffies + HZ));
@@ -214,9 +219,12 @@ static int can_stats_proc_show(struct seq_file *m, void *v)
struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats;
seq_putc(m, '\n');
- seq_printf(m, " %8ld transmitted frames (TXF)\n", pkg_stats->tx_frames);
- seq_printf(m, " %8ld received frames (RXF)\n", pkg_stats->rx_frames);
- seq_printf(m, " %8ld matched frames (RXMF)\n", pkg_stats->matches);
+ seq_printf(m, " %8ld transmitted frames (TXF)\n",
+ atomic_long_read(&pkg_stats->tx_frames));
+ seq_printf(m, " %8ld received frames (RXF)\n",
+ atomic_long_read(&pkg_stats->rx_frames));
+ seq_printf(m, " %8ld matched frames (RXMF)\n",
+ atomic_long_read(&pkg_stats->matches));
seq_putc(m, '\n');
diff --git a/net/core/dev.c b/net/core/dev.c
index 1867a6a8d76d..2ba2160dd093 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1012,6 +1012,12 @@ out:
return ret;
}
+static bool dev_addr_cmp(struct net_device *dev, unsigned short type,
+ const char *ha)
+{
+ return dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len);
+}
+
/**
* dev_getbyhwaddr_rcu - find a device by its hardware address
* @net: the applicable net namespace
@@ -1020,7 +1026,7 @@ out:
*
* Search for an interface by MAC address. Returns NULL if the device
* is not found or a pointer to the device.
- * The caller must hold RCU or RTNL.
+ * The caller must hold RCU.
* The returned device has not had its ref count increased
* and the caller must therefore be careful about locking
*
@@ -1032,14 +1038,39 @@ struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
struct net_device *dev;
for_each_netdev_rcu(net, dev)
- if (dev->type == type &&
- !memcmp(dev->dev_addr, ha, dev->addr_len))
+ if (dev_addr_cmp(dev, type, ha))
return dev;
return NULL;
}
EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
+/**
+ * dev_getbyhwaddr() - find a device by its hardware address
+ * @net: the applicable net namespace
+ * @type: media type of device
+ * @ha: hardware address
+ *
+ * Similar to dev_getbyhwaddr_rcu(), but the owner needs to hold
+ * rtnl_lock.
+ *
+ * Context: rtnl_lock() must be held.
+ * Return: pointer to the net_device, or NULL if not found
+ */
+struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type,
+ const char *ha)
+{
+ struct net_device *dev;
+
+ ASSERT_RTNL();
+ for_each_netdev(net, dev)
+ if (dev_addr_cmp(dev, type, ha))
+ return dev;
+
+ return NULL;
+}
+EXPORT_SYMBOL(dev_getbyhwaddr);
+
struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
{
struct net_device *dev, *ret = NULL;
@@ -1279,7 +1310,9 @@ int dev_change_name(struct net_device *dev, const char *newname)
rollback:
ret = device_rename(&dev->dev, dev->name);
if (ret) {
+ write_seqlock_bh(&netdev_rename_lock);
memcpy(dev->name, oldname, IFNAMSIZ);
+ write_sequnlock_bh(&netdev_rename_lock);
WRITE_ONCE(dev->name_assign_type, old_assign_type);
up_write(&devnet_rename_sem);
return ret;
@@ -2134,8 +2167,8 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue);
#endif
#ifdef CONFIG_NET_CLS_ACT
-DEFINE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key);
-EXPORT_SYMBOL(tcf_bypass_check_needed_key);
+DEFINE_STATIC_KEY_FALSE(tcf_sw_enabled_key);
+EXPORT_SYMBOL(tcf_sw_enabled_key);
#endif
DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
@@ -3690,6 +3723,9 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
{
netdev_features_t features;
+ if (!skb_frags_readable(skb))
+ goto out_kfree_skb;
+
features = netif_skb_features(skb);
skb = validate_xmit_vlan(skb, features);
if (unlikely(!skb))
@@ -4028,10 +4064,13 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
if (!miniq)
return ret;
- if (static_branch_unlikely(&tcf_bypass_check_needed_key)) {
- if (tcf_block_bypass_sw(miniq->block))
- return ret;
- }
+ /* Global bypass */
+ if (!static_branch_likely(&tcf_sw_enabled_key))
+ return ret;
+
+ /* Block-wise bypass */
+ if (tcf_block_bypass_sw(miniq->block))
+ return ret;
tc_skb_cb(skb)->mru = 0;
tc_skb_cb(skb)->post_ct = false;
@@ -4572,7 +4611,7 @@ use_local_napi:
* we have to raise NET_RX_SOFTIRQ.
*/
if (!sd->in_net_rx_action)
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
#ifdef CONFIG_RPS
@@ -5995,16 +6034,18 @@ static DEFINE_PER_CPU(struct work_struct, flush_works);
static void flush_backlog(struct work_struct *work)
{
struct sk_buff *skb, *tmp;
+ struct sk_buff_head list;
struct softnet_data *sd;
+ __skb_queue_head_init(&list);
local_bh_disable();
sd = this_cpu_ptr(&softnet_data);
backlog_lock_irq_disable(sd);
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
- if (skb->dev->reg_state == NETREG_UNREGISTERING) {
+ if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->input_pkt_queue);
- dev_kfree_skb_irq(skb);
+ __skb_queue_tail(&list, skb);
rps_input_queue_head_incr(sd);
}
}
@@ -6012,14 +6053,16 @@ static void flush_backlog(struct work_struct *work)
local_lock_nested_bh(&softnet_data.process_queue_bh_lock);
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
- if (skb->dev->reg_state == NETREG_UNREGISTERING) {
+ if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) {
__skb_unlink(skb, &sd->process_queue);
- kfree_skb(skb);
+ __skb_queue_tail(&list, skb);
rps_input_queue_head_incr(sd);
}
}
local_unlock_nested_bh(&softnet_data.process_queue_bh_lock);
local_bh_enable();
+
+ __skb_queue_purge_reason(&list, SKB_DROP_REASON_DEV_READY);
}
static bool flush_required(int cpu)
@@ -9590,6 +9633,10 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
NL_SET_ERR_MSG(extack, "Program bound to different device");
return -EINVAL;
}
+ if (bpf_prog_is_dev_bound(new_prog->aux) && mode == XDP_MODE_SKB) {
+ NL_SET_ERR_MSG(extack, "Can't attach device-bound programs in generic mode");
+ return -EINVAL;
+ }
if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
return -EINVAL;
diff --git a/net/core/dev.h b/net/core/dev.h
index 2e3bb7669984..764e0097ccf2 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -148,6 +148,18 @@ void xdp_do_check_flushed(struct napi_struct *napi);
static inline void xdp_do_check_flushed(struct napi_struct *napi) { }
#endif
+/* Best effort check that NAPI is not idle (can't be scheduled to run) */
+static inline void napi_assert_will_not_race(const struct napi_struct *napi)
+{
+ /* uninitialized instance, can't race */
+ if (!napi->poll_list.next)
+ return;
+
+ /* SCHED bit is set on disabled instances */
+ WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state));
+ WARN_ON(READ_ONCE(napi->list_owner) != -1);
+}
+
void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
#define XMIT_RECURSION_LIMIT 8
diff --git a/net/core/devmem.c b/net/core/devmem.c
index 11b91c12ee11..17f8a83a5ee7 100644
--- a/net/core/devmem.c
+++ b/net/core/devmem.c
@@ -108,6 +108,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
struct netdev_rx_queue *rxq;
unsigned long xa_idx;
unsigned int rxq_idx;
+ int err;
if (binding->list.next)
list_del(&binding->list);
@@ -119,7 +120,8 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
rxq_idx = get_netdev_rx_queue_index(rxq);
- WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx));
+ err = netdev_rx_queue_restart(binding->dev, rxq_idx);
+ WARN_ON(err && err != -ENETDOWN);
}
xa_erase(&net_devmem_dmabuf_bindings, binding->id);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 6efd4cccc9dd..212f0a048cab 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -1734,30 +1734,30 @@ static int __init init_net_drop_monitor(void)
return -ENOSPC;
}
- rc = genl_register_family(&net_drop_monitor_family);
- if (rc) {
- pr_err("Could not create drop monitor netlink family\n");
- return rc;
+ for_each_possible_cpu(cpu) {
+ net_dm_cpu_data_init(cpu);
+ net_dm_hw_cpu_data_init(cpu);
}
- WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT);
rc = register_netdevice_notifier(&dropmon_net_notifier);
if (rc < 0) {
pr_crit("Failed to register netdevice notifier\n");
+ return rc;
+ }
+
+ rc = genl_register_family(&net_drop_monitor_family);
+ if (rc) {
+ pr_err("Could not create drop monitor netlink family\n");
goto out_unreg;
}
+ WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT);
rc = 0;
- for_each_possible_cpu(cpu) {
- net_dm_cpu_data_init(cpu);
- net_dm_hw_cpu_data_init(cpu);
- }
-
goto out;
out_unreg:
- genl_unregister_family(&net_drop_monitor_family);
+ WARN_ON(unregister_netdevice_notifier(&dropmon_net_notifier));
out:
return rc;
}
@@ -1766,19 +1766,18 @@ static void exit_net_drop_monitor(void)
{
int cpu;
- BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier));
-
/*
* Because of the module_get/put we do in the trace state change path
* we are guaranteed not to have any current users when we get here
*/
+ BUG_ON(genl_unregister_family(&net_drop_monitor_family));
+
+ BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier));
for_each_possible_cpu(cpu) {
net_dm_hw_cpu_data_fini(cpu);
net_dm_cpu_data_fini(cpu);
}
-
- BUG_ON(genl_unregister_family(&net_drop_monitor_family));
}
module_init(init_net_drop_monitor);
diff --git a/net/core/dst.c b/net/core/dst.c
index 9552a90d4772..cc990706b645 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -148,8 +148,8 @@ void dst_dev_put(struct dst_entry *dst)
dst->obsolete = DST_OBSOLETE_DEAD;
if (dst->ops->ifdown)
dst->ops->ifdown(dst, dev);
- dst->input = dst_discard;
- dst->output = dst_discard_out;
+ WRITE_ONCE(dst->input, dst_discard);
+ WRITE_ONCE(dst->output, dst_discard_out);
dst->dev = blackhole_netdev;
netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker,
GFP_ATOMIC);
@@ -165,6 +165,14 @@ static void dst_count_dec(struct dst_entry *dst)
void dst_release(struct dst_entry *dst)
{
if (dst && rcuref_put(&dst->__rcuref)) {
+#ifdef CONFIG_DST_CACHE
+ if (dst->flags & DST_METADATA) {
+ struct metadata_dst *md_dst = (struct metadata_dst *)dst;
+
+ if (md_dst->type == METADATA_IP_TUNNEL)
+ dst_cache_reset_now(&md_dst->u.tun_info.dst_cache);
+ }
+#endif
dst_count_dec(dst);
call_rcu_hurry(&dst->rcu_head, dst_destroy_rcu);
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 154a2681f55c..388ff1d6d86b 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -37,8 +37,8 @@ static const struct fib_kuid_range fib_kuid_range_unset = {
bool fib_rule_matchall(const struct fib_rule *rule)
{
- if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id ||
- rule->flags)
+ if (READ_ONCE(rule->iifindex) || READ_ONCE(rule->oifindex) ||
+ rule->mark || rule->tun_id || rule->flags)
return false;
if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1)
return false;
@@ -260,12 +260,14 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
struct flowi *fl, int flags,
struct fib_lookup_arg *arg)
{
- int ret = 0;
+ int iifindex, oifindex, ret = 0;
- if (rule->iifindex && (rule->iifindex != fl->flowi_iif))
+ iifindex = READ_ONCE(rule->iifindex);
+ if (iifindex && (iifindex != fl->flowi_iif))
goto out;
- if (rule->oifindex && (rule->oifindex != fl->flowi_oif))
+ oifindex = READ_ONCE(rule->oifindex);
+ if (oifindex && (oifindex != fl->flowi_oif))
goto out;
if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
@@ -1038,14 +1040,14 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
if (rule->iifname[0]) {
if (nla_put_string(skb, FRA_IIFNAME, rule->iifname))
goto nla_put_failure;
- if (rule->iifindex == -1)
+ if (READ_ONCE(rule->iifindex) == -1)
frh->flags |= FIB_RULE_IIF_DETACHED;
}
if (rule->oifname[0]) {
if (nla_put_string(skb, FRA_OIFNAME, rule->oifname))
goto nla_put_failure;
- if (rule->oifindex == -1)
+ if (READ_ONCE(rule->oifindex) == -1)
frh->flags |= FIB_RULE_OIF_DETACHED;
}
@@ -1217,10 +1219,10 @@ static void attach_rules(struct list_head *rules, struct net_device *dev)
list_for_each_entry(rule, rules, list) {
if (rule->iifindex == -1 &&
strcmp(dev->name, rule->iifname) == 0)
- rule->iifindex = dev->ifindex;
+ WRITE_ONCE(rule->iifindex, dev->ifindex);
if (rule->oifindex == -1 &&
strcmp(dev->name, rule->oifname) == 0)
- rule->oifindex = dev->ifindex;
+ WRITE_ONCE(rule->oifindex, dev->ifindex);
}
}
@@ -1230,9 +1232,9 @@ static void detach_rules(struct list_head *rules, struct net_device *dev)
list_for_each_entry(rule, rules, list) {
if (rule->iifindex == dev->ifindex)
- rule->iifindex = -1;
+ WRITE_ONCE(rule->iifindex, -1);
if (rule->oifindex == dev->ifindex)
- rule->oifindex = -1;
+ WRITE_ONCE(rule->oifindex, -1);
}
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 54a53fae9e98..02fedc404d7f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -218,24 +218,36 @@ BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
return 0;
}
+static int bpf_skb_load_helper_convert_offset(const struct sk_buff *skb, int offset)
+{
+ if (likely(offset >= 0))
+ return offset;
+
+ if (offset >= SKF_NET_OFF)
+ return offset - SKF_NET_OFF + skb_network_offset(skb);
+
+ if (offset >= SKF_LL_OFF && skb_mac_header_was_set(skb))
+ return offset - SKF_LL_OFF + skb_mac_offset(skb);
+
+ return INT_MIN;
+}
+
BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
data, int, headlen, int, offset)
{
- u8 tmp, *ptr;
+ u8 tmp;
const int len = sizeof(tmp);
- if (offset >= 0) {
- if (headlen - offset >= len)
- return *(u8 *)(data + offset);
- if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
- return tmp;
- } else {
- ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
- if (likely(ptr))
- return *(u8 *)ptr;
- }
+ offset = bpf_skb_load_helper_convert_offset(skb, offset);
+ if (offset == INT_MIN)
+ return -EFAULT;
- return -EFAULT;
+ if (headlen - offset >= len)
+ return *(u8 *)(data + offset);
+ if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
+ return tmp;
+ else
+ return -EFAULT;
}
BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
@@ -248,21 +260,19 @@ BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
data, int, headlen, int, offset)
{
- __be16 tmp, *ptr;
+ __be16 tmp;
const int len = sizeof(tmp);
- if (offset >= 0) {
- if (headlen - offset >= len)
- return get_unaligned_be16(data + offset);
- if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
- return be16_to_cpu(tmp);
- } else {
- ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
- if (likely(ptr))
- return get_unaligned_be16(ptr);
- }
+ offset = bpf_skb_load_helper_convert_offset(skb, offset);
+ if (offset == INT_MIN)
+ return -EFAULT;
- return -EFAULT;
+ if (headlen - offset >= len)
+ return get_unaligned_be16(data + offset);
+ if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
+ return be16_to_cpu(tmp);
+ else
+ return -EFAULT;
}
BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
@@ -275,21 +285,19 @@ BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
data, int, headlen, int, offset)
{
- __be32 tmp, *ptr;
+ __be32 tmp;
const int len = sizeof(tmp);
- if (likely(offset >= 0)) {
- if (headlen - offset >= len)
- return get_unaligned_be32(data + offset);
- if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
- return be32_to_cpu(tmp);
- } else {
- ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
- if (likely(ptr))
- return get_unaligned_be32(ptr);
- }
+ offset = bpf_skb_load_helper_convert_offset(skb, offset);
+ if (offset == INT_MIN)
+ return -EFAULT;
- return -EFAULT;
+ if (headlen - offset >= len)
+ return get_unaligned_be32(data + offset);
+ if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
+ return be32_to_cpu(tmp);
+ else
+ return -EFAULT;
}
BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
@@ -1972,10 +1980,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
bool do_mforce = flags & BPF_F_MARK_ENFORCE;
+ bool is_ipv6 = flags & BPF_F_IPV6;
__sum16 *ptr;
if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
- BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
+ BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK | BPF_F_IPV6)))
return -EINVAL;
if (unlikely(offset > 0xffff || offset & 1))
return -EFAULT;
@@ -1991,7 +2000,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
if (unlikely(from != 0))
return -EINVAL;
- inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
+ inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, is_ipv6);
break;
case 2:
inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
@@ -2518,6 +2527,7 @@ int skb_do_redirect(struct sk_buff *skb)
goto out_drop;
skb->dev = dev;
dev_sw_netstats_rx_add(dev, skb->len);
+ skb_scrub_packet(skb, false);
return -EAGAIN;
}
return flags & BPF_F_NEIGH ?
@@ -3240,6 +3250,13 @@ static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto)
+{
+ skb->protocol = htons(proto);
+ if (skb_valid_dst(skb))
+ skb_dst_drop(skb);
+}
+
static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
{
/* Caller already did skb_cow() with len as headroom,
@@ -3336,7 +3353,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
}
}
- skb->protocol = htons(ETH_P_IPV6);
+ bpf_skb_change_protocol(skb, ETH_P_IPV6);
skb_clear_hash(skb);
return 0;
@@ -3366,7 +3383,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
}
}
- skb->protocol = htons(ETH_P_IP);
+ bpf_skb_change_protocol(skb, ETH_P_IP);
skb_clear_hash(skb);
return 0;
@@ -3557,10 +3574,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
/* Match skb->protocol to new outer l3 protocol */
if (skb->protocol == htons(ETH_P_IP) &&
flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
- skb->protocol = htons(ETH_P_IPV6);
+ bpf_skb_change_protocol(skb, ETH_P_IPV6);
else if (skb->protocol == htons(ETH_P_IPV6) &&
flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
- skb->protocol = htons(ETH_P_IP);
+ bpf_skb_change_protocol(skb, ETH_P_IP);
}
if (skb_is_gso(skb)) {
@@ -3613,10 +3630,10 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
/* Match skb->protocol to new outer l3 protocol */
if (skb->protocol == htons(ETH_P_IP) &&
flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
- skb->protocol = htons(ETH_P_IPV6);
+ bpf_skb_change_protocol(skb, ETH_P_IPV6);
else if (skb->protocol == htons(ETH_P_IPV6) &&
flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
- skb->protocol = htons(ETH_P_IP);
+ bpf_skb_change_protocol(skb, ETH_P_IP);
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -7662,7 +7679,7 @@ static const struct bpf_func_proto bpf_sock_ops_load_hdr_opt_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};
@@ -9390,6 +9407,9 @@ static bool flow_dissector_is_valid_access(int off, int size,
if (off < 0 || off >= sizeof(struct __sk_buff))
return false;
+ if (off % size != 0)
+ return false;
+
if (type == BPF_WRITE)
return false;
@@ -11263,6 +11283,7 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
struct sock_reuseport *reuse;
struct sock *selected_sk;
+ int err;
selected_sk = map->ops->map_lookup_elem(map, key);
if (!selected_sk)
@@ -11270,10 +11291,6 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
if (!reuse) {
- /* Lookup in sock_map can return TCP ESTABLISHED sockets. */
- if (sk_is_refcounted(selected_sk))
- sock_put(selected_sk);
-
/* reuseport_array has only sk with non NULL sk_reuseport_cb.
* The only (!reuse) case here is - the sk has already been
* unhashed (e.g. by close()), so treat it as -ENOENT.
@@ -11281,24 +11298,33 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
* Other maps (e.g. sock_map) do not provide this guarantee and
* the sk may never be in the reuseport group to begin with.
*/
- return is_sockarray ? -ENOENT : -EINVAL;
+ err = is_sockarray ? -ENOENT : -EINVAL;
+ goto error;
}
if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
struct sock *sk = reuse_kern->sk;
- if (sk->sk_protocol != selected_sk->sk_protocol)
- return -EPROTOTYPE;
- else if (sk->sk_family != selected_sk->sk_family)
- return -EAFNOSUPPORT;
-
- /* Catch all. Likely bound to a different sockaddr. */
- return -EBADFD;
+ if (sk->sk_protocol != selected_sk->sk_protocol) {
+ err = -EPROTOTYPE;
+ } else if (sk->sk_family != selected_sk->sk_family) {
+ err = -EAFNOSUPPORT;
+ } else {
+ /* Catch all. Likely bound to a different sockaddr. */
+ err = -EBADFD;
+ }
+ goto error;
}
reuse_kern->selected_sk = selected_sk;
return 0;
+error:
+ /* Lookup in sock_map can return TCP ESTABLISHED sockets. */
+ if (sk_is_refcounted(selected_sk))
+ sock_put(selected_sk);
+
+ return err;
}
static const struct bpf_func_proto sk_select_reuseport_proto = {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 0e638a37aa09..9cd8de6bebb5 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -853,23 +853,30 @@ __skb_flow_dissect_ports(const struct sk_buff *skb,
void *target_container, const void *data,
int nhoff, u8 ip_proto, int hlen)
{
- enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX;
- struct flow_dissector_key_ports *key_ports;
+ struct flow_dissector_key_ports_range *key_ports_range = NULL;
+ struct flow_dissector_key_ports *key_ports = NULL;
+ __be32 ports;
if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS))
- dissector_ports = FLOW_DISSECTOR_KEY_PORTS;
- else if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_PORTS_RANGE))
- dissector_ports = FLOW_DISSECTOR_KEY_PORTS_RANGE;
+ key_ports = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ target_container);
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS_RANGE))
+ key_ports_range = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS_RANGE,
+ target_container);
- if (dissector_ports == FLOW_DISSECTOR_KEY_MAX)
+ if (!key_ports && !key_ports_range)
return;
- key_ports = skb_flow_dissector_target(flow_dissector,
- dissector_ports,
- target_container);
- key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
- data, hlen);
+ ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen);
+
+ if (key_ports)
+ key_ports->ports = ports;
+
+ if (key_ports_range)
+ key_ports_range->tp.ports = ports;
}
static void
@@ -924,6 +931,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
struct flow_dissector *flow_dissector,
void *target_container)
{
+ struct flow_dissector_key_ports_range *key_ports_range = NULL;
struct flow_dissector_key_ports *key_ports = NULL;
struct flow_dissector_key_control *key_control;
struct flow_dissector_key_basic *key_basic;
@@ -968,20 +976,21 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
- if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS))
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) {
key_ports = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_PORTS,
target_container);
- else if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_PORTS_RANGE))
- key_ports = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_PORTS_RANGE,
- target_container);
-
- if (key_ports) {
key_ports->src = flow_keys->sport;
key_ports->dst = flow_keys->dport;
}
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS_RANGE)) {
+ key_ports_range = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS_RANGE,
+ target_container);
+ key_ports_range->tp.src = flow_keys->sport;
+ key_ports_range->tp.dst = flow_keys->dport;
+ }
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
@@ -1108,10 +1117,12 @@ bool __skb_flow_dissect(const struct net *net,
FLOW_DISSECTOR_KEY_BASIC,
target_container);
+ rcu_read_lock();
+
if (skb) {
if (!net) {
if (skb->dev)
- net = dev_net(skb->dev);
+ net = dev_net_rcu(skb->dev);
else if (skb->sk)
net = sock_net(skb->sk);
}
@@ -1122,7 +1133,6 @@ bool __skb_flow_dissect(const struct net *net,
enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
struct bpf_prog_array *run_array;
- rcu_read_lock();
run_array = rcu_dereference(init_net.bpf.run_array[type]);
if (!run_array)
run_array = rcu_dereference(net->bpf.run_array[type]);
@@ -1150,17 +1160,17 @@ bool __skb_flow_dissect(const struct net *net,
prog = READ_ONCE(run_array->items[0].prog);
result = bpf_flow_dissect(prog, &ctx, n_proto, nhoff,
hlen, flags);
- if (result == BPF_FLOW_DISSECTOR_CONTINUE)
- goto dissect_continue;
- __skb_flow_bpf_to_target(&flow_keys, flow_dissector,
- target_container);
- rcu_read_unlock();
- return result == BPF_OK;
+ if (result != BPF_FLOW_DISSECTOR_CONTINUE) {
+ __skb_flow_bpf_to_target(&flow_keys, flow_dissector,
+ target_container);
+ rcu_read_unlock();
+ return result == BPF_OK;
+ }
}
-dissect_continue:
- rcu_read_unlock();
}
+ rcu_read_unlock();
+
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
struct ethhdr *eth = eth_hdr(skb);
diff --git a/net/core/gro.c b/net/core/gro.c
index d1f44084e978..0ad549b07e03 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -7,9 +7,6 @@
#define MAX_GRO_SKBS 8
-/* This should be increased if a protocol with a bigger head is added. */
-#define GRO_MAX_HEAD (MAX_HEADER + 128)
-
static DEFINE_SPINLOCK(offload_lock);
/**
@@ -656,6 +653,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->pkt_type = PACKET_HOST;
skb->encapsulation = 0;
+ skb->ip_summed = CHECKSUM_NONE;
skb_shinfo(skb)->gso_type = 0;
skb_shinfo(skb)->gso_size = 0;
if (unlikely(skb->slow_gro)) {
diff --git a/net/core/ieee8021q_helpers.c b/net/core/ieee8021q_helpers.c
index 759a9b9f3f89..669b357b73b2 100644
--- a/net/core/ieee8021q_helpers.c
+++ b/net/core/ieee8021q_helpers.c
@@ -7,6 +7,11 @@
#include <net/dscp.h>
#include <net/ieee8021q.h>
+/* verify that table covers all 8 traffic types */
+#define TT_MAP_SIZE_OK(tbl) \
+ compiletime_assert(ARRAY_SIZE(tbl) == IEEE8021Q_TT_MAX, \
+ #tbl " size mismatch")
+
/* The following arrays map Traffic Types (TT) to traffic classes (TC) for
* different number of queues as shown in the example provided by
* IEEE 802.1Q-2022 in Annex I "I.3 Traffic type to traffic class mapping" and
@@ -101,51 +106,28 @@ int ieee8021q_tt_to_tc(enum ieee8021q_traffic_type tt, unsigned int num_queues)
switch (num_queues) {
case 8:
- compiletime_assert(ARRAY_SIZE(ieee8021q_8queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_8queue_tt_tc_map != max - 1");
+ TT_MAP_SIZE_OK(ieee8021q_8queue_tt_tc_map);
return ieee8021q_8queue_tt_tc_map[tt];
case 7:
- compiletime_assert(ARRAY_SIZE(ieee8021q_7queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_7queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_7queue_tt_tc_map);
return ieee8021q_7queue_tt_tc_map[tt];
case 6:
- compiletime_assert(ARRAY_SIZE(ieee8021q_6queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_6queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_6queue_tt_tc_map);
return ieee8021q_6queue_tt_tc_map[tt];
case 5:
- compiletime_assert(ARRAY_SIZE(ieee8021q_5queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_5queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_5queue_tt_tc_map);
return ieee8021q_5queue_tt_tc_map[tt];
case 4:
- compiletime_assert(ARRAY_SIZE(ieee8021q_4queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_4queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_4queue_tt_tc_map);
return ieee8021q_4queue_tt_tc_map[tt];
case 3:
- compiletime_assert(ARRAY_SIZE(ieee8021q_3queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_3queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_3queue_tt_tc_map);
return ieee8021q_3queue_tt_tc_map[tt];
case 2:
- compiletime_assert(ARRAY_SIZE(ieee8021q_2queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_2queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_2queue_tt_tc_map);
return ieee8021q_2queue_tt_tc_map[tt];
case 1:
- compiletime_assert(ARRAY_SIZE(ieee8021q_1queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_1queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_1queue_tt_tc_map);
return ieee8021q_1queue_tt_tc_map[tt];
}
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 711cd3b4347a..f63586c9ce02 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -23,6 +23,8 @@
#include <net/ip6_fib.h>
#include <net/rtnh.h>
+#include "dev.h"
+
DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);
@@ -325,82 +327,132 @@ EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap);
int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
const struct lwtunnel_encap_ops *ops;
struct lwtunnel_state *lwtstate;
- int ret = -EINVAL;
+ struct dst_entry *dst;
+ int ret;
- if (!dst)
+ local_bh_disable();
+
+ if (dev_xmit_recursion()) {
+ net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
+ __func__);
+ ret = -ENETDOWN;
goto drop;
+ }
+
+ dst = skb_dst(skb);
+ if (!dst) {
+ ret = -EINVAL;
+ goto drop;
+ }
lwtstate = dst->lwtstate;
if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
- lwtstate->type > LWTUNNEL_ENCAP_MAX)
- return 0;
+ lwtstate->type > LWTUNNEL_ENCAP_MAX) {
+ ret = 0;
+ goto out;
+ }
ret = -EOPNOTSUPP;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
- if (likely(ops && ops->output))
+ if (likely(ops && ops->output)) {
+ dev_xmit_recursion_inc();
ret = ops->output(net, sk, skb);
+ dev_xmit_recursion_dec();
+ }
rcu_read_unlock();
if (ret == -EOPNOTSUPP)
goto drop;
- return ret;
+ goto out;
drop:
kfree_skb(skb);
+out:
+ local_bh_enable();
return ret;
}
EXPORT_SYMBOL_GPL(lwtunnel_output);
int lwtunnel_xmit(struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
const struct lwtunnel_encap_ops *ops;
struct lwtunnel_state *lwtstate;
- int ret = -EINVAL;
+ struct dst_entry *dst;
+ int ret;
- if (!dst)
+ local_bh_disable();
+
+ if (dev_xmit_recursion()) {
+ net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
+ __func__);
+ ret = -ENETDOWN;
goto drop;
+ }
+
+ dst = skb_dst(skb);
+ if (!dst) {
+ ret = -EINVAL;
+ goto drop;
+ }
lwtstate = dst->lwtstate;
if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
- lwtstate->type > LWTUNNEL_ENCAP_MAX)
- return 0;
+ lwtstate->type > LWTUNNEL_ENCAP_MAX) {
+ ret = 0;
+ goto out;
+ }
ret = -EOPNOTSUPP;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
- if (likely(ops && ops->xmit))
+ if (likely(ops && ops->xmit)) {
+ dev_xmit_recursion_inc();
ret = ops->xmit(skb);
+ dev_xmit_recursion_dec();
+ }
rcu_read_unlock();
if (ret == -EOPNOTSUPP)
goto drop;
- return ret;
+ goto out;
drop:
kfree_skb(skb);
+out:
+ local_bh_enable();
return ret;
}
EXPORT_SYMBOL_GPL(lwtunnel_xmit);
int lwtunnel_input(struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
const struct lwtunnel_encap_ops *ops;
struct lwtunnel_state *lwtstate;
- int ret = -EINVAL;
+ struct dst_entry *dst;
+ int ret;
+
+ DEBUG_NET_WARN_ON_ONCE(!in_softirq());
+
+ if (dev_xmit_recursion()) {
+ net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
+ __func__);
+ ret = -ENETDOWN;
+ goto drop;
+ }
- if (!dst)
+ dst = skb_dst(skb);
+ if (!dst) {
+ ret = -EINVAL;
goto drop;
+ }
lwtstate = dst->lwtstate;
if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
@@ -410,8 +462,11 @@ int lwtunnel_input(struct sk_buff *skb)
ret = -EOPNOTSUPP;
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
- if (likely(ops && ops->input))
+ if (likely(ops && ops->input)) {
+ dev_xmit_recursion_inc();
ret = ops->input(skb);
+ dev_xmit_recursion_dec();
+ }
rcu_read_unlock();
if (ret == -EOPNOTSUPP)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index cc58315a40a7..96786016dbb4 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -55,7 +55,8 @@ static void __neigh_notify(struct neighbour *n, int type, int flags,
u32 pid);
static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
- struct net_device *dev);
+ struct net_device *dev,
+ bool skip_perm);
#ifdef CONFIG_PROC_FS
static const struct seq_operations neigh_stat_seq_ops;
@@ -444,7 +445,7 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
{
write_lock_bh(&tbl->lock);
neigh_flush_dev(tbl, dev, skip_perm);
- pneigh_ifdown_and_unlock(tbl, dev);
+ pneigh_ifdown_and_unlock(tbl, dev, skip_perm);
pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
tbl->family);
if (skb_queue_empty_lockless(&tbl->proxy_queue))
@@ -847,7 +848,8 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
}
static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
- struct net_device *dev)
+ struct net_device *dev,
+ bool skip_perm)
{
struct pneigh_entry *n, **np, *freelist = NULL;
u32 h;
@@ -855,12 +857,15 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
for (h = 0; h <= PNEIGH_HASHMASK; h++) {
np = &tbl->phash_buckets[h];
while ((n = *np) != NULL) {
+ if (skip_perm && n->permanent)
+ goto skip;
if (!dev || n->dev == dev) {
*np = n->next;
n->next = freelist;
freelist = n;
continue;
}
+skip:
np = &n->next;
}
}
@@ -2041,6 +2046,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
pn = pneigh_lookup(tbl, net, dst, dev, 1);
if (pn) {
pn->flags = ndm_flags;
+ pn->permanent = !!(ndm->ndm_state & NUD_PERMANENT);
if (protocol)
pn->protocol = protocol;
err = 0;
@@ -2301,6 +2307,7 @@ static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
[NDTPA_IFINDEX] = { .type = NLA_U32 },
[NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
+ [NDTPA_QUEUE_LENBYTES] = { .type = NLA_U32 },
[NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
[NDTPA_APP_PROBES] = { .type = NLA_U32 },
[NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
@@ -3513,10 +3520,12 @@ static const struct seq_operations neigh_stat_seq_ops = {
static void __neigh_notify(struct neighbour *n, int type, int flags,
u32 pid)
{
- struct net *net = dev_net(n->dev);
struct sk_buff *skb;
int err = -ENOBUFS;
+ struct net *net;
+ rcu_read_lock();
+ net = dev_net_rcu(n->dev);
skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
if (skb == NULL)
goto errout;
@@ -3529,9 +3538,11 @@ static void __neigh_notify(struct neighbour *n, int type, int flags,
goto errout;
}
rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
- return;
+ goto out;
errout:
rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+out:
+ rcu_read_unlock();
}
void neigh_app_ns(struct neighbour *n)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 70fea7c1a4b0..ee3c1b37d06c 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -458,7 +458,7 @@ static void net_complete_free(void)
}
-static void net_free(struct net *net)
+void net_passive_dec(struct net *net)
{
if (refcount_dec_and_test(&net->passive)) {
kfree(rcu_access_pointer(net->gen));
@@ -476,7 +476,7 @@ void net_drop_ns(void *p)
struct net *net = (struct net *)p;
if (net)
- net_free(net);
+ net_passive_dec(net);
}
struct net *copy_net_ns(unsigned long flags,
@@ -517,7 +517,7 @@ put_userns:
key_remove_domain(net->key_domain);
#endif
put_user_ns(user_ns);
- net_free(net);
+ net_passive_dec(net);
dec_ucounts:
dec_net_namespaces(ucounts);
return ERR_PTR(rv);
@@ -662,7 +662,7 @@ static void cleanup_net(struct work_struct *work)
key_remove_domain(net->key_domain);
#endif
put_user_ns(net->user_ns);
- net_free(net);
+ net_passive_dec(net);
}
}
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index b28424ae06d5..8614988fc67b 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -178,6 +178,16 @@ static const struct genl_multicast_group netdev_nl_mcgrps[] = {
[NETDEV_NLGRP_PAGE_POOL] = { "page-pool", },
};
+static void __netdev_nl_sock_priv_init(void *priv)
+{
+ netdev_nl_sock_priv_init(priv);
+}
+
+static void __netdev_nl_sock_priv_destroy(void *priv)
+{
+ netdev_nl_sock_priv_destroy(priv);
+}
+
struct genl_family netdev_nl_family __ro_after_init = {
.name = NETDEV_FAMILY_NAME,
.version = NETDEV_FAMILY_VERSION,
@@ -189,6 +199,6 @@ struct genl_family netdev_nl_family __ro_after_init = {
.mcgrps = netdev_nl_mcgrps,
.n_mcgrps = ARRAY_SIZE(netdev_nl_mcgrps),
.sock_priv_size = sizeof(struct list_head),
- .sock_priv_init = (void *)netdev_nl_sock_priv_init,
- .sock_priv_destroy = (void *)netdev_nl_sock_priv_destroy,
+ .sock_priv_init = __netdev_nl_sock_priv_init,
+ .sock_priv_destroy = __netdev_nl_sock_priv_destroy,
};
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index ad426b3a03b5..0fe537781bc4 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -616,25 +616,66 @@ netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp,
return 0;
}
+/**
+ * netdev_stat_queue_sum() - add up queue stats from range of queues
+ * @netdev: net_device
+ * @rx_start: index of the first Rx queue to query
+ * @rx_end: index after the last Rx queue (first *not* to query)
+ * @rx_sum: output Rx stats, should be already initialized
+ * @tx_start: index of the first Tx queue to query
+ * @tx_end: index after the last Tx queue (first *not* to query)
+ * @tx_sum: output Tx stats, should be already initialized
+ *
+ * Add stats from [start, end) range of queue IDs to *x_sum structs.
+ * The sum structs must be already initialized. Usually this
+ * helper is invoked from the .get_base_stats callbacks of drivers
+ * to account for stats of disabled queues. In that case the ranges
+ * are usually [netdev->real_num_*x_queues, netdev->num_*x_queues).
+ */
+void netdev_stat_queue_sum(struct net_device *netdev,
+ int rx_start, int rx_end,
+ struct netdev_queue_stats_rx *rx_sum,
+ int tx_start, int tx_end,
+ struct netdev_queue_stats_tx *tx_sum)
+{
+ const struct netdev_stat_ops *ops;
+ struct netdev_queue_stats_rx rx;
+ struct netdev_queue_stats_tx tx;
+ int i;
+
+ ops = netdev->stat_ops;
+
+ for (i = rx_start; i < rx_end; i++) {
+ memset(&rx, 0xff, sizeof(rx));
+ if (ops->get_queue_stats_rx)
+ ops->get_queue_stats_rx(netdev, i, &rx);
+ netdev_nl_stats_add(rx_sum, &rx, sizeof(rx));
+ }
+ for (i = tx_start; i < tx_end; i++) {
+ memset(&tx, 0xff, sizeof(tx));
+ if (ops->get_queue_stats_tx)
+ ops->get_queue_stats_tx(netdev, i, &tx);
+ netdev_nl_stats_add(tx_sum, &tx, sizeof(tx));
+ }
+}
+EXPORT_SYMBOL(netdev_stat_queue_sum);
+
static int
netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp,
const struct genl_info *info)
{
- struct netdev_queue_stats_rx rx_sum, rx;
- struct netdev_queue_stats_tx tx_sum, tx;
- const struct netdev_stat_ops *ops;
+ struct netdev_queue_stats_rx rx_sum;
+ struct netdev_queue_stats_tx tx_sum;
void *hdr;
- int i;
- ops = netdev->stat_ops;
/* Netdev can't guarantee any complete counters */
- if (!ops->get_base_stats)
+ if (!netdev->stat_ops->get_base_stats)
return 0;
memset(&rx_sum, 0xff, sizeof(rx_sum));
memset(&tx_sum, 0xff, sizeof(tx_sum));
- ops->get_base_stats(netdev, &rx_sum, &tx_sum);
+ netdev->stat_ops->get_base_stats(netdev, &rx_sum, &tx_sum);
/* The op was there, but nothing reported, don't bother */
if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) &&
@@ -647,18 +688,8 @@ netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp,
if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex))
goto nla_put_failure;
- for (i = 0; i < netdev->real_num_rx_queues; i++) {
- memset(&rx, 0xff, sizeof(rx));
- if (ops->get_queue_stats_rx)
- ops->get_queue_stats_rx(netdev, i, &rx);
- netdev_nl_stats_add(&rx_sum, &rx, sizeof(rx));
- }
- for (i = 0; i < netdev->real_num_tx_queues; i++) {
- memset(&tx, 0xff, sizeof(tx));
- if (ops->get_queue_stats_tx)
- ops->get_queue_stats_tx(netdev, i, &tx);
- netdev_nl_stats_add(&tx_sum, &tx, sizeof(tx));
- }
+ netdev_stat_queue_sum(netdev, 0, netdev->real_num_rx_queues, &rx_sum,
+ 0, netdev->real_num_tx_queues, &tx_sum);
if (netdev_nl_stats_write_rx(rsp, &rx_sum) ||
netdev_nl_stats_write_tx(rsp, &tx_sum))
diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h
index 7eadb8393e00..cd95394399b4 100644
--- a/net/core/netmem_priv.h
+++ b/net/core/netmem_priv.h
@@ -5,7 +5,7 @@
static inline unsigned long netmem_get_pp_magic(netmem_ref netmem)
{
- return __netmem_clear_lsb(netmem)->pp_magic;
+ return __netmem_clear_lsb(netmem)->pp_magic & ~PP_DMA_INDEX_MASK;
}
static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
@@ -15,9 +15,16 @@ static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
static inline void netmem_clear_pp_magic(netmem_ref netmem)
{
+ WARN_ON_ONCE(__netmem_clear_lsb(netmem)->pp_magic & PP_DMA_INDEX_MASK);
+
__netmem_clear_lsb(netmem)->pp_magic = 0;
}
+static inline bool netmem_is_pp(netmem_ref netmem)
+{
+ return (netmem_get_pp_magic(netmem) & PP_MAGIC_MASK) == PP_SIGNATURE;
+}
+
static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool)
{
__netmem_clear_lsb(netmem)->pp = pool;
@@ -28,4 +35,28 @@ static inline void netmem_set_dma_addr(netmem_ref netmem,
{
__netmem_clear_lsb(netmem)->dma_addr = dma_addr;
}
+
+static inline unsigned long netmem_get_dma_index(netmem_ref netmem)
+{
+ unsigned long magic;
+
+ if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
+ return 0;
+
+ magic = __netmem_clear_lsb(netmem)->pp_magic;
+
+ return (magic & PP_DMA_INDEX_MASK) >> PP_DMA_INDEX_SHIFT;
+}
+
+static inline void netmem_set_dma_index(netmem_ref netmem,
+ unsigned long id)
+{
+ unsigned long magic;
+
+ if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
+ return;
+
+ magic = netmem_get_pp_magic(netmem) | (id << PP_DMA_INDEX_SHIFT);
+ __netmem_clear_lsb(netmem)->pp_magic = magic;
+}
#endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 45fb60bc4803..87182a4272bf 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -319,6 +319,7 @@ static int netpoll_owner_active(struct net_device *dev)
static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
netdev_tx_t status = NETDEV_TX_BUSY;
+ netdev_tx_t ret = NET_XMIT_DROP;
struct net_device *dev;
unsigned long tries;
/* It is up to the caller to keep npinfo alive. */
@@ -327,11 +328,12 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
lockdep_assert_irqs_disabled();
dev = np->dev;
+ rcu_read_lock();
npinfo = rcu_dereference_bh(dev->npinfo);
if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
dev_kfree_skb_irq(skb);
- return NET_XMIT_DROP;
+ goto out;
}
/* don't get messages out of order, and no recursion */
@@ -370,7 +372,10 @@ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
skb_queue_tail(&npinfo->txq, skb);
schedule_delayed_work(&npinfo->tx_work,0);
}
- return NETDEV_TX_OK;
+ ret = NETDEV_TX_OK;
+out:
+ rcu_read_unlock();
+ return ret;
}
netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
@@ -779,6 +784,13 @@ put_noaddr:
if (err)
goto put;
rtnl_unlock();
+
+ /* Make sure all NAPI polls which started before dev->npinfo
+ * was visible have exited before we start calling NAPI poll.
+ * NAPI skips locking if dev->npinfo is NULL.
+ */
+ synchronize_rcu();
+
return 0;
put:
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index a813d30d2135..b1c3e0ad6dbf 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -25,6 +25,7 @@
#include <trace/events/page_pool.h>
+#include "dev.h"
#include "mp_dmabuf_devmem.h"
#include "netmem_priv.h"
#include "page_pool_priv.h"
@@ -150,9 +151,9 @@ u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats)
EXPORT_SYMBOL(page_pool_ethtool_stats_get);
#else
-#define alloc_stat_inc(pool, __stat)
-#define recycle_stat_inc(pool, __stat)
-#define recycle_stat_add(pool, __stat, val)
+#define alloc_stat_inc(...) do { } while (0)
+#define recycle_stat_inc(...) do { } while (0)
+#define recycle_stat_add(...) do { } while (0)
#endif
static bool page_pool_producer_lock(struct page_pool *pool)
@@ -272,8 +273,7 @@ static int page_pool_init(struct page_pool *pool,
/* Driver calling page_pool_create() also call page_pool_destroy() */
refcount_set(&pool->user_cnt, 1);
- if (pool->dma_map)
- get_device(pool->p.dev);
+ xa_init_flags(&pool->dma_mapped, XA_FLAGS_ALLOC1);
if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) {
/* We rely on rtnl_lock()ing to make sure netdev_rx_queue
@@ -311,9 +311,7 @@ free_ptr_ring:
static void page_pool_uninit(struct page_pool *pool)
{
ptr_ring_cleanup(&pool->ring, NULL);
-
- if (pool->dma_map)
- put_device(pool->p.dev);
+ xa_destroy(&pool->dma_mapped);
#ifdef CONFIG_PAGE_POOL_STATS
if (!pool->system)
@@ -454,13 +452,21 @@ page_pool_dma_sync_for_device(const struct page_pool *pool,
netmem_ref netmem,
u32 dma_sync_size)
{
- if (pool->dma_sync && dma_dev_need_sync(pool->p.dev))
- __page_pool_dma_sync_for_device(pool, netmem, dma_sync_size);
+ if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) {
+ rcu_read_lock();
+ /* re-check under rcu_read_lock() to sync with page_pool_scrub() */
+ if (pool->dma_sync)
+ __page_pool_dma_sync_for_device(pool, netmem,
+ dma_sync_size);
+ rcu_read_unlock();
+ }
}
-static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem)
+static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t gfp)
{
dma_addr_t dma;
+ int err;
+ u32 id;
/* Setup DMA mapping: use 'struct page' area for storing DMA-addr
* since dma_addr_t can be either 32 or 64 bits and does not always fit
@@ -474,15 +480,30 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem)
if (dma_mapping_error(pool->p.dev, dma))
return false;
- if (page_pool_set_dma_addr_netmem(netmem, dma))
+ if (page_pool_set_dma_addr_netmem(netmem, dma)) {
+ WARN_ONCE(1, "unexpected DMA address, please report to netdev@");
goto unmap_failed;
+ }
+
+ if (in_softirq())
+ err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem),
+ PP_DMA_INDEX_LIMIT, gfp);
+ else
+ err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem),
+ PP_DMA_INDEX_LIMIT, gfp);
+ if (err) {
+ WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@");
+ goto unset_failed;
+ }
+ netmem_set_dma_index(netmem, id);
page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len);
return true;
+unset_failed:
+ page_pool_set_dma_addr_netmem(netmem, 0);
unmap_failed:
- WARN_ONCE(1, "unexpected DMA address, please report to netdev@");
dma_unmap_page_attrs(pool->p.dev, dma,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
@@ -499,7 +520,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
if (unlikely(!page))
return NULL;
- if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page)))) {
+ if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page), gfp))) {
put_page(page);
return NULL;
}
@@ -546,7 +567,7 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool,
*/
for (i = 0; i < nr_pages; i++) {
netmem = pool->alloc.cache[i];
- if (dma_map && unlikely(!page_pool_dma_map(pool, netmem))) {
+ if (dma_map && unlikely(!page_pool_dma_map(pool, netmem, gfp))) {
put_page(netmem_to_page(netmem));
continue;
}
@@ -648,6 +669,8 @@ void page_pool_clear_pp_info(netmem_ref netmem)
static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
netmem_ref netmem)
{
+ struct page *old, *page = netmem_to_page(netmem);
+ unsigned long id;
dma_addr_t dma;
if (!pool->dma_map)
@@ -656,6 +679,17 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
*/
return;
+ id = netmem_get_dma_index(netmem);
+ if (!id)
+ return;
+
+ if (in_softirq())
+ old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0);
+ else
+ old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0);
+ if (old != page)
+ return;
+
dma = page_pool_get_dma_addr_netmem(netmem);
/* When page is unmapped, it cannot be returned to our pool */
@@ -663,6 +697,7 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
page_pool_set_dma_addr_netmem(netmem, 0);
+ netmem_set_dma_index(netmem, 0);
}
/* Disconnects a page (from a page_pool). API users can have a need
@@ -699,19 +734,16 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
static bool page_pool_recycle_in_ring(struct page_pool *pool, netmem_ref netmem)
{
- int ret;
- /* BH protection not needed if current is softirq */
- if (in_softirq())
- ret = ptr_ring_produce(&pool->ring, (__force void *)netmem);
- else
- ret = ptr_ring_produce_bh(&pool->ring, (__force void *)netmem);
+ bool in_softirq, ret;
- if (!ret) {
+ /* BH protection not needed if current is softirq */
+ in_softirq = page_pool_producer_lock(pool);
+ ret = !__ptr_ring_produce(&pool->ring, (__force void *)netmem);
+ if (ret)
recycle_stat_inc(pool, ring);
- return true;
- }
+ page_pool_producer_unlock(pool, in_softirq);
- return false;
+ return ret;
}
/* Only allow direct recycling in special circumstances, into the
@@ -797,6 +829,10 @@ static bool page_pool_napi_local(const struct page_pool *pool)
const struct napi_struct *napi;
u32 cpuid;
+ /* On PREEMPT_RT the softirq can be preempted by the consumer */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ return false;
+
if (unlikely(!in_softirq()))
return false;
@@ -1037,8 +1073,29 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
static void page_pool_scrub(struct page_pool *pool)
{
+ unsigned long id;
+ void *ptr;
+
page_pool_empty_alloc_cache_once(pool);
- pool->destroy_cnt++;
+ if (!pool->destroy_cnt++ && pool->dma_map) {
+ if (pool->dma_sync) {
+ /* Disable page_pool_dma_sync_for_device() */
+ pool->dma_sync = false;
+
+ /* Make sure all concurrent returns that may see the old
+ * value of dma_sync (and thus perform a sync) have
+ * finished before doing the unmapping below. Skip the
+ * wait if the device doesn't actually need syncing, or
+ * if there are no outstanding mapped pages.
+ */
+ if (dma_dev_need_sync(pool->p.dev) &&
+ !xa_empty(&pool->dma_mapped))
+ synchronize_net();
+ }
+
+ xa_for_each(&pool->dma_mapped, id, ptr)
+ __page_pool_release_page_dma(pool, page_to_netmem(ptr));
+ }
/* No more consumers should exist, but producers could still
* be in-flight.
@@ -1048,10 +1105,14 @@ static void page_pool_scrub(struct page_pool *pool)
static int page_pool_release(struct page_pool *pool)
{
+ bool in_softirq;
int inflight;
page_pool_scrub(pool);
inflight = page_pool_inflight(pool, true);
+ /* Acquire producer lock to make sure producers have exited. */
+ in_softirq = page_pool_producer_lock(pool);
+ page_pool_producer_unlock(pool, in_softirq);
if (!inflight)
__page_pool_destroy(pool);
@@ -1066,7 +1127,13 @@ static void page_pool_release_retry(struct work_struct *wq)
int inflight;
inflight = page_pool_release(pool);
- if (!inflight)
+ /* In rare cases, a driver bug may cause inflight to go negative.
+ * Don't reschedule release if inflight is 0 or negative.
+ * - If 0, the page_pool has been destroyed
+ * - if negative, we will never recover
+ * in both cases no reschedule is necessary.
+ */
+ if (inflight <= 0)
return;
/* Periodic warning for page pools the user can't see */
@@ -1102,11 +1169,7 @@ void page_pool_disable_direct_recycling(struct page_pool *pool)
if (!pool->p.napi)
return;
- /* To avoid races with recycling and additional barriers make sure
- * pool and NAPI are unlinked when NAPI is disabled.
- */
- WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state));
- WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1);
+ napi_assert_will_not_race(pool->p.napi);
WRITE_ONCE(pool->p.napi, NULL);
}
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
index 48335766c1bf..8d31c71bea1a 100644
--- a/net/core/page_pool_user.c
+++ b/net/core/page_pool_user.c
@@ -353,7 +353,7 @@ void page_pool_unlist(struct page_pool *pool)
int page_pool_check_memory_provider(struct net_device *dev,
struct netdev_rx_queue *rxq)
{
- struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv;
+ void *binding = rxq->mp_params.mp_priv;
struct page_pool *pool;
struct hlist_node *n;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 34f68ef74b8f..762ede027899 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -851,6 +851,9 @@ static ssize_t get_imix_entries(const char __user *buffer,
unsigned long weight;
unsigned long size;
+ if (pkt_dev->n_imix_entries >= MAX_IMIX_ENTRIES)
+ return -E2BIG;
+
len = num_arg(&buffer[i], max_digits, &size);
if (len < 0)
return len;
@@ -880,9 +883,6 @@ static ssize_t get_imix_entries(const char __user *buffer,
i++;
pkt_dev->n_imix_entries++;
-
- if (pkt_dev->n_imix_entries > MAX_IMIX_ENTRIES)
- return -E2BIG;
} while (c == ' ');
return i;
@@ -898,6 +898,10 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
pkt_dev->nr_labels = 0;
do {
__u32 tmp;
+
+ if (n >= MAX_MPLS_LABELS)
+ return -E2BIG;
+
len = hex32_arg(&buffer[i], 8, &tmp);
if (len <= 0)
return len;
@@ -909,8 +913,6 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
return -EFAULT;
i++;
n++;
- if (n >= MAX_MPLS_LABELS)
- return -E2BIG;
} while (c == ',');
pkt_dev->nr_labels = n;
@@ -1896,8 +1898,8 @@ static ssize_t pktgen_thread_write(struct file *file,
i = len;
/* Read variable name */
-
- len = strn_len(&user_buffer[i], sizeof(name) - 1);
+ max = min(sizeof(name) - 1, count - i);
+ len = strn_len(&user_buffer[i], max);
if (len < 0)
return len;
@@ -1927,7 +1929,8 @@ static ssize_t pktgen_thread_write(struct file *file,
if (!strcmp(name, "add_device")) {
char f[32];
memset(f, 0, 32);
- len = strn_len(&user_buffer[i], sizeof(f) - 1);
+ max = min(sizeof(f) - 1, count - i);
+ len = strn_len(&user_buffer[i], max);
if (len < 0) {
ret = len;
goto out;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2ba5cd965d3f..4d0ee1c9002a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1005,6 +1005,9 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
/* IFLA_VF_STATS_TX_DROPPED */
nla_total_size_64bit(sizeof(__u64)));
}
+ if (dev->netdev_ops->ndo_get_vf_guid)
+ size += num_vfs * 2 *
+ nla_total_size(sizeof(struct ifla_vf_guid));
return size;
} else
return 0;
diff --git a/net/core/scm.c b/net/core/scm.c
index 4f6a14babe5a..733c0cbd393d 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -282,6 +282,16 @@ efault:
}
EXPORT_SYMBOL(put_cmsg);
+int put_cmsg_notrunc(struct msghdr *msg, int level, int type, int len,
+ void *data)
+{
+ /* Don't produce truncated CMSGs */
+ if (!msg->msg_control || msg->msg_controllen < CMSG_LEN(len))
+ return -ETOOSMALL;
+
+ return put_cmsg(msg, level, type, len, data);
+}
+
void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss_internal)
{
struct scm_timestamping64 tss;
diff --git a/net/core/selftests.c b/net/core/selftests.c
index 8f801e6e3b91..ef27594d6a99 100644
--- a/net/core/selftests.c
+++ b/net/core/selftests.c
@@ -100,10 +100,10 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev,
ehdr->h_proto = htons(ETH_P_IP);
if (attr->tcp) {
+ memset(thdr, 0, sizeof(*thdr));
thdr->source = htons(attr->sport);
thdr->dest = htons(attr->dport);
thdr->doff = sizeof(struct tcphdr) / 4;
- thdr->check = 0;
} else {
uhdr->source = htons(attr->sport);
uhdr->dest = htons(attr->dport);
@@ -144,16 +144,25 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev,
attr->id = net_test_next_id;
shdr->id = net_test_next_id++;
- if (attr->size)
- skb_put(skb, attr->size);
- if (attr->max_size && attr->max_size > skb->len)
- skb_put(skb, attr->max_size - skb->len);
+ if (attr->size) {
+ void *payload = skb_put(skb, attr->size);
+
+ memset(payload, 0, attr->size);
+ }
+
+ if (attr->max_size && attr->max_size > skb->len) {
+ size_t pad_len = attr->max_size - skb->len;
+ void *pad = skb_put(skb, pad_len);
+
+ memset(pad, 0, pad_len);
+ }
skb->csum = 0;
skb->ip_summed = CHECKSUM_PARTIAL;
if (attr->tcp) {
- thdr->check = ~tcp_v4_check(skb->len, ihdr->saddr,
- ihdr->daddr, 0);
+ int l4len = skb->len - skb_transport_offset(skb);
+
+ thdr->check = ~tcp_v4_check(l4len, ihdr->saddr, ihdr->daddr, 0);
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
} else {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 74149dc4ee31..cf54593149cc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -69,6 +69,7 @@
#include <net/dst.h>
#include <net/sock.h>
#include <net/checksum.h>
+#include <net/gro.h>
#include <net/gso.h>
#include <net/hotdata.h>
#include <net/ip6_checksum.h>
@@ -95,7 +96,9 @@
static struct kmem_cache *skbuff_ext_cache __ro_after_init;
#endif
-#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
+#define GRO_MAX_HEAD_PAD (GRO_MAX_HEAD + NET_SKB_PAD + NET_IP_ALIGN)
+#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(max(MAX_TCP_HEADER, \
+ GRO_MAX_HEAD_PAD))
/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.
* This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique
@@ -736,7 +739,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
/* If requested length is either too small or too big,
* we use kmalloc() for skb->head allocation.
*/
- if (len <= SKB_WITH_OVERHEAD(1024) ||
+ if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) ||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
@@ -816,7 +819,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
* When the small frag allocator is available, prefer it over kmalloc
* for small fragments
*/
- if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) ||
+ if ((!NAPI_HAS_SMALL_PAGE_FRAG &&
+ len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)) ||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
@@ -921,11 +925,6 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
-static bool is_pp_netmem(netmem_ref netmem)
-{
- return (netmem_get_pp_magic(netmem) & ~0x3UL) == PP_SIGNATURE;
-}
-
int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
unsigned int headroom)
{
@@ -1023,14 +1022,7 @@ bool napi_pp_put_page(netmem_ref netmem)
{
netmem = netmem_compound_head(netmem);
- /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
- * in order to preserve any existing bits, such as bit 0 for the
- * head page of compound page and bit 1 for pfmemalloc page, so
- * mask those bits for freeing side when doing below checking,
- * and page_is_pfmemalloc() is checked in __page_pool_put_page()
- * to avoid recycling the pfmemalloc page.
- */
- if (unlikely(!is_pp_netmem(netmem)))
+ if (unlikely(!netmem_is_pp(netmem)))
return false;
page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false);
@@ -1070,7 +1062,7 @@ static int skb_pp_frag_ref(struct sk_buff *skb)
for (i = 0; i < shinfo->nr_frags; i++) {
head_netmem = netmem_compound_head(shinfo->frags[i].netmem);
- if (likely(is_pp_netmem(head_netmem)))
+ if (likely(netmem_is_pp(head_netmem)))
page_pool_ref_netmem(head_netmem);
else
page_ref_inc(netmem_to_page(head_netmem));
@@ -6123,11 +6115,11 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
skb->offload_fwd_mark = 0;
skb->offload_l3_fwd_mark = 0;
#endif
+ ipvs_reset(skb);
if (!xnet)
return;
- ipvs_reset(skb);
skb->mark = 0;
skb_clear_tstamp(skb);
}
@@ -6205,9 +6197,6 @@ int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len)
if (!pskb_may_pull(skb, write_len))
return -ENOMEM;
- if (!skb_frags_readable(skb))
- return -EFAULT;
-
if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
return 0;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 8ad7e6755fd6..adb3166ede97 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -529,16 +529,22 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
u32 off, u32 len,
struct sk_psock *psock,
struct sock *sk,
- struct sk_msg *msg)
+ struct sk_msg *msg,
+ bool take_ref)
{
int num_sge, copied;
+ /* skb_to_sgvec will fail when the total number of fragments in
+ * frag_list and frags exceeds MAX_MSG_FRAGS. For example, the
+ * caller may aggregate multiple skbs.
+ */
num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
if (num_sge < 0) {
/* skb linearize may fail with ENOMEM, but lets simply try again
* later if this happens. Under memory pressure we don't want to
* drop the skb. We need to linearize the skb so that the mapping
* in skb_to_sgvec can not error.
+ * Note that skb_linearize requires the skb not to be shared.
*/
if (skb_linearize(skb))
return -EAGAIN;
@@ -548,11 +554,14 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
return num_sge;
}
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+ psock->ingress_bytes += len;
+#endif
copied = len;
msg->sg.start = 0;
msg->sg.size = copied;
msg->sg.end = num_sge;
- msg->skb = skb;
+ msg->skb = take_ref ? skb_get(skb) : skb;
sk_psock_queue_msg(psock, msg);
sk_psock_data_ready(sk, psock);
@@ -560,7 +569,7 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
}
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
- u32 off, u32 len);
+ u32 off, u32 len, bool take_ref);
static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len)
@@ -574,7 +583,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
* correctly.
*/
if (unlikely(skb->sk == sk))
- return sk_psock_skb_ingress_self(psock, skb, off, len);
+ return sk_psock_skb_ingress_self(psock, skb, off, len, true);
msg = sk_psock_create_ingress_msg(sk, skb);
if (!msg)
return -EAGAIN;
@@ -586,7 +595,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
* into user buffers.
*/
skb_set_owner_r(skb, sk);
- err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, true);
if (err < 0)
kfree(msg);
return err;
@@ -597,7 +606,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
* because the skb is already accounted for here.
*/
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
- u32 off, u32 len)
+ u32 off, u32 len, bool take_ref)
{
struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC);
struct sock *sk = psock->sk;
@@ -606,7 +615,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
if (unlikely(!msg))
return -EAGAIN;
skb_set_owner_r(skb, sk);
- err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, take_ref);
if (err < 0)
kfree(msg);
return err;
@@ -615,18 +624,13 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
- int err = 0;
-
if (!ingress) {
if (!sock_writeable(psock->sk))
return -EAGAIN;
return skb_send_sock(psock->sk, skb, off, len);
}
- skb_get(skb);
- err = sk_psock_skb_ingress(psock, skb, off, len);
- if (err < 0)
- kfree_skb(skb);
- return err;
+
+ return sk_psock_skb_ingress(psock, skb, off, len);
}
static void sk_psock_skb_state(struct sk_psock *psock,
@@ -651,12 +655,21 @@ static void sk_psock_backlog(struct work_struct *work)
bool ingress;
int ret;
- mutex_lock(&psock->work_mutex);
- if (unlikely(state->len)) {
- len = state->len;
- off = state->off;
- }
+ /* If sk is quickly removed from the map and then added back, the old
+ * psock should not be scheduled, because there are now two psocks
+ * pointing to the same sk.
+ */
+ if (!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+ return;
+ /* Increment the psock refcnt to synchronize with close(fd) path in
+ * sock_map_close(), ensuring we wait for backlog thread completion
+ * before sk_socket freed. If refcnt increment fails, it indicates
+ * sock_map_close() completed with sk_socket potentially already freed.
+ */
+ if (!sk_psock_get(psock->sk))
+ return;
+ mutex_lock(&psock->work_mutex);
while ((skb = skb_peek(&psock->ingress_skb))) {
len = skb->len;
off = 0;
@@ -666,6 +679,13 @@ static void sk_psock_backlog(struct work_struct *work)
off = stm->offset;
len = stm->full_len;
}
+
+ /* Resume processing from previous partial state */
+ if (unlikely(state->len)) {
+ len = state->len;
+ off = state->off;
+ }
+
ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb);
do {
@@ -676,7 +696,8 @@ static void sk_psock_backlog(struct work_struct *work)
if (ret <= 0) {
if (ret == -EAGAIN) {
sk_psock_skb_state(psock, state, len, off);
-
+ /* Restore redir info we cleared before */
+ skb_bpf_set_redir(skb, psock->sk, ingress);
/* Delay slightly to prioritize any
* other work that might be here.
*/
@@ -693,11 +714,14 @@ static void sk_psock_backlog(struct work_struct *work)
len -= ret;
} while (len);
+ /* The entire skb sent, clear state */
+ sk_psock_skb_state(psock, state, 0, 0);
skb = skb_dequeue(&psock->ingress_skb);
kfree_skb(skb);
}
end:
mutex_unlock(&psock->work_mutex);
+ sk_psock_put(psock->sk, psock);
}
struct sk_psock *sk_psock_init(struct sock *sk, int node)
@@ -1010,7 +1034,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
off = stm->offset;
len = stm->full_len;
}
- err = sk_psock_skb_ingress_self(psock, skb, off, len);
+ err = sk_psock_skb_ingress_self(psock, skb, off, len, false);
}
if (err < 0) {
spin_lock_bh(&psock->ingress_lock);
@@ -1143,6 +1167,10 @@ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
if (!ret)
sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED);
+ if (sk_is_tcp(sk)) {
+ psock->strp.cb.read_sock = tcp_bpf_strp_read_sock;
+ psock->copied_seq = tcp_sk(sk)->copied_seq;
+ }
return ret;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index a83f64a1d96a..d392cb37a864 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2107,6 +2107,8 @@ lenout:
*/
static inline void sock_lock_init(struct sock *sk)
{
+ sk_owner_clear(sk);
+
if (sk->sk_kern_sock)
sock_lock_init_class_and_name(
sk,
@@ -2203,6 +2205,9 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
cgroup_sk_free(&sk->sk_cgrp_data);
mem_cgroup_sk_free(sk);
security_sk_free(sk);
+
+ sk_owner_put(sk);
+
if (slab != NULL)
kmem_cache_free(slab, sk);
else
@@ -2238,6 +2243,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
get_net_track(net, &sk->ns_tracker, priority);
sock_inuse_add(net, 1);
} else {
+ net_passive_inc(net);
__netns_tracker_alloc(net, &sk->ns_tracker,
false, priority);
}
@@ -2262,6 +2268,7 @@ EXPORT_SYMBOL(sk_alloc);
static void __sk_destruct(struct rcu_head *head)
{
struct sock *sk = container_of(head, struct sock, sk_rcu);
+ struct net *net = sock_net(sk);
struct sk_filter *filter;
if (sk->sk_destruct)
@@ -2293,14 +2300,28 @@ static void __sk_destruct(struct rcu_head *head)
put_cred(sk->sk_peer_cred);
put_pid(sk->sk_peer_pid);
- if (likely(sk->sk_net_refcnt))
- put_net_track(sock_net(sk), &sk->ns_tracker);
- else
- __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
-
+ if (likely(sk->sk_net_refcnt)) {
+ put_net_track(net, &sk->ns_tracker);
+ } else {
+ __netns_tracker_free(net, &sk->ns_tracker, false);
+ net_passive_dec(net);
+ }
sk_prot_free(sk->sk_prot_creator, sk);
}
+void sk_net_refcnt_upgrade(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+
+ WARN_ON_ONCE(sk->sk_net_refcnt);
+ __netns_tracker_free(net, &sk->ns_tracker, false);
+ net_passive_dec(net);
+ sk->sk_net_refcnt = 1;
+ get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(net, 1);
+}
+EXPORT_SYMBOL_GPL(sk_net_refcnt_upgrade);
+
void sk_destruct(struct sock *sk)
{
bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
@@ -2397,6 +2418,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
* is not properly dismantling its kernel sockets at netns
* destroy time.
*/
+ net_passive_inc(sock_net(newsk));
__netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker,
false, priority);
}
@@ -3152,16 +3174,16 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL;
struct proto *prot = sk->sk_prot;
- bool charged = false;
+ bool charged = true;
long allocated;
sk_memory_allocated_add(sk, amt);
allocated = sk_memory_allocated(sk);
if (memcg) {
- if (!mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge()))
+ charged = mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge());
+ if (!charged)
goto suppress_allocation;
- charged = true;
}
/* Under limit. */
@@ -3246,7 +3268,7 @@ suppress_allocation:
sk_memory_allocated_sub(sk, amt);
- if (charged)
+ if (memcg && charged)
mem_cgroup_uncharge_skmem(memcg, amt);
return 0;
@@ -3925,7 +3947,7 @@ static int assign_proto_idx(struct proto *prot)
{
prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
- if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
+ if (unlikely(prot->inuse_idx == PROTO_INUSE_NR)) {
pr_err("PROTO_INUSE_NR exhausted\n");
return -ENOSPC;
}
@@ -3936,7 +3958,7 @@ static int assign_proto_idx(struct proto *prot)
static void release_proto_idx(struct proto *prot)
{
- if (prot->inuse_idx != PROTO_INUSE_NR - 1)
+ if (prot->inuse_idx != PROTO_INUSE_NR)
clear_bit(prot->inuse_idx, proto_inuse_idx);
}
#else
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index f1b9b3958792..82a14f131d00 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -303,7 +303,10 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
write_lock_bh(&sk->sk_callback_lock);
if (stream_parser && stream_verdict && !psock->saved_data_ready) {
- ret = sk_psock_init_strp(sk, psock);
+ if (sk_is_tcp(sk))
+ ret = sk_psock_init_strp(sk, psock);
+ else
+ ret = -EOPNOTSUPP;
if (ret) {
write_unlock_bh(&sk->sk_callback_lock);
sk_psock_put(sk, psock);
@@ -541,6 +544,9 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
if (sk_is_stream_unix(sk))
return (1 << sk->sk_state) & TCPF_ESTABLISHED;
+ if (sk_is_vsock(sk) &&
+ (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET))
+ return (1 << sk->sk_state) & TCPF_ESTABLISHED;
return true;
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 86a2476678c4..47e2743ffe22 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -34,6 +34,7 @@ static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
static int max_skb_frags = MAX_SKB_FRAGS;
static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE;
+static int netdev_budget_usecs_min = 2 * USEC_PER_SEC / HZ;
static int net_msg_warn; /* Unused, but still a sysctl */
@@ -303,7 +304,7 @@ static int proc_do_dev_weight(const struct ctl_table *table, int write,
int ret, weight;
mutex_lock(&dev_weight_mutex);
- ret = proc_dointvec(table, write, buffer, lenp, ppos);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write) {
weight = READ_ONCE(weight_p);
WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias);
@@ -396,6 +397,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_do_dev_weight,
+ .extra1 = SYSCTL_ONE,
},
{
.procname = "dev_weight_rx_bias",
@@ -403,6 +405,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_do_dev_weight,
+ .extra1 = SYSCTL_ONE,
},
{
.procname = "dev_weight_tx_bias",
@@ -410,6 +413,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_do_dev_weight,
+ .extra1 = SYSCTL_ONE,
},
{
.procname = "netdev_max_backlog",
@@ -577,7 +581,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
+ .extra1 = &netdev_budget_usecs_min,
},
{
.procname = "fb_tunnels_only_for_init_net",
diff --git a/net/core/utils.c b/net/core/utils.c
index 27f4cffaae05..b8c21a859e27 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -473,11 +473,11 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
EXPORT_SYMBOL(inet_proto_csum_replace16);
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
- __wsum diff, bool pseudohdr)
+ __wsum diff, bool pseudohdr, bool ipv6)
{
if (skb->ip_summed != CHECKSUM_PARTIAL) {
csum_replace_by_diff(sum, diff);
- if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr && !ipv6)
skb->csum = ~csum_sub(diff, skb->csum);
} else if (pseudohdr) {
*sum = ~csum_fold(csum_add(diff, csum_unfold(*sum)));
diff --git a/net/core/xdp.c b/net/core/xdp.c
index bcc5551c6424..23e7d736718b 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -381,8 +381,8 @@ void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
page = virt_to_head_page(data);
if (napi_direct && xdp_return_frame_no_direct())
napi_direct = false;
- /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE)
- * as mem->type knows this a page_pool page
+ /* No need to check netmem_is_pp() as mem->type knows this a
+ * page_pool page
*/
page_pool_put_full_page(page->pp, page, napi_direct);
break;
diff --git a/net/devlink/core.c b/net/devlink/core.c
index f49cd83f1955..7203c39532fc 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -117,7 +117,7 @@ static struct devlink_rel *devlink_rel_alloc(void)
err = xa_alloc_cyclic(&devlink_rels, &rel->index, rel,
xa_limit_32b, &next, GFP_KERNEL);
- if (err) {
+ if (err < 0) {
kfree(rel);
return ERR_PTR(err);
}
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 1664547deffd..ac3a252969cb 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -862,6 +862,16 @@ static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst)
kfree(dst->lags);
}
+static void dsa_tree_teardown_routing_table(struct dsa_switch_tree *dst)
+{
+ struct dsa_link *dl, *next;
+
+ list_for_each_entry_safe(dl, next, &dst->rtable, list) {
+ list_del(&dl->list);
+ kfree(dl);
+ }
+}
+
static int dsa_tree_setup(struct dsa_switch_tree *dst)
{
bool complete;
@@ -879,7 +889,7 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
err = dsa_tree_setup_cpu_ports(dst);
if (err)
- return err;
+ goto teardown_rtable;
err = dsa_tree_setup_switches(dst);
if (err)
@@ -911,14 +921,14 @@ teardown_switches:
dsa_tree_teardown_switches(dst);
teardown_cpu_ports:
dsa_tree_teardown_cpu_ports(dst);
+teardown_rtable:
+ dsa_tree_teardown_routing_table(dst);
return err;
}
static void dsa_tree_teardown(struct dsa_switch_tree *dst)
{
- struct dsa_link *dl, *next;
-
if (!dst->setup)
return;
@@ -932,10 +942,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
dsa_tree_teardown_cpu_ports(dst);
- list_for_each_entry_safe(dl, next, &dst->rtable, list) {
- list_del(&dl->list);
- kfree(dl);
- }
+ dsa_tree_teardown_routing_table(dst);
pr_info("DSA: tree %d torn down\n", dst->index);
@@ -1478,12 +1485,44 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
static void dsa_switch_release_ports(struct dsa_switch *ds)
{
+ struct dsa_mac_addr *a, *tmp;
struct dsa_port *dp, *next;
+ struct dsa_vlan *v, *n;
dsa_switch_for_each_port_safe(dp, next, ds) {
- WARN_ON(!list_empty(&dp->fdbs));
- WARN_ON(!list_empty(&dp->mdbs));
- WARN_ON(!list_empty(&dp->vlans));
+ /* These are either entries that upper layers lost track of
+ * (probably due to bugs), or installed through interfaces
+ * where one does not necessarily have to remove them, like
+ * ndo_dflt_fdb_add().
+ */
+ list_for_each_entry_safe(a, tmp, &dp->fdbs, list) {
+ dev_info(ds->dev,
+ "Cleaning up unicast address %pM vid %u from port %d\n",
+ a->addr, a->vid, dp->index);
+ list_del(&a->list);
+ kfree(a);
+ }
+
+ list_for_each_entry_safe(a, tmp, &dp->mdbs, list) {
+ dev_info(ds->dev,
+ "Cleaning up multicast address %pM vid %u from port %d\n",
+ a->addr, a->vid, dp->index);
+ list_del(&a->list);
+ kfree(a);
+ }
+
+ /* These are entries that upper layers have lost track of,
+ * probably due to bugs, but also due to dsa_port_do_vlan_del()
+ * having failed and the VLAN entry still lingering on.
+ */
+ list_for_each_entry_safe(v, n, &dp->vlans, list) {
+ dev_info(ds->dev,
+ "Cleaning up vid %u from port %d\n",
+ v->vid, dp->index);
+ list_del(&v->list);
+ kfree(v);
+ }
+
list_del(&dp->list);
kfree(dp);
}
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 3ee53e28ec2e..53e03fd8071b 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -197,7 +197,7 @@ static int dsa_port_do_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid)
err = ds->ops->tag_8021q_vlan_del(ds, port, vid);
if (err) {
- refcount_inc(&v->refcount);
+ refcount_set(&v->refcount, 1);
return err;
}
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 8c3c068728e5..fe75821623a4 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -257,7 +257,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
int source_port;
u8 *brcm_tag;
- if (unlikely(!pskb_may_pull(skb, BRCM_LEG_PORT_ID)))
+ if (unlikely(!pskb_may_pull(skb, BRCM_LEG_TAG_LEN + VLAN_HLEN)))
return NULL;
brcm_tag = dsa_etype_header_pos_rx(skb);
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 281bbac5539d..75197861bc91 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -140,7 +140,12 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev)
{
- u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+ u8 *tag;
+
+ if (skb_linearize(skb))
+ return NULL;
+
+ tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
return ksz_common_rcv(skb, dev, tag[0] & KSZ8795_TAIL_TAG_EG_PORT_M,
KSZ_EGRESS_TAG_LEN);
@@ -311,10 +316,16 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev)
{
- /* Tag decoding */
- u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
- unsigned int port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M;
unsigned int len = KSZ_EGRESS_TAG_LEN;
+ unsigned int port;
+ u8 *tag;
+
+ if (skb_linearize(skb))
+ return NULL;
+
+ /* Tag decoding */
+ tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+ port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M;
/* Extra 4-bytes PTP timestamp */
if (tag[0] & KSZ9477_PTP_TAG_INDICATION) {
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index f22051f33868..84096f6b0236 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -72,8 +72,8 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
dev = req_info.dev;
rtnl_lock();
- phydev = ethnl_req_get_phydev(&req_info,
- tb[ETHTOOL_A_CABLE_TEST_HEADER],
+ phydev = ethnl_req_get_phydev(&req_info, tb,
+ ETHTOOL_A_CABLE_TEST_HEADER,
info->extack);
if (IS_ERR_OR_NULL(phydev)) {
ret = -EOPNOTSUPP;
@@ -339,8 +339,8 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
goto out_dev_put;
rtnl_lock();
- phydev = ethnl_req_get_phydev(&req_info,
- tb[ETHTOOL_A_CABLE_TEST_TDR_HEADER],
+ phydev = ethnl_req_get_phydev(&req_info, tb,
+ ETHTOOL_A_CABLE_TEST_TDR_HEADER,
info->extack);
if (IS_ERR_OR_NULL(phydev)) {
ret = -EOPNOTSUPP;
diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c
index 4d5581147952..8bf99295bfbe 100644
--- a/net/ethtool/cmis_cdb.c
+++ b/net/ethtool/cmis_cdb.c
@@ -346,7 +346,7 @@ ethtool_cmis_module_poll(struct net_device *dev,
struct netlink_ext_ack extack = {};
int err;
- ethtool_cmis_page_init(&page_data, 0, offset, sizeof(rpl));
+ ethtool_cmis_page_init(&page_data, 0, offset, sizeof(*rpl));
page_data.data = (u8 *)rpl;
err = ops->get_module_eeprom_by_page(dev, &page_data, &extack);
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 65cfe76dafbe..8b9692c35e70 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -992,7 +992,13 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
if (rc)
return rc;
- if (ops->get_rxfh) {
+ /* Nonzero ring with RSS only makes sense if NIC adds them together */
+ if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS &&
+ !ops->cap_rss_rxnfc_adds &&
+ ethtool_get_flow_spec_ring(info.fs.ring_cookie))
+ return -EINVAL;
+
+ if (cmd == ETHTOOL_SRXFH && ops->get_rxfh) {
struct ethtool_rxfh_param rxfh = {};
rc = ops->get_rxfh(dev, &rxfh);
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
index 34d76e87847d..05a5f72c99fa 100644
--- a/net/ethtool/linkstate.c
+++ b/net/ethtool/linkstate.c
@@ -3,6 +3,7 @@
#include "netlink.h"
#include "common.h"
#include <linux/phy.h>
+#include <linux/phylib_stubs.h>
struct linkstate_req_info {
struct ethnl_req_info base;
@@ -26,9 +27,8 @@ const struct nla_policy ethnl_linkstate_get_policy[] = {
NLA_POLICY_NESTED(ethnl_header_policy_stats),
};
-static int linkstate_get_sqi(struct net_device *dev)
+static int linkstate_get_sqi(struct phy_device *phydev)
{
- struct phy_device *phydev = dev->phydev;
int ret;
if (!phydev)
@@ -46,9 +46,8 @@ static int linkstate_get_sqi(struct net_device *dev)
return ret;
}
-static int linkstate_get_sqi_max(struct net_device *dev)
+static int linkstate_get_sqi_max(struct phy_device *phydev)
{
- struct phy_device *phydev = dev->phydev;
int ret;
if (!phydev)
@@ -100,19 +99,28 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
{
struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
int ret;
+ phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_LINKSTATE_HEADER,
+ info->extack);
+ if (IS_ERR(phydev)) {
+ ret = PTR_ERR(phydev);
+ goto out;
+ }
+
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
data->link = __ethtool_get_link(dev);
- ret = linkstate_get_sqi(dev);
+ ret = linkstate_get_sqi(phydev);
if (linkstate_sqi_critical_error(ret))
goto out;
data->sqi = ret;
- ret = linkstate_get_sqi_max(dev);
+ ret = linkstate_get_sqi_max(phydev);
if (linkstate_sqi_critical_error(ret))
goto out;
data->sqi_max = ret;
@@ -127,9 +135,9 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
sizeof(data->link_stats) / 8);
if (req_base->flags & ETHTOOL_FLAG_STATS) {
- if (dev->phydev)
- data->link_stats.link_down_events =
- READ_ONCE(dev->phydev->link_down_events);
+ if (phydev)
+ phy_ethtool_get_link_ext_stats(phydev,
+ &data->link_stats);
if (dev->ethtool_ops->get_link_ext_stats)
dev->ethtool_ops->get_link_ext_stats(dev,
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index e3f0ef6b851b..a52be67139d0 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -90,7 +90,7 @@ int ethnl_ops_begin(struct net_device *dev)
pm_runtime_get_sync(dev->dev.parent);
if (!netif_device_present(dev) ||
- dev->reg_state == NETREG_UNREGISTERING) {
+ dev->reg_state >= NETREG_UNREGISTERING) {
ret = -ENODEV;
goto err;
}
@@ -210,7 +210,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
}
struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info,
- const struct nlattr *header,
+ struct nlattr **tb, unsigned int header,
struct netlink_ext_ack *extack)
{
struct phy_device *phydev;
@@ -224,8 +224,8 @@ struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info,
return req_info->dev->phydev;
phydev = phy_link_topo_get_phy(req_info->dev, req_info->phy_index);
- if (!phydev) {
- NL_SET_ERR_MSG_ATTR(extack, header,
+ if (!phydev && tb) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[header],
"no phy matching phyindex");
return ERR_PTR(-ENODEV);
}
@@ -490,7 +490,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
ret = ops->prepare_data(req_info, reply_data, info);
rtnl_unlock();
if (ret < 0)
- goto err_cleanup;
+ goto err_dev;
ret = ops->reply_size(req_info, reply_data);
if (ret < 0)
goto err_cleanup;
@@ -548,7 +548,7 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, info);
rtnl_unlock();
if (ret < 0)
- goto out;
+ goto out_cancel;
ret = ethnl_fill_reply_header(skb, dev, ctx->ops->hdr_attr);
if (ret < 0)
goto out;
@@ -557,6 +557,7 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
out:
if (ctx->ops->cleanup_data)
ctx->ops->cleanup_data(ctx->reply_data);
+out_cancel:
ctx->reply_data->dev = NULL;
if (ret < 0)
genlmsg_cancel(skb, ehdr);
@@ -760,7 +761,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
ethnl_init_reply_data(reply_data, ops, dev);
ret = ops->prepare_data(req_info, reply_data, &info);
if (ret < 0)
- goto err_cleanup;
+ goto err_rep;
ret = ops->reply_size(req_info, reply_data);
if (ret < 0)
goto err_cleanup;
@@ -795,6 +796,7 @@ err_skb:
err_cleanup:
if (ops->cleanup_data)
ops->cleanup_data(reply_data);
+err_rep:
kfree(reply_data);
kfree(req_info);
return;
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 203b08eb6c6f..5e176938d6d2 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -275,7 +275,8 @@ static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info)
* ethnl_req_get_phydev() - Gets the phy_device targeted by this request,
* if any. Must be called under rntl_lock().
* @req_info: The ethnl request to get the phy from.
- * @header: The netlink header, used for error reporting.
+ * @tb: The netlink attributes array, for error reporting.
+ * @header: The netlink header index, used for error reporting.
* @extack: The netlink extended ACK, for error reporting.
*
* The caller must hold RTNL, until it's done interacting with the returned
@@ -289,7 +290,7 @@ static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info)
* is returned.
*/
struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info,
- const struct nlattr *header,
+ struct nlattr **tb, unsigned int header,
struct netlink_ext_ack *extack);
/**
diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c
index ed8f690f6bac..e067cc234419 100644
--- a/net/ethtool/phy.c
+++ b/net/ethtool/phy.c
@@ -125,7 +125,7 @@ static int ethnl_phy_parse_request(struct ethnl_req_info *req_base,
struct phy_req_info *req_info = PHY_REQINFO(req_base);
struct phy_device *phydev;
- phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PHY_HEADER],
+ phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PHY_HEADER,
extack);
if (!phydev)
return 0;
diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c
index d95d92f173a6..e1f7820a6158 100644
--- a/net/ethtool/plca.c
+++ b/net/ethtool/plca.c
@@ -62,7 +62,7 @@ static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base,
struct phy_device *phydev;
int ret;
- phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER],
+ phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PLCA_HEADER,
info->extack);
// check that the PHY device is available and connected
if (IS_ERR_OR_NULL(phydev)) {
@@ -152,7 +152,7 @@ ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info)
bool mod = false;
int ret;
- phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PLCA_HEADER],
+ phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PLCA_HEADER,
info->extack);
// check that the PHY device is available and connected
if (IS_ERR_OR_NULL(phydev))
@@ -211,7 +211,7 @@ static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base,
struct phy_device *phydev;
int ret;
- phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER],
+ phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PLCA_HEADER,
info->extack);
// check that the PHY device is available and connected
if (IS_ERR_OR_NULL(phydev)) {
diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index a0705edca22a..71843de832cc 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -64,7 +64,7 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
- phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PSE_HEADER],
+ phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PSE_HEADER,
info->extack);
if (IS_ERR(phydev))
return -ENODEV;
@@ -261,7 +261,7 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
struct phy_device *phydev;
int ret;
- phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER],
+ phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PSE_HEADER,
info->extack);
ret = ethnl_set_pse_validate(phydev, info);
if (ret)
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index e07386275e14..8aa45f3fdfdf 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -107,6 +107,8 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev,
u32 total_size, indir_bytes;
u8 *rss_config;
+ data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key;
+
ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context);
if (!ctx)
return -ENOENT;
@@ -153,7 +155,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context)
return -EOPNOTSUPP;
- data->no_key_fields = !ops->rxfh_per_ctx_key;
return rss_prepare_ctx(request, dev, data, info);
}
diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c
index 912f0c4fff2f..273ae4ff343f 100644
--- a/net/ethtool/stats.c
+++ b/net/ethtool/stats.c
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/phy.h>
+#include <linux/phylib_stubs.h>
+
#include "netlink.h"
#include "common.h"
#include "bitset.h"
@@ -20,6 +23,7 @@ struct stats_reply_data {
struct ethtool_eth_mac_stats mac_stats;
struct ethtool_eth_ctrl_stats ctrl_stats;
struct ethtool_rmon_stats rmon_stats;
+ struct ethtool_phy_stats phydev_stats;
);
const struct ethtool_rmon_hist_range *rmon_ranges;
};
@@ -120,8 +124,15 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
struct stats_reply_data *data = STATS_REPDATA(reply_base);
enum ethtool_mac_stats_src src = req_info->src;
struct net_device *dev = reply_base->dev;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
int ret;
+ phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_STATS_HEADER,
+ info->extack);
+ if (IS_ERR(phydev))
+ return PTR_ERR(phydev);
+
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
@@ -146,6 +157,13 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
data->rmon_stats.src = src;
if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) &&
+ src == ETHTOOL_MAC_STATS_SRC_AGGREGATE) {
+ if (phydev)
+ phy_ethtool_get_phy_stats(phydev, &data->phy_stats,
+ &data->phydev_stats);
+ }
+
+ if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) &&
dev->ethtool_ops->get_eth_phy_stats)
dev->ethtool_ops->get_eth_phy_stats(dev, &data->phy_stats);
if (test_bit(ETHTOOL_STATS_ETH_MAC, req_info->stat_mask) &&
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index b3382b3cf325..b9400d18f01d 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -299,7 +299,7 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base,
return 0;
}
- phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_HEADER_FLAGS],
+ phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_HEADER_FLAGS,
info->extack);
/* phydev can be NULL, check for errors only */
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 44048d7538dd..9d0754b3642f 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -543,6 +543,7 @@ static struct hsr_proto_ops hsr_ops = {
.drop_frame = hsr_drop_frame,
.fill_frame_info = hsr_fill_frame_info,
.invalid_dan_ingress_frame = hsr_invalid_dan_ingress_frame,
+ .register_frame_out = hsr_register_frame_out,
};
static struct hsr_proto_ops prp_ops = {
@@ -553,6 +554,7 @@ static struct hsr_proto_ops prp_ops = {
.fill_frame_info = prp_fill_frame_info,
.handle_san_frame = prp_handle_san_frame,
.update_san_info = prp_update_san_info,
+ .register_frame_out = prp_register_frame_out,
};
void hsr_dev_setup(struct net_device *dev)
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 40c5fbbd155d..ace4e355d164 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -524,8 +524,8 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
* Also for SAN, this shouldn't be done.
*/
if (!frame->is_from_san &&
- hsr_register_frame_out(port, frame->node_src,
- frame->sequence_nr))
+ hsr->proto_ops->register_frame_out &&
+ hsr->proto_ops->register_frame_out(port, frame))
continue;
if (frame->is_supervision && port->type == HSR_PT_MASTER &&
@@ -688,9 +688,12 @@ static int fill_frame_info(struct hsr_frame_info *frame,
frame->is_vlan = true;
if (frame->is_vlan) {
- if (skb->mac_len < offsetofend(struct hsr_vlan_ethhdr, vlanhdr))
+ /* Note: skb->mac_len might be wrong here. */
+ if (!pskb_may_pull(skb,
+ skb_mac_offset(skb) +
+ offsetofend(struct hsr_vlan_ethhdr, vlanhdr)))
return -EINVAL;
- vlan_hdr = (struct hsr_vlan_ethhdr *)ethhdr;
+ vlan_hdr = (struct hsr_vlan_ethhdr *)skb_mac_header(skb);
proto = vlan_hdr->vlanhdr.h_vlan_encapsulated_proto;
/* FIXME: */
netdev_warn_once(skb->dev, "VLAN not yet supported");
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 73bc6f659812..85991fab7db5 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -35,6 +35,7 @@ static bool seq_nr_after(u16 a, u16 b)
#define seq_nr_before(a, b) seq_nr_after((b), (a))
#define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b)))
+#define PRP_DROP_WINDOW_LEN 32768
bool hsr_addr_is_redbox(struct hsr_priv *hsr, unsigned char *addr)
{
@@ -176,8 +177,11 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
new_node->time_in[i] = now;
new_node->time_out[i] = now;
}
- for (i = 0; i < HSR_PT_PORTS; i++)
+ for (i = 0; i < HSR_PT_PORTS; i++) {
new_node->seq_out[i] = seq_out;
+ new_node->seq_expected[i] = seq_out + 1;
+ new_node->seq_start[i] = seq_out + 1;
+ }
if (san && hsr->proto_ops->handle_san_frame)
hsr->proto_ops->handle_san_frame(san, rx_port, new_node);
@@ -482,9 +486,11 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
* 0 otherwise, or
* negative error code on error
*/
-int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
- u16 sequence_nr)
+int hsr_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame)
{
+ struct hsr_node *node = frame->node_src;
+ u16 sequence_nr = frame->sequence_nr;
+
spin_lock_bh(&node->seq_out_lock);
if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]) &&
time_is_after_jiffies(node->time_out[port->type] +
@@ -499,6 +505,89 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
return 0;
}
+/* Adaptation of the PRP duplicate discard algorithm described in wireshark
+ * wiki (https://wiki.wireshark.org/PRP)
+ *
+ * A drop window is maintained for both LANs with start sequence set to the
+ * first sequence accepted on the LAN that has not been seen on the other LAN,
+ * and expected sequence set to the latest received sequence number plus one.
+ *
+ * When a frame is received on either LAN it is compared against the received
+ * frames on the other LAN. If it is outside the drop window of the other LAN
+ * the frame is accepted and the drop window is updated.
+ * The drop window for the other LAN is reset.
+ *
+ * 'port' is the outgoing interface
+ * 'frame' is the frame to be sent
+ *
+ * Return:
+ * 1 if frame can be shown to have been sent recently on this interface,
+ * 0 otherwise
+ */
+int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame)
+{
+ enum hsr_port_type other_port;
+ enum hsr_port_type rcv_port;
+ struct hsr_node *node;
+ u16 sequence_diff;
+ u16 sequence_exp;
+ u16 sequence_nr;
+
+ /* out-going frames are always in order
+ * and can be checked the same way as for HSR
+ */
+ if (frame->port_rcv->type == HSR_PT_MASTER)
+ return hsr_register_frame_out(port, frame);
+
+ /* for PRP we should only forward frames from the slave ports
+ * to the master port
+ */
+ if (port->type != HSR_PT_MASTER)
+ return 1;
+
+ node = frame->node_src;
+ sequence_nr = frame->sequence_nr;
+ sequence_exp = sequence_nr + 1;
+ rcv_port = frame->port_rcv->type;
+ other_port = rcv_port == HSR_PT_SLAVE_A ? HSR_PT_SLAVE_B :
+ HSR_PT_SLAVE_A;
+
+ spin_lock_bh(&node->seq_out_lock);
+ if (time_is_before_jiffies(node->time_out[port->type] +
+ msecs_to_jiffies(HSR_ENTRY_FORGET_TIME)) ||
+ (node->seq_start[rcv_port] == node->seq_expected[rcv_port] &&
+ node->seq_start[other_port] == node->seq_expected[other_port])) {
+ /* the node hasn't been sending for a while
+ * or both drop windows are empty, forward the frame
+ */
+ node->seq_start[rcv_port] = sequence_nr;
+ } else if (seq_nr_before(sequence_nr, node->seq_expected[other_port]) &&
+ seq_nr_before_or_eq(node->seq_start[other_port], sequence_nr)) {
+ /* drop the frame, update the drop window for the other port
+ * and reset our drop window
+ */
+ node->seq_start[other_port] = sequence_exp;
+ node->seq_expected[rcv_port] = sequence_exp;
+ node->seq_start[rcv_port] = node->seq_expected[rcv_port];
+ spin_unlock_bh(&node->seq_out_lock);
+ return 1;
+ }
+
+ /* update the drop window for the port where this frame was received
+ * and clear the drop window for the other port
+ */
+ node->seq_start[other_port] = node->seq_expected[other_port];
+ node->seq_expected[rcv_port] = sequence_exp;
+ sequence_diff = sequence_exp - node->seq_start[rcv_port];
+ if (sequence_diff > PRP_DROP_WINDOW_LEN)
+ node->seq_start[rcv_port] = sequence_exp - PRP_DROP_WINDOW_LEN;
+
+ node->time_out[port->type] = jiffies;
+ node->seq_out[port->type] = sequence_nr;
+ spin_unlock_bh(&node->seq_out_lock);
+ return 0;
+}
+
static struct hsr_port *get_late_port(struct hsr_priv *hsr,
struct hsr_node *node)
{
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 993fa950d814..b04948659d84 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -44,8 +44,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
u16 sequence_nr);
-int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
- u16 sequence_nr);
+int hsr_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame);
void hsr_prune_nodes(struct timer_list *t);
void hsr_prune_proxy_nodes(struct timer_list *t);
@@ -73,6 +72,8 @@ void prp_update_san_info(struct hsr_node *node, bool is_sup);
bool hsr_is_node_in_db(struct list_head *node_db,
const unsigned char addr[ETH_ALEN]);
+int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame);
+
struct hsr_node {
struct list_head mac_list;
/* Protect R/W access to seq_out */
@@ -89,6 +90,9 @@ struct hsr_node {
bool san_b;
u16 seq_out[HSR_PT_PORTS];
bool removed;
+ /* PRP specific duplicate handling */
+ u16 seq_expected[HSR_PT_PORTS];
+ u16 seq_start[HSR_PT_PORTS];
struct rcu_head rcu_head;
};
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index fcfeb79bb040..e26244456f63 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -183,6 +183,8 @@ struct hsr_proto_ops {
struct hsr_frame_info *frame);
bool (*invalid_dan_ingress_frame)(__be16 protocol);
void (*update_san_info)(struct hsr_node *node, bool is_sup);
+ int (*register_frame_out)(struct hsr_port *port,
+ struct hsr_frame_info *frame);
};
struct hsr_self_node {
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 11c1519b3699..8fb48f42581c 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -659,10 +659,12 @@ static int arp_xmit_finish(struct net *net, struct sock *sk, struct sk_buff *skb
*/
void arp_xmit(struct sk_buff *skb)
{
+ rcu_read_lock();
/* Send it off, maybe filter it using firewalling first. */
NF_HOOK(NFPROTO_ARP, NF_ARP_OUT,
- dev_net(skb->dev), NULL, skb, NULL, skb->dev,
+ dev_net_rcu(skb->dev), NULL, skb, NULL, skb->dev,
arp_xmit_finish);
+ rcu_read_unlock();
}
EXPORT_SYMBOL(arp_xmit);
@@ -1075,7 +1077,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
if (!dev && (r->arp_flags & ATF_COM)) {
- dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family,
+ dev = dev_getbyhwaddr(net, r->arp_ha.sa_family,
r->arp_ha.sa_data);
if (!dev)
return -ENODEV;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7cf5f7d0d0de..a55e95046984 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1351,10 +1351,11 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
__be32 addr = 0;
unsigned char localnet_scope = RT_SCOPE_HOST;
struct in_device *in_dev;
- struct net *net = dev_net(dev);
+ struct net *net;
int master_idx;
rcu_read_lock();
+ net = dev_net_rcu(dev);
in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
goto no_in_dev;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index f3281312eb5e..cbe4c6fc8b8e 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -120,47 +120,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
}
#ifdef CONFIG_INET_ESPINTCP
-struct esp_tcp_sk {
- struct sock *sk;
- struct rcu_head rcu;
-};
-
-static void esp_free_tcp_sk(struct rcu_head *head)
-{
- struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
-
- sock_put(esk->sk);
- kfree(esk);
-}
-
static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
{
struct xfrm_encap_tmpl *encap = x->encap;
struct net *net = xs_net(x);
- struct esp_tcp_sk *esk;
__be16 sport, dport;
- struct sock *nsk;
struct sock *sk;
- sk = rcu_dereference(x->encap_sk);
- if (sk && sk->sk_state == TCP_ESTABLISHED)
- return sk;
-
spin_lock_bh(&x->lock);
sport = encap->encap_sport;
dport = encap->encap_dport;
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (sk && sk == nsk) {
- esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
- if (!esk) {
- spin_unlock_bh(&x->lock);
- return ERR_PTR(-ENOMEM);
- }
- RCU_INIT_POINTER(x->encap_sk, NULL);
- esk->sk = sk;
- call_rcu(&esk->rcu, esp_free_tcp_sk);
- }
spin_unlock_bh(&x->lock);
sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4,
@@ -173,20 +142,6 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
return ERR_PTR(-EINVAL);
}
- spin_lock_bh(&x->lock);
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (encap->encap_sport != sport ||
- encap->encap_dport != dport) {
- sock_put(sk);
- sk = nsk ?: ERR_PTR(-EREMCHG);
- } else if (sk == nsk) {
- sock_put(sk);
- } else {
- rcu_assign_pointer(x->encap_sk, sk);
- }
- spin_unlock_bh(&x->lock);
-
return sk;
}
@@ -199,8 +154,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
sk = esp_find_tcp_sk(x);
err = PTR_ERR_OR_ZERO(sk);
- if (err)
+ if (err) {
+ kfree_skb(skb);
goto out;
+ }
bh_lock_sock(sk);
if (sock_owned_by_user(sk))
@@ -209,6 +166,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
err = espintcp_push_skb(sk, skb);
bh_unlock_sock(sk);
+ sock_put(sk);
+
out:
rcu_read_unlock();
return err;
@@ -392,6 +351,8 @@ static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x,
if (IS_ERR(sk))
return ERR_CAST(sk);
+ sock_put(sk);
+
*lenp = htons(len);
esph = (struct ip_esp_hdr *)(lenp + 1);
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 80c4ea0e12f4..e0d94270da28 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -53,9 +53,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
if (sp->len == XFRM_MAX_DEPTH)
goto out_reset;
- x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
- (xfrm_address_t *)&ip_hdr(skb)->daddr,
- spi, IPPROTO_ESP, AF_INET);
+ x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+ (xfrm_address_t *)&ip_hdr(skb)->daddr,
+ spi, IPPROTO_ESP, AF_INET);
if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
/* non-offload path will record the error and audit log */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 793e6781399a..5b7c41333d6f 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -829,19 +829,33 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
}
}
+ if (cfg->fc_dst_len > 32) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix length");
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (cfg->fc_dst_len < 32 && (ntohl(cfg->fc_dst) << cfg->fc_dst_len)) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix for given prefix length");
+ err = -EINVAL;
+ goto errout;
+ }
+
if (cfg->fc_nh_id) {
if (cfg->fc_oif || cfg->fc_gw_family ||
cfg->fc_encap || cfg->fc_mp) {
NL_SET_ERR_MSG(extack,
"Nexthop specification and nexthop id are mutually exclusive");
- return -EINVAL;
+ err = -EINVAL;
+ goto errout;
}
}
if (has_gw && has_via) {
NL_SET_ERR_MSG(extack,
"Nexthop configuration can not contain both GATEWAY and VIA");
- return -EINVAL;
+ err = -EINVAL;
+ goto errout;
}
if (!cfg->fc_table)
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index b07292d50ee7..4563e5303c1a 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -245,9 +245,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
- struct net *net = sock_net(skb->sk);
+ struct fib4_rule *rule4 = (struct fib4_rule *)rule;
+ struct net *net = rule->fr_net;
int err = -EINVAL;
- struct fib4_rule *rule4 = (struct fib4_rule *) rule;
if (!inet_validate_dscp(frh->tos)) {
NL_SET_ERR_MSG(extack,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 09e31757e96c..cc86031d2050 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1193,22 +1193,6 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp,
return 0;
}
-static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
-{
- if (plen > KEYLENGTH) {
- NL_SET_ERR_MSG(extack, "Invalid prefix length");
- return false;
- }
-
- if ((plen < KEYLENGTH) && (key << plen)) {
- NL_SET_ERR_MSG(extack,
- "Invalid prefix for given prefix length");
- return false;
- }
-
- return true;
-}
-
static void fib_remove_alias(struct trie *t, struct key_vector *tp,
struct key_vector *l, struct fib_alias *old);
@@ -1229,9 +1213,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
key = ntohl(cfg->fc_dst);
- if (!fib_valid_key_len(key, plen, extack))
- return -EINVAL;
-
pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
fi = fib_create_info(cfg, extack);
@@ -1723,9 +1704,6 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
key = ntohl(cfg->fc_dst);
- if (!fib_valid_key_len(key, plen, extack))
- return -EINVAL;
-
l = fib_find_node(t, &tp, key);
if (!l)
return -ESRCH;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c3ad41573b33..b8111ec651b5 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -312,7 +312,6 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
struct dst_entry *dst = &rt->dst;
struct inet_peer *peer;
bool rc = true;
- int vif;
if (!apply_ratelimit)
return true;
@@ -321,12 +320,12 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
goto out;
- vif = l3mdev_master_ifindex(dst->dev);
- peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
+ rcu_read_lock();
+ peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr,
+ l3mdev_master_ifindex_rcu(dst->dev));
rc = inet_peer_xrlim_allow(peer,
READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
out:
if (!rc)
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
@@ -400,10 +399,10 @@ static void icmp_push_reply(struct sock *sk,
static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
{
- struct ipcm_cookie ipc;
struct rtable *rt = skb_rtable(skb);
- struct net *net = dev_net(rt->dst.dev);
+ struct net *net = dev_net_rcu(rt->dst.dev);
bool apply_ratelimit = false;
+ struct ipcm_cookie ipc;
struct flowi4 fl4;
struct sock *sk;
struct inet_sock *inet;
@@ -478,13 +477,11 @@ static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb)
return route_lookup_dev;
}
-static struct rtable *icmp_route_lookup(struct net *net,
- struct flowi4 *fl4,
+static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4,
struct sk_buff *skb_in,
- const struct iphdr *iph,
- __be32 saddr, u8 tos, u32 mark,
- int type, int code,
- struct icmp_bxm *param)
+ const struct iphdr *iph, __be32 saddr,
+ dscp_t dscp, u32 mark, int type,
+ int code, struct icmp_bxm *param)
{
struct net_device *route_lookup_dev;
struct dst_entry *dst, *dst2;
@@ -498,7 +495,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->saddr = saddr;
fl4->flowi4_mark = mark;
fl4->flowi4_uid = sock_net_uid(net, NULL);
- fl4->flowi4_tos = tos & INET_DSCP_MASK;
+ fl4->flowi4_tos = inet_dscp_to_dsfield(dscp);
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
@@ -550,7 +547,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
orefdst = skb_in->_skb_refdst; /* save old refdst */
skb_dst_set(skb_in, NULL);
err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
- tos, rt2->dst.dev);
+ dscp, rt2->dst.dev);
dst_release(&rt2->dst);
rt2 = skb_rtable(skb_in);
@@ -611,12 +608,14 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
struct sock *sk;
if (!rt)
- goto out;
+ return;
+
+ rcu_read_lock();
if (rt->dst.dev)
- net = dev_net(rt->dst.dev);
+ net = dev_net_rcu(rt->dst.dev);
else if (skb_in->dev)
- net = dev_net(skb_in->dev);
+ net = dev_net_rcu(skb_in->dev);
else
goto out;
@@ -744,8 +743,9 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
ipc.opt = &icmp_param.replyopts.opt;
ipc.sockc.mark = mark;
- rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
- type, code, &icmp_param);
+ rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr,
+ inet_dsfield_to_dscp(tos), mark, type, code,
+ &icmp_param);
if (IS_ERR(rt))
goto out_unlock;
@@ -787,7 +787,8 @@ out_unlock:
icmp_xmit_unlock(sk);
out_bh_enable:
local_bh_enable();
-out:;
+out:
+ rcu_read_unlock();
}
EXPORT_SYMBOL(__icmp_send);
@@ -836,7 +837,7 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
* avoid additional coding at protocol handlers.
*/
if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) {
- __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
+ __ICMP_INC_STATS(dev_net_rcu(skb->dev), ICMP_MIB_INERRORS);
return;
}
@@ -870,7 +871,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
struct net *net;
u32 info = 0;
- net = dev_net(skb_dst(skb)->dev);
+ net = dev_net_rcu(skb_dst(skb)->dev);
/*
* Incomplete header ?
@@ -981,7 +982,7 @@ out_err:
static enum skb_drop_reason icmp_redirect(struct sk_buff *skb)
{
if (skb->len < sizeof(struct iphdr)) {
- __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
+ __ICMP_INC_STATS(dev_net_rcu(skb->dev), ICMP_MIB_INERRORS);
return SKB_DROP_REASON_PKT_TOO_SMALL;
}
@@ -1013,7 +1014,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb)
struct icmp_bxm icmp_param;
struct net *net;
- net = dev_net(skb_dst(skb)->dev);
+ net = dev_net_rcu(skb_dst(skb)->dev);
/* should there be an ICMP stat for ignored echos? */
if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all))
return SKB_NOT_DROPPED_YET;
@@ -1042,9 +1043,9 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb)
bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
{
+ struct net *net = dev_net_rcu(skb->dev);
struct icmp_ext_hdr *ext_hdr, _ext_hdr;
struct icmp_ext_echo_iio *iio, _iio;
- struct net *net = dev_net(skb->dev);
struct inet6_dev *in6_dev;
struct in_device *in_dev;
struct net_device *dev;
@@ -1183,7 +1184,7 @@ static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb)
return SKB_NOT_DROPPED_YET;
out_err:
- __ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
+ __ICMP_INC_STATS(dev_net_rcu(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
return SKB_DROP_REASON_PKT_TOO_SMALL;
}
@@ -1200,7 +1201,7 @@ int icmp_rcv(struct sk_buff *skb)
{
enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct rtable *rt = skb_rtable(skb);
- struct net *net = dev_net(rt->dst.dev);
+ struct net *net = dev_net_rcu(rt->dst.dev);
struct icmphdr *icmph;
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
@@ -1373,9 +1374,9 @@ int icmp_err(struct sk_buff *skb, u32 info)
struct iphdr *iph = (struct iphdr *)skb->data;
int offset = iph->ihl<<2;
struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset);
+ struct net *net = dev_net_rcu(skb->dev);
int type = icmp_hdr(skb)->type;
int code = icmp_hdr(skb)->code;
- struct net *net = dev_net(skb->dev);
/*
* Use ping_err to handle all icmp errors except those
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 9bfcfd016e18..2b4a58824763 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -1230,22 +1230,37 @@ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
{
unsigned int locksz = sizeof(spinlock_t);
unsigned int i, nblocks = 1;
+ spinlock_t *ptr = NULL;
- if (locksz != 0) {
- /* allocate 2 cache lines or at least one spinlock per cpu */
- nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U);
- nblocks = roundup_pow_of_two(nblocks * num_possible_cpus());
+ if (locksz == 0)
+ goto set_mask;
- /* no more locks than number of hash buckets */
- nblocks = min(nblocks, hashinfo->ehash_mask + 1);
+ /* Allocate 2 cache lines or at least one spinlock per cpu. */
+ nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U) * num_possible_cpus();
- hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL);
- if (!hashinfo->ehash_locks)
- return -ENOMEM;
+ /* At least one page per NUMA node. */
+ nblocks = max(nblocks, num_online_nodes() * PAGE_SIZE / locksz);
+
+ nblocks = roundup_pow_of_two(nblocks);
+
+ /* No more locks than number of hash buckets. */
+ nblocks = min(nblocks, hashinfo->ehash_mask + 1);
- for (i = 0; i < nblocks; i++)
- spin_lock_init(&hashinfo->ehash_locks[i]);
+ if (num_online_nodes() > 1) {
+ /* Use vmalloc() to allow NUMA policy to spread pages
+ * on all available nodes if desired.
+ */
+ ptr = vmalloc_array(nblocks, locksz);
+ }
+ if (!ptr) {
+ ptr = kvmalloc_array(nblocks, locksz, GFP_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
}
+ for (i = 0; i < nblocks; i++)
+ spin_lock_init(&ptr[i]);
+ hashinfo->ehash_locks = ptr;
+set_mask:
hashinfo->ehash_locks_mask = nblocks - 1;
return 0;
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 5bd759963451..9c5ffe3b5f77 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -95,6 +95,7 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
{
struct rb_node **pp, *parent, *next;
struct inet_peer *p;
+ u32 now;
pp = &base->rb_root.rb_node;
parent = NULL;
@@ -108,8 +109,9 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
p = rb_entry(parent, struct inet_peer, rb_node);
cmp = inetpeer_addr_cmp(daddr, &p->daddr);
if (cmp == 0) {
- if (!refcount_inc_not_zero(&p->refcnt))
- break;
+ now = jiffies;
+ if (READ_ONCE(p->dtime) != now)
+ WRITE_ONCE(p->dtime, now);
return p;
}
if (gc_stack) {
@@ -155,9 +157,6 @@ static void inet_peer_gc(struct inet_peer_base *base,
for (i = 0; i < gc_cnt; i++) {
p = gc_stack[i];
- /* The READ_ONCE() pairs with the WRITE_ONCE()
- * in inet_putpeer()
- */
delta = (__u32)jiffies - READ_ONCE(p->dtime);
if (delta < ttl || !refcount_dec_if_one(&p->refcnt))
@@ -173,31 +172,23 @@ static void inet_peer_gc(struct inet_peer_base *base,
}
}
+/* Must be called under RCU : No refcount change is done here. */
struct inet_peer *inet_getpeer(struct inet_peer_base *base,
- const struct inetpeer_addr *daddr,
- int create)
+ const struct inetpeer_addr *daddr)
{
struct inet_peer *p, *gc_stack[PEER_MAX_GC];
struct rb_node **pp, *parent;
unsigned int gc_cnt, seq;
- int invalidated;
/* Attempt a lockless lookup first.
* Because of a concurrent writer, we might not find an existing entry.
*/
- rcu_read_lock();
seq = read_seqbegin(&base->lock);
p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp);
- invalidated = read_seqretry(&base->lock, seq);
- rcu_read_unlock();
if (p)
return p;
- /* If no writer did a change during our lookup, we can return early. */
- if (!create && !invalidated)
- return NULL;
-
/* retry an exact lookup, taking the lock before.
* At least, nodes should be hot in our cache.
*/
@@ -206,12 +197,12 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
gc_cnt = 0;
p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp);
- if (!p && create) {
+ if (!p) {
p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
if (p) {
p->daddr = *daddr;
p->dtime = (__u32)jiffies;
- refcount_set(&p->refcnt, 2);
+ refcount_set(&p->refcnt, 1);
atomic_set(&p->rid, 0);
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
p->rate_tokens = 0;
@@ -236,15 +227,9 @@ EXPORT_SYMBOL_GPL(inet_getpeer);
void inet_putpeer(struct inet_peer *p)
{
- /* The WRITE_ONCE() pairs with itself (we run lockless)
- * and the READ_ONCE() in inet_peer_gc()
- */
- WRITE_ONCE(p->dtime, (__u32)jiffies);
-
if (refcount_dec_and_test(&p->refcnt))
call_rcu(&p->rcu, inetpeer_free_rcu);
}
-EXPORT_SYMBOL_GPL(inet_putpeer);
/*
* Check transmit rate limitation for given message.
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a92664a5ef2e..9ca0a183a55f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -82,15 +82,20 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
{
struct ipq *qp = container_of(q, struct ipq, q);
- struct net *net = q->fqdir->net;
-
const struct frag_v4_compare_key *key = a;
+ struct net *net = q->fqdir->net;
+ struct inet_peer *p = NULL;
q->key.v4 = *key;
qp->ecn = 0;
- qp->peer = q->fqdir->max_dist ?
- inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
- NULL;
+ if (q->fqdir->max_dist) {
+ rcu_read_lock();
+ p = inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif);
+ if (p && !refcount_inc_not_zero(&p->refcnt))
+ p = NULL;
+ rcu_read_unlock();
+ }
+ qp->peer = p;
}
static void ip4_frag_free(struct inet_frag_queue *q)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f1f31ebfc793..9667f2774025 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -141,7 +141,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
const struct iphdr *iph;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
- unsigned int data_len = 0;
struct ip_tunnel *t;
if (tpi->proto == htons(ETH_P_TEB))
@@ -182,7 +181,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
case ICMP_TIME_EXCEEDED:
if (code != ICMP_EXC_TTL)
return 0;
- data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
break;
case ICMP_REDIRECT:
@@ -190,10 +188,16 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
}
#if IS_ENABLED(CONFIG_IPV6)
- if (tpi->proto == htons(ETH_P_IPV6) &&
- !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
- type, data_len))
- return 0;
+ if (tpi->proto == htons(ETH_P_IPV6)) {
+ unsigned int data_len = 0;
+
+ if (type == ICMP_TIME_EXCEEDED)
+ data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
+
+ if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
+ type, data_len))
+ return 0;
+ }
#endif
if (t->parms.iph.daddr == 0 ||
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 68aedb8877b9..81e86e5defee 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -617,7 +617,8 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev)
orefdst = skb->_skb_refdst;
skb_dst_set(skb, NULL);
- err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev);
+ err = ip_route_input(skb, nexthop, iph->saddr, ip4h_dscp(iph),
+ dev);
rt2 = skb_rtable(skb);
if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
skb_dst_drop(skb);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index a3676155be78..f65d2f727381 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -416,7 +416,7 @@ int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst,
skb_dst_update_pmtu_no_confirm(skb, mtu);
- if (!reply || skb->pkt_type == PACKET_HOST)
+ if (!reply)
return 0;
if (skb->protocol == htons(ETH_P_IP))
@@ -451,7 +451,7 @@ static const struct nla_policy
geneve_opt_policy[LWTUNNEL_IP_OPT_GENEVE_MAX + 1] = {
[LWTUNNEL_IP_OPT_GENEVE_CLASS] = { .type = NLA_U16 },
[LWTUNNEL_IP_OPT_GENEVE_TYPE] = { .type = NLA_U8 },
- [LWTUNNEL_IP_OPT_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 },
+ [LWTUNNEL_IP_OPT_GENEVE_DATA] = { .type = NLA_BINARY, .len = 127 },
};
static const struct nla_policy
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 449a2ac40bdc..de0d9cc7806a 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -817,7 +817,7 @@ static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
cache->mfc_un.res.maxvif = vifi + 1;
}
}
- cache->mfc_un.res.lastuse = jiffies;
+ WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies);
}
static int vif_add(struct net *net, struct mr_table *mrt,
@@ -1667,9 +1667,9 @@ int ipmr_ioctl(struct sock *sk, int cmd, void *arg)
rcu_read_lock();
c = ipmr_cache_find(mrt, sr->src.s_addr, sr->grp.s_addr);
if (c) {
- sr->pktcnt = c->_c.mfc_un.res.pkt;
- sr->bytecnt = c->_c.mfc_un.res.bytes;
- sr->wrong_if = c->_c.mfc_un.res.wrong_if;
+ sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
+ sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
+ sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
rcu_read_unlock();
return 0;
}
@@ -1739,9 +1739,9 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
rcu_read_lock();
c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
if (c) {
- sr.pktcnt = c->_c.mfc_un.res.pkt;
- sr.bytecnt = c->_c.mfc_un.res.bytes;
- sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
+ sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
+ sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -1974,9 +1974,9 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
int vif, ct;
vif = c->_c.mfc_parent;
- c->_c.mfc_un.res.pkt++;
- c->_c.mfc_un.res.bytes += skb->len;
- c->_c.mfc_un.res.lastuse = jiffies;
+ atomic_long_inc(&c->_c.mfc_un.res.pkt);
+ atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
+ WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
struct mfc_cache *cache_proxy;
@@ -2007,7 +2007,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
goto dont_forward;
}
- c->_c.mfc_un.res.wrong_if++;
+ atomic_long_inc(&c->_c.mfc_un.res.wrong_if);
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -3015,9 +3015,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->_c.mfc_un.res.pkt,
- mfc->_c.mfc_un.res.bytes,
- mfc->_c.mfc_un.res.wrong_if);
+ atomic_long_read(&mfc->_c.mfc_un.res.pkt),
+ atomic_long_read(&mfc->_c.mfc_un.res.bytes),
+ atomic_long_read(&mfc->_c.mfc_un.res.wrong_if));
for (n = mfc->_c.mfc_un.res.minvif;
n < mfc->_c.mfc_un.res.maxvif; n++) {
if (VIF_EXISTS(mrt, n) &&
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index f0af12a2f70b..28d77d454d44 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -263,9 +263,9 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
lastuse = READ_ONCE(c->mfc_un.res.lastuse);
lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
- mfcs.mfcs_packets = c->mfc_un.res.pkt;
- mfcs.mfcs_bytes = c->mfc_un.res.bytes;
- mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
+ mfcs.mfcs_packets = atomic_long_read(&c->mfc_un.res.pkt);
+ mfcs.mfcs_bytes = atomic_long_read(&c->mfc_un.res.bytes);
+ mfcs.mfcs_wrong_if = atomic_long_read(&c->mfc_un.res.wrong_if);
if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
RTA_PAD))
@@ -330,9 +330,6 @@ next_entry:
list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
if (e < s_e)
goto next_entry2;
- if (filter->dev &&
- !mr_mfc_uses_dev(mrt, mfc, filter->dev))
- goto next_entry2;
err = fill(mrt, skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags);
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 09fff5d424ef..f514eb52b8d4 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -49,7 +49,12 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
else
addr = iph->saddr;
- *dst = inet_dev_addr_type(nft_net(pkt), dev, addr);
+ if (priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) {
+ *dst = inet_dev_addr_type(nft_net(pkt), dev, addr);
+ return;
+ }
+
+ *dst = inet_addr_type_dev_table(nft_net(pkt), pkt->skb->dev, addr);
}
EXPORT_SYMBOL_GPL(nft_fib4_eval_type);
@@ -64,12 +69,17 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
struct flowi4 fl4 = {
.flowi4_scope = RT_SCOPE_UNIVERSE,
.flowi4_iif = LOOPBACK_IFINDEX,
+ .flowi4_proto = pkt->tprot,
.flowi4_uid = sock_net_uid(nft_net(pkt), NULL),
- .flowi4_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)),
};
const struct net_device *oif;
const struct net_device *found;
+ if (nft_fib_can_skip(pkt)) {
+ nft_fib_store_result(dest, priv, nft_in(pkt));
+ return;
+ }
+
/*
* Do not set flowi4_oif, it restricts results (for example, asking
* for oif 3 will get RTN_UNICAST result even if the daddr exits
@@ -84,11 +94,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
else
oif = NULL;
- if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
- nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
- nft_fib_store_result(dest, priv, nft_in(pkt));
- return;
- }
+ fl4.flowi4_l3mdev = nft_fib_l3mdev_master_ifindex_rcu(pkt, oif);
iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
if (!iph) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 723ac9181558..9a5c9497b393 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -189,7 +189,11 @@ const __u8 ip_tos2prio[16] = {
EXPORT_SYMBOL(ip_tos2prio);
static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
+#ifndef CONFIG_PREEMPT_RT
#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
+#else
+#define RT_CACHE_STAT_INC(field) this_cpu_inc(rt_cache_stat.field)
+#endif
#ifdef CONFIG_PROC_FS
static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
@@ -390,7 +394,13 @@ static inline int ip_rt_proc_init(void)
static inline bool rt_is_expired(const struct rtable *rth)
{
- return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
+ bool res;
+
+ rcu_read_lock();
+ res = rth->rt_genid != rt_genid_ipv4(dev_net_rcu(rth->dst.dev));
+ rcu_read_unlock();
+
+ return res;
}
void rt_cache_flush(struct net *net)
@@ -870,11 +880,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
}
log_martians = IN_DEV_LOG_MARTIANS(in_dev);
vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
- rcu_read_unlock();
net = dev_net(rt->dst.dev);
- peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
+ peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif);
if (!peer) {
+ rcu_read_unlock();
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
rt_nexthop(rt, ip_hdr(skb)->daddr));
return;
@@ -893,7 +903,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
*/
if (peer->n_redirects >= ip_rt_redirect_number) {
peer->rate_last = jiffies;
- goto out_put_peer;
+ goto out_unlock;
}
/* Check for load limit; set rate_last to the latest sent
@@ -914,8 +924,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
&ip_hdr(skb)->saddr, inet_iif(skb),
&ip_hdr(skb)->daddr, &gw);
}
-out_put_peer:
- inet_putpeer(peer);
+out_unlock:
+ rcu_read_unlock();
}
static int ip_error(struct sk_buff *skb)
@@ -975,9 +985,9 @@ static int ip_error(struct sk_buff *skb)
break;
}
+ rcu_read_lock();
peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
- l3mdev_master_ifindex(skb->dev), 1);
-
+ l3mdev_master_ifindex_rcu(skb->dev));
send = true;
if (peer) {
now = jiffies;
@@ -989,8 +999,9 @@ static int ip_error(struct sk_buff *skb)
peer->rate_tokens -= ip_rt_error_cost;
else
send = false;
- inet_putpeer(peer);
}
+ rcu_read_unlock();
+
if (send)
icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
@@ -1001,9 +1012,9 @@ out: kfree_skb_reason(skb, reason);
static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
{
struct dst_entry *dst = &rt->dst;
- struct net *net = dev_net(dst->dev);
struct fib_result res;
bool lock = false;
+ struct net *net;
u32 old_mtu;
if (ip_mtu_locked(dst))
@@ -1013,6 +1024,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
if (old_mtu < mtu)
return;
+ rcu_read_lock();
+ net = dev_net_rcu(dst->dev);
if (mtu < net->ipv4.ip_rt_min_pmtu) {
lock = true;
mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu);
@@ -1020,17 +1033,29 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
if (rt->rt_pmtu == mtu && !lock &&
time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2))
- return;
+ goto out;
- rcu_read_lock();
if (fib_lookup(net, fl4, &res, 0) == 0) {
struct fib_nh_common *nhc;
fib_select_path(net, &res, fl4, NULL);
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (fib_info_num_path(res.fi) > 1) {
+ int nhsel;
+
+ for (nhsel = 0; nhsel < fib_info_num_path(res.fi); nhsel++) {
+ nhc = fib_info_nhc(res.fi, nhsel);
+ update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
+ jiffies + net->ipv4.ip_rt_mtu_expires);
+ }
+ goto out;
+ }
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
nhc = FIB_RES_NHC(res);
update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
jiffies + net->ipv4.ip_rt_mtu_expires);
}
+out:
rcu_read_unlock();
}
@@ -1293,10 +1318,15 @@ static void set_class_tag(struct rtable *rt, u32 tag)
static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
{
- struct net *net = dev_net(dst->dev);
unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
- unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
- net->ipv4.ip_rt_min_advmss);
+ unsigned int advmss;
+ struct net *net;
+
+ rcu_read_lock();
+ net = dev_net_rcu(dst->dev);
+ advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
+ net->ipv4.ip_rt_min_advmss);
+ rcu_read_unlock();
return min(advmss, IPV4_MAX_PMTU - header_size);
}
@@ -1654,8 +1684,8 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
else if (rt->rt_gw_family == AF_INET6)
new_rt->rt_gw6 = rt->rt_gw6;
- new_rt->dst.input = rt->dst.input;
- new_rt->dst.output = rt->dst.output;
+ new_rt->dst.input = READ_ONCE(rt->dst.input);
+ new_rt->dst.output = READ_ONCE(rt->dst.output);
new_rt->dst.error = rt->dst.error;
new_rt->dst.lastuse = jiffies;
new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
@@ -2515,7 +2545,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
do_cache = true;
if (type == RTN_BROADCAST) {
flags |= RTCF_BROADCAST | RTCF_LOCAL;
- fi = NULL;
} else if (type == RTN_MULTICAST) {
flags |= RTCF_MULTICAST | RTCF_LOCAL;
if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4f77bd862e95..156da81bce06 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1145,7 +1145,7 @@ restart:
goto do_error;
while (msg_data_left(msg)) {
- ssize_t copy = 0;
+ int copy = 0;
skb = tcp_write_queue_tail(sk);
if (skb)
@@ -1564,12 +1564,13 @@ EXPORT_SYMBOL(tcp_recv_skb);
* or for 'peeking' the socket using this routine
* (although both would be easy to implement).
*/
-int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
- sk_read_actor_t recv_actor)
+static int __tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor, bool noack,
+ u32 *copied_seq)
{
struct sk_buff *skb;
struct tcp_sock *tp = tcp_sk(sk);
- u32 seq = tp->copied_seq;
+ u32 seq = *copied_seq;
u32 offset;
int copied = 0;
@@ -1623,9 +1624,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
tcp_eat_recv_skb(sk, skb);
if (!desc->count)
break;
- WRITE_ONCE(tp->copied_seq, seq);
+ WRITE_ONCE(*copied_seq, seq);
}
- WRITE_ONCE(tp->copied_seq, seq);
+ WRITE_ONCE(*copied_seq, seq);
+
+ if (noack)
+ goto out;
tcp_rcv_space_adjust(sk);
@@ -1634,10 +1638,25 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
tcp_recv_skb(sk, seq, &offset);
tcp_cleanup_rbuf(sk, copied);
}
+out:
return copied;
}
+
+int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor)
+{
+ return __tcp_read_sock(sk, desc, recv_actor, false,
+ &tcp_sk(sk)->copied_seq);
+}
EXPORT_SYMBOL(tcp_read_sock);
+int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor, bool noack,
+ u32 *copied_seq)
+{
+ return __tcp_read_sock(sk, desc, recv_actor, noack, copied_seq);
+}
+
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
struct sk_buff *skb;
@@ -2437,14 +2456,12 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
*/
memset(&dmabuf_cmsg, 0, sizeof(dmabuf_cmsg));
dmabuf_cmsg.frag_size = copy;
- err = put_cmsg(msg, SOL_SOCKET, SO_DEVMEM_LINEAR,
- sizeof(dmabuf_cmsg), &dmabuf_cmsg);
- if (err || msg->msg_flags & MSG_CTRUNC) {
- msg->msg_flags &= ~MSG_CTRUNC;
- if (!err)
- err = -ETOOSMALL;
+ err = put_cmsg_notrunc(msg, SOL_SOCKET,
+ SO_DEVMEM_LINEAR,
+ sizeof(dmabuf_cmsg),
+ &dmabuf_cmsg);
+ if (err)
goto out;
- }
sent += copy;
@@ -2498,16 +2515,12 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
offset += copy;
remaining_len -= copy;
- err = put_cmsg(msg, SOL_SOCKET,
- SO_DEVMEM_DMABUF,
- sizeof(dmabuf_cmsg),
- &dmabuf_cmsg);
- if (err || msg->msg_flags & MSG_CTRUNC) {
- msg->msg_flags &= ~MSG_CTRUNC;
- if (!err)
- err = -ETOOSMALL;
+ err = put_cmsg_notrunc(msg, SOL_SOCKET,
+ SO_DEVMEM_DMABUF,
+ sizeof(dmabuf_cmsg),
+ &dmabuf_cmsg);
+ if (err)
goto out;
- }
atomic_long_inc(&niov->pp_ref_count);
tcp_xa_pool.netmems[tcp_xa_pool.idx++] = skb_frag_netmem(frag);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 392678ae80f4..22e8a2af5dd8 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -646,6 +646,42 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops)
ops->sendmsg == tcp_sendmsg ? 0 : -ENOTSUPP;
}
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor)
+{
+ struct sock *sk = strp->sk;
+ struct sk_psock *psock;
+ struct tcp_sock *tp;
+ int copied = 0;
+
+ tp = tcp_sk(sk);
+ rcu_read_lock();
+ psock = sk_psock(sk);
+ if (WARN_ON_ONCE(!psock)) {
+ desc->error = -EINVAL;
+ goto out;
+ }
+
+ psock->ingress_bytes = 0;
+ copied = tcp_read_sock_noack(sk, desc, recv_actor, true,
+ &psock->copied_seq);
+ if (copied < 0)
+ goto out;
+ /* recv_actor may redirect skb to another socket (SK_REDIRECT) or
+ * just put skb into ingress queue of current socket (SK_PASS).
+ * For SK_REDIRECT, we need to ack the frame immediately but for
+ * SK_PASS, we want to delay the ack until tcp_bpf_recvmsg_parser().
+ */
+ tp->copied_seq = psock->copied_seq - psock->ingress_bytes;
+ tcp_rcv_space_adjust(sk);
+ __tcp_cleanup_rbuf(sk, copied - psock->ingress_bytes);
+out:
+ rcu_read_unlock();
+ return copied;
+}
+#endif /* CONFIG_BPF_STREAM_PARSER */
+
int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 5dbed91c6178..76c23675ae50 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -392,6 +392,10 @@ static void hystart_update(struct sock *sk, u32 delay)
if (after(tp->snd_una, ca->end_seq))
bictcp_hystart_reset(sk);
+ /* hystart triggers when cwnd is larger than some threshold */
+ if (tcp_snd_cwnd(tp) < hystart_low_window)
+ return;
+
if (hystart_detect & HYSTART_ACK_TRAIN) {
u32 now = bictcp_clock_us(sk);
@@ -467,9 +471,7 @@ __bpf_kfunc static void cubictcp_acked(struct sock *sk, const struct ack_sample
if (ca->delay_min == 0 || ca->delay_min > delay)
ca->delay_min = delay;
- /* hystart triggers when cwnd is larger than some threshold */
- if (!ca->found && tcp_in_slow_start(tp) && hystart &&
- tcp_snd_cwnd(tp) >= hystart_low_window)
+ if (!ca->found && tcp_in_slow_start(tp) && hystart)
hystart_update(sk, delay);
}
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 0f523cbfe329..408985eb74ee 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -3,6 +3,7 @@
#include <linux/tcp.h>
#include <linux/rcupdate.h>
#include <net/tcp.h>
+#include <net/busy_poll.h>
void tcp_fastopen_init_key_once(struct net *net)
{
@@ -178,7 +179,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
if (!skb)
return;
- skb_dst_drop(skb);
+ tcp_cleanup_skb(skb);
/* segs_in has been initialized to 1 in tcp_create_openreq_child().
* Hence, reset segs_in to 0 before calling tcp_segs_in()
* to avoid double counting. Also, tcp_segs_in() expects
@@ -195,7 +196,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- __skb_queue_tail(&sk->sk_receive_queue, skb);
+ tcp_add_receive_queue(sk, skb);
tp->syn_data_acked = 1;
/* u64_stats_update_begin(&tp->syncp) not needed here,
@@ -279,6 +280,8 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
refcount_set(&req->rsk_refcnt, 2);
+ sk_mark_napi_id_set(child, skb);
+
/* Now finish processing the fastopen child socket. */
tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, skb);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2d43b29da15e..30f4375f8431 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -243,9 +243,15 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
do_div(val, skb->truesize);
tcp_sk(sk)->scaling_ratio = val ? val : 1;
- if (old_ratio != tcp_sk(sk)->scaling_ratio)
- WRITE_ONCE(tcp_sk(sk)->window_clamp,
- tcp_win_from_space(sk, sk->sk_rcvbuf));
+ if (old_ratio != tcp_sk(sk)->scaling_ratio) {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ val = tcp_win_from_space(sk, sk->sk_rcvbuf);
+ tcp_set_window_clamp(sk, val);
+
+ if (tp->window_clamp < tp->rcvq_space.space)
+ tp->rcvq_space.space = tp->window_clamp;
+ }
}
icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
tcp_sk(sk)->advmss);
@@ -413,6 +419,20 @@ static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr
return false;
}
+static void tcp_count_delivered_ce(struct tcp_sock *tp, u32 ecn_count)
+{
+ tp->delivered_ce += ecn_count;
+}
+
+/* Updates the delivered and delivered_ce counts */
+static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
+ bool ece_ack)
+{
+ tp->delivered += delivered;
+ if (ece_ack)
+ tcp_count_delivered_ce(tp, delivered);
+}
+
/* Buffer size and advertised window tuning.
*
* 1. Tuning sk->sk_sndbuf, when connection enters established state.
@@ -645,10 +665,12 @@ EXPORT_SYMBOL(tcp_initialize_rcv_mss);
*/
static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
{
- u32 new_sample = tp->rcv_rtt_est.rtt_us;
- long m = sample;
+ u32 new_sample, old_sample = tp->rcv_rtt_est.rtt_us;
+ long m = sample << 3;
- if (new_sample != 0) {
+ if (old_sample == 0 || m < old_sample) {
+ new_sample = m;
+ } else {
/* If we sample in larger samples in the non-timestamp
* case, we could grossly overestimate the RTT especially
* with chatty applications or bulk transfer apps which
@@ -659,17 +681,9 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
* else with timestamps disabled convergence takes too
* long.
*/
- if (!win_dep) {
- m -= (new_sample >> 3);
- new_sample += m;
- } else {
- m <<= 3;
- if (m < new_sample)
- new_sample = m;
- }
- } else {
- /* No previous measure. */
- new_sample = m << 3;
+ if (win_dep)
+ return;
+ new_sample = old_sample - (old_sample >> 3) + sample;
}
tp->rcv_rtt_est.rtt_us = new_sample;
@@ -693,7 +707,7 @@ new_measure:
tp->rcv_rtt_est.time = tp->tcp_mstamp;
}
-static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp)
+static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp, u32 min_delta)
{
u32 delta, delta_us;
@@ -703,7 +717,7 @@ static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp)
if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
if (!delta)
- delta = 1;
+ delta = min_delta;
delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
return delta_us;
}
@@ -721,9 +735,9 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
if (TCP_SKB_CB(skb)->end_seq -
TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
- s32 delta = tcp_rtt_tsopt_us(tp);
+ s32 delta = tcp_rtt_tsopt_us(tp, 0);
- if (delta >= 0)
+ if (delta > 0)
tcp_rcv_rtt_update(tp, delta, 0);
}
}
@@ -735,8 +749,7 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
void tcp_rcv_space_adjust(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- u32 copied;
- int time;
+ int time, inq, copied;
trace_tcp_rcv_space_adjust(sk);
@@ -747,6 +760,9 @@ void tcp_rcv_space_adjust(struct sock *sk)
/* Number of bytes copied to user in last RTT */
copied = tp->copied_seq - tp->rcvq_space.seq;
+ /* Number of bytes in receive queue. */
+ inq = tp->rcv_nxt - tp->copied_seq;
+ copied -= inq;
if (copied <= tp->rcvq_space.space)
goto new_measure;
@@ -1148,15 +1164,6 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
}
}
-/* Updates the delivered and delivered_ce counts */
-static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
- bool ece_ack)
-{
- tp->delivered += delivered;
- if (ece_ack)
- tp->delivered_ce += delivered;
-}
-
/* This procedure tags the retransmission queue when SACKs arrive.
*
* We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
@@ -2475,20 +2482,33 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
{
const struct sock *sk = (const struct sock *)tp;
- if (tp->retrans_stamp &&
- tcp_tsopt_ecr_before(tp, tp->retrans_stamp))
- return true; /* got echoed TS before first retransmission */
+ /* Received an echoed timestamp before the first retransmission? */
+ if (tp->retrans_stamp)
+ return tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
+
+ /* We set tp->retrans_stamp upon the first retransmission of a loss
+ * recovery episode, so normally if tp->retrans_stamp is 0 then no
+ * retransmission has happened yet (likely due to TSQ, which can cause
+ * fast retransmits to be delayed). So if snd_una advanced while
+ * (tp->retrans_stamp is 0 then apparently a packet was merely delayed,
+ * not lost. But there are exceptions where we retransmit but then
+ * clear tp->retrans_stamp, so we check for those exceptions.
+ */
- /* Check if nothing was retransmitted (retrans_stamp==0), which may
- * happen in fast recovery due to TSQ. But we ignore zero retrans_stamp
- * in TCP_SYN_SENT, since when we set FLAG_SYN_ACKED we also clear
- * retrans_stamp even if we had retransmitted the SYN.
+ /* (1) For non-SACK connections, tcp_is_non_sack_preventing_reopen()
+ * clears tp->retrans_stamp when snd_una == high_seq.
*/
- if (!tp->retrans_stamp && /* no record of a retransmit/SYN? */
- sk->sk_state != TCP_SYN_SENT) /* not the FLAG_SYN_ACKED case? */
- return true; /* nothing was retransmitted */
+ if (!tcp_is_sack(tp) && !before(tp->snd_una, tp->high_seq))
+ return false;
- return false;
+ /* (2) In TCP_SYN_SENT tcp_clean_rtx_queue() clears tp->retrans_stamp
+ * when setting FLAG_SYN_ACKED is set, even if the SYN was
+ * retransmitted.
+ */
+ if (sk->sk_state == TCP_SYN_SENT)
+ return false;
+
+ return true; /* tp->retrans_stamp is zero; no retransmit yet */
}
/* Undo procedures. */
@@ -3215,7 +3235,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
*/
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp &&
tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED)
- seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp);
+ seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp, 1);
rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
if (seq_rtt_us < 0)
@@ -3856,12 +3876,23 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
}
}
-static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
+static void tcp_in_ack_event(struct sock *sk, int flag)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- if (icsk->icsk_ca_ops->in_ack_event)
- icsk->icsk_ca_ops->in_ack_event(sk, flags);
+ if (icsk->icsk_ca_ops->in_ack_event) {
+ u32 ack_ev_flags = 0;
+
+ if (flag & FLAG_WIN_UPDATE)
+ ack_ev_flags |= CA_ACK_WIN_UPDATE;
+ if (flag & FLAG_SLOWPATH) {
+ ack_ev_flags |= CA_ACK_SLOWPATH;
+ if (flag & FLAG_ECE)
+ ack_ev_flags |= CA_ACK_ECE;
+ }
+
+ icsk->icsk_ca_ops->in_ack_event(sk, ack_ev_flags);
+ }
}
/* Congestion control has updated the cwnd already. So if we're in
@@ -3978,12 +4009,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_snd_una_update(tp, ack);
flag |= FLAG_WIN_UPDATE;
- tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
-
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
} else {
- u32 ack_ev_flags = CA_ACK_SLOWPATH;
-
if (ack_seq != TCP_SKB_CB(skb)->end_seq)
flag |= FLAG_DATA;
else
@@ -3995,19 +4022,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
+ if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb)))
flag |= FLAG_ECE;
- ack_ev_flags |= CA_ACK_ECE;
- }
if (sack_state.sack_delivered)
tcp_count_delivered(tp, sack_state.sack_delivered,
flag & FLAG_ECE);
-
- if (flag & FLAG_WIN_UPDATE)
- ack_ev_flags |= CA_ACK_WIN_UPDATE;
-
- tcp_in_ack_event(sk, ack_ev_flags);
}
/* This is a deviation from RFC3168 since it states that:
@@ -4034,6 +4054,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_rack_update_reo_wnd(sk, &rs);
+ tcp_in_ack_event(sk, flag);
+
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
@@ -4065,6 +4087,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
return 1;
no_queue:
+ tcp_in_ack_event(sk, flag);
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK) {
tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
@@ -4947,8 +4970,9 @@ static void tcp_ofo_queue(struct sock *sk)
if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
__u32 dsack = dsack_high;
+
if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
- dsack_high = TCP_SKB_CB(skb)->end_seq;
+ dsack = TCP_SKB_CB(skb)->end_seq;
tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
}
p = rb_next(p);
@@ -4964,7 +4988,7 @@ static void tcp_ofo_queue(struct sock *sk)
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
if (!eaten)
- __skb_queue_tail(&sk->sk_receive_queue, skb);
+ tcp_add_receive_queue(sk, skb);
else
kfree_skb_partial(skb, fragstolen);
@@ -5016,6 +5040,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
return;
}
+ tcp_measure_rcv_mss(sk, skb);
/* Disable header prediction. */
tp->pred_flags = 0;
inet_csk_schedule_ack(sk);
@@ -5156,7 +5181,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
skb, fragstolen)) ? 1 : 0;
tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
if (!eaten) {
- __skb_queue_tail(&sk->sk_receive_queue, skb);
+ tcp_add_receive_queue(sk, skb);
skb_set_owner_r(skb, sk);
}
return eaten;
@@ -5239,7 +5264,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
return;
}
- skb_dst_drop(skb);
+ tcp_cleanup_skb(skb);
__skb_pull(skb, tcp_hdr(skb)->doff * 4);
reason = SKB_DROP_REASON_NOT_SPECIFIED;
@@ -6208,7 +6233,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
- skb_dst_drop(skb);
+ tcp_cleanup_skb(skb);
__skb_pull(skb, tcp_header_len);
eaten = tcp_queue_rcv(sk, skb, &fragstolen);
@@ -6827,6 +6852,9 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (!tp->srtt_us)
tcp_synack_rtt_meas(sk, req);
+ if (tp->rx_opt.tstamp_ok)
+ tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
+
if (req) {
tcp_rcv_synrecv_state_fastopen(sk);
} else {
@@ -6852,9 +6880,6 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
- if (tp->rx_opt.tstamp_ok)
- tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
-
if (!inet_csk(sk)->icsk_ca_ops->cong_control)
tcp_update_pacing_rate(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index bcc2f1e090c7..824048679e1b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2025,7 +2025,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
*/
skb_condense(skb);
- skb_dst_drop(skb);
+ tcp_cleanup_skb(skb);
if (unlikely(tcp_checksum_complete(skb))) {
bh_unlock_sock(sk);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index bb1fe1ba867a..f3e4fc957219 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -806,12 +806,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* In sequence, PAWS is OK. */
- /* TODO: We probably should defer ts_recent change once
- * we take ownership of @req.
- */
- if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
- WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval);
-
if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
/* Truncate SYN, it is out of window starting
at tcp_rsk(req)->rcv_isn + 1. */
@@ -860,6 +854,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
if (!child)
goto listen_overflow;
+ if (own_req && tmp_opt.saw_tstamp &&
+ !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
+ tcp_sk(child)->rx_opt.ts_recent = tmp_opt.rcv_tsval;
+
if (own_req && rsk_drop_req(req)) {
reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req);
inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req);
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 2308665b51c5..3a9c5c14c310 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -13,12 +13,15 @@
#include <net/tcp.h>
#include <net/protocol.h>
-static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
+static void tcp_gso_tstamp(struct sk_buff *skb, struct sk_buff *gso_skb,
unsigned int seq, unsigned int mss)
{
+ u32 flags = skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP;
+ u32 ts_seq = skb_shinfo(gso_skb)->tskey;
+
while (skb) {
if (before(ts_seq, seq + mss)) {
- skb_shinfo(skb)->tx_flags |= SKBTX_SW_TSTAMP;
+ skb_shinfo(skb)->tx_flags |= flags;
skb_shinfo(skb)->tskey = ts_seq;
return;
}
@@ -193,8 +196,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
th = tcp_hdr(skb);
seq = ntohl(th->seq);
- if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP))
- tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss);
+ if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_ANY_TSTAMP))
+ tcp_gso_tstamp(segs, gso_skb, seq, mss);
newcheck = ~csum_fold(csum_add(csum_unfold(th->check), delta));
@@ -352,6 +355,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
flush |= skb->ip_summed != p->ip_summed;
flush |= skb->csum_level != p->csum_level;
flush |= NAPI_GRO_CB(p)->count >= 64;
+ skb_set_network_header(skb, skb_gro_receive_network_offset(skb));
if (flush || skb_gro_receive_list(p, skb))
mss = 1;
@@ -432,7 +436,7 @@ static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
iif, sdif);
NAPI_GRO_CB(skb)->is_flist = !sk;
if (sk)
- sock_put(sk);
+ sock_gen_put(sk);
}
INDIRECT_CALLABLE_SCOPE
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8efc58716ce9..6d5387811c32 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -265,11 +265,14 @@ static u16 tcp_select_window(struct sock *sk)
u32 cur_win, new_win;
/* Make the window 0 if we failed to queue the data because we
- * are out of memory. The window is temporary, so we don't store
- * it on the socket.
+ * are out of memory.
*/
- if (unlikely(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOMEM))
+ if (unlikely(inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOMEM)) {
+ tp->pred_flags = 0;
+ tp->rcv_wnd = 0;
+ tp->rcv_wup = tp->rcv_nxt;
return 0;
+ }
cur_win = tcp_receive_window(tp);
new_win = __tcp_select_window(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ff85242720a0..f4e24fc878fa 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -420,6 +420,49 @@ u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
udp_ehash_secret + net_hash_mix(net));
}
+/**
+ * udp4_lib_lookup1() - Simplified lookup using primary hash (destination port)
+ * @net: Network namespace
+ * @saddr: Source address, network order
+ * @sport: Source port, network order
+ * @daddr: Destination address, network order
+ * @hnum: Destination port, host order
+ * @dif: Destination interface index
+ * @sdif: Destination bridge port index, if relevant
+ * @udptable: Set of UDP hash tables
+ *
+ * Simplified lookup to be used as fallback if no sockets are found due to a
+ * potential race between (receive) address change, and lookup happening before
+ * the rehash operation. This function ignores SO_REUSEPORT groups while scoring
+ * result sockets, because if we have one, we don't need the fallback at all.
+ *
+ * Called under rcu_read_lock().
+ *
+ * Return: socket with highest matching score if any, NULL if none
+ */
+static struct sock *udp4_lib_lookup1(const struct net *net,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned int hnum,
+ int dif, int sdif,
+ const struct udp_table *udptable)
+{
+ unsigned int slot = udp_hashfn(net, hnum, udptable->mask);
+ struct udp_hslot *hslot = &udptable->hash[slot];
+ struct sock *sk, *result = NULL;
+ int score, badness = 0;
+
+ sk_for_each_rcu(sk, &hslot->head) {
+ score = compute_score(sk, net,
+ saddr, sport, daddr, hnum, dif, sdif);
+ if (score > badness) {
+ result = sk;
+ badness = score;
+ }
+ }
+
+ return result;
+}
+
/* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(const struct net *net,
__be32 saddr, __be16 sport,
@@ -525,6 +568,19 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport,
htonl(INADDR_ANY), hnum, dif, sdif,
hslot2, skb);
+ if (!IS_ERR_OR_NULL(result))
+ goto done;
+
+ /* Primary hash (destination port) lookup as fallback for this race:
+ * 1. __ip4_datagram_connect() sets sk_rcv_saddr
+ * 2. lookup (this function): new sk_rcv_saddr, hashes not updated yet
+ * 3. rehash operation updating _secondary and four-tuple_ hashes
+ * The primary hash doesn't need an update after 1., so, thanks to this
+ * further step, 1. and 3. don't need to be atomic against the lookup.
+ */
+ result = udp4_lib_lookup1(net, saddr, sport, daddr, hnum, dif, sdif,
+ udptable);
+
done:
if (IS_ERR(result))
return NULL;
@@ -929,9 +985,9 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
const int hlen = skb_network_header_len(skb) +
sizeof(struct udphdr);
- if (hlen + cork->gso_size > cork->fragsize) {
+ if (hlen + min(datalen, cork->gso_size) > cork->fragsize) {
kfree_skb(skb);
- return -EINVAL;
+ return -EMSGSIZE;
}
if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
kfree_skb(skb);
@@ -1414,12 +1470,12 @@ static bool udp_skb_has_head_state(struct sk_buff *skb)
}
/* fully reclaim rmem/fwd memory allocated for skb */
-static void udp_rmem_release(struct sock *sk, int size, int partial,
- bool rx_queue_lock_held)
+static void udp_rmem_release(struct sock *sk, unsigned int size,
+ int partial, bool rx_queue_lock_held)
{
struct udp_sock *up = udp_sk(sk);
struct sk_buff_head *sk_queue;
- int amt;
+ unsigned int amt;
if (likely(partial)) {
up->forward_deficit += size;
@@ -1439,10 +1495,8 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
if (!rx_queue_lock_held)
spin_lock(&sk_queue->lock);
-
- sk_forward_alloc_add(sk, size);
- amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1);
- sk_forward_alloc_add(sk, -amt);
+ amt = (size + sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1);
+ sk_forward_alloc_add(sk, size - amt);
if (amt)
__sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT);
@@ -1514,17 +1568,25 @@ static int udp_rmem_schedule(struct sock *sk, int size)
int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
{
struct sk_buff_head *list = &sk->sk_receive_queue;
- int rmem, err = -ENOMEM;
+ unsigned int rmem, rcvbuf;
spinlock_t *busy = NULL;
- int size, rcvbuf;
+ int size, err = -ENOMEM;
- /* Immediately drop when the receive queue is full.
- * Always allow at least one packet.
- */
rmem = atomic_read(&sk->sk_rmem_alloc);
rcvbuf = READ_ONCE(sk->sk_rcvbuf);
- if (rmem > rcvbuf)
- goto drop;
+ size = skb->truesize;
+
+ /* Immediately drop when the receive queue is full.
+ * Cast to unsigned int performs the boundary check for INT_MAX.
+ */
+ if (rmem + size > rcvbuf) {
+ if (rcvbuf > INT_MAX >> 1)
+ goto drop;
+
+ /* Always allow at least one packet for small buffer. */
+ if (rmem > rcvbuf)
+ goto drop;
+ }
/* Under mem pressure, it might be helpful to help udp_recvmsg()
* having linear skbs :
@@ -1534,10 +1596,10 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
*/
if (rmem > (rcvbuf >> 1)) {
skb_condense(skb);
-
+ size = skb->truesize;
busy = busylock_acquire(sk);
}
- size = skb->truesize;
+
udp_set_dev_scratch(skb);
atomic_add(size, &sk->sk_rmem_alloc);
@@ -1624,7 +1686,7 @@ EXPORT_SYMBOL_GPL(skb_consume_udp);
static struct sk_buff *__first_packet_length(struct sock *sk,
struct sk_buff_head *rcvq,
- int *total)
+ unsigned int *total)
{
struct sk_buff *skb;
@@ -1657,8 +1719,8 @@ static int first_packet_length(struct sock *sk)
{
struct sk_buff_head *rcvq = &udp_sk(sk)->reader_queue;
struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
+ unsigned int total = 0;
struct sk_buff *skb;
- int total = 0;
int res;
spin_lock_bh(&rcvq->lock);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index a5be6e4ed326..12ba1a8db93a 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -61,7 +61,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
skb->remcsum_offload = remcsum;
- need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb));
+ need_ipsec = (skb_dst(skb) && dst_xfrm(skb_dst(skb))) || skb_sec_path(skb);
/* Try to offload checksum if possible */
offload_csum = !!(need_csum &&
!need_ipsec &&
@@ -247,6 +247,62 @@ static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
return segs;
}
+static void __udpv6_gso_segment_csum(struct sk_buff *seg,
+ struct in6_addr *oldip,
+ const struct in6_addr *newip,
+ __be16 *oldport, __be16 newport)
+{
+ struct udphdr *uh = udp_hdr(seg);
+
+ if (ipv6_addr_equal(oldip, newip) && *oldport == newport)
+ return;
+
+ if (uh->check) {
+ inet_proto_csum_replace16(&uh->check, seg, oldip->s6_addr32,
+ newip->s6_addr32, true);
+
+ inet_proto_csum_replace2(&uh->check, seg, *oldport, newport,
+ false);
+ if (!uh->check)
+ uh->check = CSUM_MANGLED_0;
+ }
+
+ *oldip = *newip;
+ *oldport = newport;
+}
+
+static struct sk_buff *__udpv6_gso_segment_list_csum(struct sk_buff *segs)
+{
+ const struct ipv6hdr *iph;
+ const struct udphdr *uh;
+ struct ipv6hdr *iph2;
+ struct sk_buff *seg;
+ struct udphdr *uh2;
+
+ seg = segs;
+ uh = udp_hdr(seg);
+ iph = ipv6_hdr(seg);
+ uh2 = udp_hdr(seg->next);
+ iph2 = ipv6_hdr(seg->next);
+
+ if (!(*(const u32 *)&uh->source ^ *(const u32 *)&uh2->source) &&
+ ipv6_addr_equal(&iph->saddr, &iph2->saddr) &&
+ ipv6_addr_equal(&iph->daddr, &iph2->daddr))
+ return segs;
+
+ while ((seg = seg->next)) {
+ uh2 = udp_hdr(seg);
+ iph2 = ipv6_hdr(seg);
+
+ __udpv6_gso_segment_csum(seg, &iph2->saddr, &iph->saddr,
+ &uh2->source, uh->source);
+ __udpv6_gso_segment_csum(seg, &iph2->daddr, &iph->daddr,
+ &uh2->dest, uh->dest);
+ }
+
+ return segs;
+}
+
static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
netdev_features_t features,
bool is_ipv6)
@@ -259,7 +315,10 @@ static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
- return is_ipv6 ? skb : __udpv4_gso_segment_list_csum(skb);
+ if (is_ipv6)
+ return __udpv6_gso_segment_list_csum(skb);
+ else
+ return __udpv4_gso_segment_list_csum(skb);
}
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
@@ -273,6 +332,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
bool copy_dtor;
__sum16 check;
__be16 newlen;
+ int ret = 0;
mss = skb_shinfo(gso_skb)->gso_size;
if (gso_skb->len <= sizeof(*uh) + mss)
@@ -301,6 +361,10 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size)
return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+ ret = __skb_linearize(gso_skb);
+ if (ret)
+ return ERR_PTR(ret);
+
/* Setup csum, as fraglist skips this in udp4_gro_receive. */
gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head;
gso_skb->csum_offset = offsetof(struct udphdr, check);
@@ -321,13 +385,17 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
/* clear destructor to avoid skb_segment assigning it to tail */
copy_dtor = gso_skb->destructor == sock_wfree;
- if (copy_dtor)
+ if (copy_dtor) {
gso_skb->destructor = NULL;
+ gso_skb->sk = NULL;
+ }
segs = skb_segment(gso_skb, features);
if (IS_ERR_OR_NULL(segs)) {
- if (copy_dtor)
+ if (copy_dtor) {
gso_skb->destructor = sock_wfree;
+ gso_skb->sk = sk;
+ }
return segs;
}
@@ -536,6 +604,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}
+ skb_set_network_header(skb, skb_gro_receive_network_offset(skb));
ret = skb_gro_receive_list(p, skb);
} else {
skb_gro_postpull_rcsum(skb, uh,
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index a620618cc568..12a1a0f42195 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -182,11 +182,15 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
int offset = skb_gro_offset(skb);
const struct net_offload *ops;
struct sk_buff *pp = NULL;
- int ret;
-
- offset = offset - sizeof(struct udphdr);
+ int len, dlen;
+ __u8 *udpdata;
+ __be32 *udpdata32;
- if (!pskb_pull(skb, offset))
+ len = skb->len - offset;
+ dlen = offset + min(len, 8);
+ udpdata = skb_gro_header(skb, dlen, offset);
+ udpdata32 = (__be32 *)udpdata;
+ if (unlikely(!udpdata))
return NULL;
rcu_read_lock();
@@ -194,11 +198,13 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
if (!ops || !ops->callbacks.gro_receive)
goto out;
- ret = __xfrm4_udp_encap_rcv(sk, skb, false);
- if (ret)
+ /* check if it is a keepalive or IKE packet */
+ if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0)
goto out;
- skb_push(skb, offset);
+ /* set the transport header to ESP */
+ skb_set_transport_header(skb, offset);
+
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
@@ -208,7 +214,6 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
out:
rcu_read_unlock();
- skb_push(skb, offset);
NAPI_GRO_CB(skb)->same_flow = 0;
NAPI_GRO_CB(skb)->flush = 1;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f7c17388ff6a..49ec223f2eda 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2228,13 +2228,12 @@ errdad:
in6_ifa_put(ifp);
}
-/* Join to solicited addr multicast group.
- * caller must hold RTNL */
+/* Join to solicited addr multicast group. */
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
{
struct in6_addr maddr;
- if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
+ if (READ_ONCE(dev->flags) & (IFF_LOOPBACK | IFF_NOARP))
return;
addrconf_addr_solict_mult(addr, &maddr);
@@ -3237,16 +3236,13 @@ static void add_v4_addrs(struct inet6_dev *idev)
struct in6_addr addr;
struct net_device *dev;
struct net *net = dev_net(idev->dev);
- int scope, plen, offset = 0;
+ int scope, plen;
u32 pflags = 0;
ASSERT_RTNL();
memset(&addr, 0, sizeof(struct in6_addr));
- /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */
- if (idev->dev->addr_len == sizeof(struct in6_addr))
- offset = sizeof(struct in6_addr) - 4;
- memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4);
+ memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4);
if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) {
scope = IPV6_ADDR_COMPATv4;
@@ -3551,21 +3547,22 @@ static void addrconf_gre_config(struct net_device *dev)
ASSERT_RTNL();
- idev = ipv6_find_idev(dev);
- if (IS_ERR(idev)) {
- pr_debug("%s: add_dev failed\n", __func__);
+ idev = addrconf_add_dev(dev);
+ if (IS_ERR(idev))
return;
- }
- if (dev->type == ARPHRD_ETHER) {
+ /* Generate the IPv6 link-local address using addrconf_addr_gen(),
+ * unless we have an IPv4 GRE device not bound to an IP address and
+ * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this
+ * case). Such devices fall back to add_v4_addrs() instead.
+ */
+ if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 &&
+ idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) {
addrconf_addr_gen(idev, true);
return;
}
add_v4_addrs(idev);
-
- if (dev->flags & IFF_POINTOPOINT)
- addrconf_add_mroute(dev);
}
#endif
@@ -3885,7 +3882,7 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
* Do not dev_put!
*/
if (unregister) {
- idev->dead = 1;
+ WRITE_ONCE(idev->dead, 1);
/* protected by rtnl_lock */
RCU_INIT_POINTER(dev->ip6_ptr, NULL);
@@ -5807,6 +5804,27 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
}
}
+static int inet6_fill_ifla6_stats_attrs(struct sk_buff *skb,
+ struct inet6_dev *idev)
+{
+ struct nlattr *nla;
+
+ nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
+ if (!nla)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
+
+ nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
+ if (!nla)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
u32 ext_filter_mask)
{
@@ -5829,18 +5847,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
/* XXX - MC not implemented */
- if (ext_filter_mask & RTEXT_FILTER_SKIP_STATS)
- return 0;
-
- nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
- if (!nla)
- goto nla_put_failure;
- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
-
- nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
- if (!nla)
- goto nla_put_failure;
- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
+ if (!(ext_filter_mask & RTEXT_FILTER_SKIP_STATS)) {
+ if (inet6_fill_ifla6_stats_attrs(skb, idev) < 0)
+ goto nla_put_failure;
+ }
nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
if (!nla)
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index dbcea9fee626..a247bb93908b 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -1072,8 +1072,13 @@ static int calipso_sock_getattr(struct sock *sk,
struct ipv6_opt_hdr *hop;
int opt_len, len, ret_val = -ENOMSG, offset;
unsigned char *opt;
- struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk));
+ struct ipv6_pinfo *pinfo = inet6_sk(sk);
+ struct ipv6_txoptions *txopts;
+
+ if (!pinfo)
+ return -EAFNOSUPPORT;
+ txopts = txopt_get(pinfo);
if (!txopts || !txopts->hopopt)
goto done;
@@ -1125,8 +1130,13 @@ static int calipso_sock_setattr(struct sock *sk,
{
int ret_val;
struct ipv6_opt_hdr *old, *new;
- struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk));
+ struct ipv6_pinfo *pinfo = inet6_sk(sk);
+ struct ipv6_txoptions *txopts;
+ if (!pinfo)
+ return -EAFNOSUPPORT;
+
+ txopts = txopt_get(pinfo);
old = NULL;
if (txopts)
old = txopts->hopopt;
@@ -1153,8 +1163,13 @@ static int calipso_sock_setattr(struct sock *sk,
static void calipso_sock_delattr(struct sock *sk)
{
struct ipv6_opt_hdr *new_hop;
- struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk));
+ struct ipv6_pinfo *pinfo = inet6_sk(sk);
+ struct ipv6_txoptions *txopts;
+ if (!pinfo)
+ return;
+
+ txopts = txopt_get(pinfo);
if (!txopts || !txopts->hopopt)
goto done;
@@ -1192,6 +1207,10 @@ static int calipso_req_setattr(struct request_sock *req,
struct ipv6_opt_hdr *old, *new;
struct sock *sk = sk_to_full_sk(req_to_sk(req));
+ /* sk is NULL for SYN+ACK w/ SYN Cookie */
+ if (!sk)
+ return -ENOMEM;
+
if (req_inet->ipv6_opt && req_inet->ipv6_opt->hopopt)
old = req_inet->ipv6_opt->hopopt;
else
@@ -1232,6 +1251,10 @@ static void calipso_req_delattr(struct request_sock *req)
struct ipv6_txoptions *txopts;
struct sock *sk = sk_to_full_sk(req_to_sk(req));
+ /* sk is NULL for SYN+ACK w/ SYN Cookie */
+ if (!sk)
+ return;
+
if (!req_inet->ipv6_opt || !req_inet->ipv6_opt->hopopt)
return;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index b2400c226a32..62d17d7f6d9a 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -137,47 +137,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
}
#ifdef CONFIG_INET6_ESPINTCP
-struct esp_tcp_sk {
- struct sock *sk;
- struct rcu_head rcu;
-};
-
-static void esp_free_tcp_sk(struct rcu_head *head)
-{
- struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
-
- sock_put(esk->sk);
- kfree(esk);
-}
-
static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
{
struct xfrm_encap_tmpl *encap = x->encap;
struct net *net = xs_net(x);
- struct esp_tcp_sk *esk;
__be16 sport, dport;
- struct sock *nsk;
struct sock *sk;
- sk = rcu_dereference(x->encap_sk);
- if (sk && sk->sk_state == TCP_ESTABLISHED)
- return sk;
-
spin_lock_bh(&x->lock);
sport = encap->encap_sport;
dport = encap->encap_dport;
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (sk && sk == nsk) {
- esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
- if (!esk) {
- spin_unlock_bh(&x->lock);
- return ERR_PTR(-ENOMEM);
- }
- RCU_INIT_POINTER(x->encap_sk, NULL);
- esk->sk = sk;
- call_rcu(&esk->rcu, esp_free_tcp_sk);
- }
spin_unlock_bh(&x->lock);
sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6,
@@ -190,20 +159,6 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
return ERR_PTR(-EINVAL);
}
- spin_lock_bh(&x->lock);
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (encap->encap_sport != sport ||
- encap->encap_dport != dport) {
- sock_put(sk);
- sk = nsk ?: ERR_PTR(-EREMCHG);
- } else if (sk == nsk) {
- sock_put(sk);
- } else {
- rcu_assign_pointer(x->encap_sk, sk);
- }
- spin_unlock_bh(&x->lock);
-
return sk;
}
@@ -216,8 +171,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
sk = esp6_find_tcp_sk(x);
err = PTR_ERR_OR_ZERO(sk);
- if (err)
+ if (err) {
+ kfree_skb(skb);
goto out;
+ }
bh_lock_sock(sk);
if (sock_owned_by_user(sk))
@@ -226,6 +183,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
err = espintcp_push_skb(sk, skb);
bh_unlock_sock(sk);
+ sock_put(sk);
+
out:
rcu_read_unlock();
return err;
@@ -422,6 +381,8 @@ static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x,
if (IS_ERR(sk))
return ERR_CAST(sk);
+ sock_put(sk);
+
*lenp = htons(len);
esph = (struct ip_esp_hdr *)(lenp + 1);
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 919ebfabbe4e..7b41fb4f00b5 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -80,9 +80,9 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
if (sp->len == XFRM_MAX_DEPTH)
goto out_reset;
- x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
- (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
- spi, IPPROTO_ESP, AF_INET6);
+ x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+ (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
+ spi, IPPROTO_ESP, AF_INET6);
if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
/* non-offload path will record the error and audit log */
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 04a9ed5e8310..29185c9ebd02 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -365,9 +365,9 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
+ struct fib6_rule *rule6 = (struct fib6_rule *)rule;
+ struct net *net = rule->fr_net;
int err = -EINVAL;
- struct net *net = sock_net(skb->sk);
- struct fib6_rule *rule6 = (struct fib6_rule *) rule;
if (!inet_validate_dscp(frh->tos)) {
NL_SET_ERR_MSG(extack,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 071b0bc1179d..4d14ab7f7e99 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -76,7 +76,7 @@ static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
{
/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
if (type == ICMPV6_PKT_TOOBIG)
ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
@@ -222,10 +222,10 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (rt->rt6i_dst.plen < 128)
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
- peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
+ rcu_read_lock();
+ peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
res = inet_peer_xrlim_allow(peer, tmo);
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
}
if (!res)
__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
@@ -473,7 +473,10 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (!skb->dev)
return;
- net = dev_net(skb->dev);
+
+ rcu_read_lock();
+
+ net = dev_net_rcu(skb->dev);
mark = IP6_REPLY_MARK(net, skb->mark);
/*
* Make sure we respect the rules
@@ -496,7 +499,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
!(type == ICMPV6_PARAMPROB &&
code == ICMPV6_UNK_OPTION &&
(opt_unrec(skb, info))))
- return;
+ goto out;
saddr = NULL;
}
@@ -526,7 +529,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
&hdr->saddr, &hdr->daddr);
- return;
+ goto out;
}
/*
@@ -535,7 +538,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (is_ineligible(skb)) {
net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
&hdr->saddr, &hdr->daddr);
- return;
+ goto out;
}
/* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
@@ -582,7 +585,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
np = inet6_sk(sk);
if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
- goto out;
+ goto out_unlock;
tmp_hdr.icmp6_type = type;
tmp_hdr.icmp6_code = code;
@@ -600,7 +603,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
dst = icmpv6_route_lookup(net, skb, sk, &fl6);
if (IS_ERR(dst))
- goto out;
+ goto out_unlock;
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
@@ -616,7 +619,6 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
goto out_dst_release;
}
- rcu_read_lock();
idev = __in6_dev_get(skb->dev);
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
@@ -630,13 +632,15 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
len + sizeof(struct icmp6hdr));
}
- rcu_read_unlock();
+
out_dst_release:
dst_release(dst);
-out:
+out_unlock:
icmpv6_xmit_unlock(sk);
out_bh_enable:
local_bh_enable();
+out:
+ rcu_read_unlock();
}
EXPORT_SYMBOL(icmp6_send);
@@ -679,8 +683,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
skb_pull(skb2, nhs);
skb_reset_network_header(skb2);
- rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
- skb, 0);
+ rt = rt6_lookup(dev_net_rcu(skb->dev), &ipv6_hdr(skb2)->saddr,
+ NULL, 0, skb, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
@@ -717,7 +721,7 @@ EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
{
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
struct sock *sk;
struct inet6_dev *idev;
struct ipv6_pinfo *np;
@@ -832,7 +836,7 @@ enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
u8 code, __be32 info)
{
struct inet6_skb_parm *opt = IP6CB(skb);
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
const struct inet6_protocol *ipprot;
enum skb_drop_reason reason;
int inner_offset;
@@ -889,7 +893,7 @@ out:
static int icmpv6_rcv(struct sk_buff *skb)
{
enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
struct net_device *dev = icmp6_dev(skb);
struct inet6_dev *idev = __in6_dev_get(dev);
const struct in6_addr *saddr, *daddr;
@@ -921,7 +925,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
skb_set_network_header(skb, nh);
}
- __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
+ __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS);
saddr = &ipv6_hdr(skb)->saddr;
daddr = &ipv6_hdr(skb)->daddr;
@@ -939,7 +943,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
type = hdr->icmp6_type;
- ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
+ ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type);
switch (type) {
case ICMPV6_ECHO_REQUEST:
@@ -1034,9 +1038,9 @@ static int icmpv6_rcv(struct sk_buff *skb)
csum_error:
reason = SKB_DROP_REASON_ICMP_CSUM;
- __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
+ __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS);
discard_it:
- __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
+ __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS);
drop_no_count:
kfree_skb_reason(skb, reason);
return 0;
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 95e9146918cc..b8d43ed4689d 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -86,7 +86,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
diff = get_csum_diff(ip6h, p);
inet_proto_csum_replace_by_diff(&th->check, skb,
- diff, true);
+ diff, true, true);
}
break;
case NEXTHDR_UDP:
@@ -97,7 +97,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
diff = get_csum_diff(ip6h, p);
inet_proto_csum_replace_by_diff(&uh->check, skb,
- diff, true);
+ diff, true, true);
if (!uh->check)
uh->check = CSUM_MANGLED_0;
}
@@ -111,7 +111,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
diff = get_csum_diff(ip6h, p);
inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb,
- diff, true);
+ diff, true, true);
}
break;
}
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index ff7e734e335b..7d574f5132e2 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -88,13 +88,15 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
goto drop;
}
- if (ilwt->connected) {
+ /* cache only if we don't create a dst reference loop */
+ if (ilwt->connected && orig_dst->lwtstate != dst->lwtstate) {
local_bh_disable();
dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr);
local_bh_enable();
}
}
+ skb_dst_drop(skb);
skb_dst_set(skb, dst);
return dst_output(net, sk, skb);
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index beb6b4cfc551..647dd8417c6c 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -255,14 +255,15 @@ static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
}
static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
- struct ioam6_lwt_encap *tuninfo)
+ struct ioam6_lwt_encap *tuninfo,
+ struct dst_entry *cache_dst)
{
struct ipv6hdr *oldhdr, *hdr;
int hdrlen, err;
hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -293,7 +294,8 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
struct ioam6_lwt_encap *tuninfo,
bool has_tunsrc,
struct in6_addr *tunsrc,
- struct in6_addr *tundst)
+ struct in6_addr *tundst,
+ struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr, *inner_hdr;
@@ -302,7 +304,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
len = sizeof(*hdr) + hdrlen;
- err = skb_cow_head(skb, len + skb->mac_len);
+ err = skb_cow_head(skb, len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -336,8 +338,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
- struct in6_addr orig_daddr;
+ struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL;
struct ioam6_lwt *ilwt;
int err = -EINVAL;
u32 pkt_cnt;
@@ -352,7 +353,9 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k)
goto out;
- orig_daddr = ipv6_hdr(skb)->daddr;
+ local_bh_disable();
+ cache_dst = dst_cache_get(&ilwt->cache);
+ local_bh_enable();
switch (ilwt->mode) {
case IOAM6_IPTUNNEL_MODE_INLINE:
@@ -361,7 +364,7 @@ do_inline:
if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
goto out;
- err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
+ err = ioam6_do_inline(net, skb, &ilwt->tuninfo, cache_dst);
if (unlikely(err))
goto drop;
@@ -371,7 +374,7 @@ do_encap:
/* Encapsulation (ip6ip6) */
err = ioam6_do_encap(net, skb, &ilwt->tuninfo,
ilwt->has_tunsrc, &ilwt->tunsrc,
- &ilwt->tundst);
+ &ilwt->tundst, cache_dst);
if (unlikely(err))
goto drop;
@@ -389,46 +392,48 @@ do_encap:
goto drop;
}
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
+ if (unlikely(!cache_dst)) {
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct flowi6 fl6;
- if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
- local_bh_disable();
- dst = dst_cache_get(&ilwt->cache);
- local_bh_enable();
-
- if (unlikely(!dst)) {
- struct ipv6hdr *hdr = ipv6_hdr(skb);
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.daddr = hdr->daddr;
- fl6.saddr = hdr->saddr;
- fl6.flowlabel = ip6_flowinfo(hdr);
- fl6.flowi6_mark = skb->mark;
- fl6.flowi6_proto = hdr->nexthdr;
-
- dst = ip6_route_output(net, NULL, &fl6);
- if (dst->error) {
- err = dst->error;
- dst_release(dst);
- goto drop;
- }
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.daddr = hdr->daddr;
+ fl6.saddr = hdr->saddr;
+ fl6.flowlabel = ip6_flowinfo(hdr);
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = hdr->nexthdr;
+
+ cache_dst = ip6_route_output(net, NULL, &fl6);
+ if (cache_dst->error) {
+ err = cache_dst->error;
+ goto drop;
+ }
+ /* cache only if we don't create a dst reference loop */
+ if (dst->lwtstate != cache_dst->lwtstate) {
local_bh_disable();
- dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
+ dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr);
local_bh_enable();
}
- skb_dst_drop(skb);
- skb_dst_set(skb, dst);
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev));
+ if (unlikely(err))
+ goto drop;
+ }
+ /* avoid lwtunnel_output() reentry loop when destination is the same
+ * after transformation (e.g., with the inline mode)
+ */
+ if (dst->lwtstate != cache_dst->lwtstate) {
+ skb_dst_drop(skb);
+ skb_dst_set(skb, cache_dst);
return dst_output(net, sk, skb);
}
out:
+ dst_release(cache_dst);
return dst->lwtstate->orig_output(net, sk, skb);
drop:
+ dst_release(cache_dst);
kfree_skb(skb);
return err;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 9a1c59275a10..aa1046fbf28e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -440,15 +440,17 @@ struct fib6_dump_arg {
static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
{
enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE;
+ unsigned int nsiblings;
int err;
if (!rt || rt == arg->net->ipv6.fib6_null_entry)
return 0;
- if (rt->fib6_nsiblings)
+ nsiblings = READ_ONCE(rt->fib6_nsiblings);
+ if (nsiblings)
err = call_fib6_multipath_entry_notifier(arg->nb, fib_event,
rt,
- rt->fib6_nsiblings,
+ nsiblings,
arg->extack);
else
err = call_fib6_entry_notifier(arg->nb, fib_event, rt,
@@ -1126,7 +1128,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
if (rt6_duplicate_nexthop(iter, rt)) {
if (rt->fib6_nsiblings)
- rt->fib6_nsiblings = 0;
+ WRITE_ONCE(rt->fib6_nsiblings, 0);
if (!(iter->fib6_flags & RTF_EXPIRES))
return -EEXIST;
if (!(rt->fib6_flags & RTF_EXPIRES)) {
@@ -1155,7 +1157,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
*/
if (rt_can_ecmp &&
rt6_qualify_for_ecmp(iter))
- rt->fib6_nsiblings++;
+ WRITE_ONCE(rt->fib6_nsiblings,
+ rt->fib6_nsiblings + 1);
}
if (iter->fib6_metric > rt->fib6_metric)
@@ -1205,7 +1208,8 @@ next_iter:
fib6_nsiblings = 0;
list_for_each_entry_safe(sibling, temp_sibling,
&rt->fib6_siblings, fib6_siblings) {
- sibling->fib6_nsiblings++;
+ WRITE_ONCE(sibling->fib6_nsiblings,
+ sibling->fib6_nsiblings + 1);
BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
fib6_nsiblings++;
}
@@ -1250,8 +1254,9 @@ add:
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings,
fib6_siblings)
- sibling->fib6_nsiblings--;
- rt->fib6_nsiblings = 0;
+ WRITE_ONCE(sibling->fib6_nsiblings,
+ sibling->fib6_nsiblings - 1);
+ WRITE_ONCE(rt->fib6_nsiblings, 0);
list_del_rcu(&rt->fib6_siblings);
rt6_multipath_rebalance(next_sibling);
return err;
@@ -1968,8 +1973,9 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
notify_del = true;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings)
- sibling->fib6_nsiblings--;
- rt->fib6_nsiblings = 0;
+ WRITE_ONCE(sibling->fib6_nsiblings,
+ sibling->fib6_nsiblings - 1);
+ WRITE_ONCE(rt->fib6_nsiblings, 0);
list_del_rcu(&rt->fib6_siblings);
rt6_multipath_rebalance(next_sibling);
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 235808cfec70..68e9a41eed49 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1498,7 +1498,6 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
tunnel = netdev_priv(dev);
tunnel->dev = dev;
- tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
@@ -1882,7 +1881,6 @@ static int ip6erspan_tap_init(struct net_device *dev)
tunnel = netdev_priv(dev);
tunnel->dev = dev;
- tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 9822163428b0..fce91183797a 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -148,7 +148,9 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
ops = rcu_dereference(inet6_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment)) {
- skb_reset_transport_header(skb);
+ if (!skb_reset_transport_header_careful(skb))
+ goto out;
+
segs = ops->callbacks.gso_segment(skb, features);
if (!segs)
skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f26841f1490f..f0e5431c2d46 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -613,15 +613,15 @@ int ip6_forward(struct sk_buff *skb)
else
target = &hdr->daddr;
- peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
+ rcu_read_lock();
+ peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr);
/* Limit redirects both by destination (here)
and by source (inside ndisc_send_redirect)
*/
if (inet_peer_xrlim_allow(peer, 1*HZ))
ndisc_send_redirect(skb, target);
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
} else {
int addrtype = ipv6_addr_type(&hdr->saddr);
@@ -1386,6 +1386,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
}
v6_cork->hop_limit = ipc6->hlimit;
v6_cork->tclass = ipc6->tclass;
+ v6_cork->dontfrag = ipc6->dontfrag;
if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
@@ -1417,7 +1418,7 @@ static int __ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, size_t length, int transhdrlen,
- unsigned int flags, struct ipcm6_cookie *ipc6)
+ unsigned int flags)
{
struct sk_buff *skb, *skb_prev = NULL;
struct inet_cork *cork = &cork_full->base;
@@ -1471,7 +1472,7 @@ static int __ip6_append_data(struct sock *sk,
if (headersize + transhdrlen > mtu)
goto emsgsize;
- if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
+ if (cork->length + length > mtu - headersize && v6_cork->dontfrag &&
(sk->sk_protocol == IPPROTO_UDP ||
sk->sk_protocol == IPPROTO_ICMPV6 ||
sk->sk_protocol == IPPROTO_RAW)) {
@@ -1843,7 +1844,7 @@ int ip6_append_data(struct sock *sk,
return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
&np->cork, sk_page_frag(sk), getfrag,
- from, length, transhdrlen, flags, ipc6);
+ from, length, transhdrlen, flags);
}
EXPORT_SYMBOL_GPL(ip6_append_data);
@@ -2042,13 +2043,11 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
ip6_cork_release(cork, &v6_cork);
return ERR_PTR(err);
}
- if (ipc6->dontfrag < 0)
- ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk);
err = __ip6_append_data(sk, &queue, cork, &v6_cork,
&current->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
- flags, ipc6);
+ flags);
if (err) {
__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
return ERR_PTR(err);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b60e13c42bca..5350c9bb2319 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -630,8 +630,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
skb_dst_set(skb2, &rt->dst);
} else {
- if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
- skb2->dev) ||
+ if (ip_route_input(skb2, eiph->daddr, eiph->saddr,
+ ip4h_dscp(eiph), skb2->dev) ||
skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6)
goto out;
}
@@ -1878,7 +1878,6 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
int t_hlen;
t->dev = dev;
- t->net = dev_net(dev);
ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
if (ret)
@@ -1940,6 +1939,7 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
struct net *net = dev_net(dev);
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ t->net = net;
t->parms.proto = IPPROTO_IPV6;
rcu_assign_pointer(ip6n->tnls_wc[0], t);
@@ -2013,6 +2013,7 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
int err;
nt = netdev_priv(dev);
+ nt->net = net;
if (ip_tunnel_netlink_encap_parms(data, &ipencap)) {
err = ip6_tnl_encap_setup(nt, &ipencap);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 590737c27537..012350469144 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -925,7 +925,6 @@ static inline int vti6_dev_init_gen(struct net_device *dev)
struct ip6_tnl *t = netdev_priv(dev);
t->dev = dev;
- t->net = dev_net(dev);
netdev_hold(dev, &t->dev_tracker, GFP_KERNEL);
netdev_lockdep_set_classes(dev);
return 0;
@@ -958,6 +957,7 @@ static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev)
struct net *net = dev_net(dev);
struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ t->net = net;
t->parms.proto = IPPROTO_IPV6;
rcu_assign_pointer(ip6n->tnls_wc[0], t);
@@ -1008,6 +1008,7 @@ static int vti6_newlink(struct net *src_net, struct net_device *dev,
vti6_netlink_parms(data, &nt->parms);
nt->parms.proto = IPPROTO_IPV6;
+ nt->net = net;
if (vti6_locate(net, &nt->parms, 0))
return -EEXIST;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index d5057401701c..68bc518500f9 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -506,9 +506,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->_c.mfc_un.res.pkt,
- mfc->_c.mfc_un.res.bytes,
- mfc->_c.mfc_un.res.wrong_if);
+ atomic_long_read(&mfc->_c.mfc_un.res.pkt),
+ atomic_long_read(&mfc->_c.mfc_un.res.bytes),
+ atomic_long_read(&mfc->_c.mfc_un.res.wrong_if));
for (n = mfc->_c.mfc_un.res.minvif;
n < mfc->_c.mfc_un.res.maxvif; n++) {
if (VIF_EXISTS(mrt, n) &&
@@ -870,7 +870,7 @@ static void ip6mr_update_thresholds(struct mr_table *mrt,
cache->mfc_un.res.maxvif = vifi + 1;
}
}
- cache->mfc_un.res.lastuse = jiffies;
+ WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies);
}
static int mif6_add(struct net *net, struct mr_table *mrt,
@@ -1928,9 +1928,9 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
&sr->grp.sin6_addr);
if (c) {
- sr->pktcnt = c->_c.mfc_un.res.pkt;
- sr->bytecnt = c->_c.mfc_un.res.bytes;
- sr->wrong_if = c->_c.mfc_un.res.wrong_if;
+ sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
+ sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
+ sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
rcu_read_unlock();
return 0;
}
@@ -2000,9 +2000,9 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->_c.mfc_un.res.pkt;
- sr.bytecnt = c->_c.mfc_un.res.bytes;
- sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
+ sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
+ sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -2032,6 +2032,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, int vifi)
{
struct vif_device *vif = &mrt->vif_table[vifi];
+ struct net_device *indev = skb->dev;
struct net_device *vif_dev;
struct ipv6hdr *ipv6h;
struct dst_entry *dst;
@@ -2094,7 +2095,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
IP6CB(skb)->flags |= IP6SKB_FORWARDED;
return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
- net, NULL, skb, skb->dev, vif_dev,
+ net, NULL, skb, indev, skb->dev,
ip6mr_forward2_finish);
out_free:
@@ -2125,9 +2126,9 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
int true_vifi = ip6mr_find_vif(mrt, dev);
vif = c->_c.mfc_parent;
- c->_c.mfc_un.res.pkt++;
- c->_c.mfc_un.res.bytes += skb->len;
- c->_c.mfc_un.res.lastuse = jiffies;
+ atomic_long_inc(&c->_c.mfc_un.res.pkt);
+ atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
+ WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
struct mfc6_cache *cache_proxy;
@@ -2145,7 +2146,7 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
- c->_c.mfc_un.res.wrong_if++;
+ atomic_long_inc(&c->_c.mfc_un.res.wrong_if);
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index b244dbf61d5f..e2a11a2f3b25 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -804,8 +804,8 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
} else {
im->mca_crcount = idev->mc_qrv;
}
- in6_dev_put(pmc->idev);
ip6_mc_clear_src(pmc);
+ in6_dev_put(pmc->idev);
kfree_rcu(pmc, rcu);
}
}
@@ -907,23 +907,22 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
static int __ipv6_dev_mc_inc(struct net_device *dev,
const struct in6_addr *addr, unsigned int mode)
{
- struct ifmcaddr6 *mc;
struct inet6_dev *idev;
-
- ASSERT_RTNL();
+ struct ifmcaddr6 *mc;
/* we need to take a reference on idev */
idev = in6_dev_get(dev);
-
if (!idev)
return -EINVAL;
- if (idev->dead) {
+ mutex_lock(&idev->mc_lock);
+
+ if (READ_ONCE(idev->dead)) {
+ mutex_unlock(&idev->mc_lock);
in6_dev_put(idev);
return -ENODEV;
}
- mutex_lock(&idev->mc_lock);
for_each_mc_mclock(idev, mc) {
if (ipv6_addr_equal(&mc->mca_addr, addr)) {
mc->mca_users++;
@@ -1730,21 +1729,19 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
struct net_device *dev = idev->dev;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- struct net *net = dev_net(dev);
const struct in6_addr *saddr;
struct in6_addr addr_buf;
struct mld2_report *pmr;
struct sk_buff *skb;
unsigned int size;
struct sock *sk;
- int err;
+ struct net *net;
- sk = net->ipv6.igmp_sk;
/* we assume size > sizeof(ra) here
* Also try to not allocate high-order pages for big MTU
*/
size = min_t(int, mtu, PAGE_SIZE / 2) + hlen + tlen;
- skb = sock_alloc_send_skb(sk, size, 1, &err);
+ skb = alloc_skb(size, GFP_KERNEL);
if (!skb)
return NULL;
@@ -1752,6 +1749,12 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
skb_reserve(skb, hlen);
skb_tailroom_reserve(skb, mtu, tlen);
+ rcu_read_lock();
+
+ net = dev_net_rcu(dev);
+ sk = net->ipv6.igmp_sk;
+ skb_set_owner_w(skb, sk);
+
if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
@@ -1763,6 +1766,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
+ rcu_read_unlock();
+
skb_put_data(skb, ra, sizeof(ra));
skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
@@ -2122,21 +2127,21 @@ static void mld_send_cr(struct inet6_dev *idev)
static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
{
- struct net *net = dev_net(dev);
- struct sock *sk = net->ipv6.igmp_sk;
+ const struct in6_addr *snd_addr, *saddr;
+ int err, len, payload_len, full_len;
+ struct in6_addr addr_buf;
struct inet6_dev *idev;
struct sk_buff *skb;
struct mld_msg *hdr;
- const struct in6_addr *snd_addr, *saddr;
- struct in6_addr addr_buf;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- int err, len, payload_len, full_len;
u8 ra[8] = { IPPROTO_ICMPV6, 0,
IPV6_TLV_ROUTERALERT, 2, 0, 0,
IPV6_TLV_PADN, 0 };
- struct flowi6 fl6;
struct dst_entry *dst;
+ struct flowi6 fl6;
+ struct net *net;
+ struct sock *sk;
if (type == ICMPV6_MGM_REDUCTION)
snd_addr = &in6addr_linklocal_allrouters;
@@ -2147,19 +2152,21 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
payload_len = len + sizeof(ra);
full_len = sizeof(struct ipv6hdr) + payload_len;
- rcu_read_lock();
- IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS);
- rcu_read_unlock();
+ skb = alloc_skb(hlen + tlen + full_len, GFP_KERNEL);
- skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err);
+ rcu_read_lock();
+ net = dev_net_rcu(dev);
+ idev = __in6_dev_get(dev);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
if (!skb) {
- rcu_read_lock();
- IP6_INC_STATS(net, __in6_dev_get(dev),
- IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
rcu_read_unlock();
return;
}
+ sk = net->ipv6.igmp_sk;
+ skb_set_owner_w(skb, sk);
+
skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, hlen);
@@ -2184,9 +2191,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
IPPROTO_ICMPV6,
csum_partial(hdr, len, 0));
- rcu_read_lock();
- idev = __in6_dev_get(skb->dev);
-
icmpv6_flow_init(sk, &fl6, type,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
skb->dev->ifindex);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index aba94a348673..8699d1a188dc 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -418,15 +418,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
{
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
struct sk_buff *skb;
skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
- if (!skb) {
- ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
- __func__);
+ if (!skb)
return NULL;
- }
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
@@ -437,7 +433,9 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
/* Manually assign socket ownership as we avoid calling
* sock_alloc_send_pskb() to bypass wmem buffer limits
*/
- skb_set_owner_w(skb, sk);
+ rcu_read_lock();
+ skb_set_owner_w(skb, dev_net_rcu(dev)->ipv6.ndisc_sk);
+ rcu_read_unlock();
return skb;
}
@@ -473,16 +471,20 @@ static void ip6_nd_hdr(struct sk_buff *skb,
void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
+ struct icmp6hdr *icmp6h = icmp6_hdr(skb);
struct dst_entry *dst = skb_dst(skb);
- struct net *net = dev_net(skb->dev);
- struct sock *sk = net->ipv6.ndisc_sk;
struct inet6_dev *idev;
+ struct net *net;
+ struct sock *sk;
int err;
- struct icmp6hdr *icmp6h = icmp6_hdr(skb);
u8 type;
type = icmp6h->icmp6_type;
+ rcu_read_lock();
+
+ net = dev_net_rcu(skb->dev);
+ sk = net->ipv6.ndisc_sk;
if (!dst) {
struct flowi6 fl6;
int oif = skb->dev->ifindex;
@@ -490,6 +492,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
dst = icmp6_dst_alloc(skb->dev, &fl6);
if (IS_ERR(dst)) {
+ rcu_read_unlock();
kfree_skb(skb);
return;
}
@@ -504,7 +507,6 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
- rcu_read_lock();
idev = __in6_dev_get(dst->dev);
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
@@ -1694,7 +1696,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
bool ret;
if (netif_is_l3_master(skb->dev)) {
- dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
+ dev = dev_get_by_index_rcu(dev_net(skb->dev), IPCB(skb)->iif);
if (!dev)
return;
}
@@ -1731,10 +1733,12 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
"Redirect: destination is not a neighbour\n");
goto release;
}
- peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1);
+
+ rcu_read_lock();
+ peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr);
ret = inet_peer_xrlim_allow(peer, 1*HZ);
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
+
if (!ret)
goto release;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 581ce055bf52..4541836ee3da 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -164,20 +164,20 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct ip6_fraglist_iter iter;
struct sk_buff *frag2;
- if (first_len - hlen > mtu ||
- skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
+ if (first_len - hlen > mtu)
goto blackhole;
- if (skb_cloned(skb))
+ if (skb_cloned(skb) ||
+ skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
goto slow_path;
skb_walk_frags(skb, frag2) {
- if (frag2->len > mtu ||
- skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
+ if (frag2->len > mtu)
goto blackhole;
/* Partially cloned skb? */
- if (skb_shared(frag2))
+ if (skb_shared(frag2) ||
+ skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
goto slow_path;
}
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index a7690ec62325..9ea5ef56cb27 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -103,6 +103,10 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
struct sk_buff *data_skb = NULL;
int doff = 0;
int thoff = 0, tproto;
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn const *ct;
+#endif
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
if (tproto < 0) {
@@ -136,6 +140,25 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
return NULL;
}
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ /* Do the lookup with the original socket address in
+ * case this is a reply packet of an established
+ * SNAT-ted connection.
+ */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct &&
+ ((tproto != IPPROTO_ICMPV6 &&
+ ctinfo == IP_CT_ESTABLISHED_REPLY) ||
+ (tproto == IPPROTO_ICMPV6 &&
+ ctinfo == IP_CT_RELATED_REPLY)) &&
+ (ct->status & IPS_SRC_NAT_DONE)) {
+ daddr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
+ dport = (tproto == IPPROTO_TCP) ?
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port :
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
+ }
+#endif
+
return nf_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr,
sport, dport, indev);
}
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index c9f1634b3838..421036a3605b 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -50,6 +50,7 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
fl6->flowi6_mark = pkt->skb->mark;
fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK;
+ fl6->flowi6_l3mdev = nft_fib_l3mdev_master_ifindex_rcu(pkt, dev);
return lookup_flags;
}
@@ -73,8 +74,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
else if (priv->flags & NFTA_FIB_F_OIF)
dev = nft_out(pkt);
- fl6.flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev);
-
nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
@@ -158,6 +157,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
{
const struct nft_fib *priv = nft_expr_priv(expr);
int noff = skb_network_offset(pkt->skb);
+ const struct net_device *found = NULL;
const struct net_device *oif = NULL;
u32 *dest = &regs->data[priv->dreg];
struct ipv6hdr *iph, _iph;
@@ -165,11 +165,15 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
.flowi6_iif = LOOPBACK_IFINDEX,
.flowi6_proto = pkt->tprot,
.flowi6_uid = sock_net_uid(nft_net(pkt), NULL),
- .flowi6_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)),
};
struct rt6_info *rt;
int lookup_flags;
+ if (nft_fib_can_skip(pkt)) {
+ nft_fib_store_result(dest, priv, nft_in(pkt));
+ return;
+ }
+
if (priv->flags & NFTA_FIB_F_IIF)
oif = nft_in(pkt);
else if (priv->flags & NFTA_FIB_F_OIF)
@@ -181,17 +185,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
return;
}
- lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph);
-
- if (nft_hook(pkt) == NF_INET_PRE_ROUTING ||
- nft_hook(pkt) == NF_INET_INGRESS) {
- if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) ||
- nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) {
- nft_fib_store_result(dest, priv, nft_in(pkt));
- return;
- }
+ if (nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) {
+ nft_fib_store_result(dest, priv, nft_in(pkt));
+ return;
}
+ lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph);
+
*dest = 0;
rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb,
lookup_flags);
@@ -202,11 +202,15 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
goto put_rt_err;
- if (oif && oif != rt->rt6i_idev->dev &&
- l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex)
- goto put_rt_err;
+ if (!oif) {
+ found = rt->rt6i_idev->dev;
+ } else {
+ if (oif == rt->rt6i_idev->dev ||
+ l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == oif->ifindex)
+ found = oif;
+ }
- nft_fib_store_result(dest, priv, rt->rt6i_idev->dev);
+ nft_fib_store_result(dest, priv, found);
put_rt_err:
ip6_rt_put(rt);
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 608fa9d05b55..328419e05c81 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -777,7 +777,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_uid = sk->sk_uid;
- ipcm6_init(&ipc6);
+ ipcm6_init_sk(&ipc6, sk);
ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = fl6.flowi6_mark;
@@ -890,9 +890,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (hdrincl)
fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH;
- if (ipc6.tclass < 0)
- ipc6.tclass = np->tclass;
-
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
@@ -903,9 +900,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (ipc6.hlimit < 0)
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- if (ipc6.dontfrag < 0)
- ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
-
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8ebfed5d6323..22866444efc0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -412,12 +412,37 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false;
}
+static struct fib6_info *
+rt6_multipath_first_sibling_rcu(const struct fib6_info *rt)
+{
+ struct fib6_info *iter;
+ struct fib6_node *fn;
+
+ fn = rcu_dereference(rt->fib6_node);
+ if (!fn)
+ goto out;
+ iter = rcu_dereference(fn->leaf);
+ if (!iter)
+ goto out;
+
+ while (iter) {
+ if (iter->fib6_metric == rt->fib6_metric &&
+ rt6_qualify_for_ecmp(iter))
+ return iter;
+ iter = rcu_dereference(iter->fib6_next);
+ }
+
+out:
+ return NULL;
+}
+
void fib6_select_path(const struct net *net, struct fib6_result *res,
struct flowi6 *fl6, int oif, bool have_oif_match,
const struct sk_buff *skb, int strict)
{
- struct fib6_info *match = res->f6i;
+ struct fib6_info *first, *match = res->f6i;
struct fib6_info *sibling;
+ int hash;
if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
goto out;
@@ -440,16 +465,25 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
return;
}
- if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
+ first = rt6_multipath_first_sibling_rcu(match);
+ if (!first)
goto out;
- list_for_each_entry_rcu(sibling, &match->fib6_siblings,
+ hash = fl6->mp_hash;
+ if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound)) {
+ if (rt6_score_route(first->fib6_nh, first->fib6_flags, oif,
+ strict) >= 0)
+ match = first;
+ goto out;
+ }
+
+ list_for_each_entry_rcu(sibling, &first->fib6_siblings,
fib6_siblings) {
const struct fib6_nh *nh = sibling->fib6_nh;
int nh_upper_bound;
nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
- if (fl6->mp_hash > nh_upper_bound)
+ if (hash > nh_upper_bound)
continue;
if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
break;
@@ -1737,6 +1771,7 @@ out:
if (!err) {
spin_lock_bh(&f6i->fib6_table->tb6_lock);
fib6_update_sernum(net, f6i);
+ fib6_add_gc_list(f6i);
spin_unlock_bh(&f6i->fib6_table->tb6_lock);
fib6_force_start_gc(net);
}
@@ -3196,13 +3231,18 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
{
struct net_device *dev = dst->dev;
unsigned int mtu = dst_mtu(dst);
- struct net *net = dev_net(dev);
+ struct net *net;
mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+ rcu_read_lock();
+
+ net = dev_net_rcu(dev);
if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
+ rcu_read_unlock();
+
/*
* Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
* corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
@@ -3639,7 +3679,8 @@ out:
in6_dev_put(idev);
if (err) {
- lwtstate_put(fib6_nh->fib_nh_lws);
+ fib_nh_common_release(&fib6_nh->nh_common);
+ fib6_nh->nh_common.nhc_pcpu_rth_output = NULL;
fib6_nh->fib_nh_lws = NULL;
netdev_put(dev, dev_tracker);
}
@@ -3797,10 +3838,12 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (nh) {
if (rt->fib6_src.plen) {
NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
+ err = -EINVAL;
goto out_free;
}
if (!nexthop_get(nh)) {
NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ err = -ENOENT;
goto out_free;
}
rt->nh = nh;
@@ -5197,7 +5240,8 @@ static void ip6_route_mpath_notify(struct fib6_info *rt,
*/
rcu_read_lock();
- if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
+ if ((nlflags & NLM_F_APPEND) && rt_last &&
+ READ_ONCE(rt_last->fib6_nsiblings)) {
rt = list_first_or_null_rcu(&rt_last->fib6_siblings,
struct fib6_info,
fib6_siblings);
@@ -5544,32 +5588,34 @@ static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
static size_t rt6_nlmsg_size(struct fib6_info *f6i)
{
+ struct fib6_info *sibling;
+ struct fib6_nh *nh;
int nexthop_len;
if (f6i->nh) {
nexthop_len = nla_total_size(4); /* RTA_NH_ID */
nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
&nexthop_len);
- } else {
- struct fib6_nh *nh = f6i->fib6_nh;
- struct fib6_info *sibling;
-
- nexthop_len = 0;
- if (f6i->fib6_nsiblings) {
- rt6_nh_nlmsg_size(nh, &nexthop_len);
-
- rcu_read_lock();
+ goto common;
+ }
- list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
- fib6_siblings) {
- rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
- }
+ rcu_read_lock();
+retry:
+ nh = f6i->fib6_nh;
+ nexthop_len = 0;
+ if (READ_ONCE(f6i->fib6_nsiblings)) {
+ rt6_nh_nlmsg_size(nh, &nexthop_len);
- rcu_read_unlock();
+ list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
+ fib6_siblings) {
+ rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
+ if (!READ_ONCE(f6i->fib6_nsiblings))
+ goto retry;
}
- nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
}
-
+ rcu_read_unlock();
+ nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
+common:
return NLMSG_ALIGN(sizeof(struct rtmsg))
+ nla_total_size(16) /* RTA_SRC */
+ nla_total_size(16) /* RTA_DST */
@@ -5728,7 +5774,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
if (dst->lwtstate &&
lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
goto nla_put_failure;
- } else if (rt->fib6_nsiblings) {
+ } else if (READ_ONCE(rt->fib6_nsiblings)) {
struct fib6_info *sibling;
struct nlattr *mp;
@@ -5830,16 +5876,21 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i,
if (f6i->fib6_nh->fib_nh_dev == dev)
return true;
- if (f6i->fib6_nsiblings) {
- struct fib6_info *sibling, *next_sibling;
+ if (READ_ONCE(f6i->fib6_nsiblings)) {
+ const struct fib6_info *sibling;
- list_for_each_entry_safe(sibling, next_sibling,
- &f6i->fib6_siblings, fib6_siblings) {
- if (sibling->fib6_nh->fib_nh_dev == dev)
+ rcu_read_lock();
+ list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
+ fib6_siblings) {
+ if (sibling->fib6_nh->fib_nh_dev == dev) {
+ rcu_read_unlock();
return true;
+ }
+ if (!READ_ONCE(f6i->fib6_nsiblings))
+ break;
}
+ rcu_read_unlock();
}
-
return false;
}
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index db3c19a42e1c..eccfa4203e96 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -125,16 +125,17 @@ static void rpl_destroy_state(struct lwtunnel_state *lwt)
}
static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
- const struct ipv6_rpl_sr_hdr *srh)
+ const struct ipv6_rpl_sr_hdr *srh,
+ struct dst_entry *cache_dst)
{
struct ipv6_rpl_sr_hdr *isrh, *csrh;
- const struct ipv6hdr *oldhdr;
+ struct ipv6hdr oldhdr;
struct ipv6hdr *hdr;
unsigned char *buf;
size_t hdrlen;
int err;
- oldhdr = ipv6_hdr(skb);
+ memcpy(&oldhdr, ipv6_hdr(skb), sizeof(oldhdr));
buf = kcalloc(struct_size(srh, segments.addr, srh->segments_left), 2, GFP_ATOMIC);
if (!buf)
@@ -146,14 +147,14 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
memcpy(isrh, srh, sizeof(*isrh));
memcpy(isrh->rpl_segaddr, &srh->rpl_segaddr[1],
(srh->segments_left - 1) * 16);
- isrh->rpl_segaddr[srh->segments_left - 1] = oldhdr->daddr;
+ isrh->rpl_segaddr[srh->segments_left - 1] = oldhdr.daddr;
ipv6_rpl_srh_compress(csrh, isrh, &srh->rpl_segaddr[0],
isrh->segments_left - 1);
hdrlen = ((csrh->hdrlen + 1) << 3);
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err)) {
kfree(buf);
return err;
@@ -168,7 +169,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
skb_mac_header_rebuild(skb);
hdr = ipv6_hdr(skb);
- memmove(hdr, oldhdr, sizeof(*hdr));
+ memmove(hdr, &oldhdr, sizeof(*hdr));
isrh = (void *)hdr + sizeof(*hdr);
memcpy(isrh, csrh, hdrlen);
@@ -186,7 +187,8 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
return 0;
}
-static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt)
+static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt,
+ struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct rpl_iptunnel_encap *tinfo;
@@ -196,7 +198,7 @@ static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt)
tinfo = rpl_encap_lwtunnel(dst->lwtstate);
- return rpl_do_srh_inline(skb, rlwt, tinfo->srh);
+ return rpl_do_srh_inline(skb, rlwt, tinfo->srh, cache_dst);
}
static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -208,14 +210,14 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
- err = rpl_do_srh(skb, rlwt);
- if (unlikely(err))
- goto drop;
-
local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
local_bh_enable();
+ err = rpl_do_srh(skb, rlwt, dst);
+ if (unlikely(err))
+ goto drop;
+
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct flowi6 fl6;
@@ -230,25 +232,28 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
err = dst->error;
- dst_release(dst);
goto drop;
}
- local_bh_disable();
- dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr);
- local_bh_enable();
+ /* cache only if we don't create a dst reference loop */
+ if (orig_dst->lwtstate != dst->lwtstate) {
+ local_bh_disable();
+ dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr);
+ local_bh_enable();
+ }
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
-
return dst_output(net, sk, skb);
drop:
+ dst_release(dst);
kfree_skb(skb);
return err;
}
@@ -257,34 +262,48 @@ static int rpl_input(struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
+ struct lwtunnel_state *lwtst;
struct rpl_lwt *rlwt;
int err;
- rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
+ /* We cannot dereference "orig_dst" once ip6_route_input() or
+ * skb_dst_drop() is called. However, in order to detect a dst loop, we
+ * need the address of its lwtstate. So, save the address of lwtstate
+ * now and use it later as a comparison.
+ */
+ lwtst = orig_dst->lwtstate;
- err = rpl_do_srh(skb, rlwt);
- if (unlikely(err))
- goto drop;
+ rlwt = rpl_lwt_lwtunnel(lwtst);
local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
+ local_bh_enable();
+
+ err = rpl_do_srh(skb, rlwt, dst);
+ if (unlikely(err)) {
+ dst_release(dst);
+ goto drop;
+ }
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
- if (!dst->error) {
+
+ /* cache only if we don't create a dst reference loop */
+ if (!dst->error && lwtst != dst->lwtstate) {
+ local_bh_disable();
dst_cache_set_ip6(&rlwt->cache, dst,
&ipv6_hdr(skb)->saddr);
+ local_bh_enable();
}
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
} else {
skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
- local_bh_enable();
-
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
return dst_input(skb);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 098632adc9b5..51583461ae29 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -124,8 +124,8 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
return flowlabel;
}
-/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
-int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+ int proto, struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct net *net = dev_net(dst->dev);
@@ -137,7 +137,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
hdrlen = (osrh->hdrlen + 1) << 3;
tot_len = hdrlen + sizeof(*hdr);
- err = skb_cow_head(skb, tot_len + skb->mac_len);
+ err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -197,11 +197,18 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
return 0;
}
+
+/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
+int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+{
+ return __seg6_do_srh_encap(skb, osrh, proto, NULL);
+}
EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
/* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */
static int seg6_do_srh_encap_red(struct sk_buff *skb,
- struct ipv6_sr_hdr *osrh, int proto)
+ struct ipv6_sr_hdr *osrh, int proto,
+ struct dst_entry *cache_dst)
{
__u8 first_seg = osrh->first_segment;
struct dst_entry *dst = skb_dst(skb);
@@ -230,7 +237,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb,
tot_len = red_hdrlen + sizeof(struct ipv6hdr);
- err = skb_cow_head(skb, tot_len + skb->mac_len);
+ err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -317,8 +324,8 @@ out:
return 0;
}
-/* insert an SRH within an IPv6 packet, just after the IPv6 header */
-int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+static int __seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+ struct dst_entry *cache_dst)
{
struct ipv6hdr *hdr, *oldhdr;
struct ipv6_sr_hdr *isrh;
@@ -326,7 +333,7 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
hdrlen = (osrh->hdrlen + 1) << 3;
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -369,9 +376,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
return 0;
}
-EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
-static int seg6_do_srh(struct sk_buff *skb)
+static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct seg6_iptunnel_encap *tinfo;
@@ -384,7 +390,7 @@ static int seg6_do_srh(struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
return -EINVAL;
- err = seg6_do_srh_inline(skb, tinfo->srh);
+ err = __seg6_do_srh_inline(skb, tinfo->srh, cache_dst);
if (err)
return err;
break;
@@ -402,9 +408,11 @@ static int seg6_do_srh(struct sk_buff *skb)
return -EINVAL;
if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP)
- err = seg6_do_srh_encap(skb, tinfo->srh, proto);
+ err = __seg6_do_srh_encap(skb, tinfo->srh,
+ proto, cache_dst);
else
- err = seg6_do_srh_encap_red(skb, tinfo->srh, proto);
+ err = seg6_do_srh_encap_red(skb, tinfo->srh,
+ proto, cache_dst);
if (err)
return err;
@@ -425,11 +433,13 @@ static int seg6_do_srh(struct sk_buff *skb)
skb_push(skb, skb->mac_len);
if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP)
- err = seg6_do_srh_encap(skb, tinfo->srh,
- IPPROTO_ETHERNET);
+ err = __seg6_do_srh_encap(skb, tinfo->srh,
+ IPPROTO_ETHERNET,
+ cache_dst);
else
err = seg6_do_srh_encap_red(skb, tinfo->srh,
- IPPROTO_ETHERNET);
+ IPPROTO_ETHERNET,
+ cache_dst);
if (err)
return err;
@@ -444,6 +454,13 @@ static int seg6_do_srh(struct sk_buff *skb)
return 0;
}
+/* insert an SRH within an IPv6 packet, just after the IPv6 header */
+int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+{
+ return __seg6_do_srh_inline(skb, osrh, NULL);
+}
+EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
+
static int seg6_input_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
@@ -455,34 +472,48 @@ static int seg6_input_core(struct net *net, struct sock *sk,
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
+ struct lwtunnel_state *lwtst;
struct seg6_lwt *slwt;
int err;
- err = seg6_do_srh(skb);
- if (unlikely(err))
- goto drop;
+ /* We cannot dereference "orig_dst" once ip6_route_input() or
+ * skb_dst_drop() is called. However, in order to detect a dst loop, we
+ * need the address of its lwtstate. So, save the address of lwtstate
+ * now and use it later as a comparison.
+ */
+ lwtst = orig_dst->lwtstate;
- slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+ slwt = seg6_lwt_lwtunnel(lwtst);
local_bh_disable();
dst = dst_cache_get(&slwt->cache);
+ local_bh_enable();
+
+ err = seg6_do_srh(skb, dst);
+ if (unlikely(err)) {
+ dst_release(dst);
+ goto drop;
+ }
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
- if (!dst->error) {
+
+ /* cache only if we don't create a dst reference loop */
+ if (!dst->error && lwtst != dst->lwtstate) {
+ local_bh_disable();
dst_cache_set_ip6(&slwt->cache, dst,
&ipv6_hdr(skb)->saddr);
+ local_bh_enable();
}
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
} else {
skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
- local_bh_enable();
-
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
@@ -528,16 +559,16 @@ static int seg6_output_core(struct net *net, struct sock *sk,
struct seg6_lwt *slwt;
int err;
- err = seg6_do_srh(skb);
- if (unlikely(err))
- goto drop;
-
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
local_bh_disable();
dst = dst_cache_get(&slwt->cache);
local_bh_enable();
+ err = seg6_do_srh(skb, dst);
+ if (unlikely(err))
+ goto drop;
+
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct flowi6 fl6;
@@ -552,28 +583,31 @@ static int seg6_output_core(struct net *net, struct sock *sk,
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
err = dst->error;
- dst_release(dst);
goto drop;
}
- local_bh_disable();
- dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
- local_bh_enable();
+ /* cache only if we don't create a dst reference loop */
+ if (orig_dst->lwtstate != dst->lwtstate) {
+ local_bh_disable();
+ dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
+ local_bh_enable();
+ }
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
-
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, skb_dst(skb)->dev, dst_output);
return dst_output(net, sk, skb);
drop:
+ dst_release(dst);
kfree_skb(skb);
return err;
}
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index c74705ead984..e445a0a45568 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -1644,10 +1644,8 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
[SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
[SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 },
- [SEG6_LOCAL_NH4] = { .type = NLA_BINARY,
- .len = sizeof(struct in_addr) },
- [SEG6_LOCAL_NH6] = { .type = NLA_BINARY,
- .len = sizeof(struct in6_addr) },
+ [SEG6_LOCAL_NH4] = NLA_POLICY_EXACT_LEN(sizeof(struct in_addr)),
+ [SEG6_LOCAL_NH6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
[SEG6_LOCAL_IIF] = { .type = NLA_U32 },
[SEG6_LOCAL_OIF] = { .type = NLA_U32 },
[SEG6_LOCAL_BPF] = { .type = NLA_NESTED },
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 39bd8951bfca..3c15a0ae228e 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -269,6 +269,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
nt = netdev_priv(dev);
+ nt->net = net;
nt->parms = *parms;
if (ipip6_tunnel_create(dev) < 0)
goto failed_free;
@@ -1449,7 +1450,6 @@ static int ipip6_tunnel_init(struct net_device *dev)
int err;
tunnel->dev = dev;
- tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
ipip6_tunnel_bind_dev(dev);
@@ -1563,6 +1563,7 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
int err;
nt = netdev_priv(dev);
+ nt->net = net;
if (ip_tunnel_netlink_encap_parms(data, &ipencap)) {
err = ip_tunnel_encap_setup(nt, &ipencap);
@@ -1858,6 +1859,9 @@ static int __net_init sit_init_net(struct net *net)
*/
sitn->fb_tunnel_dev->netns_local = true;
+ t = netdev_priv(sitn->fb_tunnel_dev);
+ t->net = net;
+
err = register_netdev(sitn->fb_tunnel_dev);
if (err)
goto err_reg_dev;
@@ -1865,8 +1869,6 @@ static int __net_init sit_init_net(struct net *net)
ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
- t = netdev_priv(sitn->fb_tunnel_dev);
-
strcpy(t->parms.name, sitn->fb_tunnel_dev->name);
return 0;
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index a45bf17cb2a1..5ab509a5fbdf 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -42,7 +42,7 @@ static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
iif, sdif);
NAPI_GRO_CB(skb)->is_flist = !sk;
if (sk)
- sock_put(sk);
+ sock_gen_put(sk);
#endif /* IS_ENABLED(CONFIG_IPV6) */
}
@@ -94,14 +94,23 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
}
static void __tcpv6_gso_segment_csum(struct sk_buff *seg,
+ struct in6_addr *oldip,
+ const struct in6_addr *newip,
__be16 *oldport, __be16 newport)
{
- struct tcphdr *th;
+ struct tcphdr *th = tcp_hdr(seg);
+
+ if (!ipv6_addr_equal(oldip, newip)) {
+ inet_proto_csum_replace16(&th->check, seg,
+ oldip->s6_addr32,
+ newip->s6_addr32,
+ true);
+ *oldip = *newip;
+ }
if (*oldport == newport)
return;
- th = tcp_hdr(seg);
inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false);
*oldport = newport;
}
@@ -129,10 +138,10 @@ static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs)
th2 = tcp_hdr(seg);
iph2 = ipv6_hdr(seg);
- iph2->saddr = iph->saddr;
- iph2->daddr = iph->daddr;
- __tcpv6_gso_segment_csum(seg, &th2->source, th->source);
- __tcpv6_gso_segment_csum(seg, &th2->dest, th->dest);
+ __tcpv6_gso_segment_csum(seg, &iph2->saddr, &iph->saddr,
+ &th2->source, th->source);
+ __tcpv6_gso_segment_csum(seg, &iph2->daddr, &iph->daddr,
+ &th2->dest, th->dest);
}
return segs;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0cef8ae5d1ea..57e38e5e4be9 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -159,6 +159,49 @@ static int compute_score(struct sock *sk, const struct net *net,
return score;
}
+/**
+ * udp6_lib_lookup1() - Simplified lookup using primary hash (destination port)
+ * @net: Network namespace
+ * @saddr: Source address, network order
+ * @sport: Source port, network order
+ * @daddr: Destination address, network order
+ * @hnum: Destination port, host order
+ * @dif: Destination interface index
+ * @sdif: Destination bridge port index, if relevant
+ * @udptable: Set of UDP hash tables
+ *
+ * Simplified lookup to be used as fallback if no sockets are found due to a
+ * potential race between (receive) address change, and lookup happening before
+ * the rehash operation. This function ignores SO_REUSEPORT groups while scoring
+ * result sockets, because if we have one, we don't need the fallback at all.
+ *
+ * Called under rcu_read_lock().
+ *
+ * Return: socket with highest matching score if any, NULL if none
+ */
+static struct sock *udp6_lib_lookup1(const struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum, int dif, int sdif,
+ const struct udp_table *udptable)
+{
+ unsigned int slot = udp_hashfn(net, hnum, udptable->mask);
+ struct udp_hslot *hslot = &udptable->hash[slot];
+ struct sock *sk, *result = NULL;
+ int score, badness = 0;
+
+ sk_for_each_rcu(sk, &hslot->head) {
+ score = compute_score(sk, net,
+ saddr, sport, daddr, hnum, dif, sdif);
+ if (score > badness) {
+ result = sk;
+ badness = score;
+ }
+ }
+
+ return result;
+}
+
/* called with rcu_read_lock() */
static struct sock *udp6_lib_lookup2(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -263,6 +306,13 @@ struct sock *__udp6_lib_lookup(const struct net *net,
result = udp6_lib_lookup2(net, saddr, sport,
&in6addr_any, hnum, dif, sdif,
hslot2, skb);
+ if (!IS_ERR_OR_NULL(result))
+ goto done;
+
+ /* Cover address change/lookup/rehash race: see __udp4_lib_lookup() */
+ result = udp6_lib_lookup1(net, saddr, sport, daddr, hnum, dif, sdif,
+ udptable);
+
done:
if (IS_ERR(result))
return NULL;
@@ -1244,9 +1294,9 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
const int hlen = skb_network_header_len(skb) +
sizeof(struct udphdr);
- if (hlen + cork->gso_size > cork->fragsize) {
+ if (hlen + min(datalen, cork->gso_size) > cork->fragsize) {
kfree_skb(skb);
- return -EINVAL;
+ return -EMSGSIZE;
}
if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
kfree_skb(skb);
@@ -1349,7 +1399,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int is_udplite = IS_UDPLITE(sk);
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
- ipcm6_init(&ipc6);
+ ipcm6_init_sk(&ipc6, sk);
ipc6.gso_size = READ_ONCE(up->gso_size);
ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
@@ -1558,9 +1608,6 @@ do_udp_sendmsg:
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
- if (ipc6.tclass < 0)
- ipc6.tclass = np->tclass;
-
fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel);
dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected);
@@ -1606,8 +1653,6 @@ back_from_confirm:
WRITE_ONCE(up->pending, AF_INET6);
do_append_data:
- if (ipc6.dontfrag < 0)
- ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
up->len += ulen;
err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
&ipc6, fl6, dst_rt6_info(dst),
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 4abc5e9d6322..9005fc156a20 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -179,14 +179,18 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
int offset = skb_gro_offset(skb);
const struct net_offload *ops;
struct sk_buff *pp = NULL;
- int ret;
+ int len, dlen;
+ __u8 *udpdata;
+ __be32 *udpdata32;
if (skb->protocol == htons(ETH_P_IP))
return xfrm4_gro_udp_encap_rcv(sk, head, skb);
- offset = offset - sizeof(struct udphdr);
-
- if (!pskb_pull(skb, offset))
+ len = skb->len - offset;
+ dlen = offset + min(len, 8);
+ udpdata = skb_gro_header(skb, dlen, offset);
+ udpdata32 = (__be32 *)udpdata;
+ if (unlikely(!udpdata))
return NULL;
rcu_read_lock();
@@ -194,11 +198,13 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
if (!ops || !ops->callbacks.gro_receive)
goto out;
- ret = __xfrm6_udp_encap_rcv(sk, skb, false);
- if (ret)
+ /* check if it is a keepalive or IKE packet */
+ if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0)
goto out;
- skb_push(skb, offset);
+ /* set the transport header to ESP */
+ skb_set_transport_header(skb, offset);
+
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
@@ -208,7 +214,6 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
out:
rcu_read_unlock();
- skb_push(skb, offset);
NAPI_GRO_CB(skb)->same_flow = 0;
NAPI_GRO_CB(skb)->flush = 1;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index d4118c796290..1d37b26ea2ef 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -429,7 +429,7 @@ static void psock_write_space(struct sock *sk)
/* Check if the socket is reserved so someone is waiting for sending. */
kcm = psock->tx_kcm;
- if (kcm && !unlikely(kcm->tx_stopped))
+ if (kcm)
queue_work(kcm_wq, &kcm->tx_work);
spin_unlock_bh(&mux->lock);
@@ -1696,12 +1696,6 @@ static int kcm_release(struct socket *sock)
*/
__skb_queue_purge(&sk->sk_write_queue);
- /* Set tx_stopped. This is checked when psock is bound to a kcm and we
- * get a writespace callback. This prevents further work being queued
- * from the callback (unbinding the psock occurs after canceling work.
- */
- kcm->tx_stopped = 1;
-
release_sock(sk);
spin_lock_bh(&mux->lock);
@@ -1717,7 +1711,7 @@ static int kcm_release(struct socket *sock)
/* Cancel work. After this point there should be no outside references
* to the kcm socket.
*/
- cancel_work_sync(&kcm->tx_work);
+ disable_work_sync(&kcm->tx_work);
lock_sock(sk);
psock = kcm->tx_psock;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f79fb99271ed..c56bb4f451e6 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1354,7 +1354,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
}
if (hdr->sadb_msg_seq) {
- x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
+ x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX);
if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) {
xfrm_state_put(x);
x = NULL;
@@ -1362,7 +1362,8 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
}
if (!x)
- x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family);
+ x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, UINT_MAX,
+ proto, xdaddr, xsaddr, 1, family);
if (x == NULL)
return -ENOENT;
@@ -1417,7 +1418,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb
if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0)
return 0;
- x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq);
+ x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX);
if (x == NULL)
return 0;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index f4c1da070826..b98d13584c81 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -547,7 +547,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_uid = sk->sk_uid;
- ipcm6_init(&ipc6);
+ ipcm6_init_sk(&ipc6, sk);
if (lsa) {
if (addr_len < SIN6_LEN_RFC2133)
@@ -634,9 +634,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
- if (ipc6.tclass < 0)
- ipc6.tclass = np->tclass;
-
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
@@ -648,9 +645,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (ipc6.hlimit < 0)
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
- if (ipc6.dontfrag < 0)
- ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
-
if (msg->msg_flags & MSG_CONFIRM)
goto do_confirm;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 0259cde394ba..cc77ec5769d8 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -887,15 +887,15 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (sk->sk_type != SOCK_STREAM)
goto copy_uaddr;
+ /* Partial read */
+ if (used + offset < skb_len)
+ continue;
+
if (!(flags & MSG_PEEK)) {
skb_unlink(skb, &sk->sk_receive_queue);
kfree_skb(skb);
*seq = 0;
}
-
- /* Partial read */
- if (used + offset < skb_len)
- continue;
} while (len > 0);
out:
diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c
index 06fb8e6944b0..7a0cae9a8111 100644
--- a/net/llc/llc_s_ac.c
+++ b/net/llc/llc_s_ac.c
@@ -24,7 +24,7 @@
#include <net/llc_s_ac.h>
#include <net/llc_s_ev.h>
#include <net/llc_sap.h>
-
+#include <net/sock.h>
/**
* llc_sap_action_unitdata_ind - forward UI PDU to network layer
@@ -40,6 +40,26 @@ int llc_sap_action_unitdata_ind(struct llc_sap *sap, struct sk_buff *skb)
return 0;
}
+static int llc_prepare_and_xmit(struct sk_buff *skb)
+{
+ struct llc_sap_state_ev *ev = llc_sap_ev(skb);
+ struct sk_buff *nskb;
+ int rc;
+
+ rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
+ if (rc)
+ return rc;
+
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return -ENOMEM;
+
+ if (skb->sk)
+ skb_set_owner_w(nskb, skb->sk);
+
+ return dev_queue_xmit(nskb);
+}
+
/**
* llc_sap_action_send_ui - sends UI PDU resp to UNITDATA REQ to MAC layer
* @sap: SAP
@@ -52,17 +72,12 @@ int llc_sap_action_unitdata_ind(struct llc_sap *sap, struct sk_buff *skb)
int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb)
{
struct llc_sap_state_ev *ev = llc_sap_ev(skb);
- int rc;
llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap,
ev->daddr.lsap, LLC_PDU_CMD);
llc_pdu_init_as_ui_cmd(skb);
- rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
- if (likely(!rc)) {
- skb_get(skb);
- rc = dev_queue_xmit(skb);
- }
- return rc;
+
+ return llc_prepare_and_xmit(skb);
}
/**
@@ -77,17 +92,12 @@ int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb)
int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb)
{
struct llc_sap_state_ev *ev = llc_sap_ev(skb);
- int rc;
llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap,
ev->daddr.lsap, LLC_PDU_CMD);
llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0);
- rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
- if (likely(!rc)) {
- skb_get(skb);
- rc = dev_queue_xmit(skb);
- }
- return rc;
+
+ return llc_prepare_and_xmit(skb);
}
/**
@@ -133,17 +143,12 @@ out:
int llc_sap_action_send_test_c(struct llc_sap *sap, struct sk_buff *skb)
{
struct llc_sap_state_ev *ev = llc_sap_ev(skb);
- int rc;
llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap,
ev->daddr.lsap, LLC_PDU_CMD);
llc_pdu_init_as_test_cmd(skb);
- rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
- if (likely(!rc)) {
- skb_get(skb);
- rc = dev_queue_xmit(skb);
- }
- return rc;
+
+ return llc_prepare_and_xmit(skb);
}
int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb)
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f11fd360b422..2890dde9b3bf 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1078,13 +1078,13 @@ ieee80211_copy_rnr_beacon(u8 *pos, struct cfg80211_rnr_elems *dst,
{
int i, offset = 0;
+ dst->cnt = src->cnt;
for (i = 0; i < src->cnt; i++) {
memcpy(pos + offset, src->elem[i].data, src->elem[i].len);
dst->elem[i].len = src->elem[i].len;
dst->elem[i].data = pos + offset;
offset += dst->elem[i].len;
}
- dst->cnt = src->cnt;
return offset;
}
@@ -1879,12 +1879,12 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
}
if (params->supported_rates &&
- params->supported_rates_len) {
- ieee80211_parse_bitrates(link->conf->chanreq.oper.width,
- sband, params->supported_rates,
- params->supported_rates_len,
- &link_sta->pub->supp_rates[sband->band]);
- }
+ params->supported_rates_len &&
+ !ieee80211_parse_bitrates(link->conf->chanreq.oper.width,
+ sband, params->supported_rates,
+ params->supported_rates_len,
+ &link_sta->pub->supp_rates[sband->band]))
+ return -EINVAL;
if (params->ht_capa)
ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
@@ -2876,7 +2876,7 @@ static int ieee80211_scan(struct wiphy *wiphy,
* the frames sent while scanning on other channel will be
* lost)
*/
- if (sdata->deflink.u.ap.beacon &&
+ if (ieee80211_num_beaconing_links(sdata) &&
(!(wiphy->features & NL80211_FEATURE_AP_SCAN) ||
!(req->flags & NL80211_SCAN_FLAG_AP)))
return -EOPNOTSUPP;
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index cca6d14084d2..e3b46df95b71 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1349,6 +1349,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link)
goto out;
}
+ link->radar_required = link->reserved_radar_required;
list_move(&link->assigned_chanctx_list, &new_ctx->assigned_links);
rcu_assign_pointer(link_conf->chanctx_conf, &new_ctx->conf);
@@ -2097,6 +2098,9 @@ void ieee80211_link_release_channel(struct ieee80211_link_data *link)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ return;
+
lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (rcu_access_pointer(link->conf->chanctx_conf))
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 02b5476a4376..a0710ae0e7a4 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -499,6 +499,7 @@ static const char *hw_flag_names[] = {
FLAG(DISALLOW_PUNCTURING),
FLAG(DISALLOW_PUNCTURING_5GHZ),
FLAG(HANDLES_QUIET_CSA),
+ FLAG(STRICT),
#undef FLAG
};
@@ -531,6 +532,46 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf,
return rv;
}
+static ssize_t hwflags_write(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct ieee80211_local *local = file->private_data;
+ char buf[100];
+ int val;
+
+ if (count >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(buf, user_buf, count))
+ return -EFAULT;
+
+ if (count && buf[count - 1] == '\n')
+ buf[count - 1] = '\0';
+ else
+ buf[count] = '\0';
+
+ if (sscanf(buf, "strict=%d", &val) == 1) {
+ switch (val) {
+ case 0:
+ ieee80211_hw_set(&local->hw, STRICT);
+ return count;
+ case 1:
+ __clear_bit(IEEE80211_HW_STRICT, local->hw.flags);
+ return count;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static const struct file_operations hwflags_ops = {
+ .open = simple_open,
+ .read = hwflags_read,
+ .write = hwflags_write,
+};
+
static ssize_t misc_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
@@ -581,7 +622,6 @@ static ssize_t queues_read(struct file *file, char __user *user_buf,
return simple_read_from_buffer(user_buf, count, ppos, buf, res);
}
-DEBUGFS_READONLY_FILE_OPS(hwflags);
DEBUGFS_READONLY_FILE_OPS(queues);
DEBUGFS_READONLY_FILE_OPS(misc);
@@ -659,7 +699,7 @@ void debugfs_hw_add(struct ieee80211_local *local)
#ifdef CONFIG_PM
DEBUGFS_ADD_MODE(reset, 0200);
#endif
- DEBUGFS_ADD(hwflags);
+ DEBUGFS_ADD_MODE(hwflags, 0600);
DEBUGFS_ADD(user_power);
DEBUGFS_ADD(power);
DEBUGFS_ADD(hw_conf);
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 68596ef78b15..d0b145888e13 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -728,7 +728,7 @@ static ssize_t ieee80211_if_parse_active_links(struct ieee80211_sub_if_data *sda
{
u16 active_links;
- if (kstrtou16(buf, 0, &active_links))
+ if (kstrtou16(buf, 0, &active_links) || !active_links)
return -EINVAL;
return ieee80211_set_active_links(&sdata->vif, active_links) ?: buflen;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 1e9389c49a57..e6f937cfedcf 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -154,12 +154,6 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
p += scnprintf(p,
bufsz + buf - p,
- "target %uus interval %uus ecn %s\n",
- codel_time_to_us(sta->cparams.target),
- codel_time_to_us(sta->cparams.interval),
- sta->cparams.ecn ? "yes" : "no");
- p += scnprintf(p,
- bufsz + buf - p,
"tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n");
for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index fe868b521622..4243f8ee5ab6 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -115,8 +115,14 @@ void drv_remove_interface(struct ieee80211_local *local,
sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER;
- /* Remove driver debugfs entries */
- ieee80211_debugfs_recreate_netdev(sdata, sdata->vif.valid_links);
+ /*
+ * Remove driver debugfs entries.
+ * The virtual monitor interface doesn't get a debugfs
+ * entry, so it's exempt here.
+ */
+ if (sdata != rcu_access_pointer(local->monitor_sdata))
+ ieee80211_debugfs_recreate_netdev(sdata,
+ sdata->vif.valid_links);
trace_drv_remove_interface(local, sdata);
local->ops->remove_interface(&local->hw, &sdata->vif);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index d382d9729e85..d1c10f5f9516 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -2,7 +2,7 @@
/*
* Portions of this file
* Copyright(c) 2016 Intel Deutschland GmbH
-* Copyright (C) 2018-2019, 2021-2024 Intel Corporation
+* Copyright (C) 2018-2019, 2021-2025 Intel Corporation
*/
#ifndef __MAC80211_DRIVER_OPS
@@ -724,6 +724,9 @@ static inline void drv_flush_sta(struct ieee80211_local *local,
if (sdata && !check_sdata_in_driver(sdata))
return;
+ if (!sta->uploaded)
+ return;
+
trace_drv_flush_sta(local, sdata, &sta->sta);
if (local->ops->flush_sta)
local->ops->flush_sta(&local->hw, &sdata->vif, &sta->sta);
@@ -952,6 +955,7 @@ static inline void drv_mgd_complete_tx(struct ieee80211_local *local,
return;
WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION);
+ info->link_id = info->link_id < 0 ? 0 : info->link_id;
trace_drv_mgd_complete_tx(local, sdata, info->duration,
info->subtype, info->success);
if (local->ops->mgd_complete_tx)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 7a0242e937d3..2f017dbbcb97 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1209,6 +1209,15 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p)
if ((_link = wiphy_dereference((local)->hw.wiphy, \
___sdata->link[___link_id])))
+#define for_each_link_data(sdata, __link) \
+ struct ieee80211_sub_if_data *__sdata = sdata; \
+ for (int __link_id = 0; \
+ __link_id < ARRAY_SIZE((__sdata)->link); __link_id++) \
+ if ((!(__sdata)->vif.valid_links || \
+ (__sdata)->vif.valid_links & BIT(__link_id)) && \
+ ((__link) = sdata_dereference((__sdata)->link[__link_id], \
+ (__sdata))))
+
static inline int
ieee80211_get_mbssid_beacon_len(struct cfg80211_mbssid_elems *elems,
struct cfg80211_rnr_elems *rnr_elems,
@@ -1751,6 +1760,7 @@ struct ieee802_11_elems {
const struct ieee80211_eht_operation *eht_operation;
const struct ieee80211_multi_link_elem *ml_basic;
const struct ieee80211_multi_link_elem *ml_reconf;
+ const struct ieee80211_multi_link_elem *ml_epcs;
const struct ieee80211_bandwidth_indication *bandwidth_indication;
const struct ieee80211_ttlm_elem *ttlm[IEEE80211_TTLM_MAX_CNT];
@@ -1781,6 +1791,7 @@ struct ieee802_11_elems {
/* mult-link element can be de-fragmented and thus u8 is not sufficient */
size_t ml_basic_len;
size_t ml_reconf_len;
+ size_t ml_epcs_len;
u8 ttlm_num;
@@ -2059,6 +2070,9 @@ static inline void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata
ieee80211_vif_set_links(sdata, 0, 0);
}
+void ieee80211_apvlan_link_setup(struct ieee80211_sub_if_data *sdata);
+void ieee80211_apvlan_link_clear(struct ieee80211_sub_if_data *sdata);
+
/* tx handling */
void ieee80211_clear_tx_pending(struct ieee80211_local *local);
void ieee80211_tx_pending(struct tasklet_struct *t);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index af9055252e6d..209d6ffa8e42 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -8,7 +8,7 @@
* Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (c) 2016 Intel Deutschland GmbH
- * Copyright (C) 2018-2024 Intel Corporation
+ * Copyright (C) 2018-2025 Intel Corporation
*/
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -494,6 +494,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
break;
list_del_rcu(&sdata->u.mntr.list);
break;
+ case NL80211_IFTYPE_AP_VLAN:
+ ieee80211_apvlan_link_clear(sdata);
+ break;
default:
break;
}
@@ -667,6 +670,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
ieee80211_txq_remove_vlan(local, sdata);
+ if (sdata->vif.txq)
+ ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq));
+
sdata->bss = NULL;
if (local->open_count == 0)
@@ -812,6 +818,9 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
*/
static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata)
{
+ if (WARN_ON(!list_empty(&sdata->work.entry)))
+ wiphy_work_cancel(sdata->local->hw.wiphy, &sdata->work);
+
/* free extra data */
ieee80211_free_keys(sdata, false);
@@ -1205,16 +1214,17 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
return;
}
- RCU_INIT_POINTER(local->monitor_sdata, NULL);
- mutex_unlock(&local->iflist_mtx);
-
- synchronize_net();
-
+ clear_bit(SDATA_STATE_RUNNING, &sdata->state);
ieee80211_link_release_channel(&sdata->deflink);
if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
drv_remove_interface(local, sdata);
+ RCU_INIT_POINTER(local->monitor_sdata, NULL);
+ mutex_unlock(&local->iflist_mtx);
+
+ synchronize_net();
+
kfree(sdata);
}
@@ -1261,6 +1271,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
sdata->crypto_tx_tailroom_needed_cnt +=
master->crypto_tx_tailroom_needed_cnt;
+ ieee80211_apvlan_link_setup(sdata);
+
break;
}
case NL80211_IFTYPE_AP:
@@ -1315,7 +1327,12 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
case NL80211_IFTYPE_AP_VLAN:
/* no need to tell driver, but set carrier and chanctx */
if (sdata->bss->active) {
- ieee80211_link_vlan_copy_chanctx(&sdata->deflink);
+ struct ieee80211_link_data *link;
+
+ for_each_link_data(sdata, link) {
+ ieee80211_link_vlan_copy_chanctx(link);
+ }
+
netif_carrier_on(dev);
ieee80211_set_vif_encap_ops(sdata);
} else {
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index 46092fbcde90..cafedc5ecd44 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -2,7 +2,7 @@
/*
* MLO link handling
*
- * Copyright (C) 2022-2024 Intel Corporation
+ * Copyright (C) 2022-2025 Intel Corporation
*/
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -12,6 +12,71 @@
#include "key.h"
#include "debugfs_netdev.h"
+static void ieee80211_update_apvlan_links(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_sub_if_data *vlan;
+ struct ieee80211_link_data *link;
+ u16 ap_bss_links = sdata->vif.valid_links;
+ u16 new_links, vlan_links;
+ unsigned long add;
+
+ list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {
+ int link_id;
+
+ if (!vlan)
+ continue;
+
+ /* No support for 4addr with MLO yet */
+ if (vlan->wdev.use_4addr)
+ return;
+
+ vlan_links = vlan->vif.valid_links;
+
+ new_links = ap_bss_links;
+
+ add = new_links & ~vlan_links;
+ if (!add)
+ continue;
+
+ ieee80211_vif_set_links(vlan, add, 0);
+
+ for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) {
+ link = sdata_dereference(vlan->link[link_id], vlan);
+ ieee80211_link_vlan_copy_chanctx(link);
+ }
+ }
+}
+
+void ieee80211_apvlan_link_setup(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_sub_if_data *ap_bss = container_of(sdata->bss,
+ struct ieee80211_sub_if_data, u.ap);
+ u16 new_links = ap_bss->vif.valid_links;
+ unsigned long add;
+ int link_id;
+
+ if (!ap_bss->vif.valid_links)
+ return;
+
+ add = new_links;
+ for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) {
+ sdata->wdev.valid_links |= BIT(link_id);
+ ether_addr_copy(sdata->wdev.links[link_id].addr,
+ ap_bss->wdev.links[link_id].addr);
+ }
+
+ ieee80211_vif_set_links(sdata, new_links, 0);
+}
+
+void ieee80211_apvlan_link_clear(struct ieee80211_sub_if_data *sdata)
+{
+ if (!sdata->wdev.valid_links)
+ return;
+
+ sdata->wdev.valid_links = 0;
+ ieee80211_vif_clear_links(sdata);
+}
+
void ieee80211_link_setup(struct ieee80211_link_data *link)
{
if (link->sdata->vif.type == NL80211_IFTYPE_STATION)
@@ -28,8 +93,16 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
if (link_id < 0)
link_id = 0;
- rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf);
- rcu_assign_pointer(sdata->link[link_id], link);
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+ struct ieee80211_sub_if_data *ap_bss;
+ struct ieee80211_bss_conf *ap_bss_conf;
+
+ ap_bss = container_of(sdata->bss,
+ struct ieee80211_sub_if_data, u.ap);
+ ap_bss_conf = sdata_dereference(ap_bss->vif.link_conf[link_id],
+ ap_bss);
+ memcpy(link_conf, ap_bss_conf, sizeof(*link_conf));
+ }
link->sdata = sdata;
link->link_id = link_id;
@@ -51,6 +124,7 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
if (!deflink) {
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_AP_VLAN:
ether_addr_copy(link_conf->addr,
sdata->wdev.links[link_id].addr);
link_conf->bssid = link_conf->addr;
@@ -65,6 +139,9 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
ieee80211_link_debugfs_add(link);
}
+
+ rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf);
+ rcu_assign_pointer(sdata->link[link_id], link);
}
void ieee80211_link_stop(struct ieee80211_link_data *link)
@@ -174,6 +251,7 @@ static void ieee80211_set_vif_links_bitmaps(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_AP_VLAN:
/* in an AP all links are always active */
sdata->vif.active_links = valid_links;
@@ -275,14 +353,25 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata,
ieee80211_set_vif_links_bitmaps(sdata, new_links, dormant_links);
/* tell the driver */
- ret = drv_change_vif_links(sdata->local, sdata,
- old_links & old_active,
- new_links & sdata->vif.active_links,
- old);
+ if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN)
+ ret = drv_change_vif_links(sdata->local, sdata,
+ old_links & old_active,
+ new_links & sdata->vif.active_links,
+ old);
if (!new_links)
ieee80211_debugfs_recreate_netdev(sdata, false);
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP)
+ ieee80211_update_apvlan_links(sdata);
}
+ /*
+ * Ignore errors if we are only removing links as removal should
+ * always succeed
+ */
+ if (!new_links)
+ ret = 0;
+
if (ret) {
/* restore config */
memcpy(sdata->link, old_data, sizeof(old_data));
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ee1211a213d7..caedc939eea1 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1344,10 +1344,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_MONITOR);
- local->int_scan_req = kzalloc(sizeof(*local->int_scan_req) +
- sizeof(void *) * channels, GFP_KERNEL);
+ local->int_scan_req = kzalloc(struct_size(local->int_scan_req,
+ channels, channels),
+ GFP_KERNEL);
if (!local->int_scan_req)
return -ENOMEM;
+ local->int_scan_req->n_channels = channels;
eth_broadcast_addr(local->int_scan_req->bssid);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 579d0f24ac9d..ba8aeb47bffd 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -367,6 +367,12 @@ u32 airtime_link_metric_get(struct ieee80211_local *local,
return (u32)result;
}
+/* Check that the first metric is at least 10% better than the second one */
+static bool is_metric_better(u32 x, u32 y)
+{
+ return (x < y) && (x < (y - x / 10));
+}
+
/**
* hwmp_route_info_get - Update routing info to originator and transmitter
*
@@ -458,8 +464,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
(mpath->sn == orig_sn &&
(rcu_access_pointer(mpath->next_hop) !=
sta ?
- mult_frac(new_metric, 10, 9) :
- new_metric) >= mpath->metric)) {
+ !is_metric_better(new_metric, mpath->metric) :
+ new_metric >= mpath->metric))) {
process = false;
fresh_info = false;
}
@@ -533,8 +539,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
if ((mpath->flags & MESH_PATH_FIXED) ||
((mpath->flags & MESH_PATH_ACTIVE) &&
((rcu_access_pointer(mpath->next_hop) != sta ?
- mult_frac(last_hop_metric, 10, 9) :
- last_hop_metric) > mpath->metric)))
+ !is_metric_better(last_hop_metric, mpath->metric) :
+ last_hop_metric > mpath->metric))))
fresh_info = false;
} else {
mpath = mesh_path_add(sdata, ta);
@@ -630,7 +636,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
mesh_path_add_gate(mpath);
}
rcu_read_unlock();
- } else {
+ } else if (ifmsh->mshcfg.dot11MeshForwarding) {
rcu_read_lock();
mpath = mesh_path_lookup(sdata, target_addr);
if (mpath) {
@@ -648,6 +654,8 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
}
}
rcu_read_unlock();
+ } else {
+ forward = false;
}
if (reply) {
@@ -665,7 +673,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
}
}
- if (forward && ifmsh->mshcfg.dot11MeshForwarding) {
+ if (forward) {
u32 preq_id;
u8 hopcount;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 111066928b96..5a9a84a0cc35 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8,7 +8,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2024 Intel Corporation
+ * Copyright (C) 2018 - 2025 Intel Corporation
*/
#include <linux/delay.h>
@@ -166,6 +166,9 @@ ieee80211_determine_ap_chan(struct ieee80211_sub_if_data *sdata,
bool no_vht = false;
u32 ht_cfreq;
+ if (ieee80211_hw_check(&sdata->local->hw, STRICT))
+ ignore_ht_channel_mismatch = false;
+
*chandef = (struct cfg80211_chan_def) {
.chan = channel,
.width = NL80211_CHAN_WIDTH_20_NOHT,
@@ -385,7 +388,7 @@ ieee80211_verify_peer_he_mcs_support(struct ieee80211_sub_if_data *sdata,
* zeroes, which is nonsense, and completely inconsistent with itself
* (it doesn't have 8 streams). Accept the settings in this case anyway.
*/
- if (!ap_min_req_set)
+ if (!ieee80211_hw_check(&sdata->local->hw, STRICT) && !ap_min_req_set)
return true;
/* make sure the AP is consistent with itself
@@ -445,7 +448,7 @@ ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata,
* zeroes, which is nonsense, and completely inconsistent with itself
* (it doesn't have 8 streams). Accept the settings in this case anyway.
*/
- if (!ap_min_req_set)
+ if (!ieee80211_hw_check(&sdata->local->hw, STRICT) && !ap_min_req_set)
return true;
/* Need to go over for 80MHz, 160MHz and for 80+80 */
@@ -1212,13 +1215,15 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
* Some APs apparently get confused if our capabilities are better
* than theirs, so restrict what we advertise in the assoc request.
*/
- if (!(ap_vht_cap->vht_cap_info &
- cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)))
- cap &= ~(IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
- IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE);
- else if (!(ap_vht_cap->vht_cap_info &
- cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)))
- cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
+ if (!ieee80211_hw_check(&local->hw, STRICT)) {
+ if (!(ap_vht_cap->vht_cap_info &
+ cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)))
+ cap &= ~(IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
+ IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE);
+ else if (!(ap_vht_cap->vht_cap_info &
+ cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)))
+ cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
+ }
/*
* If some other vif is using the MU-MIMO capability we cannot associate
@@ -1260,14 +1265,16 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
return mu_mimo_owner;
}
-static void ieee80211_assoc_add_rates(struct sk_buff *skb,
+static void ieee80211_assoc_add_rates(struct ieee80211_local *local,
+ struct sk_buff *skb,
enum nl80211_chan_width width,
struct ieee80211_supported_band *sband,
struct ieee80211_mgd_assoc_data *assoc_data)
{
u32 rates;
- if (assoc_data->supp_rates_len) {
+ if (assoc_data->supp_rates_len &&
+ !ieee80211_hw_check(&local->hw, STRICT)) {
/*
* Get all rates supported by the device and the AP as
* some APs don't like getting a superset of their rates
@@ -1481,7 +1488,7 @@ static size_t ieee80211_assoc_link_elems(struct ieee80211_sub_if_data *sdata,
*capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
if (sband->band != NL80211_BAND_S1GHZ)
- ieee80211_assoc_add_rates(skb, width, sband, assoc_data);
+ ieee80211_assoc_add_rates(local, skb, width, sband, assoc_data);
if (*capab & WLAN_CAPABILITY_SPECTRUM_MGMT ||
*capab & WLAN_CAPABILITY_RADIO_MEASURE) {
@@ -1925,7 +1932,8 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
* for some reason check it and want it to be set, set the bit for all
* pre-EHT connections as we used to do.
*/
- if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT)
+ if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT &&
+ !ieee80211_hw_check(&local->hw, STRICT))
capab |= WLAN_CAPABILITY_ESS;
/* add the elements for the assoc (main) link */
@@ -2235,7 +2243,8 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link)
if (!local->ops->abort_channel_switch)
return;
- ieee80211_link_unreserve_chanctx(link);
+ if (rcu_access_pointer(link->conf->chanctx_conf))
+ ieee80211_link_unreserve_chanctx(link);
ieee80211_vif_unblock_queues_csa(sdata);
@@ -3581,7 +3590,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
if (tx)
ieee80211_flush_queues(local, sdata, false);
- drv_mgd_complete_tx(sdata->local, sdata, &info);
+ if (tx || frame_buf)
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
/* clear AP addr only after building the needed mgmt frames */
eth_zero_addr(sdata->deflink.u.mgd.bssid);
@@ -4025,7 +4035,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
struct ieee80211_link_data *link;
link = sdata_dereference(sdata->link[link_id], sdata);
- if (!link)
+ if (!link || !link->conf->bss)
continue;
cfg80211_unlink_bss(local->hw.wiphy, link->conf->bss);
link->conf->bss = NULL;
@@ -4282,6 +4292,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_prep_tx_info info = {
.subtype = IEEE80211_STYPE_AUTH,
};
+ bool sae_need_confirm = false;
lockdep_assert_wiphy(sdata->local->hw.wiphy);
@@ -4298,6 +4309,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
status_code = le16_to_cpu(mgmt->u.auth.status_code);
+ info.link_id = ifmgd->auth_data->link_id;
+
if (auth_alg != ifmgd->auth_data->algorithm ||
(auth_alg != WLAN_AUTH_SAE &&
auth_transaction != ifmgd->auth_data->expected_transaction) ||
@@ -4325,6 +4338,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
jiffies + IEEE80211_AUTH_WAIT_SAE_RETRY;
ifmgd->auth_data->timeout_started = true;
run_again(sdata, ifmgd->auth_data->timeout);
+ if (auth_transaction == 1)
+ sae_need_confirm = true;
goto notify_driver;
}
@@ -4368,6 +4383,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_mark_sta_auth(sdata))
return; /* ignore frame -- wait for timeout */
} else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
+ auth_transaction == 1) {
+ sae_need_confirm = true;
+ } else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
auth_transaction == 2) {
sdata_info(sdata, "SAE peer confirmed\n");
ifmgd->auth_data->peer_confirmed = true;
@@ -4375,7 +4393,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
notify_driver:
- drv_mgd_complete_tx(sdata->local, sdata, &info);
+ if (!sae_need_confirm)
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
}
#define case_WLAN(type) \
@@ -4710,7 +4729,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
* 2G/3G/4G wifi routers, reported models include the "Onda PN51T",
* "Vodafone PocketWiFi 2", "ZTE MF60" and a similar T-Mobile device.
*/
- if (!is_6ghz &&
+ if (!ieee80211_hw_check(&local->hw, STRICT) && !is_6ghz &&
((assoc_data->wmm && !elems->wmm_param) ||
(link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT &&
(!elems->ht_cap_elem || !elems->ht_operation)) ||
@@ -4733,6 +4752,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
parse_params.start = bss_ies->data;
parse_params.len = bss_ies->len;
parse_params.bss = cbss;
+ parse_params.link_id = -1;
bss_elems = ieee802_11_parse_elems_full(&parse_params);
if (!bss_elems) {
ret = false;
@@ -4845,6 +4865,15 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
bss_vht_cap = (const void *)elem->data;
}
+ if (ieee80211_hw_check(&local->hw, STRICT) &&
+ (!bss_vht_cap || memcmp(bss_vht_cap, elems->vht_cap_elem,
+ sizeof(*bss_vht_cap)))) {
+ rcu_read_unlock();
+ ret = false;
+ link_info(link, "VHT capabilities mismatch\n");
+ goto out;
+ }
+
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
elems->vht_cap_elem,
bss_vht_cap, link_sta);
@@ -6681,6 +6710,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
struct ieee80211_bss_conf *bss_conf = link->conf;
struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg;
struct ieee80211_mgmt *mgmt = (void *) hdr;
+ struct ieee80211_ext *ext = NULL;
size_t baselen;
struct ieee802_11_elems *elems;
struct ieee80211_local *local = sdata->local;
@@ -6706,12 +6736,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
/* Process beacon from the current BSS */
bssid = ieee80211_get_bssid(hdr, len, sdata->vif.type);
if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
- struct ieee80211_ext *ext = (void *) mgmt;
-
- if (ieee80211_is_s1g_short_beacon(ext->frame_control))
- variable = ext->u.s1g_short_beacon.variable;
- else
- variable = ext->u.s1g_beacon.variable;
+ ext = (void *)mgmt;
+ variable = ext->u.s1g_beacon.variable +
+ ieee80211_s1g_optional_len(ext->frame_control);
}
baselen = (u8 *) variable - (u8 *) mgmt;
@@ -6896,7 +6923,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
}
if ((ncrc == link->u.mgd.beacon_crc && link->u.mgd.beacon_crc_valid) ||
- ieee80211_is_s1g_short_beacon(mgmt->frame_control))
+ (ext && ieee80211_is_s1g_short_beacon(ext->frame_control,
+ parse_params.start,
+ parse_params.len)))
goto free;
link->u.mgd.beacon_crc = ncrc;
link->u.mgd.beacon_crc_valid = true;
@@ -7159,6 +7188,7 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_res);
int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 +
2 * 2 * IEEE80211_TTLM_NUM_TIDS;
+ u16 status_code;
skb = dev_alloc_skb(local->tx_headroom + hdr_len + ttlm_max_len);
if (!skb)
@@ -7181,19 +7211,18 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
WARN_ON(1);
fallthrough;
case NEG_TTLM_RES_REJECT:
- mgmt->u.action.u.ttlm_res.status_code =
- WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING;
+ status_code = WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING;
break;
case NEG_TTLM_RES_ACCEPT:
- mgmt->u.action.u.ttlm_res.status_code = WLAN_STATUS_SUCCESS;
+ status_code = WLAN_STATUS_SUCCESS;
break;
case NEG_TTLM_RES_SUGGEST_PREFERRED:
- mgmt->u.action.u.ttlm_res.status_code =
- WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED;
+ status_code = WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED;
ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm);
break;
}
+ mgmt->u.action.u.ttlm_res.status_code = cpu_to_le16(status_code);
ieee80211_tx_skb(sdata, skb);
}
@@ -7359,7 +7388,7 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata,
* This can be better implemented in the future, to handle request
* rejections.
*/
- if (mgmt->u.action.u.ttlm_res.status_code != WLAN_STATUS_SUCCESS)
+ if (le16_to_cpu(mgmt->u.action.u.ttlm_res.status_code) != WLAN_STATUS_SUCCESS)
__ieee80211_disconnect(sdata);
}
@@ -9201,7 +9230,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
ieee80211_report_disconnect(sdata, frame_buf,
sizeof(frame_buf), true,
req->reason_code, false);
- drv_mgd_complete_tx(sdata->local, sdata, &info);
return 0;
}
diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c
index 279c5143b335..922ea9a6e241 100644
--- a/net/mac80211/parse.c
+++ b/net/mac80211/parse.c
@@ -44,6 +44,12 @@ struct ieee80211_elems_parse {
/* The reconfiguration Multi-Link element in the original elements */
const struct element *ml_reconf_elem;
+ /* The EPCS Multi-Link element in the original elements */
+ const struct element *ml_epcs_elem;
+
+ bool multi_link_inner;
+ bool skip_vendor;
+
/*
* scratch buffer that can be used for various element parsing related
* tasks, e.g., element de-fragmentation etc.
@@ -149,16 +155,18 @@ ieee80211_parse_extension_element(u32 *crc,
switch (le16_get_bits(mle->control,
IEEE80211_ML_CONTROL_TYPE)) {
case IEEE80211_ML_CONTROL_TYPE_BASIC:
- if (elems_parse->ml_basic_elem) {
+ if (elems_parse->multi_link_inner) {
elems->parse_error |=
IEEE80211_PARSE_ERR_DUP_NEST_ML_BASIC;
break;
}
- elems_parse->ml_basic_elem = elem;
break;
case IEEE80211_ML_CONTROL_TYPE_RECONF:
elems_parse->ml_reconf_elem = elem;
break;
+ case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS:
+ elems_parse->ml_epcs_elem = elem;
+ break;
default:
break;
}
@@ -393,6 +401,9 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params,
IEEE80211_PARSE_ERR_BAD_ELEM_SIZE;
break;
case WLAN_EID_VENDOR_SPECIFIC:
+ if (elems_parse->skip_vendor)
+ break;
+
if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 &&
pos[2] == 0xf2) {
/* Microsoft OUI (00:50:F2) */
@@ -747,7 +758,6 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
{
const struct element *elem, *sub;
size_t profile_len = 0;
- bool found = false;
if (!bss || !bss->transmitted_bss)
return profile_len;
@@ -798,15 +808,14 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
index[2],
new_bssid);
if (ether_addr_equal(new_bssid, bss->bssid)) {
- found = true;
elems->bssid_index_len = index[1];
elems->bssid_index = (void *)&index[2];
- break;
+ return profile_len;
}
}
}
- return found ? profile_len : 0;
+ return 0;
}
static void
@@ -860,21 +869,36 @@ ieee80211_mle_get_sta_prof(struct ieee80211_elems_parse *elems_parse,
}
}
-static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse,
- struct ieee80211_elems_parse_params *params)
+static const struct element *
+ieee80211_prep_mle_link_parse(struct ieee80211_elems_parse *elems_parse,
+ struct ieee80211_elems_parse_params *params,
+ struct ieee80211_elems_parse_params *sub)
{
struct ieee802_11_elems *elems = &elems_parse->elems;
struct ieee80211_mle_per_sta_profile *prof;
- struct ieee80211_elems_parse_params sub = {
- .mode = params->mode,
- .action = params->action,
- .from_ap = params->from_ap,
- .link_id = -1,
- };
- ssize_t ml_len = elems->ml_basic_len;
- const struct element *non_inherit = NULL;
+ const struct element *tmp;
+ ssize_t ml_len;
const u8 *end;
+ if (params->mode < IEEE80211_CONN_MODE_EHT)
+ return NULL;
+
+ for_each_element_extid(tmp, WLAN_EID_EXT_EHT_MULTI_LINK,
+ elems->ie_start, elems->total_len) {
+ const struct ieee80211_multi_link_elem *mle =
+ (void *)tmp->data + 1;
+
+ if (!ieee80211_mle_size_ok(tmp->data + 1, tmp->datalen - 1))
+ continue;
+
+ if (le16_get_bits(mle->control, IEEE80211_ML_CONTROL_TYPE) !=
+ IEEE80211_ML_CONTROL_TYPE_BASIC)
+ continue;
+
+ elems_parse->ml_basic_elem = tmp;
+ break;
+ }
+
ml_len = cfg80211_defragment_element(elems_parse->ml_basic_elem,
elems->ie_start,
elems->total_len,
@@ -885,26 +909,26 @@ static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse,
WLAN_EID_FRAGMENT);
if (ml_len < 0)
- return;
+ return NULL;
elems->ml_basic = (const void *)elems_parse->scratch_pos;
elems->ml_basic_len = ml_len;
elems_parse->scratch_pos += ml_len;
if (params->link_id == -1)
- return;
+ return NULL;
ieee80211_mle_get_sta_prof(elems_parse, params->link_id);
prof = elems->prof;
if (!prof)
- return;
+ return NULL;
/* check if we have the 4 bytes for the fixed part in assoc response */
if (elems->sta_prof_len < sizeof(*prof) + prof->sta_info_len - 1 + 4) {
elems->prof = NULL;
elems->sta_prof_len = 0;
- return;
+ return NULL;
}
/*
@@ -913,13 +937,17 @@ static void ieee80211_mle_parse_link(struct ieee80211_elems_parse *elems_parse,
* the -1 is because the 'sta_info_len' is accounted to as part of the
* per-STA profile, but not part of the 'u8 variable[]' portion.
*/
- sub.start = prof->variable + prof->sta_info_len - 1 + 4;
+ sub->start = prof->variable + prof->sta_info_len - 1 + 4;
end = (const u8 *)prof + elems->sta_prof_len;
- sub.len = end - sub.start;
+ sub->len = end - sub->start;
+
+ sub->mode = params->mode;
+ sub->action = params->action;
+ sub->from_ap = params->from_ap;
+ sub->link_id = -1;
- non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
- sub.start, sub.len);
- _ieee802_11_parse_elems_full(&sub, elems_parse, non_inherit);
+ return cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
+ sub->start, sub->len);
}
static void
@@ -943,18 +971,43 @@ ieee80211_mle_defrag_reconf(struct ieee80211_elems_parse *elems_parse)
elems_parse->scratch_pos += ml_len;
}
+static void
+ieee80211_mle_defrag_epcs(struct ieee80211_elems_parse *elems_parse)
+{
+ struct ieee802_11_elems *elems = &elems_parse->elems;
+ ssize_t ml_len;
+
+ ml_len = cfg80211_defragment_element(elems_parse->ml_epcs_elem,
+ elems->ie_start,
+ elems->total_len,
+ elems_parse->scratch_pos,
+ elems_parse->scratch +
+ elems_parse->scratch_len -
+ elems_parse->scratch_pos,
+ WLAN_EID_FRAGMENT);
+ if (ml_len < 0)
+ return;
+ elems->ml_epcs = (void *)elems_parse->scratch_pos;
+ elems->ml_epcs_len = ml_len;
+ elems_parse->scratch_pos += ml_len;
+}
+
struct ieee802_11_elems *
ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
{
+ struct ieee80211_elems_parse_params sub = {};
struct ieee80211_elems_parse *elems_parse;
- struct ieee802_11_elems *elems;
const struct element *non_inherit = NULL;
- u8 *nontransmitted_profile;
- int nontransmitted_profile_len = 0;
+ struct ieee802_11_elems *elems;
size_t scratch_len = 3 * params->len;
+ bool multi_link_inner = false;
BUILD_BUG_ON(offsetof(typeof(*elems_parse), elems) != 0);
+ /* cannot parse for both a specific link and non-transmitted BSS */
+ if (WARN_ON(params->link_id >= 0 && params->bss))
+ return NULL;
+
elems_parse = kzalloc(struct_size(elems_parse, scratch, scratch_len),
GFP_ATOMIC);
if (!elems_parse)
@@ -971,36 +1024,55 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
ieee80211_clear_tpe(&elems->tpe);
ieee80211_clear_tpe(&elems->csa_tpe);
- nontransmitted_profile = elems_parse->scratch_pos;
- nontransmitted_profile_len =
- ieee802_11_find_bssid_profile(params->start, params->len,
- elems, params->bss,
- nontransmitted_profile);
- elems_parse->scratch_pos += nontransmitted_profile_len;
- non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
- nontransmitted_profile,
- nontransmitted_profile_len);
+ /*
+ * If we're looking for a non-transmitted BSS then we cannot at
+ * the same time be looking for a second link as the two can only
+ * appear in the same frame carrying info for different BSSes.
+ *
+ * In any case, we only look for one at a time, as encoded by
+ * the WARN_ON above.
+ */
+ if (params->bss) {
+ int nontx_len =
+ ieee802_11_find_bssid_profile(params->start,
+ params->len,
+ elems, params->bss,
+ elems_parse->scratch_pos);
+ sub.start = elems_parse->scratch_pos;
+ sub.mode = params->mode;
+ sub.len = nontx_len;
+ sub.action = params->action;
+ sub.link_id = params->link_id;
+
+ /* consume the space used for non-transmitted profile */
+ elems_parse->scratch_pos += nontx_len;
+
+ non_inherit = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE,
+ sub.start, nontx_len);
+ } else {
+ /* must always parse to get elems_parse->ml_basic_elem */
+ non_inherit = ieee80211_prep_mle_link_parse(elems_parse, params,
+ &sub);
+ multi_link_inner = true;
+ }
+ elems_parse->skip_vendor =
+ cfg80211_find_elem(WLAN_EID_VENDOR_SPECIFIC,
+ sub.start, sub.len);
elems->crc = _ieee802_11_parse_elems_full(params, elems_parse,
non_inherit);
- /* Override with nontransmitted profile, if found */
- if (nontransmitted_profile_len) {
- struct ieee80211_elems_parse_params sub = {
- .mode = params->mode,
- .start = nontransmitted_profile,
- .len = nontransmitted_profile_len,
- .action = params->action,
- .link_id = params->link_id,
- };
-
+ /* Override with nontransmitted/per-STA profile if found */
+ if (sub.len) {
+ elems_parse->multi_link_inner = multi_link_inner;
+ elems_parse->skip_vendor = false;
_ieee802_11_parse_elems_full(&sub, elems_parse, NULL);
}
- ieee80211_mle_parse_link(elems_parse, params);
-
ieee80211_mle_defrag_reconf(elems_parse);
+ ieee80211_mle_defrag_epcs(elems_parse);
+
if (elems->tim && !elems->parse_error) {
const struct ieee80211_tim_ie *tim_ie = elems->tim;
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 3dc9752188d5..1b045b62961f 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -971,8 +971,6 @@ int rate_control_set_rates(struct ieee80211_hw *hw,
if (sta->uploaded)
drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta);
- ieee80211_sta_set_expected_throughput(pubsta, sta_get_expected_throughput(sta));
-
return 0;
}
EXPORT_SYMBOL(rate_control_set_rates);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 694b43091fec..8c0d91dfd7e2 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -6,7 +6,7 @@
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2024 Intel Corporation
+ * Copyright (C) 2018-2025 Intel Corporation
*/
#include <linux/jiffies.h>
@@ -2994,6 +2994,7 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta
}
IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_frames);
+ ieee80211_set_qos_hdr(sdata, fwd_skb);
ieee80211_add_pending_skb(local, fwd_skb);
rx_accept:
@@ -3322,8 +3323,8 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
return;
}
- if (!ether_addr_equal(mgmt->sa, sdata->deflink.u.mgd.bssid) ||
- !ether_addr_equal(mgmt->bssid, sdata->deflink.u.mgd.bssid)) {
+ if (!ether_addr_equal(mgmt->sa, sdata->vif.cfg.ap_addr) ||
+ !ether_addr_equal(mgmt->bssid, sdata->vif.cfg.ap_addr)) {
/* Not from the current AP or not associated yet. */
return;
}
@@ -3339,9 +3340,9 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
skb_reserve(skb, local->hw.extra_tx_headroom);
resp = skb_put_zero(skb, 24);
- memcpy(resp->da, mgmt->sa, ETH_ALEN);
+ memcpy(resp->da, sdata->vif.cfg.ap_addr, ETH_ALEN);
memcpy(resp->sa, sdata->vif.addr, ETH_ALEN);
- memcpy(resp->bssid, sdata->deflink.u.mgd.bssid, ETH_ALEN);
+ memcpy(resp->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
IEEE80211_STYPE_ACTION);
skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query));
@@ -4282,10 +4283,16 @@ static bool ieee80211_rx_data_set_sta(struct ieee80211_rx_data *rx,
rx->link_sta = NULL;
}
- if (link_id < 0)
- rx->link = &rx->sdata->deflink;
- else if (!ieee80211_rx_data_set_link(rx, link_id))
+ if (link_id < 0) {
+ if (ieee80211_vif_is_mld(&rx->sdata->vif) &&
+ sta && !sta->sta.valid_links)
+ rx->link =
+ rcu_dereference(rx->sdata->link[sta->deflink.link_id]);
+ else
+ rx->link = &rx->sdata->deflink;
+ } else if (!ieee80211_rx_data_set_link(rx, link_id)) {
return false;
+ }
return true;
}
@@ -4480,6 +4487,10 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
if (!multicast &&
!ether_addr_equal(sdata->dev->dev_addr, hdr->addr1))
return false;
+ /* reject invalid/our STA address */
+ if (!is_valid_ether_addr(hdr->addr2) ||
+ ether_addr_equal(sdata->dev->dev_addr, hdr->addr2))
+ return false;
if (!rx->sta) {
int rate_idx;
if (status->encoding != RX_ENC_LEGACY)
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index adb88c06b598..ce6d5857214e 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -260,6 +260,7 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
struct ieee80211_mgmt *mgmt = (void *)skb->data;
struct ieee80211_bss *bss;
struct ieee80211_channel *channel;
+ struct ieee80211_ext *ext;
size_t min_hdr_len = offsetof(struct ieee80211_mgmt,
u.probe_resp.variable);
@@ -269,12 +270,10 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
return;
if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
- if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
- min_hdr_len = offsetof(struct ieee80211_ext,
- u.s1g_short_beacon.variable);
- else
- min_hdr_len = offsetof(struct ieee80211_ext,
- u.s1g_beacon);
+ ext = (struct ieee80211_ext *)mgmt;
+ min_hdr_len =
+ offsetof(struct ieee80211_ext, u.s1g_beacon.variable) +
+ ieee80211_s1g_optional_len(ext->frame_control);
}
if (skb->len < min_hdr_len)
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index aa22f09e6d14..4eb45e08b97e 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/module.h>
@@ -18,7 +18,6 @@
#include <linux/timer.h>
#include <linux/rtnetlink.h>
-#include <net/codel.h>
#include <net/mac80211.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
@@ -683,12 +682,6 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata,
}
}
- sta->cparams.ce_threshold = CODEL_DISABLED_THRESHOLD;
- sta->cparams.target = MS2TIME(20);
- sta->cparams.interval = MS2TIME(100);
- sta->cparams.ecn = true;
- sta->cparams.ce_threshold_selector = 0;
- sta->cparams.ce_threshold_mask = 0;
sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr);
@@ -1317,9 +1310,13 @@ static int _sta_info_move_state(struct sta_info *sta,
sta->sta.addr, new_state);
/* notify the driver before the actual changes so it can
- * fail the transition
+ * fail the transition if the state is increasing.
+ * The driver is required not to fail when the transition
+ * is decreasing the state, so first, do all the preparation
+ * work and only then, notify the driver.
*/
- if (test_sta_flag(sta, WLAN_STA_INSERTED)) {
+ if (new_state > sta->sta_state &&
+ test_sta_flag(sta, WLAN_STA_INSERTED)) {
int err = drv_sta_state(sta->local, sta->sdata, sta,
sta->sta_state, new_state);
if (err)
@@ -1395,6 +1392,16 @@ static int _sta_info_move_state(struct sta_info *sta,
break;
}
+ if (new_state < sta->sta_state &&
+ test_sta_flag(sta, WLAN_STA_INSERTED)) {
+ int err = drv_sta_state(sta->local, sta->sdata, sta,
+ sta->sta_state, new_state);
+
+ WARN_ONCE(err,
+ "Driver is not allowed to fail if the sta_state is transitioning down the list: %d\n",
+ err);
+ }
+
sta->sta_state = new_state;
return 0;
@@ -2864,27 +2871,6 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta)
return sta->deflink.status_stats.last_ack;
}
-static void sta_update_codel_params(struct sta_info *sta, u32 thr)
-{
- if (thr && thr < STA_SLOW_THRESHOLD * sta->local->num_sta) {
- sta->cparams.target = MS2TIME(50);
- sta->cparams.interval = MS2TIME(300);
- sta->cparams.ecn = false;
- } else {
- sta->cparams.target = MS2TIME(20);
- sta->cparams.interval = MS2TIME(100);
- sta->cparams.ecn = true;
- }
-}
-
-void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta,
- u32 thr)
-{
- struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
-
- sta_update_codel_params(sta, thr);
-}
-
int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 9195d5a2de0a..a9cfeeb13e53 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -466,14 +466,6 @@ struct ieee80211_fragment_cache {
unsigned int next;
};
-/*
- * The bandwidth threshold below which the per-station CoDel parameters will be
- * scaled to be more lenient (to prevent starvation of slow stations). This
- * value will be scaled by the number of active stations when it is being
- * applied.
- */
-#define STA_SLOW_THRESHOLD 6000 /* 6 Mbps */
-
/**
* struct link_sta_info - Link STA information
* All link specific sta info are stored here for reference. This can be
@@ -619,7 +611,6 @@ struct link_sta_info {
* @sta: station information we share with the driver
* @sta_state: duplicates information about station state (for debug)
* @rcu_head: RCU head used for freeing this station struct
- * @cparams: CoDel parameters for this station.
* @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED)
* @amsdu_mesh_control: track the mesh A-MSDU format used by the peer:
*
@@ -710,8 +701,6 @@ struct sta_info {
struct dentry *debugfs_dir;
#endif
- struct codel_params cparams;
-
u8 reserved_tid;
s8 amsdu_mesh_control;
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index f07b40916485..1cb42c5b9b04 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1421,7 +1421,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
return -EOPNOTSUPP;
- if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ if (sdata->vif.type != NL80211_IFTYPE_STATION || !sdata->vif.cfg.assoc)
return -EINVAL;
switch (oper) {
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 0ff8b56f5807..9c515fb8ebe1 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -622,6 +622,12 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
else
tx->key = NULL;
+ if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) {
+ if (tx->key && tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
+ info->control.hw_key = &tx->key->conf;
+ return TX_CONTINUE;
+ }
+
if (tx->key) {
bool skip_hw = false;
@@ -1401,16 +1407,9 @@ static struct sk_buff *fq_tin_dequeue_func(struct fq *fq,
local = container_of(fq, struct ieee80211_local, fq);
txqi = container_of(tin, struct txq_info, tin);
+ cparams = &local->cparams;
cstats = &txqi->cstats;
- if (txqi->txq.sta) {
- struct sta_info *sta = container_of(txqi->txq.sta,
- struct sta_info, sta);
- cparams = &sta->cparams;
- } else {
- cparams = &local->cparams;
- }
-
if (flow == &tin->default_flow)
cvars = &txqi->def_cvars;
else
@@ -1444,7 +1443,7 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local,
{
struct fq *fq = &local->fq;
struct fq_tin *tin = &txqi->tin;
- u32 flow_idx = fq_flow_idx(fq, skb);
+ u32 flow_idx;
ieee80211_set_skb_enqueue_time(skb);
@@ -1460,6 +1459,7 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local,
IEEE80211_TX_INTCFL_NEED_TXPROCESSING;
__skb_queue_tail(&txqi->frags, skb);
} else {
+ flow_idx = fq_flow_idx(fq, skb);
fq_tin_enqueue(fq, tin, flow_idx, skb,
fq_skb_free_func);
}
@@ -3892,6 +3892,7 @@ begin:
* The key can be removed while the packet was queued, so need to call
* this here to get the current key.
*/
+ info->control.hw_key = NULL;
r = ieee80211_tx_h_select_key(&tx);
if (r != TX_CONTINUE) {
ieee80211_free_txskb(&local->hw, skb);
@@ -4113,7 +4114,9 @@ void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
spin_lock_bh(&local->active_txq_lock[txq->ac]);
- has_queue = force || txq_has_queue(txq);
+ has_queue = force ||
+ (!test_bit(IEEE80211_TXQ_STOP, &txqi->flags) &&
+ txq_has_queue(txq));
if (list_empty(&txqi->schedule_order) &&
(has_queue || ieee80211_txq_keep_active(txqi))) {
/* If airtime accounting is active, always enqueue STAs at the
@@ -4523,8 +4526,10 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
IEEE80211_TX_CTRL_MLO_LINK_UNSPEC,
NULL);
} else if (ieee80211_vif_is_mld(&sdata->vif) &&
- sdata->vif.type == NL80211_IFTYPE_AP &&
- !ieee80211_hw_check(&sdata->local->hw, MLO_MCAST_MULTI_LINK_TX)) {
+ ((sdata->vif.type == NL80211_IFTYPE_AP &&
+ !ieee80211_hw_check(&sdata->local->hw, MLO_MCAST_MULTI_LINK_TX)) ||
+ (sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
+ !sdata->wdev.use_4addr))) {
ieee80211_mlo_multicast_tx(dev, skb);
} else {
normal:
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 38c30e4ddda9..77638e965726 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -6,7 +6,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2024 Intel Corporation
+ * Copyright (C) 2018-2025 Intel Corporation
*
* utilities for mac80211
*/
@@ -685,7 +685,7 @@ void __ieee80211_flush_queues(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
unsigned int queues, bool drop)
{
- if (!local->ops->flush)
+ if (!local->ops->flush && !drop)
return;
/*
@@ -712,7 +712,8 @@ void __ieee80211_flush_queues(struct ieee80211_local *local,
}
}
- drv_flush(local, sdata, queues, drop);
+ if (local->ops->flush)
+ drv_flush(local, sdata, queues, drop);
ieee80211_wake_queues_by_reason(&local->hw, queues,
IEEE80211_QUEUE_STOP_REASON_FLUSH,
@@ -2184,8 +2185,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
ieee80211_reconfig_roc(local);
/* Requeue all works */
- list_for_each_entry(sdata, &local->interfaces, list)
- wiphy_work_queue(local->hw.wiphy, &sdata->work);
+ list_for_each_entry(sdata, &local->interfaces, list) {
+ if (ieee80211_sdata_running(sdata))
+ wiphy_work_queue(local->hw.wiphy, &sdata->work);
+ }
}
ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
@@ -3905,7 +3908,7 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local,
{
u64 tsf = drv_get_tsf(local, sdata);
u64 dtim_count = 0;
- u16 beacon_int = sdata->vif.bss_conf.beacon_int * 1024;
+ u32 beacon_int = sdata->vif.bss_conf.beacon_int * 1024;
u8 dtim_period = sdata->vif.bss_conf.dtim_period;
struct ps_data *ps;
u8 bcns_from_dtim;
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index c0e2da5072be..9e4631fade90 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -684,6 +684,10 @@ void ieee802154_if_remove(struct ieee802154_sub_if_data *sdata)
ASSERT_RTNL();
mutex_lock(&sdata->local->iflist_mtx);
+ if (list_empty(&sdata->local->interfaces)) {
+ mutex_unlock(&sdata->local->iflist_mtx);
+ return;
+ }
list_del_rcu(&sdata->list);
mutex_unlock(&sdata->local->iflist_mtx);
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index f6de136008f6..70aeebfc4182 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -73,7 +73,6 @@ static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
lock_sock(sk);
- /* TODO: allow rebind */
if (sk_hashed(sk)) {
rc = -EADDRINUSE;
goto out_release;
@@ -629,12 +628,36 @@ static void mctp_sk_close(struct sock *sk, long timeout)
static int mctp_sk_hash(struct sock *sk)
{
struct net *net = sock_net(sk);
+ struct sock *existing;
+ struct mctp_sock *msk;
+ int rc;
+
+ msk = container_of(sk, struct mctp_sock, sk);
+
+ /* Bind lookup runs under RCU, remain live during that. */
+ sock_set_flag(sk, SOCK_RCU_FREE);
mutex_lock(&net->mctp.bind_lock);
+
+ /* Prevent duplicate binds. */
+ sk_for_each(existing, &net->mctp.binds) {
+ struct mctp_sock *mex =
+ container_of(existing, struct mctp_sock, sk);
+
+ if (mex->bind_type == msk->bind_type &&
+ mex->bind_addr == msk->bind_addr &&
+ mex->bind_net == msk->bind_net) {
+ rc = -EADDRINUSE;
+ goto out;
+ }
+ }
+
sk_add_node_rcu(sk, &net->mctp.binds);
- mutex_unlock(&net->mctp.bind_lock);
+ rc = 0;
- return 0;
+out:
+ mutex_unlock(&net->mctp.bind_lock);
+ return rc;
}
static void mctp_sk_unhash(struct sock *sk)
diff --git a/net/mctp/device.c b/net/mctp/device.c
index 85cc5f31f1e7..8d1386601bbe 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -20,8 +20,7 @@
#include <net/sock.h>
struct mctp_dump_cb {
- int h;
- int idx;
+ unsigned long ifindex;
size_t a_idx;
};
@@ -115,43 +114,36 @@ static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct mctp_dump_cb *mcb = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
- struct hlist_head *head;
struct net_device *dev;
struct ifaddrmsg *hdr;
struct mctp_dev *mdev;
- int ifindex;
- int idx = 0, rc;
-
- hdr = nlmsg_data(cb->nlh);
- // filter by ifindex if requested
- ifindex = hdr->ifa_index;
+ int ifindex = 0, rc;
+
+ /* Filter by ifindex if a header is provided */
+ if (cb->nlh->nlmsg_len >= nlmsg_msg_size(sizeof(*hdr))) {
+ hdr = nlmsg_data(cb->nlh);
+ ifindex = hdr->ifa_index;
+ } else {
+ if (cb->strict_check) {
+ NL_SET_ERR_MSG(cb->extack, "mctp: Invalid header for addr dump request");
+ return -EINVAL;
+ }
+ }
rcu_read_lock();
- for (; mcb->h < NETDEV_HASHENTRIES; mcb->h++, mcb->idx = 0) {
- idx = 0;
- head = &net->dev_index_head[mcb->h];
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx >= mcb->idx &&
- (ifindex == 0 || ifindex == dev->ifindex)) {
- mdev = __mctp_dev_get(dev);
- if (mdev) {
- rc = mctp_dump_dev_addrinfo(mdev,
- skb, cb);
- mctp_dev_put(mdev);
- // Error indicates full buffer, this
- // callback will get retried.
- if (rc < 0)
- goto out;
- }
- }
- idx++;
- // reset for next iteration
- mcb->a_idx = 0;
- }
+ for_each_netdev_dump(net, dev, mcb->ifindex) {
+ if (ifindex && ifindex != dev->ifindex)
+ continue;
+ mdev = __mctp_dev_get(dev);
+ if (!mdev)
+ continue;
+ rc = mctp_dump_dev_addrinfo(mdev, skb, cb);
+ mctp_dev_put(mdev);
+ if (rc < 0)
+ break;
+ mcb->a_idx = 0;
}
-out:
rcu_read_unlock();
- mcb->idx = idx;
return skb->len;
}
@@ -525,9 +517,12 @@ static struct notifier_block mctp_dev_nb = {
};
static const struct rtnl_msg_handler mctp_device_rtnl_msg_handlers[] = {
- {THIS_MODULE, PF_MCTP, RTM_NEWADDR, mctp_rtm_newaddr, NULL, 0},
- {THIS_MODULE, PF_MCTP, RTM_DELADDR, mctp_rtm_deladdr, NULL, 0},
- {THIS_MODULE, PF_MCTP, RTM_GETADDR, NULL, mctp_dump_addrinfo, 0},
+ {.owner = THIS_MODULE, .protocol = PF_MCTP, .msgtype = RTM_NEWADDR,
+ .doit = mctp_rtm_newaddr},
+ {.owner = THIS_MODULE, .protocol = PF_MCTP, .msgtype = RTM_DELADDR,
+ .doit = mctp_rtm_deladdr},
+ {.owner = THIS_MODULE, .protocol = PF_MCTP, .msgtype = RTM_GETADDR,
+ .dumpit = mctp_dump_addrinfo},
};
int __init mctp_device_init(void)
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 3f2bd65ff5e3..d9c8e5a5f9ce 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -313,8 +313,10 @@ static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev)
key = flow->key;
- if (WARN_ON(key->dev && key->dev != dev))
+ if (key->dev) {
+ WARN_ON(key->dev != dev);
return;
+ }
mctp_dev_set_key(dev, key);
}
@@ -332,8 +334,14 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
& MCTP_HDR_SEQ_MASK;
if (!key->reasm_head) {
- key->reasm_head = skb;
- key->reasm_tailp = &(skb_shinfo(skb)->frag_list);
+ /* Since we're manipulating the shared frag_list, ensure it isn't
+ * shared with any other SKBs.
+ */
+ key->reasm_head = skb_unshare(skb, GFP_ATOMIC);
+ if (!key->reasm_head)
+ return -ENOMEM;
+
+ key->reasm_tailp = &(skb_shinfo(key->reasm_head)->frag_list);
key->last_seq = this_seq;
return 0;
}
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index 17165b86ce22..06c1897b685a 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -921,6 +921,114 @@ static void mctp_test_route_input_sk_fail_frag(struct kunit *test)
__mctp_route_test_fini(test, dev, rt, sock);
}
+/* Input route to socket, using a fragmented message created from clones.
+ */
+static void mctp_test_route_input_cloned_frag(struct kunit *test)
+{
+ /* 5 packet fragments, forming 2 complete messages */
+ const struct mctp_hdr hdrs[5] = {
+ RX_FRAG(FL_S, 0),
+ RX_FRAG(0, 1),
+ RX_FRAG(FL_E, 2),
+ RX_FRAG(FL_S, 0),
+ RX_FRAG(FL_E, 1),
+ };
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct sk_buff *skb[5];
+ struct sk_buff *rx_skb;
+ struct socket *sock;
+ size_t data_len;
+ u8 compare[100];
+ u8 flat[100];
+ size_t total;
+ void *p;
+ int rc;
+
+ /* Arbitrary length */
+ data_len = 3;
+ total = data_len + sizeof(struct mctp_hdr);
+
+ __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY);
+
+ /* Create a single skb initially with concatenated packets */
+ skb[0] = mctp_test_create_skb(&hdrs[0], 5 * total);
+ mctp_test_skb_set_dev(skb[0], dev);
+ memset(skb[0]->data, 0 * 0x11, skb[0]->len);
+ memcpy(skb[0]->data, &hdrs[0], sizeof(struct mctp_hdr));
+
+ /* Extract and populate packets */
+ for (int i = 1; i < 5; i++) {
+ skb[i] = skb_clone(skb[i - 1], GFP_ATOMIC);
+ KUNIT_ASSERT_TRUE(test, skb[i]);
+ p = skb_pull(skb[i], total);
+ KUNIT_ASSERT_TRUE(test, p);
+ skb_reset_network_header(skb[i]);
+ memcpy(skb[i]->data, &hdrs[i], sizeof(struct mctp_hdr));
+ memset(&skb[i]->data[sizeof(struct mctp_hdr)], i * 0x11, data_len);
+ }
+ for (int i = 0; i < 5; i++)
+ skb_trim(skb[i], total);
+
+ /* SOM packets have a type byte to match the socket */
+ skb[0]->data[4] = 0;
+ skb[3]->data[4] = 0;
+
+ skb_dump("pkt1 ", skb[0], false);
+ skb_dump("pkt2 ", skb[1], false);
+ skb_dump("pkt3 ", skb[2], false);
+ skb_dump("pkt4 ", skb[3], false);
+ skb_dump("pkt5 ", skb[4], false);
+
+ for (int i = 0; i < 5; i++) {
+ KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1);
+ /* Take a reference so we can check refcounts at the end */
+ skb_get(skb[i]);
+ }
+
+ /* Feed the fragments into MCTP core */
+ for (int i = 0; i < 5; i++) {
+ rc = mctp_route_input(&rt->rt, skb[i]);
+ KUNIT_EXPECT_EQ(test, rc, 0);
+ }
+
+ /* Receive first reassembled message */
+ rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
+ KUNIT_EXPECT_EQ(test, rc, 0);
+ KUNIT_EXPECT_EQ(test, rx_skb->len, 3 * data_len);
+ rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len);
+ for (int i = 0; i < rx_skb->len; i++)
+ compare[i] = (i / data_len) * 0x11;
+ /* Set type byte */
+ compare[0] = 0;
+
+ KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len);
+ KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1);
+ kfree_skb(rx_skb);
+
+ /* Receive second reassembled message */
+ rx_skb = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
+ KUNIT_EXPECT_EQ(test, rc, 0);
+ KUNIT_EXPECT_EQ(test, rx_skb->len, 2 * data_len);
+ rc = skb_copy_bits(rx_skb, 0, flat, rx_skb->len);
+ for (int i = 0; i < rx_skb->len; i++)
+ compare[i] = (i / data_len + 3) * 0x11;
+ /* Set type byte */
+ compare[0] = 0;
+
+ KUNIT_EXPECT_MEMEQ(test, flat, compare, rx_skb->len);
+ KUNIT_EXPECT_EQ(test, refcount_read(&rx_skb->users), 1);
+ kfree_skb(rx_skb);
+
+ /* Check input skb refcounts */
+ for (int i = 0; i < 5; i++) {
+ KUNIT_EXPECT_EQ(test, refcount_read(&skb[i]->users), 1);
+ kfree_skb(skb[i]);
+ }
+
+ __mctp_route_test_fini(test, dev, rt, sock);
+}
+
#if IS_ENABLED(CONFIG_MCTP_FLOWS)
static void mctp_test_flow_init(struct kunit *test,
@@ -1144,6 +1252,7 @@ static struct kunit_case mctp_test_cases[] = {
KUNIT_CASE(mctp_test_packet_flow),
KUNIT_CASE(mctp_test_fragment_flow),
KUNIT_CASE(mctp_test_route_output_key_create),
+ KUNIT_CASE(mctp_test_route_input_cloned_frag),
{}
};
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index df62638b6498..3373b6b34dc7 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -81,8 +81,8 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
if (index < net->mpls.platform_labels) {
struct mpls_route __rcu **platform_label =
- rcu_dereference(net->mpls.platform_label);
- rt = rcu_dereference(platform_label[index]);
+ rcu_dereference_rtnl(net->mpls.platform_label);
+ rt = rcu_dereference_rtnl(platform_label[index]);
}
return rt;
}
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index b0dd008e2114..dd595d9b5e50 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -405,9 +405,9 @@ void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
subflow->mpc_drop = 1;
mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
- } else {
- subflow->mpc_drop = 0;
}
+ } else if (ssk->sk_state == TCP_SYN_SENT) {
+ subflow->mpc_drop = 0;
}
}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index a62bc874bf1e..a97505b78671 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -108,7 +108,6 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->suboptions |= OPTION_MPTCP_DSS;
mp_opt->use_map = 1;
mp_opt->mpc_map = 1;
- mp_opt->use_ack = 0;
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
}
@@ -157,11 +156,6 @@ static void mptcp_parse_option(const struct sk_buff *skb,
pr_debug("DSS\n");
ptr++;
- /* we must clear 'mpc_map' be able to detect MP_CAPABLE
- * map vs DSS map in mptcp_incoming_options(), and reconstruct
- * map info accordingly
- */
- mp_opt->mpc_map = 0;
flags = (*ptr++) & MPTCP_DSS_FLAG_MASK;
mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0;
mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0;
@@ -369,8 +363,11 @@ void mptcp_get_options(const struct sk_buff *skb,
const unsigned char *ptr;
int length;
- /* initialize option status */
- mp_opt->suboptions = 0;
+ /* Ensure that casting the whole status to u32 is efficient and safe */
+ BUILD_BUG_ON(sizeof_field(struct mptcp_options_received, status) != sizeof(u32));
+ BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct mptcp_options_received, status),
+ sizeof(u32)));
+ *(u32 *)&mp_opt->status = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -607,7 +604,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
}
opts->ext_copy.use_ack = 1;
opts->suboptions = OPTION_MPTCP_DSS;
- WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));
/* Add kind/length/subtype/flag overhead if mapping is not populated */
if (dss_size == 0)
@@ -655,6 +651,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
bool drop_other_suboptions = false;
unsigned int opt_size = *size;
+ struct mptcp_addr_info addr;
bool echo;
int len;
@@ -663,7 +660,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
*/
if (!mptcp_pm_should_add_signal(msk) ||
(opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) ||
- !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &opts->addr,
+ !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &addr,
&echo, &drop_other_suboptions))
return false;
@@ -676,7 +673,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
else if (opts->suboptions & OPTION_MPTCP_DSS)
return false;
- len = mptcp_add_addr_len(opts->addr.family, echo, !!opts->addr.port);
+ len = mptcp_add_addr_len(addr.family, echo, !!addr.port);
if (remaining < len)
return false;
@@ -693,6 +690,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
opts->ahmac = 0;
*size -= opt_size;
}
+ opts->addr = addr;
opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
if (!echo) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX);
@@ -981,8 +979,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (subflow->mp_join)
goto reset;
subflow->mp_capable = 0;
+ if (!mptcp_try_fallback(ssk))
+ goto reset;
pr_fallback(msk);
- mptcp_do_fallback(ssk);
return false;
}
@@ -1288,7 +1287,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
}
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT);
}
- return;
+ goto update_wspace;
}
if (rcv_wnd_new != rcv_wnd_old) {
@@ -1313,6 +1312,9 @@ raise_win:
th->window = htons(new_win);
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED);
}
+
+update_wspace:
+ WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
}
__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 620264c75dc2..2c8815daf5b0 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -303,8 +303,14 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
pr_debug("fail_seq=%llu\n", fail_seq);
- if (!READ_ONCE(msk->allow_infinite_fallback))
+ /* After accepting the fail, we can't create any other subflows */
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_infinite_fallback) {
+ spin_unlock_bh(&msk->fallback_lock);
return;
+ }
+ msk->allow_subflows = false;
+ spin_unlock_bh(&msk->fallback_lock);
if (!subflow->fail_tout) {
pr_debug("send MP_FAIL response and infinite map\n");
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 45a2b5f05d38..2a085ec5bfd0 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -968,7 +968,7 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
struct mptcp_pm_addr_entry *entry,
- bool needs_id)
+ bool needs_id, bool replace)
{
struct mptcp_pm_addr_entry *cur, *del_entry = NULL;
unsigned int addr_max;
@@ -1008,6 +1008,17 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
if (entry->addr.id)
goto out;
+ /* allow callers that only need to look up the local
+ * addr's id to skip replacement. This allows them to
+ * avoid calling synchronize_rcu in the packet recv
+ * path.
+ */
+ if (!replace) {
+ kfree(entry);
+ ret = cur->addr.id;
+ goto out;
+ }
+
pernet->addrs--;
entry->addr.id = cur->addr.id;
list_del_rcu(&cur->list);
@@ -1160,7 +1171,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
entry->ifindex = 0;
entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
entry->lsk = NULL;
- ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true);
+ ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false);
if (ret < 0)
kfree(entry);
@@ -1432,7 +1443,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
}
}
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry,
- !mptcp_pm_has_addr_attr_id(attr, info));
+ !mptcp_pm_has_addr_attr_id(attr, info),
+ true);
if (ret < 0) {
GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret);
goto out_free;
@@ -1513,11 +1525,6 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
if (mptcp_pm_is_userspace(msk))
goto next;
- if (list_empty(&msk->conn_list)) {
- mptcp_pm_remove_anno_addr(msk, addr, false);
- goto next;
- }
-
lock_sock(sk);
remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
@@ -2049,7 +2056,8 @@ int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
- (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL |
+ MPTCP_PM_ADDR_FLAG_IMPLICIT))) {
spin_unlock_bh(&pernet->lock);
GENL_SET_ERR_MSG(info, "invalid addr flags");
return -EINVAL;
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index e35178f5205f..bb76295d04c5 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -589,11 +589,9 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
goto set_flags_err;
- if (attr_rem) {
- ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem);
- if (ret < 0)
- goto set_flags_err;
- }
+ ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem);
+ if (ret < 0)
+ goto set_flags_err;
if (loc.addr.family == AF_UNSPEC ||
rem.addr.family == AF_UNSPEC) {
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4b9d850ce85a..d865d08a0c5e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -136,6 +136,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
int delta;
if (MPTCP_SKB_CB(from)->offset ||
+ ((to->len + from->len) > (sk->sk_rcvbuf >> 3)) ||
!skb_try_coalesce(to, from, &fragstolen, &delta))
return false;
@@ -622,10 +623,9 @@ static bool mptcp_check_data_fin(struct sock *sk)
static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk)
{
- if (READ_ONCE(msk->allow_infinite_fallback)) {
+ if (mptcp_try_fallback(ssk)) {
MPTCP_INC_STATS(sock_net(ssk),
MPTCP_MIB_DSSCORRUPTIONFALLBACK);
- mptcp_do_fallback(ssk);
} else {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET);
mptcp_subflow_reset(ssk);
@@ -877,7 +877,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk)
{
mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq);
- WRITE_ONCE(msk->allow_infinite_fallback, false);
+ msk->allow_infinite_fallback = false;
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
}
@@ -888,6 +888,14 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
if (sk->sk_state != TCP_ESTABLISHED)
return false;
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_subflows) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
+ mptcp_subflow_joined(msk, ssk);
+ spin_unlock_bh(&msk->fallback_lock);
+
/* attach to msk socket only after we are sure we will deal with it
* at close time
*/
@@ -896,7 +904,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
mptcp_sockopt_sync_locked(msk, ssk);
- mptcp_subflow_joined(msk, ssk);
mptcp_stop_tout_timer(sk);
__mptcp_propagate_sndbuf(sk, ssk);
return true;
@@ -1235,10 +1242,14 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk,
mpext->infinite_map = 1;
mpext->data_len = 0;
+ if (!mptcp_try_fallback(ssk)) {
+ mptcp_subflow_reset(ssk);
+ return;
+ }
+
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX);
mptcp_subflow_ctx(ssk)->send_infinite_map = 0;
pr_fallback(msk);
- mptcp_do_fallback(ssk);
}
#define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1))
@@ -1766,8 +1777,10 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
* see mptcp_disconnect().
* Attempt it again outside the problematic scope.
*/
- if (!mptcp_disconnect(sk, 0))
+ if (!mptcp_disconnect(sk, 0)) {
+ sk->sk_disconnects++;
sk->sk_socket->state = SS_UNCONNECTED;
+ }
}
inet_clear_bit(DEFER_CONNECT, sk);
@@ -2640,9 +2653,9 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
static void __mptcp_retrans(struct sock *sk)
{
+ struct mptcp_sendmsg_info info = { .data_lock_held = true, };
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_subflow_context *subflow;
- struct mptcp_sendmsg_info info = {};
struct mptcp_data_frag *dfrag;
struct sock *ssk;
int ret, err;
@@ -2687,6 +2700,18 @@ static void __mptcp_retrans(struct sock *sk)
info.sent = 0;
info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len :
dfrag->already_sent;
+
+ /*
+ * make the whole retrans decision, xmit, disallow
+ * fallback atomic
+ */
+ spin_lock_bh(&msk->fallback_lock);
+ if (__mptcp_check_fallback(msk)) {
+ spin_unlock_bh(&msk->fallback_lock);
+ release_sock(ssk);
+ return;
+ }
+
while (info.sent < info.limit) {
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0)
@@ -2700,8 +2725,9 @@ static void __mptcp_retrans(struct sock *sk)
len = max(copied, len);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
- WRITE_ONCE(msk->allow_infinite_fallback, false);
+ msk->allow_infinite_fallback = false;
}
+ spin_unlock_bh(&msk->fallback_lock);
release_sock(ssk);
}
@@ -2830,7 +2856,8 @@ static void __mptcp_init_sock(struct sock *sk)
WRITE_ONCE(msk->first, NULL);
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
- WRITE_ONCE(msk->allow_infinite_fallback, true);
+ msk->allow_infinite_fallback = true;
+ msk->allow_subflows = true;
msk->recovery = false;
msk->subflow_id = 1;
msk->last_data_sent = tcp_jiffies32;
@@ -2838,6 +2865,7 @@ static void __mptcp_init_sock(struct sock *sk)
msk->last_ack_recv = tcp_jiffies32;
mptcp_pm_data_init(msk);
+ spin_lock_init(&msk->fallback_lock);
/* re-use the csk retrans timer for MPTCP-level retrans */
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
@@ -3221,7 +3249,16 @@ static int mptcp_disconnect(struct sock *sk, int flags)
* subflow
*/
mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
+
+ /* The first subflow is already in TCP_CLOSE status, the following
+ * can't overlap with a fallback anymore
+ */
+ spin_lock_bh(&msk->fallback_lock);
+ msk->allow_subflows = true;
+ msk->allow_infinite_fallback = true;
WRITE_ONCE(msk->flags, 0);
+ spin_unlock_bh(&msk->fallback_lock);
+
msk->cb_flags = 0;
msk->recovery = false;
WRITE_ONCE(msk->can_ack, false);
@@ -3634,7 +3671,13 @@ bool mptcp_finish_join(struct sock *ssk)
/* active subflow, already present inside the conn_list */
if (!list_empty(&subflow->node)) {
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_subflows) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
mptcp_subflow_joined(msk, ssk);
+ spin_unlock_bh(&msk->fallback_lock);
mptcp_propagate_sndbuf(parent, ssk);
return true;
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index a93e661ef5c4..6f191b125978 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -149,22 +149,24 @@ struct mptcp_options_received {
u32 subflow_seq;
u16 data_len;
__sum16 csum;
- u16 suboptions;
+ struct_group(status,
+ u16 suboptions;
+ u16 use_map:1,
+ dsn64:1,
+ data_fin:1,
+ use_ack:1,
+ ack64:1,
+ mpc_map:1,
+ reset_reason:4,
+ reset_transient:1,
+ echo:1,
+ backup:1,
+ deny_join_id0:1,
+ __unused:2;
+ );
+ u8 join_id;
u32 token;
u32 nonce;
- u16 use_map:1,
- dsn64:1,
- data_fin:1,
- use_ack:1,
- ack64:1,
- mpc_map:1,
- reset_reason:4,
- reset_transient:1,
- echo:1,
- backup:1,
- deny_join_id0:1,
- __unused:2;
- u8 join_id;
u64 thmac;
u8 hmac[MPTCPOPT_HMAC_LEN];
struct mptcp_addr_info addr;
@@ -340,10 +342,16 @@ struct mptcp_sock {
u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space;
u8 scaling_ratio;
+ bool allow_subflows;
u32 subflow_id;
u32 setsockopt_seq;
char ca_name[TCP_CA_NAME_MAX];
+
+ spinlock_t fallback_lock; /* protects fallback,
+ * allow_infinite_fallback and
+ * allow_join
+ */
};
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
@@ -760,10 +768,15 @@ static inline u64 mptcp_data_avail(const struct mptcp_sock *msk)
static inline bool mptcp_epollin_ready(const struct sock *sk)
{
+ u64 data_avail = mptcp_data_avail(mptcp_sk(sk));
+
+ if (!data_avail)
+ return false;
+
/* mptcp doesn't have to deal with small skbs in the receive queue,
- * at it can always coalesce them
+ * as it can always coalesce them
*/
- return (mptcp_data_avail(mptcp_sk(sk)) >= sk->sk_rcvlowat) ||
+ return (data_avail >= sk->sk_rcvlowat) ||
(mem_cgroup_sockets_enabled && sk->sk_memcg &&
mem_cgroup_under_socket_pressure(sk->sk_memcg)) ||
READ_ONCE(tcp_memory_pressure);
@@ -1181,13 +1194,22 @@ static inline bool mptcp_check_fallback(const struct sock *sk)
return __mptcp_check_fallback(msk);
}
-static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
+static inline bool __mptcp_try_fallback(struct mptcp_sock *msk)
{
if (__mptcp_check_fallback(msk)) {
pr_debug("TCP fallback already done (msk=%p)\n", msk);
- return;
+ return true;
}
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_infinite_fallback) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
+
+ msk->allow_subflows = false;
set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
+ spin_unlock_bh(&msk->fallback_lock);
+ return true;
}
static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
@@ -1199,14 +1221,15 @@ static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
TCPF_SYN_RECV | TCPF_LISTEN));
}
-static inline void mptcp_do_fallback(struct sock *ssk)
+static inline bool mptcp_try_fallback(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = subflow->conn;
struct mptcp_sock *msk;
msk = mptcp_sk(sk);
- __mptcp_do_fallback(msk);
+ if (!__mptcp_try_fallback(msk))
+ return false;
if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) {
gfp_t saved_allocation = ssk->sk_allocation;
@@ -1218,6 +1241,7 @@ static inline void mptcp_do_fallback(struct sock *ssk)
tcp_shutdown(ssk, SEND_SHUTDOWN);
ssk->sk_allocation = saved_allocation;
}
+ return true;
}
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a)
@@ -1227,7 +1251,7 @@ static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
{
pr_fallback(msk);
subflow->request_mptcp = 0;
- __mptcp_do_fallback(msk);
+ WARN_ON_ONCE(!__mptcp_try_fallback(msk));
}
static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 505445a9598f..3caa0a9d3b38 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -1419,6 +1419,12 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
switch (optname) {
case IP_TOS:
return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos));
+ case IP_FREEBIND:
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet_test_bit(FREEBIND, sk));
+ case IP_TRANSPARENT:
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet_test_bit(TRANSPARENT, sk));
case IP_BIND_ADDRESS_NO_PORT:
return mptcp_put_int_option(msk, optval, optlen,
inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
@@ -1430,6 +1436,26 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
return -EOPNOTSUPP;
}
+static int mptcp_getsockopt_v6(struct mptcp_sock *msk, int optname,
+ char __user *optval, int __user *optlen)
+{
+ struct sock *sk = (void *)msk;
+
+ switch (optname) {
+ case IPV6_V6ONLY:
+ return mptcp_put_int_option(msk, optval, optlen,
+ sk->sk_ipv6only);
+ case IPV6_TRANSPARENT:
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet_test_bit(TRANSPARENT, sk));
+ case IPV6_FREEBIND:
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet_test_bit(FREEBIND, sk));
+ }
+
+ return -EOPNOTSUPP;
+}
+
static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
char __user *optval, int __user *optlen)
{
@@ -1469,6 +1495,8 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname,
if (level == SOL_IP)
return mptcp_getsockopt_v4(msk, optname, optval, option);
+ if (level == SOL_IPV6)
+ return mptcp_getsockopt_v6(msk, optname, optval, option);
if (level == SOL_TCP)
return mptcp_getsockopt_sol_tcp(msk, optname, optval, option);
if (level == SOL_MPTCP)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 860903e06422..a05f201d194c 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -543,9 +543,11 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp) {
if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) {
+ if (!mptcp_try_fallback(sk))
+ goto do_reset;
+
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
- mptcp_do_fallback(sk);
pr_fallback(msk);
goto fallback;
}
@@ -754,8 +756,6 @@ static bool subflow_hmac_valid(const struct request_sock *req,
subflow_req = mptcp_subflow_rsk(req);
msk = subflow_req->msk;
- if (!msk)
- return false;
subflow_generate_hmac(READ_ONCE(msk->remote_key),
READ_ONCE(msk->local_key),
@@ -853,12 +853,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
} else if (subflow_req->mp_join) {
mptcp_get_options(skb, &mp_opt);
- if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK) ||
- !subflow_hmac_valid(req, &mp_opt) ||
- !mptcp_can_accept_new_subflow(subflow_req->msk)) {
- SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
+ if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK))
fallback = true;
- }
}
create_child:
@@ -908,6 +904,17 @@ create_child:
goto dispose_child;
}
+ if (!subflow_hmac_valid(req, &mp_opt)) {
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
+ subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
+ goto dispose_child;
+ }
+
+ if (!mptcp_can_accept_new_subflow(owner)) {
+ subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
+ goto dispose_child;
+ }
+
/* move the msk reference ownership to the subflow */
subflow_req->msk = NULL;
ctx->conn = (struct sock *)owner;
@@ -1140,7 +1147,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
if (data_len == 0) {
pr_debug("infinite mapping received\n");
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
- subflow->map_data_len = 0;
return MAPPING_INVALID;
}
@@ -1284,32 +1290,29 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss
mptcp_schedule_work(sk);
}
-static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
-{
- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-
- if (subflow->mp_join)
- return false;
- else if (READ_ONCE(msk->csum_enabled))
- return !subflow->valid_csum_seen;
- else
- return READ_ONCE(msk->allow_infinite_fallback);
-}
-
-static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
+static bool mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
unsigned long fail_tout;
+ /* we are really failing, prevent any later subflow join */
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_infinite_fallback) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
+ msk->allow_subflows = false;
+ spin_unlock_bh(&msk->fallback_lock);
+
/* graceful failure can happen only on the MPC subflow */
if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first)))
- return;
+ return false;
/* since the close timeout take precedence on the fail one,
* no need to start the latter when the first is already set
*/
if (sock_flag((struct sock *)msk, SOCK_DEAD))
- return;
+ return true;
/* we don't need extreme accuracy here, use a zero fail_tout as special
* value meaning no fail timeout at all;
@@ -1321,6 +1324,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
tcp_send_ack(ssk);
mptcp_reset_tout_timer(msk, subflow->fail_tout);
+ return true;
}
static bool subflow_check_data_avail(struct sock *ssk)
@@ -1381,17 +1385,16 @@ fallback:
(subflow->mp_join || subflow->valid_csum_seen)) {
subflow->send_mp_fail = 1;
- if (!READ_ONCE(msk->allow_infinite_fallback)) {
+ if (!mptcp_subflow_fail(msk, ssk)) {
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
goto reset;
}
- mptcp_subflow_fail(msk, ssk);
WRITE_ONCE(subflow->data_avail, true);
return true;
}
- if (!subflow_can_fallback(subflow) && subflow->map_data_len) {
+ if (!mptcp_try_fallback(ssk)) {
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
*/
@@ -1407,8 +1410,6 @@ reset:
WRITE_ONCE(subflow->data_avail, false);
return false;
}
-
- mptcp_do_fallback(ssk);
}
skb = skb_peek(&ssk->sk_receive_queue);
@@ -1673,7 +1674,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local,
/* discard the subflow socket */
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
- WRITE_ONCE(msk->allow_infinite_fallback, false);
mptcp_stop_tout_timer(sk);
return 0;
@@ -1768,10 +1768,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
* needs it.
* Update ns_tracker to current stack trace and refcounted tracker.
*/
- __netns_tracker_free(net, &sf->sk->ns_tracker, false);
- sf->sk->sk_net_refcnt = 1;
- get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL);
- sock_inuse_add(net, 1);
+ sk_net_refcnt_upgrade(sf->sk);
err = tcp_set_ulp(sf->sk, "mptcp");
if (err)
goto err_free;
@@ -1853,7 +1850,7 @@ static void subflow_state_change(struct sock *sk)
msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
- mptcp_do_fallback(sk);
+ WARN_ON_ONCE(!mptcp_try_fallback(sk));
pr_fallback(msk);
subflow->conn_finished = 1;
mptcp_propagate_state(parent, sk, subflow, NULL);
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index ef0f8f73826f..ad1f671ffc37 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -110,7 +110,7 @@ struct ncsi_channel_version {
u8 update; /* NCSI version update */
char alpha1; /* NCSI version alpha1 */
char alpha2; /* NCSI version alpha2 */
- u8 fw_name[12]; /* Firmware name string */
+ u8 fw_name[12 + 1]; /* Firmware name string */
u32 fw_version; /* Firmware version */
u16 pci_ids[4]; /* PCI identification */
u32 mf_id; /* Manufacture ID */
@@ -143,16 +143,15 @@ struct ncsi_channel_vlan_filter {
};
struct ncsi_channel_stats {
- u32 hnc_cnt_hi; /* Counter cleared */
- u32 hnc_cnt_lo; /* Counter cleared */
- u32 hnc_rx_bytes; /* Rx bytes */
- u32 hnc_tx_bytes; /* Tx bytes */
- u32 hnc_rx_uc_pkts; /* Rx UC packets */
- u32 hnc_rx_mc_pkts; /* Rx MC packets */
- u32 hnc_rx_bc_pkts; /* Rx BC packets */
- u32 hnc_tx_uc_pkts; /* Tx UC packets */
- u32 hnc_tx_mc_pkts; /* Tx MC packets */
- u32 hnc_tx_bc_pkts; /* Tx BC packets */
+ u64 hnc_cnt; /* Counter cleared */
+ u64 hnc_rx_bytes; /* Rx bytes */
+ u64 hnc_tx_bytes; /* Tx bytes */
+ u64 hnc_rx_uc_pkts; /* Rx UC packets */
+ u64 hnc_rx_mc_pkts; /* Rx MC packets */
+ u64 hnc_rx_bc_pkts; /* Rx BC packets */
+ u64 hnc_tx_uc_pkts; /* Tx UC packets */
+ u64 hnc_tx_mc_pkts; /* Tx MC packets */
+ u64 hnc_tx_bc_pkts; /* Tx BC packets */
u32 hnc_fcs_err; /* FCS errors */
u32 hnc_align_err; /* Alignment errors */
u32 hnc_false_carrier; /* False carrier detection */
@@ -181,7 +180,7 @@ struct ncsi_channel_stats {
u32 hnc_tx_1023_frames; /* Tx 512-1023 bytes frames */
u32 hnc_tx_1522_frames; /* Tx 1024-1522 bytes frames */
u32 hnc_tx_9022_frames; /* Tx 1523-9022 bytes frames */
- u32 hnc_rx_valid_bytes; /* Rx valid bytes */
+ u64 hnc_rx_valid_bytes; /* Rx valid bytes */
u32 hnc_rx_runt_pkts; /* Rx error runt packets */
u32 hnc_rx_jabber_pkts; /* Rx error jabber packets */
u32 ncsi_rx_cmds; /* Rx NCSI commands */
@@ -289,6 +288,7 @@ enum {
ncsi_dev_state_config_sp = 0x0301,
ncsi_dev_state_config_cis,
ncsi_dev_state_config_oem_gma,
+ ncsi_dev_state_config_apply_mac,
ncsi_dev_state_config_clear_vids,
ncsi_dev_state_config_svf,
ncsi_dev_state_config_ev,
@@ -322,6 +322,7 @@ struct ncsi_dev_priv {
#define NCSI_DEV_RESHUFFLE 4
#define NCSI_DEV_RESET 8 /* Reset state of NC */
unsigned int gma_flag; /* OEM GMA flag */
+ struct sockaddr pending_mac; /* MAC address received from GMA */
spinlock_t lock; /* Protect the NCSI device */
unsigned int package_probe_id;/* Current ID during probe */
unsigned int package_num; /* Number of packages */
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 5cf55bde366d..7891a537bddd 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -1038,7 +1038,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
: ncsi_dev_state_config_clear_vids;
break;
case ncsi_dev_state_config_oem_gma:
- nd->state = ncsi_dev_state_config_clear_vids;
+ nd->state = ncsi_dev_state_config_apply_mac;
nca.package = np->id;
nca.channel = nc->id;
@@ -1050,10 +1050,22 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
nca.type = NCSI_PKT_CMD_OEM;
ret = ncsi_gma_handler(&nca, nc->version.mf_id);
}
- if (ret < 0)
+ if (ret < 0) {
+ nd->state = ncsi_dev_state_config_clear_vids;
schedule_work(&ndp->work);
+ }
break;
+ case ncsi_dev_state_config_apply_mac:
+ rtnl_lock();
+ ret = dev_set_mac_address(dev, &ndp->pending_mac, NULL);
+ rtnl_unlock();
+ if (ret < 0)
+ netdev_warn(dev, "NCSI: 'Writing MAC address to device failed\n");
+
+ nd->state = ncsi_dev_state_config_clear_vids;
+
+ fallthrough;
case ncsi_dev_state_config_clear_vids:
case ncsi_dev_state_config_svf:
case ncsi_dev_state_config_ev:
@@ -1373,6 +1385,12 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_package;
break;
case ncsi_dev_state_probe_package:
+ if (ndp->package_probe_id >= 8) {
+ /* Last package probed, finishing */
+ ndp->flags |= NCSI_DEV_PROBED;
+ break;
+ }
+
ndp->pending_req_num = 1;
nca.type = NCSI_PKT_CMD_SP;
@@ -1489,13 +1507,8 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
if (ret)
goto error;
- /* Probe next package */
+ /* Probe next package after receiving response */
ndp->package_probe_id++;
- if (ndp->package_probe_id >= 8) {
- /* Probe finished */
- ndp->flags |= NCSI_DEV_PROBED;
- break;
- }
nd->state = ncsi_dev_state_probe_package;
ndp->active_package = NULL;
break;
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index f2f3b5c1b941..24edb2737972 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -252,16 +252,15 @@ struct ncsi_rsp_gp_pkt {
/* Get Controller Packet Statistics */
struct ncsi_rsp_gcps_pkt {
struct ncsi_rsp_pkt_hdr rsp; /* Response header */
- __be32 cnt_hi; /* Counter cleared */
- __be32 cnt_lo; /* Counter cleared */
- __be32 rx_bytes; /* Rx bytes */
- __be32 tx_bytes; /* Tx bytes */
- __be32 rx_uc_pkts; /* Rx UC packets */
- __be32 rx_mc_pkts; /* Rx MC packets */
- __be32 rx_bc_pkts; /* Rx BC packets */
- __be32 tx_uc_pkts; /* Tx UC packets */
- __be32 tx_mc_pkts; /* Tx MC packets */
- __be32 tx_bc_pkts; /* Tx BC packets */
+ __be64 cnt; /* Counter cleared */
+ __be64 rx_bytes; /* Rx bytes */
+ __be64 tx_bytes; /* Tx bytes */
+ __be64 rx_uc_pkts; /* Rx UC packets */
+ __be64 rx_mc_pkts; /* Rx MC packets */
+ __be64 rx_bc_pkts; /* Rx BC packets */
+ __be64 tx_uc_pkts; /* Tx UC packets */
+ __be64 tx_mc_pkts; /* Tx MC packets */
+ __be64 tx_bc_pkts; /* Tx BC packets */
__be32 fcs_err; /* FCS errors */
__be32 align_err; /* Alignment errors */
__be32 false_carrier; /* False carrier detection */
@@ -290,11 +289,11 @@ struct ncsi_rsp_gcps_pkt {
__be32 tx_1023_frames; /* Tx 512-1023 bytes frames */
__be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */
__be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */
- __be32 rx_valid_bytes; /* Rx valid bytes */
+ __be64 rx_valid_bytes; /* Rx valid bytes */
__be32 rx_runt_pkts; /* Rx error runt packets */
__be32 rx_jabber_pkts; /* Rx error jabber packets */
__be32 checksum; /* Checksum */
-};
+} __packed __aligned(4);
/* Get NCSI Statistics */
struct ncsi_rsp_gns_pkt {
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index e28be33bdf2c..d5ed80731e89 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -628,16 +628,14 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
static int ncsi_rsp_handler_oem_gma(struct ncsi_request *nr, int mfr_id)
{
struct ncsi_dev_priv *ndp = nr->ndp;
+ struct sockaddr *saddr = &ndp->pending_mac;
struct net_device *ndev = ndp->ndev.dev;
struct ncsi_rsp_oem_pkt *rsp;
- struct sockaddr saddr;
u32 mac_addr_off = 0;
- int ret = 0;
/* Get the response header */
rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
- saddr.sa_family = ndev->type;
ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
if (mfr_id == NCSI_OEM_MFR_BCM_ID)
mac_addr_off = BCM_MAC_ADDR_OFFSET;
@@ -646,22 +644,17 @@ static int ncsi_rsp_handler_oem_gma(struct ncsi_request *nr, int mfr_id)
else if (mfr_id == NCSI_OEM_MFR_INTEL_ID)
mac_addr_off = INTEL_MAC_ADDR_OFFSET;
- memcpy(saddr.sa_data, &rsp->data[mac_addr_off], ETH_ALEN);
+ saddr->sa_family = ndev->type;
+ memcpy(saddr->sa_data, &rsp->data[mac_addr_off], ETH_ALEN);
if (mfr_id == NCSI_OEM_MFR_BCM_ID || mfr_id == NCSI_OEM_MFR_INTEL_ID)
- eth_addr_inc((u8 *)saddr.sa_data);
- if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
+ eth_addr_inc((u8 *)saddr->sa_data);
+ if (!is_valid_ether_addr((const u8 *)saddr->sa_data))
return -ENXIO;
/* Set the flag for GMA command which should only be called once */
ndp->gma_flag = 1;
- rtnl_lock();
- ret = dev_set_mac_address(ndev, &saddr, NULL);
- rtnl_unlock();
- if (ret < 0)
- netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
-
- return ret;
+ return 0;
}
/* Response handler for Mellanox card */
@@ -782,6 +775,7 @@ static int ncsi_rsp_handler_gvi(struct ncsi_request *nr)
ncv->alpha1 = rsp->alpha1;
ncv->alpha2 = rsp->alpha2;
memcpy(ncv->fw_name, rsp->fw_name, 12);
+ ncv->fw_name[12] = '\0';
ncv->fw_version = ntohl(rsp->fw_version);
for (i = 0; i < ARRAY_SIZE(ncv->pci_ids); i++)
ncv->pci_ids[i] = ntohs(rsp->pci_ids[i]);
@@ -933,16 +927,15 @@ static int ncsi_rsp_handler_gcps(struct ncsi_request *nr)
/* Update HNC's statistics */
ncs = &nc->stats;
- ncs->hnc_cnt_hi = ntohl(rsp->cnt_hi);
- ncs->hnc_cnt_lo = ntohl(rsp->cnt_lo);
- ncs->hnc_rx_bytes = ntohl(rsp->rx_bytes);
- ncs->hnc_tx_bytes = ntohl(rsp->tx_bytes);
- ncs->hnc_rx_uc_pkts = ntohl(rsp->rx_uc_pkts);
- ncs->hnc_rx_mc_pkts = ntohl(rsp->rx_mc_pkts);
- ncs->hnc_rx_bc_pkts = ntohl(rsp->rx_bc_pkts);
- ncs->hnc_tx_uc_pkts = ntohl(rsp->tx_uc_pkts);
- ncs->hnc_tx_mc_pkts = ntohl(rsp->tx_mc_pkts);
- ncs->hnc_tx_bc_pkts = ntohl(rsp->tx_bc_pkts);
+ ncs->hnc_cnt = be64_to_cpu(rsp->cnt);
+ ncs->hnc_rx_bytes = be64_to_cpu(rsp->rx_bytes);
+ ncs->hnc_tx_bytes = be64_to_cpu(rsp->tx_bytes);
+ ncs->hnc_rx_uc_pkts = be64_to_cpu(rsp->rx_uc_pkts);
+ ncs->hnc_rx_mc_pkts = be64_to_cpu(rsp->rx_mc_pkts);
+ ncs->hnc_rx_bc_pkts = be64_to_cpu(rsp->rx_bc_pkts);
+ ncs->hnc_tx_uc_pkts = be64_to_cpu(rsp->tx_uc_pkts);
+ ncs->hnc_tx_mc_pkts = be64_to_cpu(rsp->tx_mc_pkts);
+ ncs->hnc_tx_bc_pkts = be64_to_cpu(rsp->tx_bc_pkts);
ncs->hnc_fcs_err = ntohl(rsp->fcs_err);
ncs->hnc_align_err = ntohl(rsp->align_err);
ncs->hnc_false_carrier = ntohl(rsp->false_carrier);
@@ -971,7 +964,7 @@ static int ncsi_rsp_handler_gcps(struct ncsi_request *nr)
ncs->hnc_tx_1023_frames = ntohl(rsp->tx_1023_frames);
ncs->hnc_tx_1522_frames = ntohl(rsp->tx_1522_frames);
ncs->hnc_tx_9022_frames = ntohl(rsp->tx_9022_frames);
- ncs->hnc_rx_valid_bytes = ntohl(rsp->rx_valid_bytes);
+ ncs->hnc_rx_valid_bytes = be64_to_cpu(rsp->rx_valid_bytes);
ncs->hnc_rx_runt_pkts = ntohl(rsp->rx_runt_pkts);
ncs->hnc_rx_jabber_pkts = ntohl(rsp->rx_jabber_pkts);
@@ -1096,14 +1089,12 @@ static int ncsi_rsp_handler_netlink(struct ncsi_request *nr)
static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr)
{
struct ncsi_dev_priv *ndp = nr->ndp;
+ struct sockaddr *saddr = &ndp->pending_mac;
struct net_device *ndev = ndp->ndev.dev;
struct ncsi_rsp_gmcma_pkt *rsp;
- struct sockaddr saddr;
- int ret = -1;
int i;
rsp = (struct ncsi_rsp_gmcma_pkt *)skb_network_header(nr->rsp);
- saddr.sa_family = ndev->type;
ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netdev_info(ndev, "NCSI: Received %d provisioned MAC addresses\n",
@@ -1115,20 +1106,20 @@ static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr)
rsp->addresses[i][4], rsp->addresses[i][5]);
}
+ saddr->sa_family = ndev->type;
for (i = 0; i < rsp->address_count; i++) {
- memcpy(saddr.sa_data, &rsp->addresses[i], ETH_ALEN);
- ret = ndev->netdev_ops->ndo_set_mac_address(ndev, &saddr);
- if (ret < 0) {
+ if (!is_valid_ether_addr(rsp->addresses[i])) {
netdev_warn(ndev, "NCSI: Unable to assign %pM to device\n",
- saddr.sa_data);
+ rsp->addresses[i]);
continue;
}
- netdev_warn(ndev, "NCSI: Set MAC address to %pM\n", saddr.sa_data);
+ memcpy(saddr->sa_data, rsp->addresses[i], ETH_ALEN);
+ netdev_warn(ndev, "NCSI: Will set MAC address to %pM\n", saddr->sa_data);
break;
}
- ndp->gma_flag = ret == 0;
- return ret;
+ ndp->gma_flag = 1;
+ return 0;
}
static struct ncsi_rsp_handler {
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index cf3ce72c3de6..5251524b96af 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -64,7 +64,7 @@ struct hbucket {
#define ahash_sizeof_regions(htable_bits) \
(ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region))
#define ahash_region(n, htable_bits) \
- ((n) % ahash_numof_locks(htable_bits))
+ ((n) / jhash_size(HTABLE_REGION_BITS))
#define ahash_bucket_start(h, htable_bits) \
((htable_bits) < HTABLE_REGION_BITS ? 0 \
: (h) * jhash_size(HTABLE_REGION_BITS))
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index dc6ddc4abbe2..3224f6e17e73 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3091,12 +3091,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
case IP_VS_SO_GET_SERVICES:
{
struct ip_vs_get_services *get;
- int size;
+ size_t size;
get = (struct ip_vs_get_services *)arg;
size = struct_size(get, entrytable, get->num_services);
if (*len != size) {
- pr_err("length: %u != %u\n", *len, size);
+ pr_err("length: %u != %zu\n", *len, size);
ret = -EINVAL;
goto out;
}
@@ -3132,12 +3132,12 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
case IP_VS_SO_GET_DESTS:
{
struct ip_vs_get_dests *get;
- int size;
+ size_t size;
get = (struct ip_vs_get_dests *)arg;
size = struct_size(get, entrytable, get->num_dests);
if (*len != size) {
- pr_err("length: %u != %u\n", *len, size);
+ pr_err("length: %u != %zu\n", *len, size);
ret = -EINVAL;
goto out;
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 3313bceb6cc9..014f07740369 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -119,13 +119,12 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
return false;
}
-/* Get route to daddr, update *saddr, optionally bind route to saddr */
+/* Get route to daddr, optionally bind route to saddr */
static struct rtable *do_output_route4(struct net *net, __be32 daddr,
- int rt_mode, __be32 *saddr)
+ int rt_mode, __be32 *ret_saddr)
{
struct flowi4 fl4;
struct rtable *rt;
- bool loop = false;
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = daddr;
@@ -135,23 +134,17 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,
retry:
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt)) {
- /* Invalid saddr ? */
- if (PTR_ERR(rt) == -EINVAL && *saddr &&
- rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
- *saddr = 0;
- flowi4_update_output(&fl4, 0, daddr, 0);
- goto retry;
- }
IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
return NULL;
- } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
+ }
+ if (rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
ip_rt_put(rt);
- *saddr = fl4.saddr;
flowi4_update_output(&fl4, 0, daddr, fl4.saddr);
- loop = true;
+ rt_mode = 0;
goto retry;
}
- *saddr = fl4.saddr;
+ if (ret_saddr)
+ *ret_saddr = fl4.saddr;
return rt;
}
@@ -344,19 +337,15 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
if (ret_saddr)
*ret_saddr = dest_dst->dst_saddr.ip;
} else {
- __be32 saddr = htonl(INADDR_ANY);
-
noref = 0;
/* For such unconfigured boxes avoid many route lookups
* for performance reasons because we do not remember saddr
*/
rt_mode &= ~IP_VS_RT_MODE_CONNECT;
- rt = do_output_route4(net, daddr, rt_mode, &saddr);
+ rt = do_output_route4(net, daddr, rt_mode, ret_saddr);
if (!rt)
goto err_unreach;
- if (ret_saddr)
- *ret_saddr = saddr;
}
local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index 3d64a4511fcf..b5e4ca9026a8 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -17,7 +17,7 @@ static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
.skb = skb,
};
- return bpf_prog_run(prog, &ctx);
+ return bpf_prog_run_pin_on_cpu(prog, &ctx);
}
struct bpf_nf_link {
@@ -295,6 +295,9 @@ static bool nf_is_valid_access(int off, int size, enum bpf_access_type type,
if (off < 0 || off >= sizeof(struct bpf_nf_ctx))
return false;
+ if (off % size != 0)
+ return false;
+
if (type == BPF_WRITE)
return false;
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 4890af4dc263..913ede2f57f9 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -132,7 +132,7 @@ static int __nf_conncount_add(struct net *net,
struct nf_conn *found_ct;
unsigned int collect = 0;
- if (time_is_after_eq_jiffies((unsigned long)list->last_gc))
+ if ((u32)jiffies == list->last_gc)
goto add_new_node;
/* check the saved connections */
@@ -234,7 +234,7 @@ bool nf_conncount_gc_list(struct net *net,
bool ret = false;
/* don't bother if we just did GC */
- if (time_is_after_eq_jiffies((unsigned long)READ_ONCE(list->last_gc)))
+ if ((u32)jiffies == READ_ONCE(list->last_gc))
return false;
/* don't bother if other cpu is already doing GC */
@@ -377,6 +377,8 @@ restart:
conn->tuple = *tuple;
conn->zone = *zone;
+ conn->cpu = raw_smp_processor_id();
+ conn->jiffies32 = (u32)jiffies;
memcpy(rbconn->key, key, sizeof(u32) * data->keylen);
nf_conncount_list_init(&rbconn->list);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 456446d7af20..f5bde4f13958 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1121,6 +1121,12 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
&nf_conntrack_hash[repl_idx]);
+ /* confirmed bit must be set after hlist add, not before:
+ * loser_ct can still be visible to other cpu due to
+ * SLAB_TYPESAFE_BY_RCU.
+ */
+ smp_mb__before_atomic();
+ set_bit(IPS_CONFIRMED_BIT, &loser_ct->status);
NF_CT_STAT_INC(net, clash_resolve);
return NF_ACCEPT;
@@ -1257,8 +1263,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
* user context, else we insert an already 'dead' hash, blocking
* further use of that particular connection -JM.
*/
- ct->status |= IPS_CONFIRMED;
-
if (unlikely(nf_ct_is_dying(ct))) {
NF_CT_STAT_INC(net, insert_failed);
goto dying;
@@ -1290,7 +1294,7 @@ chaintoolong:
}
}
- /* Timer relative to confirmation time, not original
+ /* Timeout is relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
weird delay cases. */
ct->timeout += nfct_time_stamp;
@@ -1298,11 +1302,21 @@ chaintoolong:
__nf_conntrack_insert_prepare(ct);
/* Since the lookup is lockless, hash insertion must be done after
- * starting the timer and setting the CONFIRMED bit. The RCU barriers
- * guarantee that no other CPU can find the conntrack before the above
- * stores are visible.
+ * setting ct->timeout. The RCU barriers guarantee that no other CPU
+ * can find the conntrack before the above stores are visible.
*/
__nf_conntrack_hash_insert(ct, hash, reply_hash);
+
+ /* IPS_CONFIRMED unset means 'ct not (yet) in hash', conntrack lookups
+ * skip entries that lack this bit. This happens when a CPU is looking
+ * at a stale entry that is being recycled due to SLAB_TYPESAFE_BY_RCU
+ * or when another CPU encounters this entry right after the insertion
+ * but before the set-confirm-bit below. This bit must not be set until
+ * after __nf_conntrack_hash_insert().
+ */
+ smp_mb__before_atomic();
+ set_bit(IPS_CONFIRMED_BIT, &ct->status);
+
nf_conntrack_double_unlock(hash, reply_hash);
local_bh_enable();
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6a1239433830..18a91c031554 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -860,8 +860,6 @@ errout:
static int ctnetlink_done(struct netlink_callback *cb)
{
- if (cb->args[1])
- nf_ct_put((struct nf_conn *)cb->args[1]);
kfree(cb->data);
return 0;
}
@@ -1184,19 +1182,26 @@ ignore_entry:
return 0;
}
+static unsigned long ctnetlink_get_id(const struct nf_conn *ct)
+{
+ unsigned long id = nf_ct_get_id(ct);
+
+ return id ? id : 1;
+}
+
static int
ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
{
unsigned int flags = cb->data ? NLM_F_DUMP_FILTERED : 0;
struct net *net = sock_net(skb->sk);
- struct nf_conn *ct, *last;
+ unsigned long last_id = cb->args[1];
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
struct nf_conn *nf_ct_evict[8];
+ struct nf_conn *ct;
int res, i;
spinlock_t *lockp;
- last = (struct nf_conn *)cb->args[1];
i = 0;
local_bh_disable();
@@ -1233,7 +1238,7 @@ restart:
continue;
if (cb->args[1]) {
- if (ct != last)
+ if (ctnetlink_get_id(ct) != last_id)
continue;
cb->args[1] = 0;
}
@@ -1246,8 +1251,7 @@ restart:
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
ct, true, flags);
if (res < 0) {
- nf_conntrack_get(&ct->ct_general);
- cb->args[1] = (unsigned long)ct;
+ cb->args[1] = ctnetlink_get_id(ct);
spin_unlock(lockp);
goto out;
}
@@ -1260,12 +1264,10 @@ restart:
}
out:
local_bh_enable();
- if (last) {
+ if (last_id) {
/* nf ct hash resize happened, now clear the leftover. */
- if ((struct nf_conn *)cb->args[1] == last)
+ if (cb->args[1] == last_id)
cb->args[1] = 0;
-
- nf_ct_put(last);
}
while (i) {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 7d4f0fa8b609..3ea60ff7a6a4 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -619,7 +619,9 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.data = &nf_conntrack_max,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
},
[NF_SYSCTL_CT_COUNT] = {
.procname = "nf_conntrack_count",
@@ -655,7 +657,9 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.data = &nf_ct_expect_max,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_INT_MAX,
},
[NF_SYSCTL_CT_ACCT] = {
.procname = "nf_conntrack_acct",
@@ -948,7 +952,9 @@ static struct ctl_table nf_ct_netfilter_table[] = {
.data = &nf_conntrack_max,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
},
};
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 4085c436e306..02f10a46fab7 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -248,7 +248,7 @@ static noinline bool
nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple,
const struct nf_conn *ignored_ct)
{
- static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST_BIT;
+ static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST;
const struct nf_conntrack_tuple_hash *thash;
const struct nf_conntrack_zone *zone;
struct nf_conn *ct;
@@ -287,8 +287,14 @@ nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple,
zone = nf_ct_zone(ignored_ct);
thash = nf_conntrack_find_get(net, zone, tuple);
- if (unlikely(!thash)) /* clashing entry went away */
- return false;
+ if (unlikely(!thash)) {
+ struct nf_conntrack_tuple reply;
+
+ nf_ct_invert_tuple(&reply, tuple);
+ thash = nf_conntrack_find_get(net, zone, &reply);
+ if (!thash) /* clashing entry went away */
+ return false;
+ }
ct = nf_ct_tuplehash_to_ctrack(thash);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 42dc8cc721ff..3743e4249dc8 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -31,7 +31,6 @@ unsigned int nf_tables_net_id __read_mostly;
static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
-static LIST_HEAD(nf_tables_destroy_list);
static LIST_HEAD(nf_tables_gc_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
@@ -122,7 +121,6 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
table->validate_state = new_validate_state;
}
static void nf_tables_trans_destroy_work(struct work_struct *w);
-static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
static void nft_trans_gc_work(struct work_struct *work);
static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
@@ -1031,11 +1029,6 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
NFTA_TABLE_PAD))
goto nla_put_failure;
- if (event == NFT_MSG_DELTABLE) {
- nlmsg_end(skb, nlh);
- return 0;
- }
-
if (nla_put_be32(skb, NFTA_TABLE_FLAGS,
htonl(table->flags & NFT_TABLE_F_MASK)))
goto nla_put_failure;
@@ -1891,11 +1884,6 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
NFTA_CHAIN_PAD))
goto nla_put_failure;
- if (event == NFT_MSG_DELCHAIN && !hook_list) {
- nlmsg_end(skb, nlh);
- return 0;
- }
-
if (nft_is_base_chain(chain)) {
const struct nft_base_chain *basechain = nft_base_chain(chain);
struct nft_stats __percpu *stats;
@@ -2734,11 +2722,11 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
err = nft_netdev_register_hooks(ctx->net, &hook.list);
if (err < 0)
goto err_hooks;
+
+ unregister = true;
}
}
- unregister = true;
-
if (nla[NFTA_CHAIN_COUNTERS]) {
if (!nft_is_base_chain(chain)) {
err = -EOPNOTSUPP;
@@ -3886,7 +3874,7 @@ void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule)
/* can only be used if rule is no longer visible to dumps */
static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
{
- lockdep_commit_lock_is_held(ctx->net);
+ WARN_ON_ONCE(!lockdep_commit_lock_is_held(ctx->net));
nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
nf_tables_rule_destroy(ctx, rule);
@@ -4647,6 +4635,14 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb,
return 0;
}
+static u32 nft_set_userspace_size(const struct nft_set_ops *ops, u32 size)
+{
+ if (ops->usize)
+ return ops->usize(size);
+
+ return size;
+}
+
static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
const struct nft_set *set, u16 event, u16 flags)
{
@@ -4672,11 +4668,6 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
NFTA_SET_PAD))
goto nla_put_failure;
- if (event == NFT_MSG_DELSET) {
- nlmsg_end(skb, nlh);
- return 0;
- }
-
if (set->flags != 0)
if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags)))
goto nla_put_failure;
@@ -4717,7 +4708,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
if (!nest)
goto nla_put_failure;
if (set->size &&
- nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size)))
+ nla_put_be32(skb, NFTA_SET_DESC_SIZE,
+ htonl(nft_set_userspace_size(set->ops, set->size))))
goto nla_put_failure;
if (set->field_count > 1 &&
@@ -4959,7 +4951,7 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr,
static int nft_set_desc_concat(struct nft_set_desc *desc,
const struct nlattr *nla)
{
- u32 num_regs = 0, key_num_regs = 0;
+ u32 len = 0, num_regs;
struct nlattr *attr;
int rem, err, i;
@@ -4973,12 +4965,12 @@ static int nft_set_desc_concat(struct nft_set_desc *desc,
}
for (i = 0; i < desc->field_count; i++)
- num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32));
+ len += round_up(desc->field_len[i], sizeof(u32));
- key_num_regs = DIV_ROUND_UP(desc->klen, sizeof(u32));
- if (key_num_regs != num_regs)
+ if (len != desc->klen)
return -EINVAL;
+ num_regs = DIV_ROUND_UP(desc->klen, sizeof(u32));
if (num_regs > NFT_REG32_COUNT)
return -E2BIG;
@@ -5085,6 +5077,15 @@ static bool nft_set_is_same(const struct nft_set *set,
return true;
}
+static u32 nft_set_kernel_size(const struct nft_set_ops *ops,
+ const struct nft_set_desc *desc)
+{
+ if (ops->ksize)
+ return ops->ksize(desc->size);
+
+ return desc->size;
+}
+
static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
@@ -5267,6 +5268,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (err < 0)
return err;
+ if (desc.size)
+ desc.size = nft_set_kernel_size(set->ops, &desc);
+
err = 0;
if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
@@ -5289,6 +5293,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (IS_ERR(ops))
return PTR_ERR(ops);
+ if (desc.size)
+ desc.size = nft_set_kernel_size(ops, &desc);
+
udlen = 0;
if (nla[NFTA_SET_USERDATA])
udlen = nla_len(nla[NFTA_SET_USERDATA]);
@@ -5652,7 +5659,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding,
enum nft_trans_phase phase)
{
- lockdep_commit_lock_is_held(ctx->net);
+ WARN_ON_ONCE(!lockdep_commit_lock_is_held(ctx->net));
switch (phase) {
case NFT_TRANS_PREPARE_ERROR:
@@ -6855,6 +6862,27 @@ static bool nft_setelem_valid_key_end(const struct nft_set *set,
return true;
}
+static u32 nft_set_maxsize(const struct nft_set *set)
+{
+ u32 maxsize, delta;
+
+ if (!set->size)
+ return UINT_MAX;
+
+ if (set->ops->adjust_maxsize)
+ delta = set->ops->adjust_maxsize(set);
+ else
+ delta = 0;
+
+ if (check_add_overflow(set->size, set->ndeact, &maxsize))
+ return UINT_MAX;
+
+ if (check_add_overflow(maxsize, delta, &maxsize))
+ return UINT_MAX;
+
+ return maxsize;
+}
+
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr, u32 nlmsg_flags)
{
@@ -7218,7 +7246,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
if (!(flags & NFT_SET_ELEM_CATCHALL)) {
- unsigned int max = set->size ? set->size + set->ndeact : UINT_MAX;
+ unsigned int max = nft_set_maxsize(set);
if (!atomic_add_unless(&set->nelems, 1, max)) {
err = -ENFILE;
@@ -7974,11 +8002,6 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
NFTA_OBJ_PAD))
goto nla_put_failure;
- if (event == NFT_MSG_DELOBJ) {
- nlmsg_end(skb, nlh);
- return 0;
- }
-
if (nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
@@ -8997,11 +9020,6 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
NFTA_FLOWTABLE_PAD))
goto nla_put_failure;
- if (event == NFT_MSG_DELFLOWTABLE && !hook_list) {
- nlmsg_end(skb, nlh);
- return 0;
- }
-
if (nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags)))
goto nla_put_failure;
@@ -9703,11 +9721,12 @@ static void nft_commit_release(struct nft_trans *trans)
static void nf_tables_trans_destroy_work(struct work_struct *w)
{
+ struct nftables_pernet *nft_net = container_of(w, struct nftables_pernet, destroy_work);
struct nft_trans *trans, *next;
LIST_HEAD(head);
spin_lock(&nf_tables_destroy_list_lock);
- list_splice_init(&nf_tables_destroy_list, &head);
+ list_splice_init(&nft_net->destroy_list, &head);
spin_unlock(&nf_tables_destroy_list_lock);
if (list_empty(&head))
@@ -9721,9 +9740,11 @@ static void nf_tables_trans_destroy_work(struct work_struct *w)
}
}
-void nf_tables_trans_destroy_flush_work(void)
+void nf_tables_trans_destroy_flush_work(struct net *net)
{
- flush_work(&trans_destroy_work);
+ struct nftables_pernet *nft_net = nft_pernet(net);
+
+ flush_work(&nft_net->destroy_work);
}
EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
@@ -10181,11 +10202,11 @@ static void nf_tables_commit_release(struct net *net)
trans->put_net = true;
spin_lock(&nf_tables_destroy_list_lock);
- list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list);
+ list_splice_tail_init(&nft_net->commit_list, &nft_net->destroy_list);
spin_unlock(&nf_tables_destroy_list_lock);
nf_tables_module_autoload_cleanup(net);
- schedule_work(&trans_destroy_work);
+ schedule_work(&nft_net->destroy_work);
mutex_unlock(&nft_net->commit_mutex);
}
@@ -11608,7 +11629,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
gc_seq = nft_gc_seq_begin(nft_net);
- nf_tables_trans_destroy_flush_work();
+ nf_tables_trans_destroy_flush_work(net);
again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
@@ -11650,6 +11671,7 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->tables);
INIT_LIST_HEAD(&nft_net->commit_list);
+ INIT_LIST_HEAD(&nft_net->destroy_list);
INIT_LIST_HEAD(&nft_net->commit_set_list);
INIT_LIST_HEAD(&nft_net->binding_list);
INIT_LIST_HEAD(&nft_net->module_list);
@@ -11658,6 +11680,7 @@ static int __net_init nf_tables_init_net(struct net *net)
nft_net->base_seq = 1;
nft_net->gc_seq = 0;
nft_net->validate_state = NFT_VALIDATE_SKIP;
+ INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work);
return 0;
}
@@ -11686,14 +11709,17 @@ static void __net_exit nf_tables_exit_net(struct net *net)
if (!list_empty(&nft_net->module_list))
nf_tables_module_autoload_cleanup(net);
+ cancel_work_sync(&nft_net->destroy_work);
__nft_release_tables(net);
nft_gc_seq_end(nft_net, gc_seq);
mutex_unlock(&nft_net->commit_mutex);
+
WARN_ON_ONCE(!list_empty(&nft_net->tables));
WARN_ON_ONCE(!list_empty(&nft_net->module_list));
WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
+ WARN_ON_ONCE(!list_empty(&nft_net->destroy_list));
}
static void nf_tables_exit_batch(struct list_head *net_exit_list)
@@ -11784,10 +11810,8 @@ static void __exit nf_tables_module_exit(void)
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
nft_chain_filter_fini();
nft_chain_route_fini();
- nf_tables_trans_destroy_flush_work();
unregister_pernet_subsys(&nf_tables_net_ops);
cancel_work_sync(&trans_gc_work);
- cancel_work_sync(&trans_destroy_work);
rcu_barrier();
rhltable_destroy(&nft_objname_ht);
nf_tables_core_module_exit();
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 7ca4f0d21fe2..72711d62fddf 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -228,7 +228,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
return 0;
}
-static void nft_compat_wait_for_destructors(void)
+static void nft_compat_wait_for_destructors(struct net *net)
{
/* xtables matches or targets can have side effects, e.g.
* creation/destruction of /proc files.
@@ -236,7 +236,7 @@ static void nft_compat_wait_for_destructors(void)
* work queue. If we have pending invocations we thus
* need to wait for those to finish.
*/
- nf_tables_trans_destroy_flush_work();
+ nf_tables_trans_destroy_flush_work(net);
}
static int
@@ -262,7 +262,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
- nft_compat_wait_for_destructors();
+ nft_compat_wait_for_destructors(ctx->net);
ret = xt_check_target(&par, size, proto, inv);
if (ret < 0) {
@@ -515,7 +515,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
- nft_compat_wait_for_destructors();
+ nft_compat_wait_for_destructors(ctx->net);
return xt_check_match(&par, size, proto, inv);
}
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 67a41cd2baaf..a1b373b99f7b 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -230,6 +230,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
enum ip_conntrack_info ctinfo;
u16 value = nft_reg_load16(&regs->data[priv->sreg]);
struct nf_conn *ct;
+ int oldcnt;
ct = nf_ct_get(skb, &ctinfo);
if (ct) /* already tracked */
@@ -250,10 +251,11 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
ct = this_cpu_read(nft_ct_pcpu_template);
- if (likely(refcount_read(&ct->ct_general.use) == 1)) {
- refcount_inc(&ct->ct_general.use);
+ __refcount_inc(&ct->ct_general.use, &oldcnt);
+ if (likely(oldcnt == 1)) {
nf_ct_zone_add(ct, &zone);
} else {
+ refcount_dec(&ct->ct_general.use);
/* previous skb got queued to userspace, allocate temporary
* one until percpu template can be reused.
*/
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index b8d03364566c..c74012c99125 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -85,7 +85,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb,
unsigned char optbuf[sizeof(struct ip_options) + 40];
struct ip_options *opt = (struct ip_options *)optbuf;
struct iphdr *iph, _iph;
- unsigned int start;
bool found = false;
__be32 info;
int optlen;
@@ -93,7 +92,6 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb,
iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
if (!iph)
return -EBADMSG;
- start = sizeof(struct iphdr);
optlen = iph->ihl * 4 - (int)sizeof(struct iphdr);
if (optlen <= 0)
@@ -103,7 +101,7 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb,
/* Copy the options since __ip_options_compile() modifies
* the options.
*/
- if (skb_copy_bits(skb, start, opt->__data, optlen))
+ if (skb_copy_bits(skb, sizeof(struct iphdr), opt->__data, optlen))
return -EBADMSG;
opt->optlen = optlen;
@@ -118,18 +116,18 @@ static int ipv4_find_option(struct net *net, struct sk_buff *skb,
found = target == IPOPT_SSRR ? opt->is_strictroute :
!opt->is_strictroute;
if (found)
- *offset = opt->srr + start;
+ *offset = opt->srr;
break;
case IPOPT_RR:
if (!opt->rr)
break;
- *offset = opt->rr + start;
+ *offset = opt->rr;
found = true;
break;
case IPOPT_RA:
if (!opt->router_alert)
break;
- *offset = opt->router_alert + start;
+ *offset = opt->router_alert;
found = true;
break;
default:
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 2f732fae5a83..da9ebd00b198 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -289,6 +289,15 @@ static bool nft_flow_offload_skip(struct sk_buff *skb, int family)
return false;
}
+static void flow_offload_ct_tcp(struct nf_conn *ct)
+{
+ /* conntrack will not see all packets, disable tcp window validation. */
+ spin_lock_bh(&ct->lock);
+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
+ spin_unlock_bh(&ct->lock);
+}
+
static void nft_flow_offload_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -356,11 +365,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
goto err_flow_alloc;
flow_offload_route_init(flow, &route);
-
- if (tcph) {
- ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
- ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
- }
+ if (tcph)
+ flow_offload_ct_tcp(ct);
__set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags);
ret = flow_offload_add(flowtable, flow);
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 9b2d7463d3d3..df0798da2329 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -19,10 +19,16 @@ struct nft_quota {
};
static inline bool nft_overquota(struct nft_quota *priv,
- const struct sk_buff *skb)
+ const struct sk_buff *skb,
+ bool *report)
{
- return atomic64_add_return(skb->len, priv->consumed) >=
- atomic64_read(&priv->quota);
+ u64 consumed = atomic64_add_return(skb->len, priv->consumed);
+ u64 quota = atomic64_read(&priv->quota);
+
+ if (report)
+ *report = consumed >= quota;
+
+ return consumed > quota;
}
static inline bool nft_quota_invert(struct nft_quota *priv)
@@ -34,7 +40,7 @@ static inline void nft_quota_do_eval(struct nft_quota *priv,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
- if (nft_overquota(priv, pkt->skb) ^ nft_quota_invert(priv))
+ if (nft_overquota(priv, pkt->skb, NULL) ^ nft_quota_invert(priv))
regs->verdict.code = NFT_BREAK;
}
@@ -51,13 +57,13 @@ static void nft_quota_obj_eval(struct nft_object *obj,
const struct nft_pktinfo *pkt)
{
struct nft_quota *priv = nft_obj_data(obj);
- bool overquota;
+ bool overquota, report;
- overquota = nft_overquota(priv, pkt->skb);
+ overquota = nft_overquota(priv, pkt->skb, &report);
if (overquota ^ nft_quota_invert(priv))
regs->verdict.code = NFT_BREAK;
- if (overquota &&
+ if (report &&
!test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags))
nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0,
NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC);
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index b93f046ac7d1..4b3452dff2ec 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -309,7 +309,8 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
nft_setelem_expr_foreach(expr, elem_expr, size) {
if (expr->ops->gc &&
- expr->ops->gc(read_pnet(&set->net), expr))
+ expr->ops->gc(read_pnet(&set->net), expr) &&
+ set->flags & NFT_SET_EVAL)
return true;
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 7be342b495f5..9e4e25f2458f 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -663,6 +663,9 @@ static int pipapo_realloc_mt(struct nft_pipapo_field *f,
check_add_overflow(rules, extra, &rules_alloc))
return -EOVERFLOW;
+ if (rules_alloc > (INT_MAX / sizeof(*new_mt)))
+ return -ENOMEM;
+
new_mt = kvmalloc_array(rules_alloc, sizeof(*new_mt), GFP_KERNEL_ACCOUNT);
if (!new_mt)
return -ENOMEM;
@@ -683,6 +686,30 @@ out_free:
return 0;
}
+
+/**
+ * lt_calculate_size() - Get storage size for lookup table with overflow check
+ * @groups: Amount of bit groups
+ * @bb: Number of bits grouped together in lookup table buckets
+ * @bsize: Size of each bucket in lookup table, in longs
+ *
+ * Return: allocation size including alignment overhead, negative on overflow
+ */
+static ssize_t lt_calculate_size(unsigned int groups, unsigned int bb,
+ unsigned int bsize)
+{
+ ssize_t ret = groups * NFT_PIPAPO_BUCKETS(bb) * sizeof(long);
+
+ if (check_mul_overflow(ret, bsize, &ret))
+ return -1;
+ if (check_add_overflow(ret, NFT_PIPAPO_ALIGN_HEADROOM, &ret))
+ return -1;
+ if (ret > INT_MAX)
+ return -1;
+
+ return ret;
+}
+
/**
* pipapo_resize() - Resize lookup or mapping table, or both
* @f: Field containing lookup and mapping tables
@@ -701,6 +728,7 @@ static int pipapo_resize(struct nft_pipapo_field *f,
long *new_lt = NULL, *new_p, *old_lt = f->lt, *old_p;
unsigned int new_bucket_size, copy;
int group, bucket, err;
+ ssize_t lt_size;
if (rules >= NFT_PIPAPO_RULE0_MAX)
return -ENOSPC;
@@ -719,10 +747,11 @@ static int pipapo_resize(struct nft_pipapo_field *f,
else
copy = new_bucket_size;
- new_lt = kvzalloc(f->groups * NFT_PIPAPO_BUCKETS(f->bb) *
- new_bucket_size * sizeof(*new_lt) +
- NFT_PIPAPO_ALIGN_HEADROOM,
- GFP_KERNEL);
+ lt_size = lt_calculate_size(f->groups, f->bb, new_bucket_size);
+ if (lt_size < 0)
+ return -ENOMEM;
+
+ new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT);
if (!new_lt)
return -ENOMEM;
@@ -907,7 +936,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
{
unsigned int groups, bb;
unsigned long *new_lt;
- size_t lt_size;
+ ssize_t lt_size;
lt_size = f->groups * NFT_PIPAPO_BUCKETS(f->bb) * f->bsize *
sizeof(*f->lt);
@@ -917,15 +946,17 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
groups = f->groups * 2;
bb = NFT_PIPAPO_GROUP_BITS_LARGE_SET;
- lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize *
- sizeof(*f->lt);
+ lt_size = lt_calculate_size(groups, bb, f->bsize);
+ if (lt_size < 0)
+ return;
} else if (f->bb == NFT_PIPAPO_GROUP_BITS_LARGE_SET &&
lt_size < NFT_PIPAPO_LT_SIZE_LOW) {
groups = f->groups / 2;
bb = NFT_PIPAPO_GROUP_BITS_SMALL_SET;
- lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize *
- sizeof(*f->lt);
+ lt_size = lt_calculate_size(groups, bb, f->bsize);
+ if (lt_size < 0)
+ return;
/* Don't increase group width if the resulting lookup table size
* would exceed the upper size threshold for a "small" set.
@@ -936,7 +967,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
return;
}
- new_lt = kvzalloc(lt_size + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL_ACCOUNT);
+ new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT);
if (!new_lt)
return;
@@ -1188,7 +1219,7 @@ static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int c
mem = s;
mem -= s->align_off;
- kfree(mem);
+ kvfree(mem);
}
/**
@@ -1209,10 +1240,9 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
void *scratch_aligned;
u32 align_off;
#endif
- scratch = kzalloc_node(struct_size(scratch, map,
- bsize_max * 2) +
- NFT_PIPAPO_ALIGN_HEADROOM,
- GFP_KERNEL_ACCOUNT, cpu_to_node(i));
+ scratch = kvzalloc_node(struct_size(scratch, map, bsize_max * 2) +
+ NFT_PIPAPO_ALIGN_HEADROOM,
+ GFP_KERNEL_ACCOUNT, cpu_to_node(i));
if (!scratch) {
/* On failure, there's no need to undo previous
* allocations: this means that some scratch maps have
@@ -1451,13 +1481,15 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
for (i = 0; i < old->field_count; i++) {
unsigned long *new_lt;
+ ssize_t lt_size;
memcpy(dst, src, offsetof(struct nft_pipapo_field, lt));
- new_lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS(src->bb) *
- src->bsize * sizeof(*dst->lt) +
- NFT_PIPAPO_ALIGN_HEADROOM,
- GFP_KERNEL_ACCOUNT);
+ lt_size = lt_calculate_size(src->groups, src->bb, src->bsize);
+ if (lt_size < 0)
+ goto out_lt;
+
+ new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT);
if (!new_lt)
goto out_lt;
@@ -1469,6 +1501,9 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
src->groups * NFT_PIPAPO_BUCKETS(src->bb));
if (src->rules > 0) {
+ if (src->rules_alloc > (INT_MAX / sizeof(*src->mt)))
+ goto out_mt;
+
dst->mt = kvmalloc_array(src->rules_alloc,
sizeof(*src->mt),
GFP_KERNEL_ACCOUNT);
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index b8d3c3213efe..be7c16c79f71 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -994,8 +994,9 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill,
NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 8, pkt[8], bsize);
NFT_PIPAPO_AVX2_AND(6, 2, 3);
+ NFT_PIPAPO_AVX2_AND(3, 4, 7);
NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 9, pkt[9], bsize);
- NFT_PIPAPO_AVX2_AND(0, 4, 5);
+ NFT_PIPAPO_AVX2_AND(0, 3, 5);
NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 10, pkt[10], bsize);
NFT_PIPAPO_AVX2_AND(2, 6, 7);
NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 11, pkt[11], bsize);
@@ -1113,6 +1114,25 @@ bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
}
/**
+ * pipapo_resmap_init_avx2() - Initialise result map before first use
+ * @m: Matching data, including mapping table
+ * @res_map: Result map
+ *
+ * Like pipapo_resmap_init() but do not set start map bits covered by the first field.
+ */
+static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, unsigned long *res_map)
+{
+ const struct nft_pipapo_field *f = m->f;
+ int i;
+
+ /* Starting map doesn't need to be set to all-ones for this implementation,
+ * but we do need to zero the remaining bits, if any.
+ */
+ for (i = f->bsize; i < m->bsize_max; i++)
+ res_map[i] = 0ul;
+}
+
+/**
* nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation
* @net: Network namespace
* @set: nftables API set representation
@@ -1170,7 +1190,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
res = scratch->map + (map_index ? m->bsize_max : 0);
fill = scratch->map + (map_index ? 0 : m->bsize_max);
- /* Starting map doesn't need to be set for this implementation */
+ pipapo_resmap_init_avx2(m, res);
nft_pipapo_avx2_prepare();
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index b7ea21327549..2e8ef16ff191 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -750,6 +750,46 @@ static void nft_rbtree_gc_init(const struct nft_set *set)
priv->last_gc = jiffies;
}
+/* rbtree stores ranges as singleton elements, each range is composed of two
+ * elements ...
+ */
+static u32 nft_rbtree_ksize(u32 size)
+{
+ return size * 2;
+}
+
+/* ... hide this detail to userspace. */
+static u32 nft_rbtree_usize(u32 size)
+{
+ if (!size)
+ return 0;
+
+ return size / 2;
+}
+
+static u32 nft_rbtree_adjust_maxsize(const struct nft_set *set)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree_elem *rbe;
+ struct rb_node *node;
+ const void *key;
+
+ node = rb_last(&priv->root);
+ if (!node)
+ return 0;
+
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ if (!nft_rbtree_interval_end(rbe))
+ return 0;
+
+ key = nft_set_ext_key(&rbe->ext);
+ if (memchr(key, 1, set->klen))
+ return 0;
+
+ /* this is the all-zero no-match element. */
+ return 1;
+}
+
const struct nft_set_type nft_set_rbtree_type = {
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
.ops = {
@@ -768,5 +808,8 @@ const struct nft_set_type nft_set_rbtree_type = {
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
.get = nft_rbtree_get,
+ .ksize = nft_rbtree_ksize,
+ .usize = nft_rbtree_usize,
+ .adjust_maxsize = nft_rbtree_adjust_maxsize,
},
};
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 5c6ed68cc6e0..e18d322290fb 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -335,13 +335,13 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr,
static const struct nla_policy nft_tunnel_opts_geneve_policy[NFTA_TUNNEL_KEY_GENEVE_MAX + 1] = {
[NFTA_TUNNEL_KEY_GENEVE_CLASS] = { .type = NLA_U16 },
[NFTA_TUNNEL_KEY_GENEVE_TYPE] = { .type = NLA_U8 },
- [NFTA_TUNNEL_KEY_GENEVE_DATA] = { .type = NLA_BINARY, .len = 128 },
+ [NFTA_TUNNEL_KEY_GENEVE_DATA] = { .type = NLA_BINARY, .len = 127 },
};
static int nft_tunnel_obj_geneve_init(const struct nlattr *attr,
struct nft_tunnel_opts *opts)
{
- struct geneve_opt *opt = (struct geneve_opt *)opts->u.data + opts->len;
+ struct geneve_opt *opt = (struct geneve_opt *)(opts->u.data + opts->len);
struct nlattr *tb[NFTA_TUNNEL_KEY_GENEVE_MAX + 1];
int err, data_len;
@@ -624,11 +624,11 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
struct geneve_opt *opt;
int offset = 0;
- inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE);
- if (!inner)
- goto failure;
while (opts->len > offset) {
- opt = (struct geneve_opt *)opts->u.data + offset;
+ inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE);
+ if (!inner)
+ goto failure;
+ opt = (struct geneve_opt *)(opts->u.data + offset);
if (nla_put_be16(skb, NFTA_TUNNEL_KEY_GENEVE_CLASS,
opt->opt_class) ||
nla_put_u8(skb, NFTA_TUNNEL_KEY_GENEVE_TYPE,
@@ -637,8 +637,8 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
opt->length * 4, opt->opt_data))
goto inner_failure;
offset += sizeof(*opt) + opt->length * 4;
+ nla_nest_end(skb, inner);
}
- nla_nest_end(skb, inner);
}
nla_nest_end(skb, nest);
return 0;
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 30e99464171b..93f064306901 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -91,7 +91,7 @@ tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb));
}
-#if IS_ENABLED(CONFIG_IP6_NF_MANGLE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static unsigned int
tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
@@ -119,7 +119,7 @@ static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = {
.targetsize = sizeof(struct xt_tcpoptstrip_target_info),
.me = THIS_MODULE,
},
-#if IS_ENABLED(CONFIG_IP6_NF_MANGLE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
{
.name = "TCPOPTSTRIP",
.family = NFPROTO_IPV6,
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 65b965ca40ea..59b9d04400ca 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -48,7 +48,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
.targetsize = sizeof(struct xt_mark_tginfo2),
.me = THIS_MODULE,
},
-#if IS_ENABLED(CONFIG_IP_NF_ARPTABLES)
+#if IS_ENABLED(CONFIG_IP_NF_ARPTABLES) || IS_ENABLED(CONFIG_NFT_COMPAT_ARP)
{
.name = "MARK",
.revision = 2,
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index 7c6bf1c16813..0ca1cdfc4095 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -38,8 +38,8 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par)
nfacct = nfnl_acct_find_get(par->net, info->name);
if (nfacct == NULL) {
- pr_info_ratelimited("accounting object `%s' does not exists\n",
- info->name);
+ pr_info_ratelimited("accounting object `%.*s' does not exist\n",
+ NFACCT_NAME_MAX, info->name);
return -ENOENT;
}
info->nfacct = nfacct;
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index cd9160bbc919..33b77084a4e5 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -1165,6 +1165,11 @@ int netlbl_conn_setattr(struct sock *sk,
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
+ if (sk->sk_family != AF_INET6) {
+ ret_val = -EAFNOSUPPORT;
+ goto conn_setattr_return;
+ }
+
addr6 = (struct sockaddr_in6 *)addr;
entry = netlbl_domhsh_getentry_af6(secattr->domain,
&addr6->sin6_addr);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 775d707ec708..8b060465a2be 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -387,7 +387,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
WARN_ON(skb->sk != NULL);
skb->sk = sk;
skb->destructor = netlink_skb_destructor;
- atomic_add(skb->truesize, &sk->sk_rmem_alloc);
sk_mem_charge(sk, skb->truesize);
}
@@ -795,16 +794,6 @@ static int netlink_release(struct socket *sock)
sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
- /* Because struct net might disappear soon, do not keep a pointer. */
- if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) {
- __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
- /* Because of deferred_put_nlk_sk and use of work queue,
- * it is possible netns will be freed before this socket.
- */
- sock_net_set(sk, &init_net);
- __netns_tracker_alloc(&init_net, &sk->ns_tracker,
- false, GFP_KERNEL);
- }
call_rcu(&nlk->rcu, deferred_put_nlk_sk);
return 0;
}
@@ -1216,41 +1205,48 @@ struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
long *timeo, struct sock *ssk)
{
+ DECLARE_WAITQUEUE(wait, current);
struct netlink_sock *nlk;
+ unsigned int rmem;
nlk = nlk_sk(sk);
+ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
- if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
- DECLARE_WAITQUEUE(wait, current);
- if (!*timeo) {
- if (!ssk || netlink_is_kernel(ssk))
- netlink_overrun(sk);
- sock_put(sk);
- kfree_skb(skb);
- return -EAGAIN;
- }
-
- __set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&nlk->wait, &wait);
+ if ((rmem == skb->truesize || rmem <= READ_ONCE(sk->sk_rcvbuf)) &&
+ !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
+ netlink_skb_set_owner_r(skb, sk);
+ return 0;
+ }
- if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
- !sock_flag(sk, SOCK_DEAD))
- *timeo = schedule_timeout(*timeo);
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&nlk->wait, &wait);
+ if (!*timeo) {
+ if (!ssk || netlink_is_kernel(ssk))
+ netlink_overrun(sk);
sock_put(sk);
+ kfree_skb(skb);
+ return -EAGAIN;
+ }
- if (signal_pending(current)) {
- kfree_skb(skb);
- return sock_intr_errno(*timeo);
- }
- return 1;
+ __set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&nlk->wait, &wait);
+ rmem = atomic_read(&sk->sk_rmem_alloc);
+
+ if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) ||
+ test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
+ !sock_flag(sk, SOCK_DEAD))
+ *timeo = schedule_timeout(*timeo);
+
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&nlk->wait, &wait);
+ sock_put(sk);
+
+ if (signal_pending(current)) {
+ kfree_skb(skb);
+ return sock_intr_errno(*timeo);
}
- netlink_skb_set_owner_r(skb, sk);
- return 0;
+
+ return 1;
}
static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
@@ -1310,6 +1306,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
ret = -ECONNREFUSED;
if (nlk->netlink_rcv != NULL) {
ret = skb->len;
+ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
netlink_skb_set_owner_r(skb, sk);
NETLINK_CB(skb).sk = ssk;
netlink_deliver_tap_kernel(sk, ssk, skb);
@@ -1386,13 +1383,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check);
static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
{
struct netlink_sock *nlk = nlk_sk(sk);
+ unsigned int rmem, rcvbuf;
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
+ rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+
+ if ((rmem == skb->truesize || rmem <= rcvbuf) &&
!test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
netlink_skb_set_owner_r(skb, sk);
__netlink_sendskb(sk, skb);
- return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
+ return rmem > (rcvbuf >> 1);
}
+
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
return -1;
}
@@ -2248,6 +2251,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
struct netlink_ext_ack extack = {};
struct netlink_callback *cb;
struct sk_buff *skb = NULL;
+ unsigned int rmem, rcvbuf;
size_t max_recvmsg_len;
struct module *module;
int err = -ENOBUFS;
@@ -2261,9 +2265,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
goto errout_skb;
}
- if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
- goto errout_skb;
-
/* NLMSG_GOODSIZE is small to avoid high order allocations being
* required, but it makes sense to _attempt_ a 16K bytes allocation
* to reduce number of system calls on dump operations, if user
@@ -2286,6 +2287,13 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
if (!skb)
goto errout_skb;
+ rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
+ if (rmem != skb->truesize && rmem >= rcvbuf) {
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+ goto errout_skb;
+ }
+
/* Trim skb to allocated size. User is expected to provide buffer as
* large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at
* netlink_recvmsg())). dump will pack as many smaller messages as
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index de175318a3a0..082ab66f120b 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -542,6 +542,8 @@ static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host,
pr_debug("pipe created=%d\n", pipe);
+ if (pipe >= NCI_HCI_MAX_PIPES)
+ pipe = NCI_HCI_INVALID_PIPE;
return pipe;
}
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index ed1508a9e093..aab107727f18 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -119,22 +119,22 @@ static int nci_uart_set_driver(struct tty_struct *tty, unsigned int driver)
memcpy(nu, nci_uart_drivers[driver], sizeof(struct nci_uart));
nu->tty = tty;
- tty->disc_data = nu;
skb_queue_head_init(&nu->tx_q);
INIT_WORK(&nu->write_work, nci_uart_write_work);
spin_lock_init(&nu->rx_lock);
ret = nu->ops.open(nu);
if (ret) {
- tty->disc_data = NULL;
kfree(nu);
+ return ret;
} else if (!try_module_get(nu->owner)) {
nu->ops.close(nu);
- tty->disc_data = NULL;
kfree(nu);
return -ENOENT;
}
- return ret;
+ tty->disc_data = nu;
+
+ return 0;
}
/* ------ LDISC part ------ */
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 16e260014684..2f22ca59586f 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -934,7 +934,9 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
{
struct vport *vport = ovs_vport_rcu(dp, out_port);
- if (likely(vport && netif_carrier_ok(vport->dev))) {
+ if (likely(vport &&
+ netif_running(vport->dev) &&
+ netif_carrier_ok(vport->dev))) {
u16 mru = OVS_CB(skb)->mru;
u32 cutlen = OVS_CB(skb)->cutlen;
@@ -945,12 +947,6 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
pskb_trim(skb, ovs_mac_header_len(key));
}
- /* Need to set the pkt_type to involve the routing layer. The
- * packet movement through the OVS datapath doesn't generally
- * use routing, but this is needed for tunnel cases.
- */
- skb->pkt_type = PACKET_OUTGOING;
-
if (likely(!mru ||
(skb->len <= mru + vport->dev->hard_header_len))) {
ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
@@ -979,8 +975,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
upcall.cmd = OVS_PACKET_CMD_ACTION;
upcall.mru = OVS_CB(skb)->mru;
- for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
- a = nla_next(a, &rem)) {
+ nla_for_each_nested(a, attr, rem) {
switch (nla_type(a)) {
case OVS_USERSPACE_ATTR_USERDATA:
upcall.userdata = a;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 3bb4810234aa..e573e9221302 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1368,8 +1368,11 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
attr == OVS_KEY_ATTR_CT_MARK)
return true;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
- attr == OVS_KEY_ATTR_CT_LABELS)
- return true;
+ attr == OVS_KEY_ATTR_CT_LABELS) {
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+ return ovs_net->xt_label;
+ }
return false;
}
@@ -1378,7 +1381,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log)
{
- unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
struct ovs_conntrack_info ct_info;
const char *helper = NULL;
u16 family;
@@ -1407,12 +1409,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
return -ENOMEM;
}
- if (nf_connlabels_get(net, n_bits - 1)) {
- nf_ct_tmpl_free(ct_info.ct);
- OVS_NLERR(log, "Failed to set connlabel length");
- return -EOPNOTSUPP;
- }
-
if (ct_info.timeout[0]) {
if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
ct_info.timeout))
@@ -1581,7 +1577,6 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
if (ct_info->ct) {
if (ct_info->timeout[0])
nf_ct_destroy_timeout(ct_info->ct);
- nf_connlabels_put(nf_ct_net(ct_info->ct));
nf_ct_tmpl_free(ct_info->ct);
}
}
@@ -2006,9 +2001,17 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
int ovs_ct_init(struct net *net)
{
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
+ unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+ if (nf_connlabels_get(net, n_bits - 1)) {
+ ovs_net->xt_label = false;
+ OVS_NLERR(true, "Failed to set connlabel length");
+ } else {
+ ovs_net->xt_label = true;
+ }
+
+#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
return ovs_ct_limit_init(net, ovs_net);
#else
return 0;
@@ -2017,9 +2020,12 @@ int ovs_ct_init(struct net *net)
void ovs_ct_exit(struct net *net)
{
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
ovs_ct_limit_exit(net, ovs_net);
#endif
+
+ if (ovs_net->xt_label)
+ nf_connlabels_put(net);
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 78d9961fcd44..8d3c01f0e2aa 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2102,6 +2102,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
{
struct ovs_header *ovs_header;
struct ovs_vport_stats vport_stats;
+ struct net *net_vport;
int err;
ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
@@ -2118,12 +2119,15 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
goto nla_put_failure;
- if (!net_eq(net, dev_net(vport->dev))) {
- int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
+ rcu_read_lock();
+ net_vport = dev_net_rcu(vport->dev);
+ if (!net_eq(net, net_vport)) {
+ int id = peernet2id_alloc(net, net_vport, GFP_ATOMIC);
if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
- goto nla_put_failure;
+ goto nla_put_failure_unlock;
}
+ rcu_read_unlock();
ovs_vport_get_stats(vport, &vport_stats);
if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
@@ -2144,6 +2148,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
genlmsg_end(skb, ovs_header);
return 0;
+nla_put_failure_unlock:
+ rcu_read_unlock();
nla_put_failure:
err = -EMSGSIZE;
error:
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 365b9bb7f546..9ca6231ea647 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -160,6 +160,9 @@ struct ovs_net {
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_ct_limit_info *ct_limit_info;
#endif
+
+ /* Module reference for configuring conntrack. */
+ bool xt_label;
};
/**
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 8a848ce72e29..b80bd3a90773 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -788,7 +788,7 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
memset(&key->ipv4, 0, sizeof(key->ipv4));
}
} else if (eth_p_mpls(key->eth.type)) {
- u8 label_count = 1;
+ size_t label_count = 1;
memset(&key->mpls, 0, sizeof(key->mpls));
skb_set_inner_network_header(skb, skb->mac_len);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 729ef582a3a8..305daf57a4f9 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2317,14 +2317,10 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
OVS_FLOW_ATTR_MASK, true, skb);
}
-#define MAX_ACTIONS_BUFSIZE (32 * 1024)
-
static struct sw_flow_actions *nla_alloc_flow_actions(int size)
{
struct sw_flow_actions *sfa;
- WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
-
sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL);
if (!sfa)
return ERR_PTR(-ENOMEM);
@@ -2480,15 +2476,6 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
- if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
- if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) {
- OVS_NLERR(log, "Flow action size exceeds max %u",
- MAX_ACTIONS_BUFSIZE);
- return ERR_PTR(-EMSGSIZE);
- }
- new_acts_size = MAX_ACTIONS_BUFSIZE;
- }
-
acts = nla_alloc_flow_actions(new_acts_size);
if (IS_ERR(acts))
return ERR_CAST(acts);
@@ -2889,7 +2876,8 @@ static int validate_set(const struct nlattr *a,
size_t key_len;
/* There can be only one key in a action */
- if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
+ if (!nla_ok(ovs_key, nla_len(a)) ||
+ nla_total_size(nla_len(ovs_key)) != nla_len(a))
return -EINVAL;
key_len = nla_len(ovs_key);
@@ -3545,7 +3533,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
int err;
u32 mpls_label_count = 0;
- *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
+ *sfa = nla_alloc_flow_actions(nla_len(attr));
if (IS_ERR(*sfa))
return PTR_ERR(*sfa);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f3cecb3e4bcb..e8589fede4d4 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2784,7 +2784,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
int len_sum = 0;
int status = TP_STATUS_AVAILABLE;
int hlen, tlen, copylen = 0;
- long timeo = 0;
+ long timeo;
mutex_lock(&po->pg_vec_lock);
@@ -2838,22 +2838,28 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz)
size_max = dev->mtu + reserve + VLAN_HLEN;
+ timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
reinit_completion(&po->skb_completion);
do {
ph = packet_current_frame(po, &po->tx_ring,
TP_STATUS_SEND_REQUEST);
if (unlikely(ph == NULL)) {
- if (need_wait && skb) {
- timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
+ /* Note: packet_read_pending() might be slow if we
+ * have to call it as it's per_cpu variable, but in
+ * fast-path we don't have to call it, only when ph
+ * is NULL, we need to check the pending_refcnt.
+ */
+ if (need_wait && packet_read_pending(&po->tx_ring)) {
timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
if (timeo <= 0) {
err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
goto out_put;
}
- }
- /* check for additional frames */
- continue;
+ /* check for additional frames */
+ continue;
+ } else
+ break;
}
skb = NULL;
@@ -2942,14 +2948,7 @@ tpacket_error:
}
packet_increment_head(&po->tx_ring);
len_sum += tp_len;
- } while (likely((ph != NULL) ||
- /* Note: packet_read_pending() might be slow if we have
- * to call it as it's per_cpu variable, but in fast-path
- * we already short-circuit the loop with the first
- * condition, and luckily don't have to go that path
- * anyway.
- */
- (need_wait && packet_read_pending(&po->tx_ring))));
+ } while (1);
err = len_sum;
goto out_put;
@@ -4563,10 +4562,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
spin_lock(&po->bind_lock);
was_running = packet_sock_flag(po, PACKET_SOCK_RUNNING);
num = po->num;
- if (was_running) {
- WRITE_ONCE(po->num, 0);
+ WRITE_ONCE(po->num, 0);
+ if (was_running)
__unregister_prot_hook(sk, false);
- }
+
spin_unlock(&po->bind_lock);
synchronize_net();
@@ -4598,10 +4597,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
mutex_unlock(&po->pg_vec_lock);
spin_lock(&po->bind_lock);
- if (was_running) {
- WRITE_ONCE(po->num, num);
+ WRITE_ONCE(po->num, num);
+ if (was_running)
register_prot_hook(sk);
- }
+
spin_unlock(&po->bind_lock);
if (pg_vec && (po->tp_version > TPACKET_V2)) {
/* Because we don't support block-based V3 on tx-ring */
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 53a858478e22..62527e1ebb88 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -826,6 +826,7 @@ static struct sock *pep_sock_accept(struct sock *sk,
}
/* Check for duplicate pipe handle */
+ pn_skb_get_dst_sockaddr(skb, &dst);
newsk = pep_find_pipe(&pn->hlist, &dst, pipe_handle);
if (unlikely(newsk)) {
__sock_put(newsk);
@@ -850,7 +851,6 @@ static struct sock *pep_sock_accept(struct sock *sk,
newsk->sk_destruct = pipe_destruct;
newpn = pep_sk(newsk);
- pn_skb_get_dst_sockaddr(skb, &dst);
pn_skb_get_src_sockaddr(skb, &src);
newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst);
newpn->pn_sk.dobject = pn_sockaddr_get_object(&src);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 0581c53e6517..3cc2f303bf78 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -504,12 +504,8 @@ bool rds_tcp_tune(struct socket *sock)
release_sock(sk);
return false;
}
- /* Update ns_tracker to current stack trace and refcounted tracker */
- __netns_tracker_free(net, &sk->ns_tracker, false);
-
- sk->sk_net_refcnt = 1;
- netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL);
- sock_inuse_add(net, 1);
+ sk_net_refcnt_upgrade(sk);
+ put_net(net);
}
rtn = net_generic(net, rds_tcp_netid);
if (rtn->sndbuf_size > 0) {
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 59050caab65c..a4a668b88a8f 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -397,15 +397,15 @@ static int rose_setsockopt(struct socket *sock, int level, int optname,
{
struct sock *sk = sock->sk;
struct rose_sock *rose = rose_sk(sk);
- int opt;
+ unsigned int opt;
if (level != SOL_ROSE)
return -ENOPROTOOPT;
- if (optlen < sizeof(int))
+ if (optlen < sizeof(unsigned int))
return -EINVAL;
- if (copy_from_sockptr(&opt, optval, sizeof(int)))
+ if (copy_from_sockptr(&opt, optval, sizeof(unsigned int)))
return -EFAULT;
switch (optname) {
@@ -414,31 +414,31 @@ static int rose_setsockopt(struct socket *sock, int level, int optname,
return 0;
case ROSE_T1:
- if (opt < 1)
+ if (opt < 1 || opt > UINT_MAX / HZ)
return -EINVAL;
rose->t1 = opt * HZ;
return 0;
case ROSE_T2:
- if (opt < 1)
+ if (opt < 1 || opt > UINT_MAX / HZ)
return -EINVAL;
rose->t2 = opt * HZ;
return 0;
case ROSE_T3:
- if (opt < 1)
+ if (opt < 1 || opt > UINT_MAX / HZ)
return -EINVAL;
rose->t3 = opt * HZ;
return 0;
case ROSE_HOLDBACK:
- if (opt < 1)
+ if (opt < 1 || opt > UINT_MAX / HZ)
return -EINVAL;
rose->hb = opt * HZ;
return 0;
case ROSE_IDLE:
- if (opt < 0)
+ if (opt > UINT_MAX / (60 * HZ))
return -EINVAL;
rose->idle = opt * 60 * HZ;
return 0;
@@ -701,11 +701,9 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct net_device *dev;
ax25_address *source;
ax25_uid_assoc *user;
+ int err = -EINVAL;
int n;
- if (!sock_flag(sk, SOCK_ZAPPED))
- return -EINVAL;
-
if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose))
return -EINVAL;
@@ -718,8 +716,15 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS)
return -EINVAL;
- if ((dev = rose_dev_get(&addr->srose_addr)) == NULL)
- return -EADDRNOTAVAIL;
+ lock_sock(sk);
+
+ if (!sock_flag(sk, SOCK_ZAPPED))
+ goto out_release;
+
+ err = -EADDRNOTAVAIL;
+ dev = rose_dev_get(&addr->srose_addr);
+ if (!dev)
+ goto out_release;
source = &addr->srose_call;
@@ -730,7 +735,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
} else {
if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) {
dev_put(dev);
- return -EACCES;
+ err = -EACCES;
+ goto out_release;
}
rose->source_call = *source;
}
@@ -753,8 +759,10 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
rose_insert_socket(sk);
sock_reset_flag(sk, SOCK_ZAPPED);
-
- return 0;
+ err = 0;
+out_release:
+ release_sock(sk);
+ return err;
}
static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags)
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index fee772b4637c..a7054546f52d 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -497,22 +497,15 @@ void rose_rt_device_down(struct net_device *dev)
t = rose_node;
rose_node = rose_node->next;
- for (i = 0; i < t->count; i++) {
+ for (i = t->count - 1; i >= 0; i--) {
if (t->neighbour[i] != s)
continue;
t->count--;
- switch (i) {
- case 0:
- t->neighbour[0] = t->neighbour[1];
- fallthrough;
- case 1:
- t->neighbour[1] = t->neighbour[2];
- break;
- case 2:
- break;
- }
+ memmove(&t->neighbour[i], &t->neighbour[i + 1],
+ sizeof(t->neighbour[0]) *
+ (t->count - i));
}
if (t->count <= 0)
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index f06ddbed3fed..1525773e94aa 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -122,6 +122,10 @@ static void rose_heartbeat_expiry(struct timer_list *t)
struct rose_sock *rose = rose_sk(sk);
bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ/20);
+ goto out;
+ }
switch (rose->state) {
case ROSE_STATE_0:
/* Magic here: If we listen() and a new link dies before it
@@ -152,6 +156,7 @@ static void rose_heartbeat_expiry(struct timer_list *t)
}
rose_start_heartbeat(sk);
+out:
bh_unlock_sock(sk);
sock_put(sk);
}
@@ -162,6 +167,10 @@ static void rose_timer_expiry(struct timer_list *t)
struct sock *sk = &rose->sock;
bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ sk_reset_timer(sk, &rose->timer, jiffies + HZ/20);
+ goto out;
+ }
switch (rose->state) {
case ROSE_STATE_1: /* T1 */
case ROSE_STATE_4: /* T2 */
@@ -182,6 +191,7 @@ static void rose_timer_expiry(struct timer_list *t)
}
break;
}
+out:
bh_unlock_sock(sk);
sock_put(sk);
}
@@ -192,6 +202,10 @@ static void rose_idletimer_expiry(struct timer_list *t)
struct sock *sk = &rose->sock;
bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ sk_reset_timer(sk, &rose->idletimer, jiffies + HZ/20);
+ goto out;
+ }
rose_clear_queues(sk);
rose_write_internal(sk, ROSE_CLEAR_REQUEST);
@@ -207,6 +221,7 @@ static void rose_idletimer_expiry(struct timer_list *t)
sk->sk_state_change(sk);
sock_set_flag(sk, SOCK_DEAD);
}
+out:
bh_unlock_sock(sk);
sock_put(sk);
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index d0fd37bdcfe9..6b036c0564c7 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -567,6 +567,7 @@ enum rxrpc_call_flag {
RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */
RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */
RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */
+ RXRPC_CALL_CONN_CHALLENGING, /* The connection is being challenged */
};
/*
@@ -587,7 +588,6 @@ enum rxrpc_call_state {
RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */
RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */
RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */
- RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */
RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */
RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */
RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 0f5a1d77b890..37ac8a665678 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -149,6 +149,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
id_in_use:
write_unlock(&rx->call_lock);
+ rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EBADSLT);
rxrpc_cleanup_call(call);
_leave(" = -EBADSLT");
return -EBADSLT;
@@ -218,6 +219,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
tail = b->call_backlog_tail;
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_call *call = b->call_backlog[tail];
+ rxrpc_see_call(call, rxrpc_call_see_discard);
rcu_assign_pointer(call->socket, rx);
if (rx->discard_new_call) {
_debug("discard %lx", call->user_call_ID);
@@ -253,6 +255,9 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
unsigned short call_tail, conn_tail, peer_tail;
unsigned short call_count, conn_count;
+ if (!b)
+ return NULL;
+
/* #calls >= #conns >= #peers must hold true. */
call_head = smp_load_acquire(&b->call_backlog_head);
call_tail = b->call_backlog_tail;
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index f9e983a12c14..e379a2a9375a 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -22,7 +22,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
[RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl",
[RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl",
[RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc",
- [RXRPC_CALL_SERVER_SECURING] = "SvSecure",
[RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq",
[RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq",
[RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl",
@@ -453,17 +452,16 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
call->cong_tstamp = skb->tstamp;
__set_bit(RXRPC_CALL_EXPOSED, &call->flags);
- rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
spin_lock(&conn->state_lock);
switch (conn->state) {
case RXRPC_CONN_SERVICE_UNSECURED:
case RXRPC_CONN_SERVICE_CHALLENGING:
- rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
+ __set_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags);
break;
case RXRPC_CONN_SERVICE:
- rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
break;
case RXRPC_CONN_ABORTED:
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 598b4ee389fc..c4eb7986efdd 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -63,11 +63,12 @@ int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb,
/*
* Mark a connection as being remotely aborted.
*/
-static bool rxrpc_input_conn_abort(struct rxrpc_connection *conn,
+static void rxrpc_input_conn_abort(struct rxrpc_connection *conn,
struct sk_buff *skb)
{
- return rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED,
- RXRPC_CALL_REMOTELY_ABORTED);
+ trace_rxrpc_rx_conn_abort(conn, skb);
+ rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED,
+ RXRPC_CALL_REMOTELY_ABORTED);
}
/*
@@ -202,11 +203,14 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn)
for (i = 0; i < RXRPC_MAXCALLS; i++) {
call = conn->channels[i].call;
- if (call)
+ if (call) {
+ rxrpc_see_call(call, rxrpc_call_see_conn_abort);
rxrpc_set_call_completion(call,
conn->completion,
conn->abort_code,
conn->error);
+ rxrpc_poke_call(call, rxrpc_call_poke_conn_abort);
+ }
}
_leave("");
@@ -218,10 +222,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn)
*/
static void rxrpc_call_is_secure(struct rxrpc_call *call)
{
- if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) {
- rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
+ if (call && __test_and_clear_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags))
rxrpc_notify_socket(call);
- }
}
/*
@@ -262,6 +264,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
* we've already received the packet, put it on the
* front of the queue.
*/
+ sp->conn = rxrpc_get_connection(conn, rxrpc_conn_get_poke_secured);
skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED;
rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured);
skb_queue_head(&conn->local->rx_queue, skb);
@@ -427,14 +430,16 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb)
if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events))
rxrpc_abort_calls(conn);
- switch (skb->mark) {
- case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
- if (conn->state != RXRPC_CONN_SERVICE)
- break;
+ if (skb) {
+ switch (skb->mark) {
+ case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
+ if (conn->state != RXRPC_CONN_SERVICE)
+ break;
- for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
- rxrpc_call_is_secure(conn->channels[loop].call);
- break;
+ for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
+ rxrpc_call_is_secure(conn->channels[loop].call);
+ break;
+ }
}
/* Process delayed ACKs whose time has come. */
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 1539d315afe7..7bc68135966e 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -67,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet,
INIT_WORK(&conn->destructor, rxrpc_clean_up_connection);
INIT_LIST_HEAD(&conn->proc_link);
INIT_LIST_HEAD(&conn->link);
+ INIT_LIST_HEAD(&conn->attend_link);
mutex_init(&conn->security_lock);
mutex_init(&conn->tx_data_alloc_lock);
skb_queue_head_init(&conn->rx_queue);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 16d49a861dbb..6a075a7c190d 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -573,7 +573,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb
rxrpc_propose_delay_ACK(call, sp->hdr.serial,
rxrpc_propose_ack_input_data);
}
- if (notify) {
+ if (notify && !test_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) {
trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial);
rxrpc_notify_socket(call);
}
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 5ea9601efd05..ccfae607c9bb 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -590,6 +590,9 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
__be32 code;
int ret, ioc;
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
+ return; /* Never abort an abort. */
+
rxrpc_see_skb(skb, rxrpc_skb_see_reject);
iov[0].iov_base = &whdr;
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 552ba84a255c..5d0842efde69 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -238,7 +238,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
bool use;
int slot;
- spin_lock(&rxnet->peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
while (!list_empty(collector)) {
peer = list_entry(collector->next,
@@ -249,7 +249,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
continue;
use = __rxrpc_use_local(peer->local, rxrpc_local_use_peer_keepalive);
- spin_unlock(&rxnet->peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
if (use) {
keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
@@ -269,17 +269,17 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
*/
slot += cursor;
slot &= mask;
- spin_lock(&rxnet->peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
list_add_tail(&peer->keepalive_link,
&rxnet->peer_keepalive[slot & mask]);
- spin_unlock(&rxnet->peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
rxrpc_unuse_local(peer->local, rxrpc_local_unuse_peer_keepalive);
}
rxrpc_put_peer(peer, rxrpc_peer_put_keepalive);
- spin_lock(&rxnet->peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
}
- spin_unlock(&rxnet->peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
}
/*
@@ -309,7 +309,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work)
* second; the bucket at cursor + 1 goes at now + 1s and so
* on...
*/
- spin_lock(&rxnet->peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
list_splice_init(&rxnet->peer_keepalive_new, &collector);
stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive);
@@ -321,7 +321,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work)
}
base = now;
- spin_unlock(&rxnet->peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
rxnet->peer_keepalive_base = base;
rxnet->peer_keepalive_cursor = cursor;
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 49dcda67a0d5..956fc7ea4b73 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -313,10 +313,10 @@ void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer)
hash_key = rxrpc_peer_hash_key(local, &peer->srx);
rxrpc_init_peer(local, peer, hash_key);
- spin_lock(&rxnet->peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new);
- spin_unlock(&rxnet->peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
}
/*
@@ -348,7 +348,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
return NULL;
}
- spin_lock(&rxnet->peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
/* Need to check that we aren't racing with someone else */
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
@@ -361,7 +361,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
&rxnet->peer_keepalive_new);
}
- spin_unlock(&rxnet->peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
if (peer)
rxrpc_free_peer(candidate);
@@ -411,10 +411,10 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer)
ASSERT(hlist_empty(&peer->error_targets));
- spin_lock(&rxnet->peer_hash_lock);
+ spin_lock_bh(&rxnet->peer_hash_lock);
hash_del_rcu(&peer->hash_link);
list_del_init(&peer->keepalive_link);
- spin_unlock(&rxnet->peer_hash_lock);
+ spin_unlock_bh(&rxnet->peer_hash_lock);
rxrpc_free_peer(peer);
}
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index a482f88c5fc5..e24a44bae9a3 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -351,6 +351,16 @@ try_again:
goto try_again;
}
+ rxrpc_see_call(call, rxrpc_call_see_recvmsg);
+ if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
+ rxrpc_see_call(call, rxrpc_call_see_already_released);
+ list_del_init(&call->recvmsg_link);
+ spin_unlock_irq(&rx->recvmsg_lock);
+ release_sock(&rx->sk);
+ trace_rxrpc_recvmsg(call->debug_id, rxrpc_recvmsg_unqueue, 0);
+ rxrpc_put_call(call, rxrpc_call_put_recvmsg);
+ goto try_again;
+ }
if (!(flags & MSG_PEEK))
list_del_init(&call->recvmsg_link);
else
@@ -374,8 +384,13 @@ try_again:
release_sock(&rx->sk);
- if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
- BUG();
+ if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
+ rxrpc_see_call(call, rxrpc_call_see_already_released);
+ mutex_unlock(&call->user_mutex);
+ if (!(flags & MSG_PEEK))
+ rxrpc_put_call(call, rxrpc_call_put_recvmsg);
+ goto try_again;
+ }
if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
if (flags & MSG_CMSG_COMPAT) {
diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c
index 085e7892d310..b1536da2246b 100644
--- a/net/rxrpc/rxperf.c
+++ b/net/rxrpc/rxperf.c
@@ -478,6 +478,18 @@ static int rxperf_deliver_request(struct rxperf_call *call)
call->unmarshal++;
fallthrough;
case 2:
+ ret = rxperf_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ /* Deal with the terminal magic cookie. */
+ call->iov_len = 4;
+ call->kvec[0].iov_len = call->iov_len;
+ call->kvec[0].iov_base = call->tmp;
+ iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len);
+ call->unmarshal++;
+ fallthrough;
+ case 3:
ret = rxperf_extract_data(call, false);
if (ret < 0)
return ret;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 23d18fe5de9f..154f650efb0a 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -654,7 +654,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
} else {
switch (rxrpc_call_state(call)) {
case RXRPC_CALL_CLIENT_AWAIT_CONN:
- case RXRPC_CALL_SERVER_SECURING:
+ case RXRPC_CALL_SERVER_RECV_REQUEST:
if (p.command == RXRPC_CMD_SEND_ABORT)
break;
fallthrough;
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 5dd41a012110..ae571bfe84c7 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -44,9 +44,9 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
ipv4_change_dsfield(ip_hdr(skb),
INET_ECN_MASK,
newdscp);
- ca->stats_dscp_set++;
+ atomic64_inc(&ca->stats_dscp_set);
} else {
- ca->stats_dscp_error++;
+ atomic64_inc(&ca->stats_dscp_error);
}
}
break;
@@ -57,9 +57,9 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
ipv6_change_dsfield(ipv6_hdr(skb),
INET_ECN_MASK,
newdscp);
- ca->stats_dscp_set++;
+ atomic64_inc(&ca->stats_dscp_set);
} else {
- ca->stats_dscp_error++;
+ atomic64_inc(&ca->stats_dscp_error);
}
}
break;
@@ -72,7 +72,7 @@ static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
struct tcf_ctinfo_params *cp,
struct sk_buff *skb)
{
- ca->stats_cpmark_set++;
+ atomic64_inc(&ca->stats_cpmark_set);
skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask;
}
@@ -323,15 +323,18 @@ static int tcf_ctinfo_dump(struct sk_buff *skb, struct tc_action *a,
}
if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_SET,
- ci->stats_dscp_set, TCA_CTINFO_PAD))
+ atomic64_read(&ci->stats_dscp_set),
+ TCA_CTINFO_PAD))
goto nla_put_failure;
if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_ERROR,
- ci->stats_dscp_error, TCA_CTINFO_PAD))
+ atomic64_read(&ci->stats_dscp_error),
+ TCA_CTINFO_PAD))
goto nla_put_failure;
if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_CPMARK_SET,
- ci->stats_cpmark_set, TCA_CTINFO_PAD))
+ atomic64_read(&ci->stats_cpmark_set),
+ TCA_CTINFO_PAD))
goto nla_put_failure;
spin_unlock_bh(&ci->tcf_lock);
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index af7c99845948..e296714803dc 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -68,7 +68,7 @@ geneve_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1] = {
[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 },
[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 },
[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY,
- .len = 128 },
+ .len = 127 },
};
static const struct nla_policy
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index bbc778c233c8..a3bab5e27e71 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -97,7 +97,7 @@ tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp,
err = xa_alloc_cyclic(&tcf_exts_miss_cookies_xa, &n->miss_cookie_base,
n, xa_limit_32b, &next, GFP_KERNEL);
- if (err)
+ if (err < 0)
goto err_xa_alloc;
exts->miss_cookie_node = n;
@@ -390,6 +390,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
tp->protocol = protocol;
tp->prio = prio;
tp->chain = chain;
+ tp->usesw = !tp->ops->reoffload;
spin_lock_init(&tp->lock);
refcount_set(&tp->refcnt, 1);
@@ -410,39 +411,31 @@ static void tcf_proto_get(struct tcf_proto *tp)
refcount_inc(&tp->refcnt);
}
-static void tcf_maintain_bypass(struct tcf_block *block)
+static void tcf_proto_count_usesw(struct tcf_proto *tp, bool add)
{
- int filtercnt = atomic_read(&block->filtercnt);
- int skipswcnt = atomic_read(&block->skipswcnt);
- bool bypass_wanted = filtercnt > 0 && filtercnt == skipswcnt;
-
- if (bypass_wanted != block->bypass_wanted) {
#ifdef CONFIG_NET_CLS_ACT
- if (bypass_wanted)
- static_branch_inc(&tcf_bypass_check_needed_key);
- else
- static_branch_dec(&tcf_bypass_check_needed_key);
-#endif
- block->bypass_wanted = bypass_wanted;
+ struct tcf_block *block = tp->chain->block;
+ bool counted = false;
+
+ if (!add) {
+ if (tp->usesw && tp->counted) {
+ if (!atomic_dec_return(&block->useswcnt))
+ static_branch_dec(&tcf_sw_enabled_key);
+ tp->counted = false;
+ }
+ return;
}
-}
-
-static void tcf_block_filter_cnt_update(struct tcf_block *block, bool *counted, bool add)
-{
- lockdep_assert_not_held(&block->cb_lock);
- down_write(&block->cb_lock);
- if (*counted != add) {
- if (add) {
- atomic_inc(&block->filtercnt);
- *counted = true;
- } else {
- atomic_dec(&block->filtercnt);
- *counted = false;
- }
+ spin_lock(&tp->lock);
+ if (tp->usesw && !tp->counted) {
+ counted = true;
+ tp->counted = true;
}
- tcf_maintain_bypass(block);
- up_write(&block->cb_lock);
+ spin_unlock(&tp->lock);
+
+ if (counted && atomic_inc_return(&block->useswcnt) == 1)
+ static_branch_inc(&tcf_sw_enabled_key);
+#endif
}
static void tcf_chain_put(struct tcf_chain *chain);
@@ -451,7 +444,7 @@ static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
bool sig_destroy, struct netlink_ext_ack *extack)
{
tp->ops->destroy(tp, rtnl_held, extack);
- tcf_block_filter_cnt_update(tp->chain->block, &tp->counted, false);
+ tcf_proto_count_usesw(tp, false);
if (sig_destroy)
tcf_proto_signal_destroyed(tp->chain, tp);
tcf_chain_put(tp->chain);
@@ -2058,6 +2051,7 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
struct tcmsg *tcm;
struct nlmsghdr *nlh;
unsigned char *b = skb_tail_pointer(skb);
+ int ret = -EMSGSIZE;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
if (!nlh)
@@ -2102,11 +2096,45 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
return skb->len;
+cls_op_not_supp:
+ ret = -EOPNOTSUPP;
out_nlmsg_trim:
nla_put_failure:
-cls_op_not_supp:
nlmsg_trim(skb, b);
- return -1;
+ return ret;
+}
+
+static struct sk_buff *tfilter_notify_prep(struct net *net,
+ struct sk_buff *oskb,
+ struct nlmsghdr *n,
+ struct tcf_proto *tp,
+ struct tcf_block *block,
+ struct Qdisc *q, u32 parent,
+ void *fh, int event,
+ u32 portid, bool rtnl_held,
+ struct netlink_ext_ack *extack)
+{
+ unsigned int size = oskb ? max(NLMSG_GOODSIZE, oskb->len) : NLMSG_GOODSIZE;
+ struct sk_buff *skb;
+ int ret;
+
+retry:
+ skb = alloc_skb(size, GFP_KERNEL);
+ if (!skb)
+ return ERR_PTR(-ENOBUFS);
+
+ ret = tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
+ n->nlmsg_seq, n->nlmsg_flags, event, false,
+ rtnl_held, extack);
+ if (ret <= 0) {
+ kfree_skb(skb);
+ if (ret == -EMSGSIZE) {
+ size += NLMSG_GOODSIZE;
+ goto retry;
+ }
+ return ERR_PTR(-EINVAL);
+ }
+ return skb;
}
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
@@ -2122,16 +2150,10 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
if (!unicast && !rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
return 0;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb)
- return -ENOBUFS;
-
- if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
- n->nlmsg_seq, n->nlmsg_flags, event,
- false, rtnl_held, extack) <= 0) {
- kfree_skb(skb);
- return -EINVAL;
- }
+ skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh, event,
+ portid, rtnl_held, extack);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
if (unicast)
err = rtnl_unicast(skb, net, portid);
@@ -2154,16 +2176,11 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
return tp->ops->delete(tp, fh, last, rtnl_held, extack);
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb)
- return -ENOBUFS;
-
- if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
- n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
- false, rtnl_held, extack) <= 0) {
+ skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh,
+ RTM_DELTFILTER, portid, rtnl_held, extack);
+ if (IS_ERR(skb)) {
NL_SET_ERR_MSG(extack, "Failed to build del event notification");
- kfree_skb(skb);
- return -EINVAL;
+ return PTR_ERR(skb);
}
err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
@@ -2404,7 +2421,7 @@ replay:
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_NEWTFILTER, false, rtnl_held, extack);
tfilter_put(tp, fh);
- tcf_block_filter_cnt_update(block, &tp->counted, true);
+ tcf_proto_count_usesw(tp, true);
/* q pointer is NULL for shared blocks */
if (q)
q->flags &= ~TCQ_F_CAN_BYPASS;
@@ -3521,8 +3538,6 @@ static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
if (*flags & TCA_CLS_FLAGS_IN_HW)
return;
*flags |= TCA_CLS_FLAGS_IN_HW;
- if (tc_skip_sw(*flags))
- atomic_inc(&block->skipswcnt);
atomic_inc(&block->offloadcnt);
}
@@ -3531,8 +3546,6 @@ static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
if (!(*flags & TCA_CLS_FLAGS_IN_HW))
return;
*flags &= ~TCA_CLS_FLAGS_IN_HW;
- if (tc_skip_sw(*flags))
- atomic_dec(&block->skipswcnt);
atomic_dec(&block->offloadcnt);
}
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 1941ebec23ff..7fbe42f0e5c2 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -509,6 +509,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (!tc_in_hw(prog->gen_flags))
prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+ tcf_proto_update_usesw(tp, prog->gen_flags);
+
if (oldprog) {
idr_replace(&head->handle_idr, prog, handle);
list_replace_rcu(&oldprog->link, &prog->link);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 1008ec8a464c..099ff6a3e1f5 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -766,7 +766,7 @@ geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY,
- .len = 128 },
+ .len = 127 },
};
static const struct nla_policy
@@ -2503,6 +2503,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (!tc_in_hw(fnew->flags))
fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+ tcf_proto_update_usesw(tp, fnew->flags);
+
spin_lock(&tp->lock);
/* tp was deleted concurrently. -EAGAIN will cause caller to lookup
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 9f1e62ca508d..f03bf5da39ee 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -228,6 +228,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
if (!tc_in_hw(new->flags))
new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+ tcf_proto_update_usesw(tp, new->flags);
+
*arg = head;
rcu_assign_pointer(tp->root, new);
return 0;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d3a03c57545b..2a1c00048fd6 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -951,6 +951,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (!tc_in_hw(new->flags))
new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+ tcf_proto_update_usesw(tp, new->flags);
+
u32_replace_knode(tp, tp_c, new);
tcf_unbind_filter(tp, &n->res);
tcf_exts_get_net(&n->exts);
@@ -1164,6 +1166,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (!tc_in_hw(n->flags))
n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+ tcf_proto_update_usesw(tp, n->flags);
+
ins = &ht->ht[TC_U32_HASH(handle)];
for (pins = rtnl_dereference(*ins); pins;
ins = &pins->next, pins = rtnl_dereference(*ins))
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a1d27bc039a3..c56a01992cb2 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -334,17 +334,22 @@ out:
return q;
}
-static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
+static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid,
+ struct netlink_ext_ack *extack)
{
unsigned long cl;
const struct Qdisc_class_ops *cops = p->ops->cl_ops;
- if (cops == NULL)
- return NULL;
+ if (cops == NULL) {
+ NL_SET_ERR_MSG(extack, "Parent qdisc is not classful");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
cl = cops->find(p, classid);
- if (cl == 0)
- return NULL;
+ if (cl == 0) {
+ NL_SET_ERR_MSG(extack, "Specified class not found");
+ return ERR_PTR(-ENOENT);
+ }
return cops->leaf(p, cl);
}
@@ -779,15 +784,12 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
{
- bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
bool notify;
int drops;
- if (n == 0 && len == 0)
- return;
drops = max_t(int, n, 0);
rcu_read_lock();
while ((parentid = sch->parent)) {
@@ -796,17 +798,8 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
if (sch->flags & TCQ_F_NOPARENT)
break;
- /* Notify parent qdisc only if child qdisc becomes empty.
- *
- * If child was empty even before update then backlog
- * counter is screwed and we skip notification because
- * parent class is already passive.
- *
- * If the original child was offloaded then it is allowed
- * to be seem as empty, so the parent is notified anyway.
- */
- notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
- !qdisc_is_offloaded);
+ /* Notify parent qdisc only if child qdisc becomes empty. */
+ notify = !sch->q.qlen;
/* TODO: perform the search on a per txq basis */
sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
@@ -815,6 +808,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
}
cops = sch->ops->cl_ops;
if (notify && cops->qlen_notify) {
+ /* Note that qlen_notify must be idempotent as it may get called
+ * multiple times.
+ */
cl = cops->find(sch, parentid);
cops->qlen_notify(sch, cl);
}
@@ -1535,7 +1531,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
return -ENOENT;
}
- q = qdisc_leaf(p, clid);
+ q = qdisc_leaf(p, clid, extack);
} else if (dev_ingress_queue(dev)) {
q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
}
@@ -1546,6 +1542,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
return -ENOENT;
}
+ if (IS_ERR(q))
+ return PTR_ERR(q);
if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
NL_SET_ERR_MSG(extack, "Invalid handle");
@@ -1639,7 +1637,9 @@ replay:
NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
return -ENOENT;
}
- q = qdisc_leaf(p, clid);
+ q = qdisc_leaf(p, clid, extack);
+ if (IS_ERR(q))
+ return PTR_ERR(q);
} else if (dev_ingress_queue_create(dev)) {
q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
}
@@ -1664,6 +1664,10 @@ replay:
q = qdisc_lookup(dev, tcm->tcm_handle);
if (!q)
goto create_n_graft;
+ if (q->parent != tcm->tcm_parent) {
+ NL_SET_ERR_MSG(extack, "Cannot move an existing qdisc to a different parent");
+ return -EINVAL;
+ }
if (n->nlmsg_flags & NLM_F_EXCL) {
NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
return -EEXIST;
@@ -2250,6 +2254,12 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
return -EOPNOTSUPP;
}
+ /* Prevent creation of traffic classes with classid TC_H_ROOT */
+ if (clid == TC_H_ROOT) {
+ NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT");
+ return -EINVAL;
+ }
+
new_cl = cl;
err = -EOPNOTSUPP;
if (cops->change)
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 3e8d4fe4d91e..afd9805cb68e 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -65,10 +65,7 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
&q->stats, qdisc_pkt_len, codel_get_enqueue_time,
drop_func, dequeue_func);
- /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
- * or HTB crashes. Defer it for next round.
- */
- if (q->stats.drop_count && sch->q.qlen) {
+ if (q->stats.drop_count) {
qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
q->stats.drop_count = 0;
q->stats.drop_len = 0;
@@ -146,7 +143,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt,
qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, true);
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index c69b999fae17..9b6d79bd8737 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -35,6 +35,11 @@ struct drr_sched {
struct Qdisc_class_hash clhash;
};
+static bool cl_is_active(struct drr_class *cl)
+{
+ return !list_empty(&cl->alist);
+}
+
static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
{
struct drr_sched *q = qdisc_priv(sch);
@@ -105,6 +110,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return -ENOBUFS;
gnet_stats_basic_sync_init(&cl->bstats);
+ INIT_LIST_HEAD(&cl->alist);
cl->common.classid = classid;
cl->quantum = quantum;
cl->qdisc = qdisc_create_dflt(sch->dev_queue,
@@ -229,7 +235,7 @@ static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg)
{
struct drr_class *cl = (struct drr_class *)arg;
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
}
static int drr_dump_class(struct Qdisc *sch, unsigned long arg,
@@ -336,7 +342,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl;
int err = 0;
- bool first;
cl = drr_classify(skb, sch, &err);
if (cl == NULL) {
@@ -346,7 +351,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return err;
}
- first = !cl->qdisc->q.qlen;
err = qdisc_enqueue(skb, cl->qdisc, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
@@ -356,7 +360,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return err;
}
- if (first) {
+ if (!cl_is_active(cl)) {
list_add_tail(&cl->alist, &q->active);
cl->deficit = cl->quantum;
}
@@ -390,7 +394,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
if (unlikely(skb == NULL))
goto out;
if (cl->qdisc->q.qlen == 0)
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
bstats_update(&cl->bstats, skb);
qdisc_bstats_update(sch, skb);
@@ -431,7 +435,7 @@ static void drr_reset_qdisc(struct Qdisc *sch)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
if (cl->qdisc->q.qlen)
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
qdisc_reset(cl->qdisc);
}
}
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index f80bc05d4c5a..82635dd2cfa5 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -74,6 +74,11 @@ static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = {
[TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
};
+static bool cl_is_active(struct ets_class *cl)
+{
+ return !list_empty(&cl->alist);
+}
+
static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr,
unsigned int *quantum,
struct netlink_ext_ack *extack)
@@ -91,6 +96,8 @@ ets_class_from_arg(struct Qdisc *sch, unsigned long arg)
{
struct ets_sched *q = qdisc_priv(sch);
+ if (arg == 0 || arg > q->nbands)
+ return NULL;
return &q->classes[arg - 1];
}
@@ -291,7 +298,7 @@ static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg)
* to remove them.
*/
if (!ets_class_is_strict(q, cl) && sch->q.qlen)
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
}
static int ets_class_dump(struct Qdisc *sch, unsigned long arg,
@@ -414,7 +421,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct ets_sched *q = qdisc_priv(sch);
struct ets_class *cl;
int err = 0;
- bool first;
cl = ets_classify(skb, sch, &err);
if (!cl) {
@@ -424,7 +430,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return err;
}
- first = !cl->qdisc->q.qlen;
err = qdisc_enqueue(skb, cl->qdisc, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
@@ -434,7 +439,7 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return err;
}
- if (first && !ets_class_is_strict(q, cl)) {
+ if (!cl_is_active(cl) && !ets_class_is_strict(q, cl)) {
list_add_tail(&cl->alist, &q->active);
cl->deficit = cl->quantum;
}
@@ -486,7 +491,7 @@ static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch)
if (unlikely(!skb))
goto out;
if (cl->qdisc->q.qlen == 0)
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
return ets_qdisc_dequeue_skb(sch, skb);
}
@@ -646,6 +651,12 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
+ for (i = nbands; i < oldbands; i++) {
+ if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
+ list_del_init(&q->classes[i].alist);
+ qdisc_purge_queue(q->classes[i].qdisc);
+ }
+
WRITE_ONCE(q->nbands, nbands);
for (i = nstrict; i < q->nstrict; i++) {
if (q->classes[i].qdisc->q.qlen) {
@@ -653,11 +664,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
q->classes[i].deficit = quanta[i];
}
}
- for (i = q->nbands; i < oldbands; i++) {
- if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
- list_del(&q->classes[i].alist);
- qdisc_tree_flush_backlog(q->classes[i].qdisc);
- }
WRITE_ONCE(q->nstrict, nstrict);
memcpy(q->prio2band, priomap, sizeof(priomap));
@@ -711,7 +717,7 @@ static void ets_qdisc_reset(struct Qdisc *sch)
for (band = q->nstrict; band < q->nbands; band++) {
if (q->classes[band].qdisc->q.qlen)
- list_del(&q->classes[band].alist);
+ list_del_init(&q->classes[band].alist);
}
for (band = 0; band < q->nbands; band++)
qdisc_reset(q->classes[band].qdisc);
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index b50b2c2cc09b..e6bfd39ff339 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -40,6 +40,9 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
{
unsigned int prev_backlog;
+ if (unlikely(READ_ONCE(sch->limit) == 0))
+ return qdisc_drop(skb, sch, to_free);
+
if (likely(sch->q.qlen < READ_ONCE(sch->limit)))
return qdisc_enqueue_tail(skb, sch);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index afefe124d903..1af9768cd8ff 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -1113,7 +1113,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
}
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = fq_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
if (!skb)
break;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 4f908c11ba95..551b7cbdae90 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -314,10 +314,8 @@ begin:
}
qdisc_bstats_update(sch, skb);
flow->deficit -= qdisc_pkt_len(skb);
- /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
- * or HTB crashes. Defer it for next round.
- */
- if (q->cstats.drop_count && sch->q.qlen) {
+
+ if (q->cstats.drop_count) {
qdisc_tree_reduce_backlog(sch, q->cstats.drop_count,
q->cstats.drop_len);
q->cstats.drop_count = 0;
@@ -442,7 +440,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
while (sch->q.qlen > sch->limit ||
q->memory_usage > q->memory_limit) {
- struct sk_buff *skb = fq_codel_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
q->cstats.drop_len += qdisc_pkt_len(skb);
rtnl_kfree_skbs(skb, skb);
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index c38f33ff80bd..6ed08b705f8a 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -364,7 +364,7 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
/* Drop excess packets if new limit is lower */
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = fq_pie_qdisc_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
len_dropped += qdisc_pkt_len(skb);
num_dropped += 1;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 38ec18f73de4..8874ae668095 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -911,8 +911,8 @@ static int pfifo_fast_change_tx_queue_len(struct Qdisc *sch,
bands[prio] = q;
}
- return skb_array_resize_multiple(bands, PFIFO_FAST_BANDS, new_len,
- GFP_KERNEL);
+ return skb_array_resize_multiple_bh(bands, PFIFO_FAST_BANDS, new_len,
+ GFP_KERNEL);
}
struct Qdisc_ops pfifo_fast_ops __read_mostly = {
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 79ba9dc70254..43b0343a7cd0 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -913,7 +913,8 @@ static void gred_destroy(struct Qdisc *sch)
for (i = 0; i < table->DPs; i++)
gred_destroy_vq(table->tab[i]);
- gred_offload(sch, TC_GRED_DESTROY);
+ if (table->opt)
+ gred_offload(sch, TC_GRED_DESTROY);
kfree(table->opt);
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index c287bf8423b4..5a7745170e84 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -175,6 +175,11 @@ struct hfsc_sched {
#define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */
+static bool cl_in_el_or_vttree(struct hfsc_class *cl)
+{
+ return ((cl->cl_flags & HFSC_FSC) && cl->cl_nactive) ||
+ ((cl->cl_flags & HFSC_RSC) && !RB_EMPTY_NODE(&cl->el_node));
+}
/*
* eligible tree holds backlogged classes being sorted by their eligible times.
@@ -203,7 +208,10 @@ eltree_insert(struct hfsc_class *cl)
static inline void
eltree_remove(struct hfsc_class *cl)
{
- rb_erase(&cl->el_node, &cl->sched->eligible);
+ if (!RB_EMPTY_NODE(&cl->el_node)) {
+ rb_erase(&cl->el_node, &cl->sched->eligible);
+ RB_CLEAR_NODE(&cl->el_node);
+ }
}
static inline void
@@ -958,6 +966,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl != NULL) {
int old_flags;
+ int len = 0;
if (parentid) {
if (cl->cl_parent &&
@@ -988,9 +997,13 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (usc != NULL)
hfsc_change_usc(cl, usc, cur_time);
+ if (cl->qdisc->q.qlen != 0)
+ len = qdisc_peek_len(cl->qdisc);
+ /* Check queue length again since some qdisc implementations
+ * (e.g., netem/codel) might empty the queue during the peek
+ * operation.
+ */
if (cl->qdisc->q.qlen != 0) {
- int len = qdisc_peek_len(cl->qdisc);
-
if (cl->cl_flags & HFSC_RSC) {
if (old_flags & HFSC_RSC)
update_ed(cl, len);
@@ -1032,6 +1045,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
+ RB_CLEAR_NODE(&cl->el_node);
+
err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
if (err) {
kfree(cl);
@@ -1220,7 +1235,8 @@ hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg)
/* vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
* needs to be called explicitly to remove a class from vttree.
*/
- update_vf(cl, 0, 0);
+ if (cl->cl_nactive)
+ update_vf(cl, 0, 0);
if (cl->cl_flags & HFSC_RSC)
eltree_remove(cl);
}
@@ -1560,7 +1576,10 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
return err;
}
- if (first) {
+ sch->qstats.backlog += len;
+ sch->q.qlen++;
+
+ if (first && !cl_in_el_or_vttree(cl)) {
if (cl->cl_flags & HFSC_RSC)
init_ed(cl, len);
if (cl->cl_flags & HFSC_FSC)
@@ -1575,9 +1594,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
}
- sch->qstats.backlog += len;
- sch->q.qlen++;
-
return NET_XMIT_SUCCESS;
}
@@ -1632,10 +1648,16 @@ hfsc_dequeue(struct Qdisc *sch)
if (cl->qdisc->q.qlen != 0) {
/* update ed */
next_len = qdisc_peek_len(cl->qdisc);
- if (realtime)
- update_ed(cl, next_len);
- else
- update_d(cl, next_len);
+ /* Check queue length again since some qdisc implementations
+ * (e.g., netem/codel) might empty the queue during the peek
+ * operation.
+ */
+ if (cl->qdisc->q.qlen != 0) {
+ if (realtime)
+ update_ed(cl, next_len);
+ else
+ update_d(cl, next_len);
+ }
} else {
/* the class becomes passive */
eltree_remove(cl);
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 44d9efe1a96a..5aa434b46707 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -564,7 +564,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt,
qlen = sch->q.qlen;
prev_backlog = sch->qstats.backlog;
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = hhf_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
rtnl_kfree_skbs(skb, skb);
}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index ff3de37874e4..1021681a5718 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -348,7 +348,8 @@ static void htb_add_to_wait_tree(struct htb_sched *q,
*/
static inline void htb_next_rb_node(struct rb_node **n)
{
- *n = rb_next(*n);
+ if (*n)
+ *n = rb_next(*n);
}
/**
@@ -609,8 +610,8 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
*/
static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
{
- WARN_ON(!cl->prio_activity);
-
+ if (!cl->prio_activity)
+ return;
htb_deactivate_prios(q, cl);
cl->prio_activity = 0;
}
@@ -820,7 +821,9 @@ static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
u32 *pid;
} stk[TC_HTB_MAXDEPTH], *sp = stk;
- BUG_ON(!hprio->row.rb_node);
+ if (unlikely(!hprio->row.rb_node))
+ return NULL;
+
sp->root = hprio->row.rb_node;
sp->pptr = &hprio->ptr;
sp->pid = &hprio->last_ptr_id;
@@ -1738,8 +1741,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg,
if (cl->parent)
cl->parent->children--;
- if (cl->prio_activity)
- htb_deactivate(q, cl);
+ htb_deactivate(q, cl);
if (cl->cmode != HTB_CAN_SEND)
htb_safe_rb_erase(&cl->pq_node,
@@ -1947,8 +1949,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
/* turn parent into inner node */
qdisc_purge_queue(parent->leaf.q);
parent_qdisc = parent->leaf.q;
- if (parent->prio_activity)
- htb_deactivate(q, parent);
+ htb_deactivate(q, parent);
/* remove from evt list because of level change */
if (parent->cmode != HTB_CAN_SEND) {
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 51d4013b6121..f3e5ef9a9592 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -152,7 +152,7 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt,
static const struct
nla_policy mqprio_tc_entry_policy[TCA_MQPRIO_TC_ENTRY_MAX + 1] = {
[TCA_MQPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32,
- TC_QOPT_MAX_QUEUE),
+ TC_QOPT_MAX_QUEUE - 1),
[TCA_MQPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32,
TC_FP_EXPRESS,
TC_FP_PREEMPTIBLE),
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 3b519adc0125..2270547b51df 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -748,9 +748,9 @@ deliver:
if (err != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(err))
qdisc_qstats_drop(sch);
- qdisc_tree_reduce_backlog(sch, 1, pkt_len);
sch->qstats.backlog -= pkt_len;
sch->q.qlen--;
+ qdisc_tree_reduce_backlog(sch, 1, pkt_len);
}
goto tfifo_dequeue;
}
@@ -972,6 +972,41 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
return 0;
}
+static const struct Qdisc_class_ops netem_class_ops;
+
+static int check_netem_in_tree(struct Qdisc *sch, bool duplicates,
+ struct netlink_ext_ack *extack)
+{
+ struct Qdisc *root, *q;
+ unsigned int i;
+
+ root = qdisc_root_sleeping(sch);
+
+ if (sch != root && root->ops->cl_ops == &netem_class_ops) {
+ if (duplicates ||
+ ((struct netem_sched_data *)qdisc_priv(root))->duplicate)
+ goto err;
+ }
+
+ if (!qdisc_dev(root))
+ return 0;
+
+ hash_for_each(qdisc_dev(root)->qdisc_hash, i, q, hash) {
+ if (sch != q && q->ops->cl_ops == &netem_class_ops) {
+ if (duplicates ||
+ ((struct netem_sched_data *)qdisc_priv(q))->duplicate)
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ NL_SET_ERR_MSG(extack,
+ "netem: cannot mix duplicating netems with other netems in tree");
+ return -EINVAL;
+}
+
/* Parse netlink message to set options */
static int netem_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
@@ -1030,6 +1065,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
q->gap = qopt->gap;
q->counter = 0;
q->loss = qopt->loss;
+
+ ret = check_netem_in_tree(sch, qopt->duplicate, extack);
+ if (ret)
+ goto unlock;
+
q->duplicate = qopt->duplicate;
/* for compatibility with earlier versions.
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index b3dcb845b327..db61cbc21b13 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -192,7 +192,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
/* Drop excess packets if new limit is lower */
qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, true);
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index cc30f7a32f1a..9e2b9a490db2 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -211,7 +211,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
for (i = q->bands; i < oldbands; i++)
- qdisc_tree_flush_backlog(q->queues[i]);
+ qdisc_purge_queue(q->queues[i]);
for (i = oldbands; i < q->bands; i++) {
q->queues[i] = queues[i];
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index d584c0c25899..5a345ef35c9f 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -202,6 +202,11 @@ struct qfq_sched {
*/
enum update_reason {enqueue, requeue};
+static bool cl_is_active(struct qfq_class *cl)
+{
+ return !list_empty(&cl->alist);
+}
+
static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
{
struct qfq_sched *q = qdisc_priv(sch);
@@ -347,7 +352,7 @@ static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
struct qfq_aggregate *agg = cl->agg;
- list_del(&cl->alist); /* remove from RR queue of the aggregate */
+ list_del_init(&cl->alist); /* remove from RR queue of the aggregate */
if (list_empty(&agg->active)) /* agg is now inactive */
qfq_deactivate_agg(q, agg);
}
@@ -407,7 +412,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
bool existing = false;
struct nlattr *tb[TCA_QFQ_MAX + 1];
struct qfq_aggregate *new_agg = NULL;
- u32 weight, lmax, inv_w;
+ u32 weight, lmax, inv_w, old_weight, old_lmax;
int err;
int delta_w;
@@ -441,12 +446,16 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
inv_w = ONE_FP / weight;
weight = ONE_FP / inv_w;
- if (cl != NULL &&
- lmax == cl->agg->lmax &&
- weight == cl->agg->class_weight)
- return 0; /* nothing to change */
+ if (cl != NULL) {
+ sch_tree_lock(sch);
+ old_weight = cl->agg->class_weight;
+ old_lmax = cl->agg->lmax;
+ sch_tree_unlock(sch);
+ if (lmax == old_lmax && weight == old_weight)
+ return 0; /* nothing to change */
+ }
- delta_w = weight - (cl ? cl->agg->class_weight : 0);
+ delta_w = weight - (cl ? old_weight : 0);
if (q->wsum + delta_w > QFQ_MAX_WSUM) {
NL_SET_ERR_MSG_FMT_MOD(extack,
@@ -477,6 +486,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
gnet_stats_basic_sync_init(&cl->bstats);
cl->common.classid = classid;
cl->deficit = lmax;
+ INIT_LIST_HEAD(&cl->alist);
cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
classid, NULL);
@@ -529,9 +539,6 @@ destroy_class:
static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
{
- struct qfq_sched *q = qdisc_priv(sch);
-
- qfq_rm_from_agg(q, cl);
gen_kill_estimator(&cl->rate_est);
qdisc_put(cl->qdisc);
kfree(cl);
@@ -552,6 +559,7 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,
qdisc_purge_queue(cl->qdisc);
qdisc_class_hash_remove(&q->clhash, &cl->common);
+ qfq_rm_from_agg(q, cl);
sch_tree_unlock(sch);
@@ -622,6 +630,7 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
{
struct qfq_class *cl = (struct qfq_class *)arg;
struct nlattr *nest;
+ u32 class_weight, lmax;
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_handle = cl->common.classid;
@@ -630,8 +639,13 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
- nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
+
+ sch_tree_lock(sch);
+ class_weight = cl->agg->class_weight;
+ lmax = cl->agg->lmax;
+ sch_tree_unlock(sch);
+ if (nla_put_u32(skb, TCA_QFQ_WEIGHT, class_weight) ||
+ nla_put_u32(skb, TCA_QFQ_LMAX, lmax))
goto nla_put_failure;
return nla_nest_end(skb, nest);
@@ -648,8 +662,10 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
memset(&xstats, 0, sizeof(xstats));
+ sch_tree_lock(sch);
xstats.weight = cl->agg->class_weight;
xstats.lmax = cl->agg->lmax;
+ sch_tree_unlock(sch);
if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
@@ -985,7 +1001,7 @@ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
cl->deficit -= (int) len;
if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */
- list_del(&cl->alist);
+ list_del_init(&cl->alist);
else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
cl->deficit += agg->lmax;
list_move_tail(&cl->alist, &agg->active);
@@ -1217,7 +1233,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct qfq_class *cl;
struct qfq_aggregate *agg;
int err = 0;
- bool first;
cl = qfq_classify(skb, sch, &err);
if (cl == NULL) {
@@ -1239,7 +1254,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
- first = !cl->qdisc->q.qlen;
err = qdisc_enqueue(skb, cl->qdisc, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
pr_debug("qfq_enqueue: enqueue failed %d\n", err);
@@ -1255,8 +1269,8 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
++sch->q.qlen;
agg = cl->agg;
- /* if the queue was not empty, then done here */
- if (!first) {
+ /* if the class is active, then done here */
+ if (cl_is_active(cl)) {
if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
list_first_entry(&agg->active, struct qfq_class, alist)
== cl && cl->deficit < len)
@@ -1418,6 +1432,8 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
struct qfq_sched *q = qdisc_priv(sch);
struct qfq_class *cl = (struct qfq_class *)arg;
+ if (list_empty(&cl->alist))
+ return;
qfq_deactivate_class(q, cl);
}
@@ -1488,6 +1504,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
common.hnode) {
+ qfq_rm_from_agg(q, cl);
qfq_destroy_class(sch, cl);
}
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index b5f096588fae..0f0701ed397e 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -283,7 +283,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb,
q->userbits = userbits;
q->limit = ctl->limit;
if (child) {
- qdisc_tree_flush_backlog(q->qdisc);
+ qdisc_purge_queue(q->qdisc);
old_child = q->qdisc;
q->qdisc = child;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 3b9245a3c767..11a7d5a25d6b 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -77,12 +77,6 @@
#define SFQ_EMPTY_SLOT 0xffff
#define SFQ_DEFAULT_HASH_DIVISOR 1024
-/* We use 16 bits to store allot, and want to handle packets up to 64K
- * Scale allot by 8 (1<<3) so that no overflow occurs.
- */
-#define SFQ_ALLOT_SHIFT 3
-#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
-
/* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
typedef u16 sfq_index;
@@ -104,7 +98,7 @@ struct sfq_slot {
sfq_index next; /* next slot in sfq RR chain */
struct sfq_head dep; /* anchor in dep[] chains */
unsigned short hash; /* hash value (index in ht[]) */
- short allot; /* credit for this slot */
+ int allot; /* credit for this slot */
unsigned int backlog;
struct red_vars vars;
@@ -120,7 +114,6 @@ struct sfq_sched_data {
siphash_key_t perturbation;
u8 cur_depth; /* depth of longest slot */
u8 flags;
- unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
struct tcf_proto __rcu *filter_list;
struct tcf_block *block;
sfq_index *ht; /* Hash table ('divisor' slots) */
@@ -317,7 +310,10 @@ drop:
/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
x = q->tail->next;
slot = &q->slots[x];
- q->tail->next = slot->next;
+ if (slot->next == x)
+ q->tail = NULL; /* no more active slots */
+ else
+ q->tail->next = slot->next;
q->ht[slot->hash] = SFQ_EMPTY_SLOT;
goto drop;
}
@@ -456,7 +452,7 @@ enqueue:
*/
q->tail = slot;
/* We could use a bigger initial quantum for new flows */
- slot->allot = q->scaled_quantum;
+ slot->allot = q->quantum;
}
if (++sch->q.qlen <= q->limit)
return NET_XMIT_SUCCESS;
@@ -493,7 +489,7 @@ next_slot:
slot = &q->slots[a];
if (slot->allot <= 0) {
q->tail = slot;
- slot->allot += q->scaled_quantum;
+ slot->allot += q->quantum;
goto next_slot;
}
skb = slot_dequeue_head(slot);
@@ -512,7 +508,7 @@ next_slot:
}
q->tail->next = next_a;
} else {
- slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
+ slot->allot -= qdisc_pkt_len(skb);
}
return skb;
}
@@ -595,7 +591,7 @@ drop:
q->tail->next = x;
}
q->tail = slot;
- slot->allot = q->scaled_quantum;
+ slot->allot = q->quantum;
}
}
sch->q.qlen -= dropped;
@@ -628,7 +624,8 @@ static void sfq_perturbation(struct timer_list *t)
rcu_read_unlock();
}
-static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
+static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
{
struct sfq_sched_data *q = qdisc_priv(sch);
struct tc_sfq_qopt *ctl = nla_data(opt);
@@ -637,6 +634,15 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
struct red_parms *p = NULL;
struct sk_buff *to_free = NULL;
struct sk_buff *tail = NULL;
+ unsigned int maxflows;
+ unsigned int quantum;
+ unsigned int divisor;
+ int perturb_period;
+ u8 headdrop;
+ u8 maxdepth;
+ int limit;
+ u8 flags;
+
if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
return -EINVAL;
@@ -646,13 +652,17 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
(!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
return -EINVAL;
- /* slot->allot is a short, make sure quantum is not too big. */
- if (ctl->quantum) {
- unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
+ if ((int)ctl->quantum < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
+ return -EINVAL;
+ }
- if (scaled <= 0 || scaled > SHRT_MAX)
- return -EINVAL;
+ if (ctl->perturb_period < 0 ||
+ ctl->perturb_period > INT_MAX / HZ) {
+ NL_SET_ERR_MSG_MOD(extack, "invalid perturb period");
+ return -EINVAL;
}
+ perturb_period = ctl->perturb_period * HZ;
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
@@ -662,37 +672,62 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
if (!p)
return -ENOMEM;
}
+
sch_tree_lock(sch);
- if (ctl->quantum) {
- q->quantum = ctl->quantum;
- q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
- }
- WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ);
+
+ limit = q->limit;
+ divisor = q->divisor;
+ headdrop = q->headdrop;
+ maxdepth = q->maxdepth;
+ maxflows = q->maxflows;
+ quantum = q->quantum;
+ flags = q->flags;
+
+ /* update and validate configuration */
+ if (ctl->quantum)
+ quantum = ctl->quantum;
if (ctl->flows)
- q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
+ maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
if (ctl->divisor) {
- q->divisor = ctl->divisor;
- q->maxflows = min_t(u32, q->maxflows, q->divisor);
+ divisor = ctl->divisor;
+ maxflows = min_t(u32, maxflows, divisor);
}
if (ctl_v1) {
if (ctl_v1->depth)
- q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
+ maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
if (p) {
- swap(q->red_parms, p);
- red_set_parms(q->red_parms,
+ red_set_parms(p,
ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog,
ctl_v1->Plog, ctl_v1->Scell_log,
NULL,
ctl_v1->max_P);
}
- q->flags = ctl_v1->flags;
- q->headdrop = ctl_v1->headdrop;
+ flags = ctl_v1->flags;
+ headdrop = ctl_v1->headdrop;
}
if (ctl->limit) {
- q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
- q->maxflows = min_t(u32, q->maxflows, q->limit);
+ limit = min_t(u32, ctl->limit, maxdepth * maxflows);
+ maxflows = min_t(u32, maxflows, limit);
}
+ if (limit == 1) {
+ sch_tree_unlock(sch);
+ kfree(p);
+ NL_SET_ERR_MSG_MOD(extack, "invalid limit");
+ return -EINVAL;
+ }
+
+ /* commit configuration */
+ q->limit = limit;
+ q->divisor = divisor;
+ q->headdrop = headdrop;
+ q->maxdepth = maxdepth;
+ q->maxflows = maxflows;
+ WRITE_ONCE(q->perturb_period, perturb_period);
+ q->quantum = quantum;
+ q->flags = flags;
+ if (p)
+ swap(q->red_parms, p);
qlen = sch->q.qlen;
while (sch->q.qlen > q->limit) {
@@ -762,12 +797,11 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
q->maxflows = SFQ_DEFAULT_FLOWS;
q->quantum = psched_mtu(qdisc_dev(sch));
- q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
q->perturb_period = 0;
get_random_bytes(&q->perturbation, sizeof(q->perturbation));
if (opt) {
- int err = sfq_change(sch, opt);
+ int err = sfq_change(sch, opt, extack);
if (err)
return err;
}
@@ -878,7 +912,7 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
if (idx != SFQ_EMPTY_SLOT) {
const struct sfq_slot *slot = &q->slots[idx];
- xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
+ xstats.allot = slot->allot;
qs.qlen = slot->qlen;
qs.backlog = slot->backlog;
}
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index 20ff7386b74b..f485f62ab721 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -123,8 +123,6 @@ static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* Check to update highest and lowest priorities. */
if (skb_queue_empty(lp_qdisc)) {
if (q->lowest_prio == q->highest_prio) {
- /* The incoming packet is the only packet in queue. */
- BUG_ON(sch->q.qlen != 1);
q->lowest_prio = prio;
q->highest_prio = prio;
} else {
@@ -156,7 +154,6 @@ static struct sk_buff *skbprio_dequeue(struct Qdisc *sch)
/* Update highest priority field. */
if (skb_queue_empty(hpq)) {
if (q->lowest_prio == q->highest_prio) {
- BUG_ON(sch->q.qlen);
q->highest_prio = 0;
q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
} else {
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 8623dc0bafc0..1620f0fd78ce 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -43,6 +43,11 @@ static struct static_key_false taprio_have_working_mqprio;
#define TAPRIO_SUPPORTED_FLAGS \
(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
#define TAPRIO_FLAGS_INVALID U32_MAX
+/* Minimum value for picos_per_byte to ensure non-zero duration
+ * for minimum-sized Ethernet frames (ETH_ZLEN = 60).
+ * 60 * 17 > PSEC_PER_NSEC (1000)
+ */
+#define TAPRIO_PICOS_PER_BYTE_MIN 17
struct sched_entry {
/* Durations between this GCL entry and the GCL entry where the
@@ -1284,7 +1289,8 @@ static void taprio_start_sched(struct Qdisc *sch,
}
static void taprio_set_picos_per_byte(struct net_device *dev,
- struct taprio_sched *q)
+ struct taprio_sched *q,
+ struct netlink_ext_ack *extack)
{
struct ethtool_link_ksettings ecmd;
int speed = SPEED_10;
@@ -1300,6 +1306,15 @@ static void taprio_set_picos_per_byte(struct net_device *dev,
skip:
picos_per_byte = (USEC_PER_SEC * 8) / speed;
+ if (picos_per_byte < TAPRIO_PICOS_PER_BYTE_MIN) {
+ if (!extack)
+ pr_warn("Link speed %d is too high. Schedule may be inaccurate.\n",
+ speed);
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Link speed %d is too high. Schedule may be inaccurate.",
+ speed);
+ picos_per_byte = TAPRIO_PICOS_PER_BYTE_MIN;
+ }
atomic64_set(&q->picos_per_byte, picos_per_byte);
netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
@@ -1324,17 +1339,19 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
if (dev != qdisc_dev(q->root))
continue;
- taprio_set_picos_per_byte(dev, q);
+ taprio_set_picos_per_byte(dev, q, NULL);
stab = rtnl_dereference(q->root->stab);
- oper = rtnl_dereference(q->oper_sched);
+ rcu_read_lock();
+ oper = rcu_dereference(q->oper_sched);
if (oper)
taprio_update_queue_max_sdu(q, oper, stab);
- admin = rtnl_dereference(q->admin_sched);
+ admin = rcu_dereference(q->admin_sched);
if (admin)
taprio_update_queue_max_sdu(q, admin, stab);
+ rcu_read_unlock();
break;
}
@@ -1846,7 +1863,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
q->flags = taprio_flags;
/* Needed for length_to_duration() during netlink attribute parsing */
- taprio_set_picos_per_byte(dev, q);
+ taprio_set_picos_per_byte(dev, q, extack);
err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
if (err < 0)
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index dc26b22d53c7..4c977f049670 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -452,7 +452,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
if (child) {
- qdisc_tree_flush_backlog(q->qdisc);
+ qdisc_purge_queue(q->qdisc);
old = q->qdisc;
q->qdisc = child;
}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a8a254a5008e..032a10d82302 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -117,7 +117,7 @@ int sctp_rcv(struct sk_buff *skb)
* it's better to just linearize it otherwise crc computing
* takes longer.
*/
- if ((!is_gso && skb_linearize(skb)) ||
+ if (((!is_gso || skb_cloned(skb)) && skb_linearize(skb)) ||
!pskb_may_pull(skb, sizeof(struct sctphdr)))
goto discard_it;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 36ee34f483d7..b301d64d9d80 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -72,8 +72,9 @@
/* Forward declarations for internal helper functions. */
static bool sctp_writeable(const struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
-static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
- size_t msg_len);
+static int sctp_wait_for_sndbuf(struct sctp_association *asoc,
+ struct sctp_transport *transport,
+ long *timeo_p, size_t msg_len);
static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -1828,7 +1829,7 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) {
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
- err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
+ err = sctp_wait_for_sndbuf(asoc, transport, &timeo, msg_len);
if (err)
goto err;
if (unlikely(sinfo->sinfo_stream >= asoc->stream.outcnt)) {
@@ -9099,7 +9100,8 @@ static void __sctp_write_space(struct sctp_association *asoc)
wq = rcu_dereference(sk->sk_wq);
if (wq) {
if (waitqueue_active(&wq->wait))
- wake_up_interruptible(&wq->wait);
+ wake_up_interruptible_poll(&wq->wait, EPOLLOUT |
+ EPOLLWRNORM | EPOLLWRBAND);
/* Note that we try to include the Async I/O support
* here by modeling from the current TCP/UDP code.
@@ -9214,8 +9216,9 @@ void sctp_sock_rfree(struct sk_buff *skb)
/* Helper function to wait for space in the sndbuf. */
-static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
- size_t msg_len)
+static int sctp_wait_for_sndbuf(struct sctp_association *asoc,
+ struct sctp_transport *transport,
+ long *timeo_p, size_t msg_len)
{
struct sock *sk = asoc->base.sk;
long current_timeo = *timeo_p;
@@ -9225,7 +9228,9 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc,
*timeo_p, msg_len);
- /* Increment the association's refcnt. */
+ /* Increment the transport and association's refcnt. */
+ if (transport)
+ sctp_transport_hold(transport);
sctp_association_hold(asoc);
/* Wait on the association specific sndbuf space. */
@@ -9234,7 +9239,7 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
TASK_INTERRUPTIBLE);
if (asoc->base.dead)
goto do_dead;
- if (!*timeo_p)
+ if ((!*timeo_p) || (transport && transport->dead))
goto do_nonblock;
if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING)
goto do_error;
@@ -9259,7 +9264,9 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
out:
finish_wait(&asoc->wait, &wait);
- /* Release the association's refcnt. */
+ /* Release the transport and association's refcnt. */
+ if (transport)
+ sctp_transport_put(transport);
sctp_association_put(asoc);
return err;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index c241cc552e8d..bfcff6d6a438 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -735,7 +735,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
* value SHOULD be the smallest TSN not acknowledged by the
* receiver of the request plus 2^31.
*/
- init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1 << 31);
+ init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1U << 31);
sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
init_tsn, GFP_ATOMIC);
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 8e1e97be4df7..ee3eac338a9d 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -525,6 +525,8 @@ static int proc_sctp_do_auth(const struct ctl_table *ctl, int write,
return ret;
}
+static DEFINE_MUTEX(sctp_sysctl_mutex);
+
static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -549,6 +551,7 @@ static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write,
if (new_value > max || new_value < min)
return -EINVAL;
+ mutex_lock(&sctp_sysctl_mutex);
net->sctp.udp_port = new_value;
sctp_udp_sock_stop(net);
if (new_value) {
@@ -561,6 +564,7 @@ static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write,
lock_sock(sk);
sctp_sk(sk)->udp_port = htons(net->sctp.udp_port);
release_sock(sk);
+ mutex_unlock(&sctp_sysctl_mutex);
}
return ret;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 2abe45af98e7..31eca29b6cfb 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -117,6 +117,8 @@ fail:
*/
void sctp_transport_free(struct sctp_transport *transport)
{
+ transport->dead = 1;
+
/* Try to delete the heartbeat timer. */
if (del_timer(&transport->hb_timer))
sctp_transport_put(transport);
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6cc7b846cff1..cdd445d40b94 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -30,6 +30,10 @@
#include <linux/splice.h>
#include <net/sock.h>
+#include <net/inet_common.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#endif
#include <net/tcp.h>
#include <net/smc.h>
#include <asm/ioctls.h>
@@ -360,8 +364,21 @@ static void smc_destruct(struct sock *sk)
return;
if (!sock_flag(sk, SOCK_DEAD))
return;
+ switch (sk->sk_family) {
+ case AF_INET:
+ inet_sock_destruct(sk);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ inet6_sock_destruct(sk);
+ break;
+#endif
+ }
}
+static struct lock_class_key smc_key;
+static struct lock_class_key smc_slock_key;
+
void smc_sk_init(struct net *net, struct sock *sk, int protocol)
{
struct smc_sock *smc = smc_sk(sk);
@@ -375,6 +392,8 @@ void smc_sk_init(struct net *net, struct sock *sk, int protocol)
INIT_WORK(&smc->connect_work, smc_connect_work);
INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
INIT_LIST_HEAD(&smc->accept_q);
+ sock_lock_init_class_and_name(sk, "slock-AF_SMC", &smc_slock_key,
+ "sk_lock-AF_SMC", &smc_key);
spin_lock_init(&smc->accept_q_lock);
spin_lock_init(&smc->conn.send_lock);
sk->sk_prot->hash(sk);
@@ -2738,7 +2757,7 @@ int smc_accept(struct socket *sock, struct socket *new_sock,
release_sock(clcsk);
} else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
lock_sock(nsk);
- smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
+ smc_rx_wait(smc_sk(nsk), &timeo, 0, smc_rx_data_available);
release_sock(nsk);
}
}
@@ -3334,10 +3353,7 @@ int smc_create_clcsk(struct net *net, struct sock *sk, int family)
* which need net ref.
*/
sk = smc->clcsock->sk;
- __netns_tracker_free(net, &sk->ns_tracker, false);
- sk->sk_net_refcnt = 1;
- get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
- sock_inuse_add(net, 1);
+ sk_net_refcnt_upgrade(sk);
return 0;
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index ad77d6b6b8d3..7579f9622e01 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -283,10 +283,10 @@ struct smc_connection {
};
struct smc_sock { /* smc sock container */
- struct sock sk;
-#if IS_ENABLED(CONFIG_IPV6)
- struct ipv6_pinfo *pinet6;
-#endif
+ union {
+ struct sock sk;
+ struct inet_sock icsk_inet;
+ };
struct socket *clcsock; /* internal tcp socket */
void (*clcsk_state_change)(struct sock *sk);
/* original stat_change fct. */
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 716808f374a8..b391c2ef463f 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -1079,14 +1079,16 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
struct smc_init_info *ini)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
+ struct net_device *base_ndev;
struct net *net;
- ndev = pnet_find_base_ndev(ndev);
+ base_ndev = pnet_find_base_ndev(ndev);
net = dev_net(ndev);
- if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
+ if (smc_pnetid_by_dev_port(base_ndev->dev.parent, base_ndev->dev_port,
ndev_pnetid) &&
+ smc_pnet_find_ndev_pnetid_by_table(base_ndev, ndev_pnetid) &&
smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) {
- smc_pnet_find_rdma_dev(ndev, ini);
+ smc_pnet_find_rdma_dev(base_ndev, ini);
return; /* pnetid could not be determined */
}
_smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL, net);
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index f0cbe77a80b4..79047721df51 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -238,22 +238,23 @@ out:
return -ENOMEM;
}
-static int smc_rx_data_available_and_no_splice_pend(struct smc_connection *conn)
+static int smc_rx_data_available_and_no_splice_pend(struct smc_connection *conn, size_t peeked)
{
- return atomic_read(&conn->bytes_to_rcv) &&
+ return smc_rx_data_available(conn, peeked) &&
!atomic_read(&conn->splice_pending);
}
/* blocks rcvbuf consumer until >=len bytes available or timeout or interrupted
* @smc smc socket
* @timeo pointer to max seconds to wait, pointer to value 0 for no timeout
+ * @peeked number of bytes already peeked
* @fcrit add'l criterion to evaluate as function pointer
* Returns:
* 1 if at least 1 byte available in rcvbuf or if socket error/shutdown.
* 0 otherwise (nothing in rcvbuf nor timeout, e.g. interrupted).
*/
-int smc_rx_wait(struct smc_sock *smc, long *timeo,
- int (*fcrit)(struct smc_connection *conn))
+int smc_rx_wait(struct smc_sock *smc, long *timeo, size_t peeked,
+ int (*fcrit)(struct smc_connection *conn, size_t baseline))
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct smc_connection *conn = &smc->conn;
@@ -262,7 +263,7 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
struct sock *sk = &smc->sk;
int rc;
- if (fcrit(conn))
+ if (fcrit(conn, peeked))
return 1;
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
add_wait_queue(sk_sleep(sk), &wait);
@@ -271,7 +272,7 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
cflags->peer_conn_abort ||
READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN ||
conn->killed ||
- fcrit(conn),
+ fcrit(conn, peeked),
&wait);
remove_wait_queue(sk_sleep(sk), &wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
@@ -322,11 +323,11 @@ static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len,
return -EAGAIN;
}
-static bool smc_rx_recvmsg_data_available(struct smc_sock *smc)
+static bool smc_rx_recvmsg_data_available(struct smc_sock *smc, size_t peeked)
{
struct smc_connection *conn = &smc->conn;
- if (smc_rx_data_available(conn))
+ if (smc_rx_data_available(conn, peeked))
return true;
else if (conn->urg_state == SMC_URG_VALID)
/* we received a single urgent Byte - skip */
@@ -344,10 +345,10 @@ static bool smc_rx_recvmsg_data_available(struct smc_sock *smc)
int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
struct pipe_inode_info *pipe, size_t len, int flags)
{
- size_t copylen, read_done = 0, read_remaining = len;
+ size_t copylen, read_done = 0, read_remaining = len, peeked_bytes = 0;
size_t chunk_len, chunk_off, chunk_len_sum;
struct smc_connection *conn = &smc->conn;
- int (*func)(struct smc_connection *conn);
+ int (*func)(struct smc_connection *conn, size_t baseline);
union smc_host_cursor cons;
int readable, chunk;
char *rcvbuf_base;
@@ -384,14 +385,14 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
if (conn->killed)
break;
- if (smc_rx_recvmsg_data_available(smc))
+ if (smc_rx_recvmsg_data_available(smc, peeked_bytes))
goto copy;
if (sk->sk_shutdown & RCV_SHUTDOWN) {
/* smc_cdc_msg_recv_action() could have run after
* above smc_rx_recvmsg_data_available()
*/
- if (smc_rx_recvmsg_data_available(smc))
+ if (smc_rx_recvmsg_data_available(smc, peeked_bytes))
goto copy;
break;
}
@@ -425,26 +426,28 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
}
}
- if (!smc_rx_data_available(conn)) {
- smc_rx_wait(smc, &timeo, smc_rx_data_available);
+ if (!smc_rx_data_available(conn, peeked_bytes)) {
+ smc_rx_wait(smc, &timeo, peeked_bytes, smc_rx_data_available);
continue;
}
copy:
/* initialize variables for 1st iteration of subsequent loop */
/* could be just 1 byte, even after waiting on data above */
- readable = atomic_read(&conn->bytes_to_rcv);
+ readable = smc_rx_data_available(conn, peeked_bytes);
splbytes = atomic_read(&conn->splice_pending);
if (!readable || (msg && splbytes)) {
if (splbytes)
func = smc_rx_data_available_and_no_splice_pend;
else
func = smc_rx_data_available;
- smc_rx_wait(smc, &timeo, func);
+ smc_rx_wait(smc, &timeo, peeked_bytes, func);
continue;
}
smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
+ if ((flags & MSG_PEEK) && peeked_bytes)
+ smc_curs_add(conn->rmb_desc->len, &cons, peeked_bytes);
/* subsequent splice() calls pick up where previous left */
if (splbytes)
smc_curs_add(conn->rmb_desc->len, &cons, splbytes);
@@ -480,6 +483,8 @@ copy:
}
read_remaining -= chunk_len;
read_done += chunk_len;
+ if (flags & MSG_PEEK)
+ peeked_bytes += chunk_len;
if (chunk_len_sum == copylen)
break; /* either on 1st or 2nd iteration */
diff --git a/net/smc/smc_rx.h b/net/smc/smc_rx.h
index db823c97d824..994f5e42d1ba 100644
--- a/net/smc/smc_rx.h
+++ b/net/smc/smc_rx.h
@@ -21,11 +21,11 @@ void smc_rx_init(struct smc_sock *smc);
int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
struct pipe_inode_info *pipe, size_t len, int flags);
-int smc_rx_wait(struct smc_sock *smc, long *timeo,
- int (*fcrit)(struct smc_connection *conn));
-static inline int smc_rx_data_available(struct smc_connection *conn)
+int smc_rx_wait(struct smc_sock *smc, long *timeo, size_t peeked,
+ int (*fcrit)(struct smc_connection *conn, size_t baseline));
+static inline int smc_rx_data_available(struct smc_connection *conn, size_t peeked)
{
- return atomic_read(&conn->bytes_to_rcv);
+ return atomic_read(&conn->bytes_to_rcv) - peeked;
}
#endif /* SMC_RX_H */
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 8299ceb3e373..95696f42647e 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -347,7 +347,10 @@ static int strp_read_sock(struct strparser *strp)
struct socket *sock = strp->sk->sk_socket;
read_descriptor_t desc;
- if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
+ if (unlikely(!sock || !sock->ops))
+ return -EBUSY;
+
+ if (unlikely(!strp->cb.read_sock && !sock->ops->read_sock))
return -EBUSY;
desc.arg.data = strp;
@@ -355,7 +358,10 @@ static int strp_read_sock(struct strparser *strp)
desc.count = 1; /* give more than one skb per call */
/* sk should be locked here, so okay to do read_sock */
- sock->ops->read_sock(strp->sk, &desc, strp_recv);
+ if (strp->cb.read_sock)
+ strp->cb.read_sock(strp, &desc, strp_recv);
+ else
+ sock->ops->read_sock(strp->sk, &desc, strp_recv);
desc.error = strp->cb.read_sock_done(strp, desc.error);
@@ -468,6 +474,7 @@ int strp_init(struct strparser *strp, struct sock *sk,
strp->cb.unlock = cb->unlock ? : strp_sock_unlock;
strp->cb.rcv_msg = cb->rcv_msg;
strp->cb.parse_msg = cb->parse_msg;
+ strp->cb.read_sock = cb->read_sock;
strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done;
strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 059f6ef1ad18..7fcb0574fc79 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1669,12 +1669,14 @@ static void remove_cache_proc_entries(struct cache_detail *cd)
}
}
-#ifdef CONFIG_PROC_FS
static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
{
struct proc_dir_entry *p;
struct sunrpc_net *sn;
+ if (!IS_ENABLED(CONFIG_PROC_FS))
+ return 0;
+
sn = net_generic(net, sunrpc_net_id);
cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc);
if (cd->procfs == NULL)
@@ -1702,12 +1704,6 @@ out_nomem:
remove_cache_proc_entries(cd);
return -ENOMEM;
}
-#else /* CONFIG_PROC_FS */
-static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
-{
- return 0;
-}
-#endif
void __init cache_initialize(void)
{
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 0090162ee8c3..e492655cb221 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -270,9 +270,6 @@ static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt,
old = rcu_dereference_protected(clnt->cl_xprt,
lockdep_is_held(&clnt->cl_lock));
- if (!xprt_bound(xprt))
- clnt->cl_autobind = 1;
-
clnt->cl_timeout = timeout;
rcu_assign_pointer(clnt->cl_xprt, xprt);
spin_unlock(&clnt->cl_lock);
@@ -2752,8 +2749,13 @@ out_verifier:
case -EPROTONOSUPPORT:
goto out_err;
case -EACCES:
- /* Re-encode with a fresh cred */
- fallthrough;
+ /* possible RPCSEC_GSS out-of-sequence event (RFC2203),
+ * reset recv state and keep waiting, don't retransmit
+ */
+ task->tk_rqstp->rq_reply_bytes_recvd = 0;
+ task->tk_status = xprt_request_enqueue_receive(task);
+ task->tk_action = call_transmit_status;
+ return -EBADMSG;
default:
goto out_garbage;
}
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 7ce3721c06ca..eadc00410ebc 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -630,7 +630,7 @@ static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry)
static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent,
const char *name)
{
- struct qstr q = QSTR_INIT(name, strlen(name));
+ struct qstr q = QSTR(name);
struct dentry *dentry = d_hash_and_lookup(parent, &q);
if (!dentry) {
dentry = d_alloc(parent, &q);
@@ -1190,8 +1190,7 @@ static const struct rpc_filelist files[] = {
struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
const unsigned char *dir_name)
{
- struct qstr dir = QSTR_INIT(dir_name, strlen(dir_name));
- return d_hash_and_lookup(sb->s_root, &dir);
+ return d_hash_and_lookup(sb->s_root, &QSTR(dir_name));
}
EXPORT_SYMBOL_GPL(rpc_d_lookup_sb);
@@ -1300,11 +1299,9 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data)
struct dentry *gssd_dentry;
struct dentry *clnt_dentry = NULL;
struct dentry *pipe_dentry = NULL;
- struct qstr q = QSTR_INIT(files[RPCAUTH_gssd].name,
- strlen(files[RPCAUTH_gssd].name));
/* We should never get this far if "gssd" doesn't exist */
- gssd_dentry = d_hash_and_lookup(root, &q);
+ gssd_dentry = d_hash_and_lookup(root, &QSTR(files[RPCAUTH_gssd].name));
if (!gssd_dentry)
return ERR_PTR(-ENOENT);
@@ -1314,9 +1311,8 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data)
goto out;
}
- q.name = gssd_dummy_clnt_dir[0].name;
- q.len = strlen(gssd_dummy_clnt_dir[0].name);
- clnt_dentry = d_hash_and_lookup(gssd_dentry, &q);
+ clnt_dentry = d_hash_and_lookup(gssd_dentry,
+ &QSTR(gssd_dummy_clnt_dir[0].name));
if (!clnt_dentry) {
__rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
pipe_dentry = ERR_PTR(-ENOENT);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 102c3818bc54..53bcca365fb1 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -820,9 +820,10 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
}
trace_rpcb_setport(child, map->r_status, map->r_port);
- xprt->ops->set_port(xprt, map->r_port);
- if (map->r_port)
+ if (map->r_port) {
+ xprt->ops->set_port(xprt, map->r_port);
xprt_set_bound(xprt);
+ }
}
/*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cef623ea1506..73bc39281ef5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -276,6 +276,8 @@ EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
{
+ if (unlikely(current->flags & PF_EXITING))
+ return -EINTR;
schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
@@ -864,8 +866,6 @@ void rpc_signal_task(struct rpc_task *task)
if (!rpc_task_set_rpc_status(task, -ERESTARTSYS))
return;
trace_rpc_task_signalled(task, task->tk_action);
- set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
- smp_mb__after_atomic();
queue = READ_ONCE(task->tk_waitqueue);
if (queue)
rpc_wake_up_queued_task(queue, task);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 79879b7d39cb..46a95877d2de 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1369,7 +1369,8 @@ svc_process_common(struct svc_rqst *rqstp)
case SVC_OK:
break;
case SVC_GARBAGE:
- goto err_garbage_args;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
+ goto err_bad_auth;
case SVC_SYSERR:
goto err_system_err;
case SVC_DENIED:
@@ -1510,14 +1511,6 @@ err_bad_proc:
*rqstp->rq_accept_statp = rpc_proc_unavail;
goto sendit;
-err_garbage_args:
- svc_printk(rqstp, "failed to decode RPC header\n");
-
- if (serv->sv_stats)
- serv->sv_stats->rpcbadfmt++;
- *rqstp->rq_accept_statp = rpc_garbage_args;
- goto sendit;
-
err_system_err:
if (serv->sv_stats)
serv->sv_stats->rpcbadfmt++;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 59e2c46240f5..e61e94576058 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -257,20 +257,47 @@ svc_tcp_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
}
static int
-svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg)
+svc_tcp_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags)
{
union {
struct cmsghdr cmsg;
u8 buf[CMSG_SPACE(sizeof(u8))];
} u;
- struct socket *sock = svsk->sk_sock;
+ u8 alert[2];
+ struct kvec alert_kvec = {
+ .iov_base = alert,
+ .iov_len = sizeof(alert),
+ };
+ struct msghdr msg = {
+ .msg_flags = *msg_flags,
+ .msg_control = &u,
+ .msg_controllen = sizeof(u),
+ };
+ int ret;
+
+ iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1,
+ alert_kvec.iov_len);
+ ret = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
+ if (ret > 0 &&
+ tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) {
+ iov_iter_revert(&msg.msg_iter, ret);
+ ret = svc_tcp_sock_process_cmsg(sock, &msg, &u.cmsg, -EAGAIN);
+ }
+ return ret;
+}
+
+static int
+svc_tcp_sock_recvmsg(struct svc_sock *svsk, struct msghdr *msg)
+{
int ret;
+ struct socket *sock = svsk->sk_sock;
- msg->msg_control = &u;
- msg->msg_controllen = sizeof(u);
ret = sock_recvmsg(sock, msg, MSG_DONTWAIT);
- if (unlikely(msg->msg_controllen != sizeof(u)))
- ret = svc_tcp_sock_process_cmsg(sock, msg, &u.cmsg, ret);
+ if (msg->msg_flags & MSG_CTRUNC) {
+ msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR);
+ if (ret == 0 || ret == -EIO)
+ ret = svc_tcp_sock_recv_cmsg(sock, &msg->msg_flags);
+ }
return ret;
}
@@ -321,7 +348,7 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
iov_iter_advance(&msg.msg_iter, seek);
buflen -= seek;
}
- len = svc_tcp_sock_recv_cmsg(svsk, &msg);
+ len = svc_tcp_sock_recvmsg(svsk, &msg);
if (len > 0)
svc_flush_bvec(bvec, len, seek);
@@ -1019,7 +1046,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen;
iov.iov_len = want;
iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want);
- len = svc_tcp_sock_recv_cmsg(svsk, &msg);
+ len = svc_tcp_sock_recvmsg(svsk, &msg);
if (len < 0)
return len;
svsk->sk_tcplen += len;
@@ -1083,9 +1110,6 @@ static void svc_tcp_fragment_received(struct svc_sock *svsk)
/* If we have more data, signal svc_xprt_enqueue() to try again */
svsk->sk_tcplen = 0;
svsk->sk_marker = xdr_zero;
-
- smp_wmb();
- tcp_set_rcvlowat(svsk->sk_sk, 1);
}
/**
@@ -1175,17 +1199,10 @@ err_incomplete:
goto err_delete;
if (len == want)
svc_tcp_fragment_received(svsk);
- else {
- /* Avoid more ->sk_data_ready() calls until the rest
- * of the message has arrived. This reduces service
- * thread wake-ups on large incoming messages. */
- tcp_set_rcvlowat(svsk->sk_sk,
- svc_sock_reclen(svsk) - svsk->sk_tcplen);
-
+ else
trace_svcsock_tcp_recv_short(&svsk->sk_xprt,
svc_sock_reclen(svsk),
svsk->sk_tcplen - sizeof(rpc_fraghdr));
- }
goto err_noclose;
error:
if (len != -EAGAIN)
@@ -1551,10 +1568,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
newlen = error;
if (protocol == IPPROTO_TCP) {
- __netns_tracker_free(net, &sock->sk->ns_tracker, false);
- sock->sk->sk_net_refcnt = 1;
- get_net_track(net, &sock->sk->ns_tracker, GFP_KERNEL);
- sock_inuse_add(net, 1);
+ sk_net_refcnt_upgrade(sock->sk);
if ((error = kernel_listen(sock, 64)) < 0)
goto bummer;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index c3fbf0779d4a..3d7f1413df02 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -406,12 +406,12 @@ static void svc_rdma_xprt_done(struct rpcrdma_notification *rn)
*/
static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
{
+ unsigned int ctxts, rq_depth, maxpayload;
struct svcxprt_rdma *listen_rdma;
struct svcxprt_rdma *newxprt = NULL;
struct rdma_conn_param conn_param;
struct rpcrdma_connect_private pmsg;
struct ib_qp_init_attr qp_attr;
- unsigned int ctxts, rq_depth;
struct ib_device *dev;
int ret = 0;
RPC_IFDEBUG(struct sockaddr *sap);
@@ -462,12 +462,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_max_bc_requests = 2;
}
- /* Arbitrarily estimate the number of rw_ctxs needed for
- * this transport. This is enough rw_ctxs to make forward
- * progress even if the client is using one rkey per page
- * in each Read chunk.
+ /* Arbitrary estimate of the needed number of rdma_rw contexts.
*/
- ctxts = 3 * RPCSVC_MAXPAGES;
+ maxpayload = min(xprt->xpt_server->sv_max_payload,
+ RPCSVC_MAXPAYLOAD_RDMA);
+ ctxts = newxprt->sc_max_requests * 3 *
+ rdma_rw_mr_factor(dev, newxprt->sc_port_num,
+ maxpayload >> PAGE_SHIFT);
+
newxprt->sc_sq_depth = rq_depth + ctxts;
if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr)
newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
@@ -575,6 +577,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
ib_destroy_qp(newxprt->sc_qp);
rdma_destroy_id(newxprt->sc_cm_id);
+ rpcrdma_rn_unregister(dev, &newxprt->sc_rn);
/* This call to put will destroy the transport */
svc_xprt_put(&newxprt->sc_xprt);
return NULL;
@@ -621,7 +624,8 @@ static void __svc_rdma_free(struct work_struct *work)
/* Destroy the CM ID */
rdma_destroy_id(rdma->sc_cm_id);
- rpcrdma_rn_unregister(device, &rdma->sc_rn);
+ if (!test_bit(XPT_LISTENER, &rdma->sc_xprt.xpt_flags))
+ rpcrdma_rn_unregister(device, &rdma->sc_rn);
kfree(rdma);
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b69e6290acfa..92cec227215a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -358,7 +358,7 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
static int
xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
- struct cmsghdr *cmsg, int ret)
+ unsigned int *msg_flags, struct cmsghdr *cmsg, int ret)
{
u8 content_type = tls_get_record_type(sock->sk, cmsg);
u8 level, description;
@@ -371,7 +371,7 @@ xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
* record, even though there might be more frames
* waiting to be decrypted.
*/
- msg->msg_flags &= ~MSG_EOR;
+ *msg_flags &= ~MSG_EOR;
break;
case TLS_RECORD_TYPE_ALERT:
tls_alert_recv(sock->sk, msg, &level, &description);
@@ -386,19 +386,33 @@ xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
}
static int
-xs_sock_recv_cmsg(struct socket *sock, struct msghdr *msg, int flags)
+xs_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags, int flags)
{
union {
struct cmsghdr cmsg;
u8 buf[CMSG_SPACE(sizeof(u8))];
} u;
+ u8 alert[2];
+ struct kvec alert_kvec = {
+ .iov_base = alert,
+ .iov_len = sizeof(alert),
+ };
+ struct msghdr msg = {
+ .msg_flags = *msg_flags,
+ .msg_control = &u,
+ .msg_controllen = sizeof(u),
+ };
int ret;
- msg->msg_control = &u;
- msg->msg_controllen = sizeof(u);
- ret = sock_recvmsg(sock, msg, flags);
- if (msg->msg_controllen != sizeof(u))
- ret = xs_sock_process_cmsg(sock, msg, &u.cmsg, ret);
+ iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1,
+ alert_kvec.iov_len);
+ ret = sock_recvmsg(sock, &msg, flags);
+ if (ret > 0 &&
+ tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) {
+ iov_iter_revert(&msg.msg_iter, ret);
+ ret = xs_sock_process_cmsg(sock, &msg, msg_flags, &u.cmsg,
+ -EAGAIN);
+ }
return ret;
}
@@ -408,7 +422,13 @@ xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek)
ssize_t ret;
if (seek != 0)
iov_iter_advance(&msg->msg_iter, seek);
- ret = xs_sock_recv_cmsg(sock, msg, flags);
+ ret = sock_recvmsg(sock, msg, flags);
+ /* Handle TLS inband control message lazily */
+ if (msg->msg_flags & MSG_CTRUNC) {
+ msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR);
+ if (ret == 0 || ret == -EIO)
+ ret = xs_sock_recv_cmsg(sock, &msg->msg_flags, flags);
+ }
return ret > 0 ? ret + seek : ret;
}
@@ -434,7 +454,7 @@ xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
size_t count)
{
iov_iter_discard(&msg->msg_iter, ITER_DEST, count);
- return xs_sock_recv_cmsg(sock, msg, flags);
+ return xs_sock_recvmsg(sock, msg, flags, 0);
}
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
@@ -1940,12 +1960,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
goto out;
}
- if (protocol == IPPROTO_TCP) {
- __netns_tracker_free(xprt->xprt_net, &sock->sk->ns_tracker, false);
- sock->sk->sk_net_refcnt = 1;
- get_net_track(xprt->xprt_net, &sock->sk->ns_tracker, GFP_KERNEL);
- sock_inuse_add(xprt->xprt_net, 1);
- }
+ if (protocol == IPPROTO_TCP)
+ sk_net_refcnt_upgrade(sock->sk);
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
if (IS_ERR(filp))
@@ -2580,7 +2596,15 @@ static void xs_tls_handshake_done(void *data, int status, key_serial_t peerid)
struct sock_xprt *lower_transport =
container_of(lower_xprt, struct sock_xprt, xprt);
- lower_transport->xprt_err = status ? -EACCES : 0;
+ switch (status) {
+ case 0:
+ case -EACCES:
+ case -ETIMEDOUT:
+ lower_transport->xprt_err = status;
+ break;
+ default:
+ lower_transport->xprt_err = -EACCES;
+ }
complete(&lower_transport->handshake_done);
xprt_put(lower_xprt);
}
@@ -2735,6 +2759,11 @@ static void xs_tcp_tls_setup_socket(struct work_struct *work)
}
rpc_shutdown_client(lower_clnt);
+ /* Check for ingress data that arrived before the socket's
+ * ->data_ready callback was set up.
+ */
+ xs_poll_check_readable(upper_transport);
+
out_unlock:
current_restore_flags(pflags, PF_MEMALLOC);
upper_transport->clnt = NULL;
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 6488ead9e464..4d5fbacef496 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -472,7 +472,7 @@ bool switchdev_port_obj_act_is_deferred(struct net_device *dev,
EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred);
static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
-static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
+static RAW_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
/**
* register_switchdev_notifier - Register notifier
@@ -518,17 +518,27 @@ EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
int register_switchdev_blocking_notifier(struct notifier_block *nb)
{
- struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
+ struct raw_notifier_head *chain = &switchdev_blocking_notif_chain;
+ int err;
+
+ rtnl_lock();
+ err = raw_notifier_chain_register(chain, nb);
+ rtnl_unlock();
- return blocking_notifier_chain_register(chain, nb);
+ return err;
}
EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier);
int unregister_switchdev_blocking_notifier(struct notifier_block *nb)
{
- struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
+ struct raw_notifier_head *chain = &switchdev_blocking_notif_chain;
+ int err;
- return blocking_notifier_chain_unregister(chain, nb);
+ rtnl_lock();
+ err = raw_notifier_chain_unregister(chain, nb);
+ rtnl_unlock();
+
+ return err;
}
EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier);
@@ -536,10 +546,11 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
struct switchdev_notifier_info *info,
struct netlink_ext_ack *extack)
{
+ ASSERT_RTNL();
info->dev = dev;
info->extack = extack;
- return blocking_notifier_call_chain(&switchdev_blocking_notif_chain,
- val, info);
+ return raw_notifier_call_chain(&switchdev_blocking_notif_chain,
+ val, info);
}
EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 43c3f1c971b8..ea5bb131ebd0 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -425,7 +425,7 @@ static void tipc_aead_free(struct rcu_head *rp)
}
free_percpu(aead->tfm_entry);
kfree_sensitive(aead->key);
- kfree(aead);
+ kfree_sensitive(aead);
}
static int tipc_aead_users(struct tipc_aead __rcu *aead)
@@ -817,12 +817,20 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
goto exit;
}
+ /* Get net to avoid freed tipc_crypto when delete namespace */
+ if (!maybe_get_net(aead->crypto->net)) {
+ tipc_bearer_put(b);
+ rc = -ENODEV;
+ goto exit;
+ }
+
/* Now, do encrypt */
rc = crypto_aead_encrypt(req);
if (rc == -EINPROGRESS || rc == -EBUSY)
return rc;
tipc_bearer_put(b);
+ put_net(aead->crypto->net);
exit:
kfree(ctx);
@@ -860,6 +868,7 @@ static void tipc_aead_encrypt_done(void *data, int err)
kfree(tx_ctx);
tipc_bearer_put(b);
tipc_aead_put(aead);
+ put_net(net);
}
/**
@@ -2293,8 +2302,8 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr)
keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME)));
/* Verify the supplied size values */
- if (unlikely(size != keylen + sizeof(struct tipc_aead_key) ||
- keylen > TIPC_AEAD_KEY_SIZE_MAX)) {
+ if (unlikely(keylen > TIPC_AEAD_KEY_SIZE_MAX ||
+ size != keylen + sizeof(struct tipc_aead_key))) {
pr_debug("%s: invalid MSG_CRYPTO key size\n", rx->name);
goto exit;
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 5c2088a469ce..5689e1f48547 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1046,6 +1046,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) {
if (imp == TIPC_SYSTEM_IMPORTANCE) {
pr_warn("%s<%s>, link overflow", link_rst_msg, l->name);
+ __skb_queue_purge(list);
return -ENOBUFS;
}
rc = link_schedule_user(l, hdr);
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index e2f19627e43d..b45c5b91bc7a 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -716,7 +716,8 @@ void tipc_mon_reinit_self(struct net *net)
if (!mon)
continue;
write_lock_bh(&mon->lock);
- mon->self->addr = tipc_own_addr(net);
+ if (mon->self)
+ mon->self->addr = tipc_own_addr(net);
write_unlock_bh(&mon->lock);
}
}
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 8ee0c07d00e9..ffe577bf6b51 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -704,8 +704,10 @@ static void tipc_topsrv_stop(struct net *net)
for (id = 0; srv->idr_in_use; id++) {
con = idr_find(&srv->conn_idr, id);
if (con) {
+ conn_get(con);
spin_unlock_bh(&srv->idr_lock);
tipc_conn_close(con);
+ conn_put(con);
spin_lock_bh(&srv->idr_lock);
}
}
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 108a4cc2e001..258d6aa4f21a 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -489,7 +489,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
rtnl_lock();
b = tipc_bearer_find(net, bname);
- if (!b) {
+ if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) {
rtnl_unlock();
return -EINVAL;
}
@@ -500,7 +500,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
rtnl_lock();
b = rtnl_dereference(tn->bearer_list[bid]);
- if (!b) {
+ if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) {
rtnl_unlock();
return -EINVAL;
}
diff --git a/net/tls/tls.h b/net/tls/tls.h
index e5e47452308a..e1eaf12b3742 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -195,7 +195,7 @@ void tls_strp_msg_done(struct tls_strparser *strp);
int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb);
void tls_rx_msg_ready(struct tls_strparser *strp);
-void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh);
+bool tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh);
int tls_strp_msg_cow(struct tls_sw_context_rx *ctx);
struct sk_buff *tls_strp_msg_detach(struct tls_sw_context_rx *ctx);
int tls_strp_msg_hold(struct tls_strparser *strp, struct sk_buff_head *dst);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 6b4b9f2749a6..0acf313deb01 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -809,6 +809,11 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
return do_tls_setsockopt(sk, optname, optval, optlen);
}
+static int tls_disconnect(struct sock *sk, int flags)
+{
+ return -EOPNOTSUPP;
+}
+
struct tls_context *tls_ctx_create(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -904,6 +909,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
prot[TLS_BASE][TLS_BASE] = *base;
prot[TLS_BASE][TLS_BASE].setsockopt = tls_setsockopt;
prot[TLS_BASE][TLS_BASE].getsockopt = tls_getsockopt;
+ prot[TLS_BASE][TLS_BASE].disconnect = tls_disconnect;
prot[TLS_BASE][TLS_BASE].close = tls_sk_proto_close;
prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index 77e33e1e340e..d71643b494a1 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -396,7 +396,6 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort)
return 0;
shinfo = skb_shinfo(strp->anchor);
- shinfo->frag_list = NULL;
/* If we don't know the length go max plus page for cipher overhead */
need_spc = strp->stm.full_len ?: TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE;
@@ -412,6 +411,8 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort)
page, 0, 0);
}
+ shinfo->frag_list = NULL;
+
strp->copy_mode = 1;
strp->stm.offset = 0;
@@ -474,7 +475,7 @@ static void tls_strp_load_anchor_with_queue(struct tls_strparser *strp, int len)
strp->stm.offset = offset;
}
-void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh)
+bool tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh)
{
struct strp_msg *rxm;
struct tls_msg *tlm;
@@ -483,8 +484,11 @@ void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh)
DEBUG_NET_WARN_ON_ONCE(!strp->stm.full_len);
if (!strp->copy_mode && force_refresh) {
- if (WARN_ON(tcp_inq(strp->sk) < strp->stm.full_len))
- return;
+ if (unlikely(tcp_inq(strp->sk) < strp->stm.full_len)) {
+ WRITE_ONCE(strp->msg_ready, 0);
+ memset(&strp->stm, 0, sizeof(strp->stm));
+ return false;
+ }
tls_strp_load_anchor_with_queue(strp, strp->stm.full_len);
}
@@ -494,6 +498,8 @@ void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh)
rxm->offset = strp->stm.offset;
tlm = tls_msg(strp->anchor);
tlm->control = strp->mark;
+
+ return true;
}
/* Called with lock held on lower socket */
@@ -511,9 +517,8 @@ static int tls_strp_read_sock(struct tls_strparser *strp)
if (inq < strp->stm.full_len)
return tls_strp_read_copy(strp, true);
+ tls_strp_load_anchor_with_queue(strp, inq);
if (!strp->stm.full_len) {
- tls_strp_load_anchor_with_queue(strp, inq);
-
sz = tls_rx_msg_size(strp, strp->anchor);
if (sz < 0) {
tls_strp_abort_strp(strp, sz);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 7bcc9b4408a2..6385329ef98d 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -872,6 +872,19 @@ more_data:
delta = msg->sg.size;
psock->eval = sk_psock_msg_verdict(sk, psock, msg);
delta -= msg->sg.size;
+
+ if ((s32)delta > 0) {
+ /* It indicates that we executed bpf_msg_pop_data(),
+ * causing the plaintext data size to decrease.
+ * Therefore the encrypted data size also needs to
+ * correspondingly decrease. We only need to subtract
+ * delta to calculate the new ciphertext length since
+ * ktls does not support block encryption.
+ */
+ struct sk_msg *enc = &ctx->open_rec->msg_encrypted;
+
+ sk_msg_trim(sk, enc, enc->sg.size - delta);
+ }
}
if (msg->cork_bytes && msg->cork_bytes > msg->sg.size &&
!enospc && !full_record) {
@@ -908,6 +921,13 @@ more_data:
&msg_redir, send, flags);
lock_sock(sk);
if (err < 0) {
+ /* Regardless of whether the data represented by
+ * msg_redir is sent successfully, we have already
+ * uncharged it via sk_msg_return_zero(). The
+ * msg->sg.size represents the remaining unprocessed
+ * data, which needs to be uncharged here.
+ */
+ sk_mem_uncharge(sk, msg->sg.size);
*copied -= sk_msg_free_nocharge(sk, &msg_redir);
msg->sg.size = 0;
}
@@ -1120,9 +1140,13 @@ alloc_encrypted:
num_async++;
else if (ret == -ENOMEM)
goto wait_for_memory;
- else if (ctx->open_rec && ret == -ENOSPC)
+ else if (ctx->open_rec && ret == -ENOSPC) {
+ if (msg_pl->cork_bytes) {
+ ret = 0;
+ goto send_end;
+ }
goto rollback_iter;
- else if (ret != -EAGAIN)
+ } else if (ret != -EAGAIN)
goto send_end;
}
continue;
@@ -1356,7 +1380,8 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
return sock_intr_errno(timeo);
}
- tls_strp_msg_load(&ctx->strp, released);
+ if (unlikely(!tls_strp_msg_load(&ctx->strp, released)))
+ return tls_rx_rec_wait(sk, psock, nonblock, false);
return 1;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 6b1762300443..45f8e21829ec 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -666,6 +666,11 @@ static void unix_sock_destructor(struct sock *sk)
#endif
}
+static unsigned int unix_skb_len(const struct sk_buff *skb)
+{
+ return skb->len - UNIXCB(skb).consumed;
+}
+
static void unix_release_sock(struct sock *sk, int embrion)
{
struct unix_sock *u = unix_sk(sk);
@@ -700,10 +705,16 @@ static void unix_release_sock(struct sock *sk, int embrion)
if (skpair != NULL) {
if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
+ struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (skb && !unix_skb_len(skb))
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+#endif
unix_state_lock(skpair);
/* No more writes */
WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
- if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion)
+ if (skb || embrion)
WRITE_ONCE(skpair->sk_err, ECONNRESET);
unix_state_unlock(skpair);
skpair->sk_state_change(skpair);
@@ -2594,11 +2605,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
return timeo;
}
-static unsigned int unix_skb_len(const struct sk_buff *skb)
-{
- return skb->len - UNIXCB(skb).consumed;
-}
-
struct unix_stream_read_state {
int (*recv_actor)(struct sk_buff *, int, int,
struct unix_stream_read_state *);
@@ -2613,11 +2619,11 @@ struct unix_stream_read_state {
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
static int unix_stream_recv_urg(struct unix_stream_read_state *state)
{
+ struct sk_buff *oob_skb, *read_skb = NULL;
struct socket *sock = state->socket;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
int chunk = 1;
- struct sk_buff *oob_skb;
mutex_lock(&u->iolock);
unix_state_lock(sk);
@@ -2632,9 +2638,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
oob_skb = u->oob_skb;
- if (!(state->flags & MSG_PEEK))
+ if (!(state->flags & MSG_PEEK)) {
WRITE_ONCE(u->oob_skb, NULL);
+ if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue &&
+ !unix_skb_len(oob_skb->prev)) {
+ read_skb = oob_skb->prev;
+ __skb_unlink(read_skb, &sk->sk_receive_queue);
+ }
+ }
+
spin_unlock(&sk->sk_receive_queue.lock);
unix_state_unlock(sk);
@@ -2645,6 +2658,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
mutex_unlock(&u->iolock);
+ consume_skb(read_skb);
+
if (chunk < 0)
return -EFAULT;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index b52b798aa4c2..ef519b55a3d9 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -337,7 +337,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
void vsock_remove_sock(struct vsock_sock *vsk)
{
- vsock_remove_bound(vsk);
+ /* Transport reassignment must not remove the binding. */
+ if (sock_flag(sk_vsock(vsk), SOCK_DEAD))
+ vsock_remove_bound(vsk);
+
vsock_remove_connected(vsk);
}
EXPORT_SYMBOL_GPL(vsock_remove_sock);
@@ -404,6 +407,8 @@ EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
static bool vsock_use_local_transport(unsigned int remote_cid)
{
+ lockdep_assert_held(&vsock_register_mutex);
+
if (!transport_local)
return false;
@@ -461,6 +466,8 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
remote_flags = vsk->remote_addr.svm_flags;
+ mutex_lock(&vsock_register_mutex);
+
switch (sk->sk_type) {
case SOCK_DGRAM:
new_transport = transport_dgram;
@@ -476,12 +483,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
new_transport = transport_h2g;
break;
default:
- return -ESOCKTNOSUPPORT;
+ ret = -ESOCKTNOSUPPORT;
+ goto err;
}
if (vsk->transport) {
- if (vsk->transport == new_transport)
- return 0;
+ if (vsk->transport == new_transport) {
+ ret = 0;
+ goto err;
+ }
/* transport->release() must be called with sock lock acquired.
* This path can only be taken during vsock_connect(), where we
@@ -491,13 +501,30 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
*/
vsk->transport->release(vsk);
vsock_deassign_transport(vsk);
+
+ /* transport's release() and destruct() can touch some socket
+ * state, since we are reassigning the socket to a new transport
+ * during vsock_connect(), let's reset these fields to have a
+ * clean state.
+ */
+ sock_reset_flag(sk, SOCK_DONE);
+ sk->sk_state = TCP_CLOSE;
+ vsk->peer_shutdown = 0;
}
/* We increase the module refcnt to prevent the transport unloading
* while there are open sockets assigned to it.
*/
- if (!new_transport || !try_module_get(new_transport->module))
- return -ENODEV;
+ if (!new_transport || !try_module_get(new_transport->module)) {
+ ret = -ENODEV;
+ goto err;
+ }
+
+ /* It's safe to release the mutex after a successful try_module_get().
+ * Whichever transport `new_transport` points at, it won't go away until
+ * the last module_put() below or in vsock_deassign_transport().
+ */
+ mutex_unlock(&vsock_register_mutex);
if (sk->sk_type == SOCK_SEQPACKET) {
if (!new_transport->seqpacket_allow ||
@@ -516,12 +543,31 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
vsk->transport = new_transport;
return 0;
+err:
+ mutex_unlock(&vsock_register_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(vsock_assign_transport);
+/*
+ * Provide safe access to static transport_{h2g,g2h,dgram,local} callbacks.
+ * Otherwise we may race with module removal. Do not use on `vsk->transport`.
+ */
+static u32 vsock_registered_transport_cid(const struct vsock_transport **transport)
+{
+ u32 cid = VMADDR_CID_ANY;
+
+ mutex_lock(&vsock_register_mutex);
+ if (*transport)
+ cid = (*transport)->get_local_cid();
+ mutex_unlock(&vsock_register_mutex);
+
+ return cid;
+}
+
bool vsock_find_cid(unsigned int cid)
{
- if (transport_g2h && cid == transport_g2h->get_local_cid())
+ if (cid == vsock_registered_transport_cid(&transport_g2h))
return true;
if (transport_h2g && cid == VMADDR_CID_HOST)
@@ -643,7 +689,8 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
unsigned int i;
for (i = 0; i < MAX_PORT_RETRIES; i++) {
- if (port <= LAST_RESERVED_PORT)
+ if (port == VMADDR_PORT_ANY ||
+ port <= LAST_RESERVED_PORT)
port = LAST_RESERVED_PORT + 1;
new_addr.svm_port = port++;
@@ -812,6 +859,13 @@ static void __vsock_release(struct sock *sk, int level)
*/
lock_sock_nested(sk, level);
+ /* Indicate to vsock_remove_sock() that the socket is being released and
+ * can be removed from the bound_table. Unlike transport reassignment
+ * case, where the socket must remain bound despite vsock_remove_sock()
+ * being called from the transport release() callback.
+ */
+ sock_set_flag(sk, SOCK_DEAD);
+
if (vsk->transport)
vsk->transport->release(vsk);
else if (sock_type_connectible(sk->sk_type))
@@ -870,6 +924,9 @@ EXPORT_SYMBOL_GPL(vsock_create_connected);
s64 vsock_stream_has_data(struct vsock_sock *vsk)
{
+ if (WARN_ON(!vsk->transport))
+ return 0;
+
return vsk->transport->stream_has_data(vsk);
}
EXPORT_SYMBOL_GPL(vsock_stream_has_data);
@@ -878,6 +935,9 @@ s64 vsock_connectible_has_data(struct vsock_sock *vsk)
{
struct sock *sk = sk_vsock(vsk);
+ if (WARN_ON(!vsk->transport))
+ return 0;
+
if (sk->sk_type == SOCK_SEQPACKET)
return vsk->transport->seqpacket_has_data(vsk);
else
@@ -887,6 +947,9 @@ EXPORT_SYMBOL_GPL(vsock_connectible_has_data);
s64 vsock_stream_has_space(struct vsock_sock *vsk)
{
+ if (WARN_ON(!vsk->transport))
+ return 0;
+
return vsk->transport->stream_has_space(vsk);
}
EXPORT_SYMBOL_GPL(vsock_stream_has_space);
@@ -1161,6 +1224,9 @@ static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor)
{
struct vsock_sock *vsk = vsock_sk(sk);
+ if (WARN_ON_ONCE(!vsk->transport))
+ return -ENODEV;
+
return vsk->transport->read_skb(vsk, read_actor);
}
@@ -1501,6 +1567,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
if (err < 0)
goto out;
+ /* sk_err might have been set as a result of an earlier
+ * (failed) connect attempt.
+ */
+ sk->sk_err = 0;
+
/* Mark sock as connecting and set the error code to in
* progress in case this is a non-blocking connect.
*/
@@ -1515,7 +1586,11 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr,
timeout = vsk->connect_timeout;
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) {
+ /* If the socket is already closing or it is in an error state, there
+ * is no point in waiting.
+ */
+ while (sk->sk_state != TCP_ESTABLISHED &&
+ sk->sk_state != TCP_CLOSING && sk->sk_err == 0) {
if (flags & O_NONBLOCK) {
/* If we're not going to block, we schedule a timeout
* function to generate a timeout on the connection
@@ -2462,18 +2537,19 @@ static long vsock_dev_do_ioctl(struct file *filp,
unsigned int cmd, void __user *ptr)
{
u32 __user *p = ptr;
- u32 cid = VMADDR_CID_ANY;
int retval = 0;
+ u32 cid;
switch (cmd) {
case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
/* To be compatible with the VMCI behavior, we prioritize the
* guest CID instead of well-know host CID (VMADDR_CID_HOST).
*/
- if (transport_g2h)
- cid = transport_g2h->get_local_cid();
- else if (transport_h2g)
- cid = transport_h2g->get_local_cid();
+ cid = vsock_registered_transport_cid(&transport_g2h);
+ if (cid == VMADDR_CID_ANY)
+ cid = vsock_registered_transport_cid(&transport_h2g);
+ if (cid == VMADDR_CID_ANY)
+ cid = vsock_registered_transport_cid(&transport_local);
if (put_user(cid, p) != 0)
retval = -EFAULT;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index b58c3818f284..f01f9e878106 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -307,7 +307,7 @@ out_rcu:
static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
{
- int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM;
+ int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
struct scatterlist pkt, *p;
struct virtqueue *vq;
struct sk_buff *skb;
@@ -670,6 +670,13 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock)
};
int ret;
+ mutex_lock(&vsock->rx_lock);
+ vsock->rx_buf_nr = 0;
+ vsock->rx_buf_max_nr = 0;
+ mutex_unlock(&vsock->rx_lock);
+
+ atomic_set(&vsock->queued_replies, 0);
+
ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, vqs_info, NULL);
if (ret < 0)
return ret;
@@ -779,9 +786,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
vsock->vdev = vdev;
- vsock->rx_buf_nr = 0;
- vsock->rx_buf_max_nr = 0;
- atomic_set(&vsock->queued_replies, 0);
mutex_init(&vsock->tx_lock);
mutex_init(&vsock->rx_lock);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 9acc13ab3f82..2c9b1011cdcc 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -26,6 +26,9 @@
/* Threshold for detecting small packets to copy */
#define GOOD_COPY_LEN 128
+static void virtio_transport_cancel_close_work(struct vsock_sock *vsk,
+ bool cancel_timeout);
+
static const struct virtio_transport *
virtio_transport_get_ops(struct vsock_sock *vsk)
{
@@ -438,18 +441,20 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
u32 len)
{
- if (vvs->rx_bytes + len > vvs->buf_alloc)
+ if (vvs->buf_used + len > vvs->buf_alloc)
return false;
vvs->rx_bytes += len;
+ vvs->buf_used += len;
return true;
}
static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
- u32 len)
+ u32 bytes_read, u32 bytes_dequeued)
{
- vvs->rx_bytes -= len;
- vvs->fwd_cnt += len;
+ vvs->rx_bytes -= bytes_read;
+ vvs->buf_used -= bytes_dequeued;
+ vvs->fwd_cnt += bytes_dequeued;
}
void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb)
@@ -578,11 +583,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
size_t len)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- size_t bytes, total = 0;
struct sk_buff *skb;
u32 fwd_cnt_delta;
bool low_rx_bytes;
int err = -EFAULT;
+ size_t total = 0;
u32 free_space;
spin_lock_bh(&vvs->rx_lock);
@@ -594,6 +599,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
}
while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
+ size_t bytes, dequeued = 0;
+
skb = skb_peek(&vvs->rx_queue);
bytes = min_t(size_t, len - total,
@@ -617,12 +624,12 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes;
if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) {
- u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
-
- virtio_transport_dec_rx_pkt(vvs, pkt_len);
+ dequeued = le32_to_cpu(virtio_vsock_hdr(skb)->len);
__skb_unlink(skb, &vvs->rx_queue);
consume_skb(skb);
}
+
+ virtio_transport_dec_rx_pkt(vvs, bytes, dequeued);
}
fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt;
@@ -778,7 +785,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
msg->msg_flags |= MSG_EOR;
}
- virtio_transport_dec_rx_pkt(vvs, pkt_len);
+ virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
kfree_skb(skb);
}
@@ -1109,6 +1116,8 @@ void virtio_transport_destruct(struct vsock_sock *vsk)
{
struct virtio_vsock_sock *vvs = vsk->trans;
+ virtio_transport_cancel_close_work(vsk, true);
+
kfree(vvs);
vsk->trans = NULL;
}
@@ -1204,17 +1213,11 @@ static void virtio_transport_wait_close(struct sock *sk, long timeout)
}
}
-static void virtio_transport_do_close(struct vsock_sock *vsk,
- bool cancel_timeout)
+static void virtio_transport_cancel_close_work(struct vsock_sock *vsk,
+ bool cancel_timeout)
{
struct sock *sk = sk_vsock(vsk);
- sock_set_flag(sk, SOCK_DONE);
- vsk->peer_shutdown = SHUTDOWN_MASK;
- if (vsock_stream_has_data(vsk) <= 0)
- sk->sk_state = TCP_CLOSING;
- sk->sk_state_change(sk);
-
if (vsk->close_work_scheduled &&
(!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
vsk->close_work_scheduled = false;
@@ -1226,6 +1229,20 @@ static void virtio_transport_do_close(struct vsock_sock *vsk,
}
}
+static void virtio_transport_do_close(struct vsock_sock *vsk,
+ bool cancel_timeout)
+{
+ struct sock *sk = sk_vsock(vsk);
+
+ sock_set_flag(sk, SOCK_DONE);
+ vsk->peer_shutdown = SHUTDOWN_MASK;
+ if (vsock_stream_has_data(vsk) <= 0)
+ sk->sk_state = TCP_CLOSING;
+ sk->sk_state_change(sk);
+
+ virtio_transport_cancel_close_work(vsk, cancel_timeout);
+}
+
static void virtio_transport_close_timeout(struct work_struct *work)
{
struct vsock_sock *vsk =
@@ -1628,8 +1645,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
lock_sock(sk);
- /* Check if sk has been closed before lock_sock */
- if (sock_flag(sk, SOCK_DONE)) {
+ /* Check if sk has been closed or assigned to another transport before
+ * lock_sock (note: listener sockets are not assigned to any transport)
+ */
+ if (sock_flag(sk, SOCK_DONE) ||
+ (sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) {
(void)virtio_transport_reset_no_sock(t, skb);
release_sock(sk);
sock_put(sk);
@@ -1719,6 +1739,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
struct sock *sk = sk_vsock(vsk);
struct virtio_vsock_hdr *hdr;
struct sk_buff *skb;
+ u32 pkt_len;
int off = 0;
int err;
@@ -1736,7 +1757,8 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
vvs->msg_count--;
- virtio_transport_dec_rx_pkt(vvs, le32_to_cpu(hdr->len));
+ pkt_len = le32_to_cpu(hdr->len);
+ virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
spin_unlock_bh(&vvs->rx_lock);
virtio_transport_send_credit_update(vsk);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index b370070194fa..7eccd6708d66 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -119,6 +119,8 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt,
u16 proto,
struct vmci_handle handle)
{
+ memset(pkt, 0, sizeof(*pkt));
+
/* We register the stream control handler as an any cid handle so we
* must always send from a source address of VMADDR_CID_ANY
*/
@@ -131,8 +133,6 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt,
pkt->type = type;
pkt->src_port = src->svm_port;
pkt->dst_port = dst->svm_port;
- memset(&pkt->proto, 0, sizeof(pkt->proto));
- memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
switch (pkt->type) {
case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c
index 4aa6e74ec295..07b96d56f3a5 100644
--- a/net/vmw_vsock/vsock_bpf.c
+++ b/net/vmw_vsock/vsock_bpf.c
@@ -77,6 +77,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
size_t len, int flags, int *addr_len)
{
struct sk_psock *psock;
+ struct vsock_sock *vsk;
int copied;
psock = sk_psock_get(sk);
@@ -84,6 +85,13 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
return __vsock_recvmsg(sk, msg, len, flags);
lock_sock(sk);
+ vsk = vsock_sk(sk);
+
+ if (WARN_ON_ONCE(!vsk->transport)) {
+ copied = -ENODEV;
+ goto out;
+ }
+
if (vsock_has_data(sk, psock) && sk_psock_queue_empty(psock)) {
release_sock(sk);
sk_psock_put(sk, psock);
@@ -108,6 +116,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
}
+out:
release_sock(sk);
sk_psock_put(sk, psock);
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index e579d7e1425f..c4f3fefeb354 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -6,7 +6,7 @@
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2018-2024 Intel Corporation
+ * Copyright 2018-2025 Intel Corporation
*/
#include <linux/export.h>
@@ -1621,6 +1621,12 @@ bool cfg80211_reg_check_beaconing(struct wiphy *wiphy,
if (cfg->reg_power == IEEE80211_REG_VLP_AP)
permitting_flags |= IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP;
+ if ((cfg->iftype == NL80211_IFTYPE_P2P_GO ||
+ cfg->iftype == NL80211_IFTYPE_AP) &&
+ (chandef->width == NL80211_CHAN_WIDTH_20_NOHT ||
+ chandef->width == NL80211_CHAN_WIDTH_20))
+ permitting_flags |= IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY;
+
return _cfg80211_reg_can_beacon(wiphy, chandef, cfg->iftype,
check_no_ir ? IEEE80211_CHAN_NO_IR : 0,
permitting_flags);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7d313fb66d76..586e50678ed8 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -553,6 +553,9 @@ use_default_name:
INIT_WORK(&rdev->mgmt_registrations_update_wk,
cfg80211_mgmt_registrations_update_wk);
spin_lock_init(&rdev->mgmt_registrations_lock);
+ INIT_WORK(&rdev->wiphy_work, cfg80211_wiphy_work);
+ INIT_LIST_HEAD(&rdev->wiphy_work_list);
+ spin_lock_init(&rdev->wiphy_work_lock);
#ifdef CONFIG_CFG80211_DEFAULT_PS
rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT;
@@ -570,9 +573,6 @@ use_default_name:
return NULL;
}
- INIT_WORK(&rdev->wiphy_work, cfg80211_wiphy_work);
- INIT_LIST_HEAD(&rdev->wiphy_work_list);
- spin_lock_init(&rdev->wiphy_work_lock);
INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work);
INIT_WORK(&rdev->conn_work, cfg80211_conn_work);
INIT_WORK(&rdev->event_work, cfg80211_event_work);
@@ -1198,6 +1198,13 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
{
struct cfg80211_internal_bss *scan, *tmp;
struct cfg80211_beacon_registration *reg, *treg;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
+ WARN_ON(!list_empty(&rdev->wiphy_work_list));
+ spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
+ cancel_work_sync(&rdev->wiphy_work);
+
rfkill_destroy(rdev->wiphy.rfkill);
list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) {
list_del(&reg->list);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index a5eb92d93074..d1a66410b9c5 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -843,7 +843,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
mgmt = (const struct ieee80211_mgmt *)params->buf;
- if (!ieee80211_is_mgmt(mgmt->frame_control))
+ if (!ieee80211_is_mgmt(mgmt->frame_control) ||
+ ieee80211_has_order(mgmt->frame_control))
return -EINVAL;
stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1e78f575fb56..ec8265f2d568 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -229,6 +229,7 @@ static int validate_beacon_head(const struct nlattr *attr,
unsigned int len = nla_len(attr);
const struct element *elem;
const struct ieee80211_mgmt *mgmt = (void *)data;
+ const struct ieee80211_ext *ext;
unsigned int fixedlen, hdrlen;
bool s1g_bcn;
@@ -237,8 +238,10 @@ static int validate_beacon_head(const struct nlattr *attr,
s1g_bcn = ieee80211_is_s1g_beacon(mgmt->frame_control);
if (s1g_bcn) {
- fixedlen = offsetof(struct ieee80211_ext,
- u.s1g_beacon.variable);
+ ext = (struct ieee80211_ext *)mgmt;
+ fixedlen =
+ offsetof(struct ieee80211_ext, u.s1g_beacon.variable) +
+ ieee80211_s1g_optional_len(ext->frame_control);
hdrlen = offsetof(struct ieee80211_ext, u.s1g_beacon);
} else {
fixedlen = offsetof(struct ieee80211_mgmt,
@@ -1213,6 +1216,10 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy,
if ((chan->flags & IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP) &&
nla_put_flag(msg, NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP))
goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY) &&
+ nla_put_flag(msg,
+ NL80211_FREQUENCY_ATTR_ALLOW_20MHZ_ACTIVITY))
+ goto nla_put_failure;
}
if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
@@ -4218,6 +4225,11 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags)
if (flags[flag])
*mntrflags |= (1<<flag);
+ /* cooked monitor mode is incompatible with other modes */
+ if (*mntrflags & MONITOR_FLAG_COOK_FRAMES &&
+ *mntrflags != MONITOR_FLAG_COOK_FRAMES)
+ return -EOPNOTSUPP;
+
*mntrflags |= MONITOR_FLAG_CHANGED;
return 0;
@@ -16777,6 +16789,7 @@ static int nl80211_set_sar_specs(struct sk_buff *skb, struct genl_info *info)
if (!sar_spec)
return -ENOMEM;
+ sar_spec->num_sub_specs = specs;
sar_spec->type = type;
specs = 0;
nla_for_each_nested(spec_list, tb[NL80211_SAR_ATTR_SPECS], rem) {
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 6489ba943a63..f6846eb0f4b8 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -5,7 +5,7 @@
* Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2024 Intel Corporation
+ * Copyright (C) 2018 - 2025 Intel Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -407,7 +407,8 @@ static bool is_an_alpha2(const char *alpha2)
{
if (!alpha2)
return false;
- return isalpha(alpha2[0]) && isalpha(alpha2[1]);
+ return isascii(alpha2[0]) && isalpha(alpha2[0]) &&
+ isascii(alpha2[1]) && isalpha(alpha2[1]);
}
static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y)
@@ -1602,6 +1603,8 @@ static u32 map_regdom_flags(u32 rd_flags)
channel_flags |= IEEE80211_CHAN_PSD;
if (rd_flags & NL80211_RRF_ALLOW_6GHZ_VLP_AP)
channel_flags |= IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP;
+ if (rd_flags & NL80211_RRF_ALLOW_20MHZ_ACTIVITY)
+ channel_flags |= IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY;
return channel_flags;
}
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index d0aed41ded2f..d80ab1725f28 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -763,12 +763,11 @@ static void cfg80211_scan_req_add_chan(struct cfg80211_scan_request *request,
}
}
+ request->n_channels++;
request->channels[n_channels] = chan;
if (add_to_6ghz)
request->scan_6ghz_params[request->n_6ghz_params].channel_idx =
n_channels;
-
- request->n_channels++;
}
static bool cfg80211_find_ssid_match(struct cfg80211_colocated_ap *ap,
@@ -858,9 +857,7 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
if (ret)
continue;
- entry = kzalloc(sizeof(*entry) + IEEE80211_MAX_SSID_LEN,
- GFP_ATOMIC);
-
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry)
continue;
@@ -2647,7 +2644,7 @@ cfg80211_defrag_mle(const struct element *mle, const u8 *ie, size_t ielen,
/* Required length for first defragmentation */
buf_len = mle->datalen - 1;
for_each_element(elem, mle->data + mle->datalen,
- ielen - sizeof(*mle) + mle->datalen) {
+ ie + ielen - mle->data - mle->datalen) {
if (elem->id != WLAN_EID_FRAGMENT)
break;
@@ -3216,6 +3213,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
const u8 *ie;
size_t ielen;
u64 tsf;
+ size_t s1g_optional_len;
if (WARN_ON(!mgmt))
return NULL;
@@ -3230,12 +3228,11 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
ext = (void *) mgmt;
- if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
- min_hdr_len = offsetof(struct ieee80211_ext,
- u.s1g_short_beacon.variable);
- else
- min_hdr_len = offsetof(struct ieee80211_ext,
- u.s1g_beacon.variable);
+ s1g_optional_len =
+ ieee80211_s1g_optional_len(ext->frame_control);
+ min_hdr_len =
+ offsetof(struct ieee80211_ext, u.s1g_beacon.variable) +
+ s1g_optional_len;
} else {
/* same for beacons */
min_hdr_len = offsetof(struct ieee80211_mgmt,
@@ -3251,11 +3248,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
const struct ieee80211_s1g_bcn_compat_ie *compat;
const struct element *elem;
- if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
- ie = ext->u.s1g_short_beacon.variable;
- else
- ie = ext->u.s1g_beacon.variable;
-
+ ie = ext->u.s1g_beacon.variable + s1g_optional_len;
elem = cfg80211_find_elem(WLAN_EID_S1G_BCN_COMPAT, ie, ielen);
if (!elem)
return NULL;
diff --git a/net/wireless/tests/scan.c b/net/wireless/tests/scan.c
index 9f458be71659..79a99cf5e892 100644
--- a/net/wireless/tests/scan.c
+++ b/net/wireless/tests/scan.c
@@ -810,6 +810,8 @@ static void test_cfg80211_parse_colocated_ap(struct kunit *test)
skb_put_data(input, "123", 3);
ies = kunit_kzalloc(test, struct_size(ies, data, input->len), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, ies);
+
ies->len = input->len;
memcpy(ies->data, input->data, input->len);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 18585b1416c6..b115489a846f 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -820,6 +820,52 @@ bool ieee80211_is_valid_amsdu(struct sk_buff *skb, u8 mesh_hdr)
}
EXPORT_SYMBOL(ieee80211_is_valid_amsdu);
+
+/*
+ * Detects if an MSDU frame was maliciously converted into an A-MSDU
+ * frame by an adversary. This is done by parsing the received frame
+ * as if it were a regular MSDU, even though the A-MSDU flag is set.
+ *
+ * For non-mesh interfaces, detection involves checking whether the
+ * payload, when interpreted as an MSDU, begins with a valid RFC1042
+ * header. This is done by comparing the A-MSDU subheader's destination
+ * address to the start of the RFC1042 header.
+ *
+ * For mesh interfaces, the MSDU includes a 6-byte Mesh Control field
+ * and an optional variable-length Mesh Address Extension field before
+ * the RFC1042 header. The position of the RFC1042 header must therefore
+ * be calculated based on the mesh header length.
+ *
+ * Since this function intentionally parses an A-MSDU frame as an MSDU,
+ * it only assumes that the A-MSDU subframe header is present, and
+ * beyond this it performs its own bounds checks under the assumption
+ * that the frame is instead parsed as a non-aggregated MSDU.
+ */
+static bool
+is_amsdu_aggregation_attack(struct ethhdr *eth, struct sk_buff *skb,
+ enum nl80211_iftype iftype)
+{
+ int offset;
+
+ /* Non-mesh case can be directly compared */
+ if (iftype != NL80211_IFTYPE_MESH_POINT)
+ return ether_addr_equal(eth->h_dest, rfc1042_header);
+
+ offset = __ieee80211_get_mesh_hdrlen(eth->h_dest[0]);
+ if (offset == 6) {
+ /* Mesh case with empty address extension field */
+ return ether_addr_equal(eth->h_source, rfc1042_header);
+ } else if (offset + ETH_ALEN <= skb->len) {
+ /* Mesh case with non-empty address extension field */
+ u8 temp[ETH_ALEN];
+
+ skb_copy_bits(skb, offset, temp, ETH_ALEN);
+ return ether_addr_equal(temp, rfc1042_header);
+ }
+
+ return false;
+}
+
void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
const u8 *addr, enum nl80211_iftype iftype,
const unsigned int extra_headroom,
@@ -861,8 +907,10 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
/* the last MSDU has no padding */
if (subframe_len > remaining)
goto purge;
- /* mitigate A-MSDU aggregation injection attacks */
- if (ether_addr_equal(hdr.eth.h_dest, rfc1042_header))
+ /* mitigate A-MSDU aggregation injection attacks, to be
+ * checked when processing first subframe (offset == 0).
+ */
+ if (offset == 0 && is_amsdu_aggregation_attack(&hdr.eth, skb, iftype))
goto purge;
offset += sizeof(struct ethhdr);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index b57d5d2904eb..f031b07baa57 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -338,13 +338,14 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
u32 len = xdp_get_buff_len(xdp);
int err;
- spin_lock_bh(&xs->rx_lock);
err = xsk_rcv_check(xs, xdp, len);
if (!err) {
+ spin_lock_bh(&xs->pool->rx_lock);
err = __xsk_rcv(xs, xdp, len);
xsk_flush(xs);
+ spin_unlock_bh(&xs->pool->rx_lock);
}
- spin_unlock_bh(&xs->rx_lock);
+
return err;
}
@@ -1720,7 +1721,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
xs = xdp_sk(sk);
xs->state = XSK_READY;
mutex_init(&xs->mutex);
- spin_lock_init(&xs->rx_lock);
INIT_LIST_HEAD(&xs->map_list);
spin_lock_init(&xs->map_list_lock);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 0662d34b09ee..b69dbd8615fc 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -87,6 +87,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
pool->addrs = umem->addrs;
pool->tx_metadata_len = umem->tx_metadata_len;
pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM;
+ spin_lock_init(&pool->rx_lock);
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
@@ -106,7 +107,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
if (pool->unaligned)
pool->free_heads[i] = xskb;
else
- xp_init_xskb_addr(xskb, pool, i * pool->chunk_size);
+ xp_init_xskb_addr(xskb, pool, (u64)i * pool->chunk_size);
}
return pool;
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index fe82e2d07300..fc7a603b04f1 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -171,8 +171,10 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb)
struct espintcp_ctx *ctx = espintcp_getctx(sk);
if (skb_queue_len(&ctx->out_queue) >=
- READ_ONCE(net_hotdata.max_backlog))
+ READ_ONCE(net_hotdata.max_backlog)) {
+ kfree_skb(skb);
return -ENOBUFS;
+ }
__skb_queue_tail(&ctx->out_queue, skb);
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index 91357ccaf4af..5b9ee63e30b6 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -132,6 +132,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
[XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
[XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
[XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 },
+ [XFRMA_SA_PCPU] = { .type = NLA_U32 },
};
static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb,
@@ -282,9 +283,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src)
case XFRMA_MTIMER_THRESH:
case XFRMA_SA_DIR:
case XFRMA_NAT_KEEPALIVE_INTERVAL:
+ case XFRMA_SA_PCPU:
return xfrm_nla_cpy(dst, src, nla_len(src));
default:
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU);
pr_warn_once("unsupported nla_type %d\n", src->nla_type);
return -EOPNOTSUPP;
}
@@ -439,7 +441,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
int err;
if (type > XFRMA_MAX) {
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU);
NL_SET_ERR_MSG(extack, "Bad attribute");
return -EOPNOTSUPP;
}
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index b33c4591e09a..32ad8f3fc81e 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -373,7 +373,6 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
xdo->dev = dev;
netdev_tracker_alloc(dev, &xdo->dev_tracker, GFP_ATOMIC);
- xdo->real_dev = dev;
xdo->type = XFRM_DEV_OFFLOAD_PACKET;
switch (dir) {
case XFRM_POLICY_IN:
@@ -395,7 +394,6 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
err = dev->xfrmdev_ops->xdo_dev_policy_add(xp, extack);
if (err) {
xdo->dev = NULL;
- xdo->real_dev = NULL;
xdo->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
xdo->dir = 0;
netdev_put(dev, &xdo->dev_tracker);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 749e7eea99e4..841a60a6fbfe 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -572,7 +572,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
- x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
+ x = xfrm_input_state_lookup(net, mark, daddr, spi, nexthdr, family);
if (x == NULL) {
secpath_reset(skb);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 98f1e2b67c76..b95d882f9dbc 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -874,7 +874,7 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
return -EINVAL;
}
- if (p.collect_md) {
+ if (p.collect_md || xi->p.collect_md) {
NL_SET_ERR_MSG(extack, "collect_md can't be changed");
return -EINVAL;
}
@@ -885,11 +885,6 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
} else {
if (xi->dev != dev)
return -EEXIST;
- if (xi->p.collect_md) {
- NL_SET_ERR_MSG(extack,
- "device can't be changed to collect_md");
- return -EINVAL;
- }
}
return xfrmi_update(xi, &p);
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index e5722c95b8bb..a30538a980cc 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -610,6 +610,40 @@ out:
}
EXPORT_SYMBOL_GPL(xfrm_output_resume);
+static int xfrm_dev_direct_output(struct sock *sk, struct xfrm_state *x,
+ struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct net *net = xs_net(x);
+ int err;
+
+ dst = skb_dst_pop(skb);
+ if (!dst) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
+ kfree_skb(skb);
+ return -EHOSTUNREACH;
+ }
+ skb_dst_set(skb, dst);
+ nf_reset_ct(skb);
+
+ err = skb_dst(skb)->ops->local_out(net, sk, skb);
+ if (unlikely(err != 1)) {
+ kfree_skb(skb);
+ return err;
+ }
+
+ /* In transport mode, network destination is
+ * directly reachable, while in tunnel mode,
+ * inner packet network may not be. In packet
+ * offload type, HW is responsible for hard
+ * header packet mangling so directly xmit skb
+ * to netdevice.
+ */
+ skb->dev = x->xso.dev;
+ __skb_push(skb, skb->dev->hard_header_len);
+ return dev_queue_xmit(skb);
+}
+
static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return xfrm_output_resume(sk, skb, 1);
@@ -729,6 +763,13 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
return -EHOSTUNREACH;
}
+ /* Exclusive direct xmit for tunnel mode, as
+ * some filtering or matching rules may apply
+ * in transport mode.
+ */
+ if (x->props.mode == XFRM_MODE_TUNNEL)
+ return xfrm_dev_direct_output(sk, x, skb);
+
return xfrm_output_resume(sk, skb, 0);
}
@@ -752,7 +793,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
skb->encapsulation = 1;
if (skb_is_gso(skb)) {
- if (skb->inner_protocol)
+ if (skb->inner_protocol && x->props.mode == XFRM_MODE_TUNNEL)
return xfrm_output_gso(net, sk, skb);
skb_shinfo(skb)->gso_type |= SKB_GSO_ESP;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index a2ea9dbac90b..2c42d83fbaa2 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -434,6 +434,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
if (policy) {
write_pnet(&policy->xp_net, net);
INIT_LIST_HEAD(&policy->walk.all);
+ INIT_HLIST_HEAD(&policy->state_cache_list);
INIT_HLIST_NODE(&policy->bydst);
INIT_HLIST_NODE(&policy->byidx);
rwlock_init(&policy->lock);
@@ -475,6 +476,9 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
static void xfrm_policy_kill(struct xfrm_policy *policy)
{
+ struct net *net = xp_net(policy);
+ struct xfrm_state *x;
+
xfrm_dev_policy_delete(policy);
write_lock_bh(&policy->lock);
@@ -490,6 +494,13 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
if (del_timer(&policy->timer))
xfrm_pol_put(policy);
+ /* XXX: Flush state cache */
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ hlist_for_each_entry_rcu(x, &policy->state_cache_list, state_cache) {
+ hlist_del_init_rcu(&x->state_cache);
+ }
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
xfrm_pol_put(policy);
}
@@ -1570,6 +1581,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
struct xfrm_policy *delpol;
struct hlist_head *chain;
+ /* Sanitize mark before store */
+ policy->mark.v &= policy->mark.m;
+
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
if (chain)
@@ -3275,6 +3289,7 @@ no_transform:
dst_release(dst);
dst = dst_orig;
}
+
ok:
xfrm_pols_put(pols, drop_pols);
if (dst && dst->xfrm &&
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index bc56c6305725..235bbefc2aba 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -714,10 +714,12 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
oseq += skb_shinfo(skb)->gso_segs;
}
- if (unlikely(xo->seq.low < replay_esn->oseq)) {
- XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi;
- xo->seq.hi = oseq_hi;
- replay_esn->oseq_hi = oseq_hi;
+ if (unlikely(oseq < replay_esn->oseq)) {
+ replay_esn->oseq_hi = ++oseq_hi;
+ if (xo->seq.low < replay_esn->oseq) {
+ XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi;
+ xo->seq.hi = oseq_hi;
+ }
if (replay_esn->oseq_hi == 0) {
replay_esn->oseq--;
replay_esn->oseq_hi--;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 37478d36a8df..6f99fd2d966c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -34,6 +34,8 @@
#define xfrm_state_deref_prot(table, net) \
rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
+#define xfrm_state_deref_check(table, net) \
+ rcu_dereference_check((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
static void xfrm_state_gc_task(struct work_struct *work);
@@ -62,6 +64,8 @@ static inline unsigned int xfrm_dst_hash(struct net *net,
u32 reqid,
unsigned short family)
{
+ lockdep_assert_held(&net->xfrm.xfrm_state_lock);
+
return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask);
}
@@ -70,6 +74,8 @@ static inline unsigned int xfrm_src_hash(struct net *net,
const xfrm_address_t *saddr,
unsigned short family)
{
+ lockdep_assert_held(&net->xfrm.xfrm_state_lock);
+
return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
}
@@ -77,11 +83,15 @@ static inline unsigned int
xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
__be32 spi, u8 proto, unsigned short family)
{
+ lockdep_assert_held(&net->xfrm.xfrm_state_lock);
+
return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
}
static unsigned int xfrm_seq_hash(struct net *net, u32 seq)
{
+ lockdep_assert_held(&net->xfrm.xfrm_state_lock);
+
return __xfrm_seq_hash(seq, net->xfrm.state_hmask);
}
@@ -665,6 +675,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
refcount_set(&x->refcnt, 1);
atomic_set(&x->tunnel_users, 0);
INIT_LIST_HEAD(&x->km.all);
+ INIT_HLIST_NODE(&x->state_cache);
INIT_HLIST_NODE(&x->bydst);
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
@@ -679,6 +690,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
x->lft.hard_packet_limit = XFRM_INF;
x->replay_maxage = 0;
x->replay_maxdiff = 0;
+ x->pcpu_num = UINT_MAX;
spin_lock_init(&x->lock);
}
return x;
@@ -743,21 +755,24 @@ int __xfrm_state_delete(struct xfrm_state *x)
if (x->km.state != XFRM_STATE_DEAD) {
x->km.state = XFRM_STATE_DEAD;
+
spin_lock(&net->xfrm.xfrm_state_lock);
list_del(&x->km.all);
hlist_del_rcu(&x->bydst);
hlist_del_rcu(&x->bysrc);
if (x->km.seq)
hlist_del_rcu(&x->byseq);
+ if (!hlist_unhashed(&x->state_cache))
+ hlist_del_rcu(&x->state_cache);
+ if (!hlist_unhashed(&x->state_cache_input))
+ hlist_del_rcu(&x->state_cache_input);
+
if (x->id.spi)
hlist_del_rcu(&x->byspi);
net->xfrm.state_num--;
xfrm_nat_keepalive_state_updated(x);
spin_unlock(&net->xfrm.xfrm_state_lock);
- if (x->encap_sk)
- sock_put(rcu_dereference_raw(x->encap_sk));
-
xfrm_dev_state_delete(x);
/* All xfrm_state objects are created by xfrm_state_alloc.
@@ -1033,16 +1048,38 @@ xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
x->props.family = tmpl->encap_family;
}
-static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark,
+struct xfrm_hash_state_ptrs {
+ const struct hlist_head *bydst;
+ const struct hlist_head *bysrc;
+ const struct hlist_head *byspi;
+ unsigned int hmask;
+};
+
+static void xfrm_hash_ptrs_get(const struct net *net, struct xfrm_hash_state_ptrs *ptrs)
+{
+ unsigned int sequence;
+
+ do {
+ sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
+
+ ptrs->bydst = xfrm_state_deref_check(net->xfrm.state_bydst, net);
+ ptrs->bysrc = xfrm_state_deref_check(net->xfrm.state_bysrc, net);
+ ptrs->byspi = xfrm_state_deref_check(net->xfrm.state_byspi, net);
+ ptrs->hmask = net->xfrm.state_hmask;
+ } while (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence));
+}
+
+static struct xfrm_state *__xfrm_state_lookup_all(const struct xfrm_hash_state_ptrs *state_ptrs,
+ u32 mark,
const xfrm_address_t *daddr,
__be32 spi, u8 proto,
unsigned short family,
struct xfrm_dev_offload *xdo)
{
- unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
+ unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask);
struct xfrm_state *x;
- hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
+ hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) {
#ifdef CONFIG_XFRM_OFFLOAD
if (xdo->type == XFRM_DEV_OFFLOAD_PACKET) {
if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
@@ -1076,15 +1113,16 @@ static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark,
return NULL;
}
-static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
+static struct xfrm_state *__xfrm_state_lookup(const struct xfrm_hash_state_ptrs *state_ptrs,
+ u32 mark,
const xfrm_address_t *daddr,
__be32 spi, u8 proto,
unsigned short family)
{
- unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
+ unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask);
struct xfrm_state *x;
- hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
+ hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) {
if (x->props.family != family ||
x->id.spi != spi ||
x->id.proto != proto ||
@@ -1101,15 +1139,63 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
return NULL;
}
-static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
+struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
+ const xfrm_address_t *daddr,
+ __be32 spi, u8 proto,
+ unsigned short family)
+{
+ struct xfrm_hash_state_ptrs state_ptrs;
+ struct hlist_head *state_cache_input;
+ struct xfrm_state *x = NULL;
+
+ state_cache_input = raw_cpu_ptr(net->xfrm.state_cache_input);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) {
+ if (x->props.family != family ||
+ x->id.spi != spi ||
+ x->id.proto != proto ||
+ !xfrm_addr_equal(&x->id.daddr, daddr, family))
+ continue;
+
+ if ((mark & x->mark.m) != x->mark.v)
+ continue;
+ if (!xfrm_state_hold_rcu(x))
+ continue;
+ goto out;
+ }
+
+ xfrm_hash_ptrs_get(net, &state_ptrs);
+
+ x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family);
+
+ if (x && x->km.state == XFRM_STATE_VALID) {
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ if (hlist_unhashed(&x->state_cache_input)) {
+ hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
+ } else {
+ hlist_del_rcu(&x->state_cache_input);
+ hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
+ }
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ }
+
+out:
+ rcu_read_unlock();
+ return x;
+}
+EXPORT_SYMBOL(xfrm_input_state_lookup);
+
+static struct xfrm_state *__xfrm_state_lookup_byaddr(const struct xfrm_hash_state_ptrs *state_ptrs,
+ u32 mark,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
u8 proto, unsigned short family)
{
- unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
+ unsigned int h = __xfrm_src_hash(daddr, saddr, family, state_ptrs->hmask);
struct xfrm_state *x;
- hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
+ hlist_for_each_entry_rcu(x, state_ptrs->bysrc + h, bysrc) {
if (x->props.family != family ||
x->id.proto != proto ||
!xfrm_addr_equal(&x->id.daddr, daddr, family) ||
@@ -1129,14 +1215,17 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
static inline struct xfrm_state *
__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
{
+ struct xfrm_hash_state_ptrs state_ptrs;
struct net *net = xs_net(x);
u32 mark = x->mark.v & x->mark.m;
+ xfrm_hash_ptrs_get(net, &state_ptrs);
+
if (use_spi)
- return __xfrm_state_lookup(net, mark, &x->id.daddr,
+ return __xfrm_state_lookup(&state_ptrs, mark, &x->id.daddr,
x->id.spi, x->id.proto, family);
else
- return __xfrm_state_lookup_byaddr(net, mark,
+ return __xfrm_state_lookup_byaddr(&state_ptrs, mark,
&x->id.daddr,
&x->props.saddr,
x->id.proto, family);
@@ -1153,7 +1242,7 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
const struct flowi *fl, unsigned short family,
struct xfrm_state **best, int *acq_in_progress,
- int *error)
+ int *error, unsigned int pcpu_id)
{
/* Resolution logic:
* 1. There is a valid state with matching selector. Done.
@@ -1174,13 +1263,18 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
&fl->u.__fl_common))
return;
+ if (x->pcpu_num != UINT_MAX && x->pcpu_num != pcpu_id)
+ return;
+
if (!*best ||
+ ((*best)->pcpu_num == UINT_MAX && x->pcpu_num == pcpu_id) ||
(*best)->km.dying > x->km.dying ||
((*best)->km.dying == x->km.dying &&
(*best)->curlft.add_time < x->curlft.add_time))
*best = x;
} else if (x->km.state == XFRM_STATE_ACQ) {
- *acq_in_progress = 1;
+ if (!*best || x->pcpu_num == pcpu_id)
+ *acq_in_progress = 1;
} else if (x->km.state == XFRM_STATE_ERROR ||
x->km.state == XFRM_STATE_EXPIRED) {
if ((!x->sel.family ||
@@ -1199,6 +1293,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
unsigned short family, u32 if_id)
{
static xfrm_address_t saddr_wildcard = { };
+ struct xfrm_hash_state_ptrs state_ptrs;
struct net *net = xp_net(pol);
unsigned int h, h_wildcard;
struct xfrm_state *x, *x0, *to_put;
@@ -1209,14 +1304,63 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
unsigned short encap_family = tmpl->encap_family;
unsigned int sequence;
struct km_event c;
+ unsigned int pcpu_id;
+ bool cached = false;
+
+ /* We need the cpu id just as a lookup key,
+ * we don't require it to be stable.
+ */
+ pcpu_id = raw_smp_processor_id();
to_put = NULL;
sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
rcu_read_lock();
- h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
- hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
+ xfrm_hash_ptrs_get(net, &state_ptrs);
+
+ hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
+ if (x->props.family == encap_family &&
+ x->props.reqid == tmpl->reqid &&
+ (mark & x->mark.m) == x->mark.v &&
+ x->if_id == if_id &&
+ !(x->props.flags & XFRM_STATE_WILDRECV) &&
+ xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
+ tmpl->mode == x->props.mode &&
+ tmpl->id.proto == x->id.proto &&
+ (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
+ xfrm_state_look_at(pol, x, fl, encap_family,
+ &best, &acquire_in_progress, &error, pcpu_id);
+ }
+
+ if (best)
+ goto cached;
+
+ hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
+ if (x->props.family == encap_family &&
+ x->props.reqid == tmpl->reqid &&
+ (mark & x->mark.m) == x->mark.v &&
+ x->if_id == if_id &&
+ !(x->props.flags & XFRM_STATE_WILDRECV) &&
+ xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
+ tmpl->mode == x->props.mode &&
+ tmpl->id.proto == x->id.proto &&
+ (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
+ xfrm_state_look_at(pol, x, fl, family,
+ &best, &acquire_in_progress, &error, pcpu_id);
+ }
+
+cached:
+ cached = true;
+ if (best)
+ goto found;
+ else if (error)
+ best = NULL;
+ else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */
+ WARN_ON(1);
+
+ h = __xfrm_dst_hash(daddr, saddr, tmpl->reqid, encap_family, state_ptrs.hmask);
+ hlist_for_each_entry_rcu(x, state_ptrs.bydst + h, bydst) {
#ifdef CONFIG_XFRM_OFFLOAD
if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
@@ -1244,13 +1388,14 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, family,
- &best, &acquire_in_progress, &error);
+ &best, &acquire_in_progress, &error, pcpu_id);
}
if (best || acquire_in_progress)
goto found;
- h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
- hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
+ h_wildcard = __xfrm_dst_hash(daddr, &saddr_wildcard, tmpl->reqid,
+ encap_family, state_ptrs.hmask);
+ hlist_for_each_entry_rcu(x, state_ptrs.bydst + h_wildcard, bydst) {
#ifdef CONFIG_XFRM_OFFLOAD
if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
@@ -1278,14 +1423,17 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, family,
- &best, &acquire_in_progress, &error);
+ &best, &acquire_in_progress, &error, pcpu_id);
}
found:
- x = best;
+ if (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) ||
+ (best && (best->pcpu_num == pcpu_id)))
+ x = best;
+
if (!x && !error && !acquire_in_progress) {
if (tmpl->id.spi &&
- (x0 = __xfrm_state_lookup_all(net, mark, daddr,
+ (x0 = __xfrm_state_lookup_all(&state_ptrs, mark, daddr,
tmpl->id.spi, tmpl->id.proto,
encap_family,
&pol->xdo)) != NULL) {
@@ -1314,6 +1462,8 @@ found:
xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
memcpy(&x->mark, &pol->mark, sizeof(x->mark));
x->if_id = if_id;
+ if ((pol->flags & XFRM_POLICY_CPU_ACQUIRE) && best)
+ x->pcpu_num = pcpu_id;
error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
if (error) {
@@ -1330,7 +1480,6 @@ found:
xso->type = XFRM_DEV_OFFLOAD_PACKET;
xso->dir = xdo->dir;
xso->dev = xdo->dev;
- xso->real_dev = xdo->real_dev;
xso->flags = XFRM_DEV_OFFLOAD_FLAG_ACQ;
netdev_hold(xso->dev, &xso->dev_tracker, GFP_ATOMIC);
error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x, NULL);
@@ -1338,7 +1487,6 @@ found:
xso->dir = 0;
netdev_put(xso->dev, &xso->dev_tracker);
xso->dev = NULL;
- xso->real_dev = NULL;
xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
x->km.state = XFRM_STATE_DEAD;
to_put = x;
@@ -1352,6 +1500,7 @@ found:
x->km.state = XFRM_STATE_ACQ;
x->dir = XFRM_SA_DIR_OUT;
list_add(&x->km.all, &net->xfrm.state_all);
+ h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
XFRM_STATE_INSERT(bydst, &x->bydst,
net->xfrm.state_bydst + h,
x->xso.type);
@@ -1359,6 +1508,7 @@ found:
XFRM_STATE_INSERT(bysrc, &x->bysrc,
net->xfrm.state_bysrc + h,
x->xso.type);
+ INIT_HLIST_NODE(&x->state_cache);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
XFRM_STATE_INSERT(byspi, &x->byspi,
@@ -1392,6 +1542,11 @@ found:
x = NULL;
error = -ESRCH;
}
+
+ /* Use the already installed 'fallback' while the CPU-specific
+ * SA acquire is handled*/
+ if (best)
+ x = best;
}
out:
if (x) {
@@ -1402,6 +1557,15 @@ out:
} else {
*err = acquire_in_progress ? -EAGAIN : error;
}
+
+ if (x && x->km.state == XFRM_STATE_VALID && !cached &&
+ (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) || x->pcpu_num == pcpu_id)) {
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ if (hlist_unhashed(&x->state_cache))
+ hlist_add_head_rcu(&x->state_cache, &pol->state_cache_list);
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ }
+
rcu_read_unlock();
if (to_put)
xfrm_state_put(to_put);
@@ -1473,6 +1637,26 @@ struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
}
EXPORT_SYMBOL(xfrm_state_lookup_byspi);
+static struct xfrm_state *xfrm_state_lookup_spi_proto(struct net *net, __be32 spi, u8 proto)
+{
+ struct xfrm_state *x;
+ unsigned int i;
+
+ rcu_read_lock();
+ for (i = 0; i <= net->xfrm.state_hmask; i++) {
+ hlist_for_each_entry_rcu(x, &net->xfrm.state_byspi[i], byspi) {
+ if (x->id.spi == spi && x->id.proto == proto) {
+ if (!xfrm_state_hold_rcu(x))
+ continue;
+ rcu_read_unlock();
+ return x;
+ }
+ }
+ }
+ rcu_read_unlock();
+ return NULL;
+}
+
static void __xfrm_state_insert(struct xfrm_state *x)
{
struct net *net = xs_net(x);
@@ -1480,6 +1664,9 @@ static void __xfrm_state_insert(struct xfrm_state *x)
list_add(&x->km.all, &net->xfrm.state_all);
+ /* Sanitize mark before store */
+ x->mark.v &= x->mark.m;
+
h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
x->props.reqid, x->props.family);
XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h,
@@ -1524,12 +1711,14 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
unsigned int h;
u32 mark = xnew->mark.v & xnew->mark.m;
u32 if_id = xnew->if_id;
+ u32 cpu_id = xnew->pcpu_num;
h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
if (x->props.family == family &&
x->props.reqid == reqid &&
x->if_id == if_id &&
+ x->pcpu_num == cpu_id &&
(mark & x->mark.m) == x->mark.v &&
xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
@@ -1552,7 +1741,7 @@ EXPORT_SYMBOL(xfrm_state_insert);
static struct xfrm_state *__find_acq_core(struct net *net,
const struct xfrm_mark *m,
unsigned short family, u8 mode,
- u32 reqid, u32 if_id, u8 proto,
+ u32 reqid, u32 if_id, u32 pcpu_num, u8 proto,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
int create)
@@ -1569,6 +1758,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
x->id.spi != 0 ||
x->id.proto != proto ||
(mark & x->mark.m) != x->mark.v ||
+ x->pcpu_num != pcpu_num ||
!xfrm_addr_equal(&x->id.daddr, daddr, family) ||
!xfrm_addr_equal(&x->props.saddr, saddr, family))
continue;
@@ -1602,6 +1792,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
break;
}
+ x->pcpu_num = pcpu_num;
x->km.state = XFRM_STATE_ACQ;
x->id.proto = proto;
x->props.family = family;
@@ -1630,7 +1821,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
return x;
}
-static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
+static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
int xfrm_state_add(struct xfrm_state *x)
{
@@ -1656,7 +1847,7 @@ int xfrm_state_add(struct xfrm_state *x)
}
if (use_spi && x->km.seq) {
- x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
+ x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq, x->pcpu_num);
if (x1 && ((x1->id.proto != x->id.proto) ||
!xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
to_put = x1;
@@ -1666,7 +1857,7 @@ int xfrm_state_add(struct xfrm_state *x)
if (use_spi && !x1)
x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
- x->props.reqid, x->if_id, x->id.proto,
+ x->props.reqid, x->if_id, x->pcpu_num, x->id.proto,
&x->id.daddr, &x->props.saddr, 0);
__xfrm_state_bump_genids(x);
@@ -1791,6 +1982,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
x->props.flags = orig->props.flags;
x->props.extra_flags = orig->props.extra_flags;
+ x->pcpu_num = orig->pcpu_num;
x->if_id = orig->if_id;
x->tfcpad = orig->tfcpad;
x->replay_maxdiff = orig->replay_maxdiff;
@@ -2041,10 +2233,13 @@ struct xfrm_state *
xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
u8 proto, unsigned short family)
{
+ struct xfrm_hash_state_ptrs state_ptrs;
struct xfrm_state *x;
rcu_read_lock();
- x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
+ xfrm_hash_ptrs_get(net, &state_ptrs);
+
+ x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family);
rcu_read_unlock();
return x;
}
@@ -2055,10 +2250,14 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark,
const xfrm_address_t *daddr, const xfrm_address_t *saddr,
u8 proto, unsigned short family)
{
+ struct xfrm_hash_state_ptrs state_ptrs;
struct xfrm_state *x;
spin_lock_bh(&net->xfrm.xfrm_state_lock);
- x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family);
+
+ xfrm_hash_ptrs_get(net, &state_ptrs);
+
+ x = __xfrm_state_lookup_byaddr(&state_ptrs, mark, daddr, saddr, proto, family);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
return x;
}
@@ -2066,13 +2265,14 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
struct xfrm_state *
xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
- u32 if_id, u8 proto, const xfrm_address_t *daddr,
+ u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr,
const xfrm_address_t *saddr, int create, unsigned short family)
{
struct xfrm_state *x;
spin_lock_bh(&net->xfrm.xfrm_state_lock);
- x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create);
+ x = __find_acq_core(net, mark, family, mode, reqid, if_id, pcpu_num,
+ proto, daddr, saddr, create);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
return x;
@@ -2207,7 +2407,7 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
/* Silly enough, but I'm lazy to build resolution list */
-static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
+static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num)
{
unsigned int h = xfrm_seq_hash(net, seq);
struct xfrm_state *x;
@@ -2215,6 +2415,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) {
if (x->km.seq == seq &&
(mark & x->mark.m) == x->mark.v &&
+ x->pcpu_num == pcpu_num &&
x->km.state == XFRM_STATE_ACQ) {
xfrm_state_hold(x);
return x;
@@ -2224,12 +2425,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
return NULL;
}
-struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
+struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num)
{
struct xfrm_state *x;
spin_lock_bh(&net->xfrm.xfrm_state_lock);
- x = __xfrm_find_acq_byseq(net, mark, seq);
+ x = __xfrm_find_acq_byseq(net, mark, seq, pcpu_num);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
return x;
}
@@ -2284,10 +2485,8 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high,
unsigned int h;
struct xfrm_state *x0;
int err = -ENOENT;
- __be32 minspi = htonl(low);
- __be32 maxspi = htonl(high);
+ u32 range = high - low + 1;
__be32 newspi = 0;
- u32 mark = x->mark.v & x->mark.m;
spin_lock_bh(&x->lock);
if (x->km.state == XFRM_STATE_DEAD) {
@@ -2301,38 +2500,34 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high,
err = -ENOENT;
- if (minspi == maxspi) {
- x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family);
- if (x0) {
- NL_SET_ERR_MSG(extack, "Requested SPI is already in use");
- xfrm_state_put(x0);
+ for (h = 0; h < range; h++) {
+ u32 spi = (low == high) ? low : get_random_u32_inclusive(low, high);
+ newspi = htonl(spi);
+
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ x0 = xfrm_state_lookup_spi_proto(net, newspi, x->id.proto);
+ if (!x0) {
+ x->id.spi = newspi;
+ h = xfrm_spi_hash(net, &x->id.daddr, newspi, x->id.proto, x->props.family);
+ XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, x->xso.type);
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ err = 0;
goto unlock;
}
- newspi = minspi;
- } else {
- u32 spi = 0;
- for (h = 0; h < high-low+1; h++) {
- spi = get_random_u32_inclusive(low, high);
- x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
- if (x0 == NULL) {
- newspi = htonl(spi);
- break;
- }
- xfrm_state_put(x0);
+ xfrm_state_put(x0);
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
+ if (signal_pending(current)) {
+ err = -ERESTARTSYS;
+ goto unlock;
}
+
+ if (low == high)
+ break;
}
- if (newspi) {
- spin_lock_bh(&net->xfrm.xfrm_state_lock);
- x->id.spi = newspi;
- h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
- XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h,
- x->xso.type);
- spin_unlock_bh(&net->xfrm.xfrm_state_lock);
- err = 0;
- } else {
+ if (err)
NL_SET_ERR_MSG(extack, "No SPI available in the requested range");
- }
unlock:
spin_unlock_bh(&x->lock);
@@ -2988,6 +3183,11 @@ int __net_init xfrm_state_init(struct net *net)
net->xfrm.state_byseq = xfrm_hash_alloc(sz);
if (!net->xfrm.state_byseq)
goto out_byseq;
+
+ net->xfrm.state_cache_input = alloc_percpu(struct hlist_head);
+ if (!net->xfrm.state_cache_input)
+ goto out_state_cache_input;
+
net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
net->xfrm.state_num = 0;
@@ -2997,6 +3197,8 @@ int __net_init xfrm_state_init(struct net *net)
&net->xfrm.xfrm_state_lock);
return 0;
+out_state_cache_input:
+ xfrm_hash_free(net->xfrm.state_byseq, sz);
out_byseq:
xfrm_hash_free(net->xfrm.state_byspi, sz);
out_byspi:
@@ -3026,6 +3228,7 @@ void xfrm_state_fini(struct net *net)
xfrm_hash_free(net->xfrm.state_bysrc, sz);
WARN_ON(!hlist_empty(net->xfrm.state_bydst));
xfrm_hash_free(net->xfrm.state_bydst, sz);
+ free_percpu(net->xfrm.state_cache_input);
}
#ifdef CONFIG_AUDITSYSCALL
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e3b8ce89831a..d41e5642625e 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -178,6 +178,28 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
"Replay seq and seq_hi should be 0 for output SA");
return -EINVAL;
}
+
+ if (!(p->flags & XFRM_STATE_ESN)) {
+ if (rs->oseq_hi) {
+ NL_SET_ERR_MSG(
+ extack,
+ "Replay oseq_hi should be 0 in non-ESN mode for output SA");
+ return -EINVAL;
+ }
+ if (rs->oseq == U32_MAX) {
+ NL_SET_ERR_MSG(
+ extack,
+ "Replay oseq should be less than 0xFFFFFFFF in non-ESN mode for output SA");
+ return -EINVAL;
+ }
+ } else {
+ if (rs->oseq == U32_MAX && rs->oseq_hi == U32_MAX) {
+ NL_SET_ERR_MSG(
+ extack,
+ "Replay oseq and oseq_hi should be less than 0xFFFFFFFF for output SA");
+ return -EINVAL;
+ }
+ }
if (rs->bmp_len) {
NL_SET_ERR_MSG(extack, "Replay bmp_len should 0 for output SA");
return -EINVAL;
@@ -190,6 +212,28 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
"Replay oseq and oseq_hi should be 0 for input SA");
return -EINVAL;
}
+ if (!(p->flags & XFRM_STATE_ESN)) {
+ if (rs->seq_hi) {
+ NL_SET_ERR_MSG(
+ extack,
+ "Replay seq_hi should be 0 in non-ESN mode for input SA");
+ return -EINVAL;
+ }
+
+ if (rs->seq == U32_MAX) {
+ NL_SET_ERR_MSG(
+ extack,
+ "Replay seq should be less than 0xFFFFFFFF in non-ESN mode for input SA");
+ return -EINVAL;
+ }
+ } else {
+ if (rs->seq == U32_MAX && rs->seq_hi == U32_MAX) {
+ NL_SET_ERR_MSG(
+ extack,
+ "Replay seq and seq_hi should be less than 0xFFFFFFFF for input SA");
+ return -EINVAL;
+ }
+ }
}
return 0;
@@ -460,6 +504,12 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
}
}
+ if (!sa_dir && attrs[XFRMA_SA_PCPU]) {
+ NL_SET_ERR_MSG(extack, "SA_PCPU only supported with SA_DIR");
+ err = -EINVAL;
+ goto out;
+ }
+
out:
return err;
}
@@ -841,6 +891,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
x->nat_keepalive_interval =
nla_get_u32(attrs[XFRMA_NAT_KEEPALIVE_INTERVAL]);
+ if (attrs[XFRMA_SA_PCPU]) {
+ x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
+ if (x->pcpu_num >= num_possible_cpus())
+ goto error;
+ }
+
err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack);
if (err)
goto error;
@@ -1296,6 +1352,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
if (ret)
goto out;
}
+ if (x->pcpu_num != UINT_MAX) {
+ ret = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
+ if (ret)
+ goto out;
+ }
if (x->dir)
ret = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
@@ -1700,6 +1761,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
u32 mark;
struct xfrm_mark m;
u32 if_id = 0;
+ u32 pcpu_num = UINT_MAX;
p = nlmsg_data(nlh);
err = verify_spi_info(p->info.id.proto, p->min, p->max, extack);
@@ -1716,8 +1778,16 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+ if (attrs[XFRMA_SA_PCPU]) {
+ pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
+ if (pcpu_num >= num_possible_cpus()) {
+ err = -EINVAL;
+ goto out_noput;
+ }
+ }
+
if (p->info.seq) {
- x = xfrm_find_acq_byseq(net, mark, p->info.seq);
+ x = xfrm_find_acq_byseq(net, mark, p->info.seq, pcpu_num);
if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) {
xfrm_state_put(x);
x = NULL;
@@ -1726,7 +1796,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!x)
x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid,
- if_id, p->info.id.proto, daddr,
+ if_id, pcpu_num, p->info.id.proto, daddr,
&p->info.saddr, 1,
family);
err = -ENOENT;
@@ -2526,7 +2596,8 @@ static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x)
+ nla_total_size(sizeof(struct xfrm_mark))
+ nla_total_size(4) /* XFRM_AE_RTHR */
+ nla_total_size(4) /* XFRM_AE_ETHR */
- + nla_total_size(sizeof(x->dir)); /* XFRMA_SA_DIR */
+ + nla_total_size(sizeof(x->dir)) /* XFRMA_SA_DIR */
+ + nla_total_size(4); /* XFRMA_SA_PCPU */
}
static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
@@ -2582,6 +2653,11 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
err = xfrm_if_id_put(skb, x->if_id);
if (err)
goto out_cancel;
+ if (x->pcpu_num != UINT_MAX) {
+ err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
+ if (err)
+ goto out_cancel;
+ }
if (x->dir) {
err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
@@ -2852,6 +2928,13 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
xfrm_mark_get(attrs, &mark);
+ if (attrs[XFRMA_SA_PCPU]) {
+ x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]);
+ err = -EINVAL;
+ if (x->pcpu_num >= num_possible_cpus())
+ goto free_state;
+ }
+
err = verify_newpolicy_info(&ua->policy, extack);
if (err)
goto free_state;
@@ -3182,6 +3265,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
[XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
[XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 },
+ [XFRMA_SA_PCPU] = { .type = NLA_U32 },
};
EXPORT_SYMBOL_GPL(xfrma_policy);
@@ -3348,7 +3432,8 @@ static inline unsigned int xfrm_expire_msgsize(void)
{
return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) +
nla_total_size(sizeof(struct xfrm_mark)) +
- nla_total_size(sizeof_field(struct xfrm_state, dir));
+ nla_total_size(sizeof_field(struct xfrm_state, dir)) +
+ nla_total_size(4); /* XFRMA_SA_PCPU */
}
static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c)
@@ -3374,6 +3459,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
err = xfrm_if_id_put(skb, x->if_id);
if (err)
return err;
+ if (x->pcpu_num != UINT_MAX) {
+ err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
+ if (err)
+ return err;
+ }
if (x->dir) {
err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir);
@@ -3481,6 +3571,8 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
}
if (x->if_id)
l += nla_total_size(sizeof(x->if_id));
+ if (x->pcpu_num)
+ l += nla_total_size(sizeof(x->pcpu_num));
/* Must count x->lastused as it may become non-zero behind our back. */
l += nla_total_size_64bit(sizeof(u64));
@@ -3587,6 +3679,7 @@ static inline unsigned int xfrm_acquire_msgsize(struct xfrm_state *x,
+ nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr)
+ nla_total_size(sizeof(struct xfrm_mark))
+ nla_total_size(xfrm_user_sec_ctx_size(x->security))
+ + nla_total_size(4) /* XFRMA_SA_PCPU */
+ userpolicy_type_attrsize();
}
@@ -3623,6 +3716,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
err = xfrm_if_id_put(skb, xp->if_id);
if (!err && xp->xdo.dev)
err = copy_user_offload(&xp->xdo, skb);
+ if (!err && x->pcpu_num != UINT_MAX)
+ err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num);
if (err) {
nlmsg_cancel(skb, nlh);
return err;