summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c3
-rw-r--r--net/8021q/vlan_dev.c9
-rw-r--r--net/9p/Kconfig1
-rw-r--r--net/Kconfig4
-rw-r--r--net/Makefile23
-rw-r--r--net/appletalk/ddp.c33
-rw-r--r--net/atm/pppoatm.c15
-rw-r--r--net/batman-adv/Kconfig3
-rw-r--r--net/batman-adv/Makefile2
-rw-r--r--net/batman-adv/bat_algo.c2
-rw-r--r--net/batman-adv/bat_algo.h2
-rw-r--r--net/batman-adv/bat_iv_ogm.c2
-rw-r--r--net/batman-adv/bat_iv_ogm.h2
-rw-r--r--net/batman-adv/bat_v.c2
-rw-r--r--net/batman-adv/bat_v.h2
-rw-r--r--net/batman-adv/bat_v_elp.c2
-rw-r--r--net/batman-adv/bat_v_elp.h2
-rw-r--r--net/batman-adv/bat_v_ogm.c2
-rw-r--r--net/batman-adv/bat_v_ogm.h2
-rw-r--r--net/batman-adv/bitarray.c2
-rw-r--r--net/batman-adv/bitarray.h2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h2
-rw-r--r--net/batman-adv/distributed-arp-table.c6
-rw-r--r--net/batman-adv/distributed-arp-table.h2
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/batman-adv/fragmentation.h2
-rw-r--r--net/batman-adv/gateway_client.c2
-rw-r--r--net/batman-adv/gateway_client.h2
-rw-r--r--net/batman-adv/gateway_common.c2
-rw-r--r--net/batman-adv/gateway_common.h2
-rw-r--r--net/batman-adv/hard-interface.c2
-rw-r--r--net/batman-adv/hard-interface.h2
-rw-r--r--net/batman-adv/hash.c2
-rw-r--r--net/batman-adv/hash.h2
-rw-r--r--net/batman-adv/log.c2
-rw-r--r--net/batman-adv/log.h2
-rw-r--r--net/batman-adv/main.c2
-rw-r--r--net/batman-adv/main.h4
-rw-r--r--net/batman-adv/multicast.c4
-rw-r--r--net/batman-adv/multicast.h2
-rw-r--r--net/batman-adv/netlink.c6
-rw-r--r--net/batman-adv/netlink.h2
-rw-r--r--net/batman-adv/network-coding.c2
-rw-r--r--net/batman-adv/network-coding.h2
-rw-r--r--net/batman-adv/originator.c2
-rw-r--r--net/batman-adv/originator.h2
-rw-r--r--net/batman-adv/routing.c2
-rw-r--r--net/batman-adv/routing.h2
-rw-r--r--net/batman-adv/send.c2
-rw-r--r--net/batman-adv/send.h2
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/batman-adv/soft-interface.h2
-rw-r--r--net/batman-adv/tp_meter.c4
-rw-r--r--net/batman-adv/tp_meter.h2
-rw-r--r--net/batman-adv/trace.c2
-rw-r--r--net/batman-adv/trace.h2
-rw-r--r--net/batman-adv/translation-table.c2
-rw-r--r--net/batman-adv/translation-table.h2
-rw-r--r--net/batman-adv/tvlv.c2
-rw-r--r--net/batman-adv/tvlv.h2
-rw-r--r--net/batman-adv/types.h5
-rw-r--r--net/bluetooth/Kconfig2
-rw-r--r--net/bluetooth/a2mp.c3
-rw-r--r--net/bluetooth/af_bluetooth.c22
-rw-r--r--net/bluetooth/amp.c3
-rw-r--r--net/bluetooth/hci_conn.c37
-rw-r--r--net/bluetooth/hci_core.c201
-rw-r--r--net/bluetooth/hci_debugfs.c80
-rw-r--r--net/bluetooth/hci_request.c74
-rw-r--r--net/bluetooth/l2cap_core.c119
-rw-r--r--net/bluetooth/mgmt.c399
-rw-r--r--net/bluetooth/msft.c460
-rw-r--r--net/bluetooth/msft.h30
-rw-r--r--net/bluetooth/smp.c5
-rw-r--r--net/bpf/test_run.c14
-rw-r--r--net/bpfilter/Kconfig2
-rw-r--r--net/bridge/Makefile2
-rw-r--r--net/bridge/br.c2
-rw-r--r--net/bridge/br_fdb.c1
-rw-r--r--net/bridge/br_forward.c3
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_mrp.c58
-rw-r--r--net/bridge/br_mrp_switchdev.c178
-rw-r--r--net/bridge/br_multicast.c265
-rw-r--r--net/bridge/br_multicast_eht.c878
-rw-r--r--net/bridge/br_netlink.c151
-rw-r--r--net/bridge/br_private.h32
-rw-r--r--net/bridge/br_private_mcast_eht.h93
-rw-r--r--net/bridge/br_private_mrp.h70
-rw-r--r--net/bridge/br_stp.c8
-rw-r--r--net/bridge/br_switchdev.c33
-rw-r--r--net/bridge/br_sysfs_br.c170
-rw-r--r--net/bridge/br_sysfs_if.c16
-rw-r--r--net/bridge/br_vlan.c31
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c5
-rw-r--r--net/caif/chnl_net.c5
-rw-r--r--net/can/Kconfig1
-rw-r--r--net/can/af_can.c34
-rw-r--r--net/can/gw.c2
-rw-r--r--net/can/isotp.c1
-rw-r--r--net/can/j1939/main.c22
-rw-r--r--net/can/j1939/socket.c13
-rw-r--r--net/can/proc.c19
-rw-r--r--net/can/raw.c16
-rw-r--r--net/ceph/auth_x.c57
-rw-r--r--net/ceph/ceph_common.c17
-rw-r--r--net/ceph/crypto.c3
-rw-r--r--net/ceph/messenger_v1.c2
-rw-r--r--net/ceph/messenger_v2.c60
-rw-r--r--net/ceph/mon_client.c14
-rw-r--r--net/ceph/osd_client.c40
-rw-r--r--net/core/datagram.c12
-rw-r--r--net/core/dev.c643
-rw-r--r--net/core/dev_ioctl.c20
-rw-r--r--net/core/devlink.c319
-rw-r--r--net/core/filter.c230
-rw-r--r--net/core/flow_dissector.c35
-rw-r--r--net/core/gen_estimator.c11
-rw-r--r--net/core/neighbour.c14
-rw-r--r--net/core/net-sysfs.c118
-rw-r--r--net/core/net_namespace.c19
-rw-r--r--net/core/netpoll.c22
-rw-r--r--net/core/page_pool.c14
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/rtnetlink.c27
-rw-r--r--net/core/skbuff.c587
-rw-r--r--net/core/skmsg.c3
-rw-r--r--net/core/sock.c212
-rw-r--r--net/core/sock_map.c2
-rw-r--r--net/core/sock_reuseport.c2
-rw-r--r--net/core/sysctl_net_core.c2
-rw-r--r--net/core/xdp.c70
-rw-r--r--net/dcb/Makefile2
-rw-r--r--net/dcb/dcbnl.c2
-rw-r--r--net/dccp/feat.c2
-rw-r--r--net/decnet/dn_route.c2
-rw-r--r--net/dns_resolver/Kconfig2
-rw-r--r--net/dsa/Kconfig28
-rw-r--r--net/dsa/Makefile2
-rw-r--r--net/dsa/dsa.c60
-rw-r--r--net/dsa/dsa2.c395
-rw-r--r--net/dsa/dsa_priv.h162
-rw-r--r--net/dsa/master.c49
-rw-r--r--net/dsa/port.c376
-rw-r--r--net/dsa/slave.c497
-rw-r--r--net/dsa/switch.c322
-rw-r--r--net/dsa/tag_8021q.c15
-rw-r--r--net/dsa/tag_brcm.c1
-rw-r--r--net/dsa/tag_dsa.c17
-rw-r--r--net/dsa/tag_ocelot.c252
-rw-r--r--net/dsa/tag_ocelot_8021q.c102
-rw-r--r--net/dsa/tag_rtl4_a.c43
-rw-r--r--net/dsa/tag_xrs700x.c66
-rw-r--r--net/ethtool/common.c152
-rw-r--r--net/ethtool/common.h7
-rw-r--r--net/ethtool/ioctl.c18
-rw-r--r--net/ethtool/linkmodes.c208
-rw-r--r--net/ethtool/netlink.h2
-rw-r--r--net/hsr/hsr_device.c53
-rw-r--r--net/hsr/hsr_device.h1
-rw-r--r--net/hsr/hsr_forward.c35
-rw-r--r--net/hsr/hsr_forward.h1
-rw-r--r--net/hsr/hsr_framereg.c11
-rw-r--r--net/hsr/hsr_framereg.h1
-rw-r--r--net/hsr/hsr_main.c11
-rw-r--r--net/hsr/hsr_main.h14
-rw-r--r--net/hsr/hsr_slave.c10
-rw-r--r--net/ife/Kconfig1
-rw-r--r--net/ipv4/af_inet.c23
-rw-r--r--net/ipv4/esp4.c7
-rw-r--r--net/ipv4/esp4_offload.c2
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_lookup.h6
-rw-r--r--net/ipv4/fib_semantics.c7
-rw-r--r--net/ipv4/fib_trie.c38
-rw-r--r--net/ipv4/gre_demux.c2
-rw-r--r--net/ipv4/gre_offload.c22
-rw-r--r--net/ipv4/icmp.c5
-rw-r--r--net/ipv4/inet_connection_sock.c1
-rw-r--r--net/ipv4/inet_hashtables.c25
-rw-r--r--net/ipv4/ip_input.c1
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/ip_tunnel.c27
-rw-r--r--net/ipv4/ip_tunnel_core.c9
-rw-r--r--net/ipv4/ipconfig.c22
-rw-r--r--net/ipv4/netfilter/arp_tables.c2
-rw-r--r--net/ipv4/netfilter/ip_tables.c2
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c2
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c18
-rw-r--r--net/ipv4/nexthop.c353
-rw-r--r--net/ipv4/proc.c50
-rw-r--r--net/ipv4/route.c14
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
-rw-r--r--net/ipv4/tcp.c200
-rw-r--r--net/ipv4/tcp_cubic.c11
-rw-r--r--net/ipv4/tcp_input.c47
-rw-r--r--net/ipv4/tcp_ipv4.c35
-rw-r--r--net/ipv4/tcp_output.c5
-rw-r--r--net/ipv4/tcp_recovery.c5
-rw-r--r--net/ipv4/tcp_timer.c54
-rw-r--r--net/ipv4/udp.c16
-rw-r--r--net/ipv4/udp_offload.c76
-rw-r--r--net/ipv4/udp_tunnel_core.c24
-rw-r--r--net/ipv6/addrconf.c14
-rw-r--r--net/ipv6/af_inet6.c19
-rw-r--r--net/ipv6/esp6.c9
-rw-r--r--net/ipv6/icmp.c18
-rw-r--r--net/ipv6/ip6_fib.c10
-rw-r--r--net/ipv6/ip6_icmp.c12
-rw-r--r--net/ipv6/ip6_input.c3
-rw-r--r--net/ipv6/ip6_offload.c1
-rw-r--r--net/ipv6/ip6_output.c47
-rw-r--r--net/ipv6/ndisc.c12
-rw-r--r--net/ipv6/netfilter/ip6_tables.c2
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c18
-rw-r--r--net/ipv6/route.c72
-rw-r--r--net/ipv6/seg6_local.c67
-rw-r--r--net/ipv6/sit.c5
-rw-r--r--net/ipv6/sysctl_net_ipv6.c9
-rw-r--r--net/ipv6/tcp_ipv6.c6
-rw-r--r--net/ipv6/udp.c11
-rw-r--r--net/ipv6/udp_offload.c2
-rw-r--r--net/iucv/af_iucv.c122
-rw-r--r--net/kcm/kcmsock.c8
-rw-r--r--net/key/af_key.c6
-rw-r--r--net/l3mdev/Makefile2
-rw-r--r--net/lapb/lapb_iface.c71
-rw-r--r--net/lapb/lapb_out.c3
-rw-r--r--net/lapb/lapb_timer.c30
-rw-r--r--net/llc/Kconfig1
-rw-r--r--net/mac80211/Kconfig2
-rw-r--r--net/mac80211/Makefile2
-rw-r--r--net/mac80211/debugfs.c96
-rw-r--r--net/mac80211/debugfs_sta.c1
-rw-r--r--net/mac80211/driver-ops.c5
-rw-r--r--net/mac80211/driver-ops.h16
-rw-r--r--net/mac80211/he.c92
-rw-r--r--net/mac80211/ieee80211_i.h4
-rw-r--r--net/mac80211/iface.c46
-rw-r--r--net/mac80211/key.c4
-rw-r--r--net/mac80211/main.c5
-rw-r--r--net/mac80211/mesh_hwmp.c2
-rw-r--r--net/mac80211/mlme.c3
-rw-r--r--net/mac80211/pm.c6
-rw-r--r--net/mac80211/rate.c3
-rw-r--r--net/mac80211/rc80211_minstrel.c574
-rw-r--r--net/mac80211/rc80211_minstrel.h184
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c172
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c1192
-rw-r--r--net/mac80211/rc80211_minstrel_ht.h137
-rw-r--r--net/mac80211/rc80211_minstrel_ht_debugfs.c79
-rw-r--r--net/mac80211/rx.c245
-rw-r--r--net/mac80211/spectmgmt.c10
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/status.c8
-rw-r--r--net/mac80211/tdls.c6
-rw-r--r--net/mac80211/trace.h18
-rw-r--r--net/mac80211/tx.c98
-rw-r--r--net/mac80211/util.c14
-rw-r--r--net/mac80211/vht.c9
-rw-r--r--net/mptcp/mib.c8
-rw-r--r--net/mptcp/mib.h8
-rw-r--r--net/mptcp/mptcp_diag.c8
-rw-r--r--net/mptcp/options.c94
-rw-r--r--net/mptcp/pm.c46
-rw-r--r--net/mptcp/pm_netlink.c826
-rw-r--r--net/mptcp/protocol.c424
-rw-r--r--net/mptcp/protocol.h155
-rw-r--r--net/mptcp/subflow.c268
-rw-r--r--net/ncsi/ncsi-rsp.c2
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h42
-rw-r--r--net/netfilter/ipvs/Kconfig13
-rw-r--r--net/netfilter/ipvs/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_twos.c139
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nf_conntrack_netlink.c7
-rw-r--r--net/netfilter/nf_conntrack_standalone.c3
-rw-r--r--net/netfilter/nf_flow_table_core.c10
-rw-r--r--net/netfilter/nf_nat_core.c1
-rw-r--r--net/netfilter/nf_tables_api.c356
-rw-r--r--net/netfilter/nfnetlink_log.c8
-rw-r--r--net/netfilter/nfnetlink_queue.c10
-rw-r--r--net/netfilter/nft_bitwise.c23
-rw-r--r--net/netfilter/nft_byteorder.c14
-rw-r--r--net/netfilter/nft_cmp.c12
-rw-r--r--net/netfilter/nft_ct.c12
-rw-r--r--net/netfilter/nft_dup_netdev.c6
-rw-r--r--net/netfilter/nft_dynset.c68
-rw-r--r--net/netfilter/nft_exthdr.c14
-rw-r--r--net/netfilter/nft_fib.c5
-rw-r--r--net/netfilter/nft_fwd_netdev.c18
-rw-r--r--net/netfilter/nft_hash.c25
-rw-r--r--net/netfilter/nft_immediate.c6
-rw-r--r--net/netfilter/nft_lookup.c14
-rw-r--r--net/netfilter/nft_masq.c18
-rw-r--r--net/netfilter/nft_meta.c8
-rw-r--r--net/netfilter/nft_nat.c35
-rw-r--r--net/netfilter/nft_numgen.c15
-rw-r--r--net/netfilter/nft_objref.c6
-rw-r--r--net/netfilter/nft_osf.c8
-rw-r--r--net/netfilter/nft_payload.c10
-rw-r--r--net/netfilter/nft_queue.c12
-rw-r--r--net/netfilter/nft_range.c6
-rw-r--r--net/netfilter/nft_redir.c18
-rw-r--r--net/netfilter/nft_rt.c7
-rw-r--r--net/netfilter/nft_socket.c7
-rw-r--r--net/netfilter/nft_tproxy.c14
-rw-r--r--net/netfilter/nft_tunnel.c8
-rw-r--r--net/netfilter/nft_xfrm.c7
-rw-r--r--net/netfilter/xt_RATEEST.c3
-rw-r--r--net/netfilter/xt_recent.c12
-rw-r--r--net/netlink/af_netlink.c8
-rw-r--r--net/netlink/genetlink.c32
-rw-r--r--net/nfc/Kconfig1
-rw-r--r--net/nfc/hci/llc_shdlc.c2
-rw-r--r--net/nfc/nci/core.c6
-rw-r--r--net/nfc/nci/uart.c3
-rw-r--r--net/nfc/netlink.c5
-rw-r--r--net/nfc/rawsock.c2
-rw-r--r--net/openvswitch/actions.c12
-rw-r--r--net/openvswitch/flow_netlink.c14
-rw-r--r--net/packet/af_packet.c8
-rw-r--r--net/packet/internal.h2
-rw-r--r--net/psample/Kconfig1
-rw-r--r--net/psample/psample.c4
-rw-r--r--net/qrtr/ns.c7
-rw-r--r--net/qrtr/qrtr.c16
-rw-r--r--net/qrtr/qrtr.h2
-rw-r--r--net/qrtr/tun.c18
-rw-r--r--net/rds/rdma.c3
-rw-r--r--net/rxrpc/Kconfig1
-rw-r--r--net/rxrpc/af_rxrpc.c6
-rw-r--r--net/rxrpc/call_accept.c1
-rw-r--r--net/rxrpc/call_object.c2
-rw-r--r--net/rxrpc/input.c2
-rw-r--r--net/rxrpc/key.c6
-rw-r--r--net/rxrpc/local_object.c74
-rw-r--r--net/sched/act_api.c106
-rw-r--r--net/sched/act_ct.c2
-rw-r--r--net/sched/cls_api.c12
-rw-r--r--net/sched/cls_flower.c86
-rw-r--r--net/sched/cls_tcindex.c8
-rw-r--r--net/sched/em_nbyte.c2
-rw-r--r--net/sched/sch_api.c10
-rw-r--r--net/sched/sch_atm.c3
-rw-r--r--net/sched/sch_cbq.c3
-rw-r--r--net/sched/sch_choke.c2
-rw-r--r--net/sched/sch_drr.c3
-rw-r--r--net/sched/sch_dsmark.c3
-rw-r--r--net/sched/sch_gred.c2
-rw-r--r--net/sched/sch_hfsc.c3
-rw-r--r--net/sched/sch_htb.c557
-rw-r--r--net/sched/sch_qfq.c3
-rw-r--r--net/sched/sch_red.c2
-rw-r--r--net/sched/sch_sfb.c3
-rw-r--r--net/sched/sch_sfq.c2
-rw-r--r--net/sched/sch_taprio.c13
-rw-r--r--net/sctp/offload.c2
-rw-r--r--net/sctp/proc.c16
-rw-r--r--net/smc/smc_core.c20
-rw-r--r--net/smc/smc_ib.c6
-rw-r--r--net/smc/smc_ism.c3
-rw-r--r--net/socket.c9
-rw-r--r--net/sunrpc/addr.c2
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c30
-rw-r--r--net/sunrpc/auth_gss/auth_gss_internal.h45
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c31
-rw-r--r--net/sunrpc/rpc_pipe.c1
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/svc_xprt.c4
-rw-r--r--net/sunrpc/svcsock.c94
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c12
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c67
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c198
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c91
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c6
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h15
-rw-r--r--net/sunrpc/xprtsock.c17
-rw-r--r--net/switchdev/Makefile2
-rw-r--r--net/switchdev/switchdev.c131
-rw-r--r--net/tipc/link.c11
-rw-r--r--net/tipc/monitor.c2
-rw-r--r--net/tipc/msg.c4
-rw-r--r--net/tipc/node.c2
-rw-r--r--net/tls/Kconfig1
-rw-r--r--net/tls/tls_device.c4
-rw-r--r--net/tls/tls_device_fallback.c2
-rw-r--r--net/unix/af_unix.c5
-rw-r--r--net/vmw_vsock/af_vsock.c32
-rw-r--r--net/vmw_vsock/hyperv_transport.c4
-rw-r--r--net/vmw_vsock/virtio_transport_common.c4
-rw-r--r--net/wireless/Kconfig1
-rw-r--r--net/wireless/chan.c5
-rw-r--r--net/wireless/core.c164
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/debugfs.c4
-rw-r--r--net/wireless/ibss.c3
-rw-r--r--net/wireless/mlme.c6
-rw-r--r--net/wireless/nl80211.c664
-rw-r--r--net/wireless/reg.c102
-rw-r--r--net/wireless/reg.h1
-rw-r--r--net/wireless/scan.c35
-rw-r--r--net/wireless/sme.c5
-rw-r--r--net/wireless/sysfs.c12
-rw-r--r--net/wireless/util.c39
-rw-r--r--net/wireless/wext-compat.c285
-rw-r--r--net/wireless/wext-core.c5
-rw-r--r--net/wireless/wext-sme.c4
-rw-r--r--net/xdp/xsk.c67
-rw-r--r--net/xdp/xsk_buff_pool.c15
-rw-r--r--net/xdp/xsk_queue.h5
-rw-r--r--net/xfrm/xfrm_input.c2
-rw-r--r--net/xfrm/xfrm_interface.c10
-rw-r--r--net/xfrm/xfrm_policy.c30
-rw-r--r--net/xfrm/xfrm_user.c2
422 files changed, 13684 insertions, 6594 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index f292e0267bb9..8b644113715e 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -284,8 +284,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
return 0;
out_free_newdev:
- if (new_dev->reg_state == NETREG_UNINITIALIZED)
- free_netdev(new_dev);
+ free_netdev(new_dev);
return err;
}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ec8408d1638f..dc1a197792e6 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -510,9 +510,17 @@ static void vlan_dev_set_lockdep_class(struct net_device *dev)
netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL);
}
+static __be16 vlan_parse_protocol(const struct sk_buff *skb)
+{
+ struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+
+ return __vlan_get_protocol(skb, veth->h_vlan_proto, NULL);
+}
+
static const struct header_ops vlan_header_ops = {
.create = vlan_dev_hard_header,
.parse = eth_header_parse,
+ .parse_protocol = vlan_parse_protocol,
};
static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev,
@@ -532,6 +540,7 @@ static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev
static const struct header_ops vlan_passthru_header_ops = {
.create = vlan_passthru_hard_header,
.parse = eth_header_parse,
+ .parse_protocol = vlan_parse_protocol,
};
static struct device_type vlan_type = {
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index 3d11fec3a8dc..64468c49791f 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -4,7 +4,6 @@
#
menuconfig NET_9P
- depends on NET
tristate "Plan 9 Resource Sharing Support (9P2000)"
help
If you say Y here, you will get experimental support for
diff --git a/net/Kconfig b/net/Kconfig
index f4c32d982af6..8cea808ad9e8 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -256,9 +256,13 @@ config RFS_ACCEL
select CPU_RMAP
default y
+config SOCK_RX_QUEUE_MAPPING
+ bool
+
config XPS
bool
depends on SMP
+ select SOCK_RX_QUEUE_MAPPING
default y
config HWBM
diff --git a/net/Makefile b/net/Makefile
index d96b0aa8f39f..9ca9572188fe 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -6,20 +6,19 @@
# Rewritten to use lists instead of if-statements.
#
-obj-$(CONFIG_NET) := devres.o socket.o core/
+obj-y := devres.o socket.o core/
-tmp-$(CONFIG_COMPAT) := compat.o
-obj-$(CONFIG_NET) += $(tmp-y)
+obj-$(CONFIG_COMPAT) += compat.o
# LLC has to be linked before the files in net/802/
obj-$(CONFIG_LLC) += llc/
-obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ ethtool/
+obj-y += ethernet/ 802/ sched/ netlink/ bpf/ ethtool/
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_TLS) += tls/
obj-$(CONFIG_XFRM) += xfrm/
obj-$(CONFIG_UNIX_SCM) += unix/
-obj-$(CONFIG_NET) += ipv6/
+obj-y += ipv6/
obj-$(CONFIG_BPFILTER) += bpfilter/
obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
@@ -56,16 +55,12 @@ obj-$(CONFIG_SMC) += smc/
obj-$(CONFIG_RFKILL) += rfkill/
obj-$(CONFIG_NET_9P) += 9p/
obj-$(CONFIG_CAIF) += caif/
-ifneq ($(CONFIG_DCB),)
-obj-y += dcb/
-endif
+obj-$(CONFIG_DCB) += dcb/
obj-$(CONFIG_6LOWPAN) += 6lowpan/
obj-$(CONFIG_IEEE802154) += ieee802154/
obj-$(CONFIG_MAC802154) += mac802154/
-ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_SYSCTL) += sysctl_net.o
-endif
obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/
obj-$(CONFIG_CEPH_LIB) += ceph/
obj-$(CONFIG_BATMAN_ADV) += batman-adv/
@@ -77,12 +72,8 @@ obj-$(CONFIG_VSOCKETS) += vmw_vsock/
obj-$(CONFIG_MPLS) += mpls/
obj-$(CONFIG_NET_NSH) += nsh/
obj-$(CONFIG_HSR) += hsr/
-ifneq ($(CONFIG_NET_SWITCHDEV),)
-obj-y += switchdev/
-endif
-ifneq ($(CONFIG_NET_L3_MASTER_DEV),)
-obj-y += l3mdev/
-endif
+obj-$(CONFIG_NET_SWITCHDEV) += switchdev/
+obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev/
obj-$(CONFIG_QRTR) += qrtr/
obj-$(CONFIG_NET_NCSI) += ncsi/
obj-$(CONFIG_XDP_SOCKETS) += xdp/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index ca1a0d07a087..ebda397fa95a 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1577,8 +1577,8 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
struct sk_buff *skb;
struct net_device *dev;
struct ddpehdr *ddp;
- int size;
- struct atalk_route *rt;
+ int size, hard_header_len;
+ struct atalk_route *rt, *rt_lo = NULL;
int err;
if (flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
@@ -1641,7 +1641,22 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
SOCK_DEBUG(sk, "SK %p: Size needed %d, device %s\n",
sk, size, dev->name);
- size += dev->hard_header_len;
+ hard_header_len = dev->hard_header_len;
+ /* Leave room for loopback hardware header if necessary */
+ if (usat->sat_addr.s_node == ATADDR_BCAST &&
+ (dev->flags & IFF_LOOPBACK || !(rt->flags & RTF_GATEWAY))) {
+ struct atalk_addr at_lo;
+
+ at_lo.s_node = 0;
+ at_lo.s_net = 0;
+
+ rt_lo = atrtr_find(&at_lo);
+
+ if (rt_lo && rt_lo->dev->hard_header_len > hard_header_len)
+ hard_header_len = rt_lo->dev->hard_header_len;
+ }
+
+ size += hard_header_len;
release_sock(sk);
skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err);
lock_sock(sk);
@@ -1649,7 +1664,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
goto out;
skb_reserve(skb, ddp_dl->header_length);
- skb_reserve(skb, dev->hard_header_len);
+ skb_reserve(skb, hard_header_len);
skb->dev = dev;
SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk);
@@ -1700,18 +1715,12 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
/* loop back */
skb_orphan(skb);
if (ddp->deh_dnode == ATADDR_BCAST) {
- struct atalk_addr at_lo;
-
- at_lo.s_node = 0;
- at_lo.s_net = 0;
-
- rt = atrtr_find(&at_lo);
- if (!rt) {
+ if (!rt_lo) {
kfree_skb(skb);
err = -ENETUNREACH;
goto out;
}
- dev = rt->dev;
+ dev = rt_lo->dev;
skb->dev = dev;
}
ddp_dl->request(ddp_dl, skb, dev->dev_addr);
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 579b66da1d95..3e4f17d335fe 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -101,9 +101,11 @@ static inline struct pppoatm_vcc *chan_to_pvcc(const struct ppp_channel *chan)
* doesn't want to be called in interrupt context, so we do it from
* a tasklet
*/
-static void pppoatm_wakeup_sender(unsigned long arg)
+static void pppoatm_wakeup_sender(struct tasklet_struct *t)
{
- ppp_output_wakeup((struct ppp_channel *) arg);
+ struct pppoatm_vcc *pvcc = from_tasklet(pvcc, t, wakeup_tasklet);
+
+ ppp_output_wakeup(&pvcc->chan);
}
static void pppoatm_release_cb(struct atm_vcc *atmvcc)
@@ -389,11 +391,7 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg)
struct atm_backend_ppp be;
struct pppoatm_vcc *pvcc;
int err;
- /*
- * Each PPPoATM instance has its own tasklet - this is just a
- * prototypical one used to initialize them
- */
- static const DECLARE_TASKLET_OLD(tasklet_proto, pppoatm_wakeup_sender);
+
if (copy_from_user(&be, arg, sizeof be))
return -EFAULT;
if (be.encaps != PPPOATM_ENCAPS_AUTODETECT &&
@@ -415,8 +413,7 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg)
pvcc->chan.ops = &pppoatm_ops;
pvcc->chan.mtu = atmvcc->qos.txtp.max_sdu - PPP_HDRLEN -
(be.encaps == e_vc ? 0 : LLC_LEN);
- pvcc->wakeup_tasklet = tasklet_proto;
- pvcc->wakeup_tasklet.data = (unsigned long) &pvcc->chan;
+ tasklet_setup(&pvcc->wakeup_tasklet, pppoatm_wakeup_sender);
err = ppp_register_channel(&pvcc->chan);
if (err != 0) {
kfree(pvcc);
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 993afd5ff7bb..860a0786bc1e 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+# Copyright (C) B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
@@ -9,7 +9,6 @@
config BATMAN_ADV
tristate "B.A.T.M.A.N. Advanced Meshing Protocol"
- depends on NET
select LIBCRC32C
help
B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 8010c34b987c..3bd0760c76a2 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+# Copyright (C) B.A.T.M.A.N. contributors:
#
# Marek Lindner, Simon Wunderlich
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index c5f404f6892f..4eee53d19eb0 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 43b045ac8ac7..2c486374af58 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Linus Lüssing
*/
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 168621c9a081..a5e313cd6f44 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
index 0c57c1000c64..04b01bd684e8 100644
--- a/net/batman-adv/bat_iv_ogm.h
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index e4455babe4c2..e1ca2b8c3152 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
*/
diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
index 5e0be10bc84e..964431f4dc8d 100644
--- a/net/batman-adv/bat_v.h
+++ b/net/batman-adv/bat_v.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Linus Lüssing
*/
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 0512ea6cd818..423c2d171703 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
*/
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index 4358d436be2a..9e2740195fa2 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Linus Lüssing, Marek Lindner
*/
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 798d659855d0..a0a9636d1740 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*/
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index 0ae2575f70bb..edeffedecade 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*/
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index 4bc695cda397..649c41f393e1 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*/
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index 533c6d44cb58..37f7ae413bc6 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*/
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index d2de12e527ba..360bdbf44748 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
*/
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 7dc6d3571925..5c22955bb9d5 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Simon Wunderlich
*/
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index fd7ba6bbdf85..8c95a11a830a 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*/
@@ -87,7 +87,7 @@ struct batadv_dhcp_packet {
__u8 sname[64];
__u8 file[128];
__be32 magic;
- __u8 options[];
+ /* __u8 options[]; */
};
#define BATADV_DHCP_YIADDR_LEN sizeof(((struct batadv_dhcp_packet *)0)->yiaddr)
@@ -1564,7 +1564,7 @@ static int batadv_dat_get_dhcp_message_type(struct sk_buff *skb)
}
/**
- * batadv_dat_get_dhcp_yiaddr() - get yiaddr from a DHCP packet
+ * batadv_dat_dhcp_get_yiaddr() - get yiaddr from a DHCP packet
* @skb: the DHCP packet to parse
* @buf: a buffer to store the yiaddr in
*
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index e980fb45693a..bed7f3d20844 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Antonio Quartulli
*/
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index e522f1fcfd9a..a5d9d800082b 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
*/
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 881ef328b6cd..dbf0871f8703 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <martin@hundeboll.net>
*/
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index cffe72f4edd7..007f2827935d 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 2fbc500f0ac1..2ae5846ef958 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 16cd9450ceb1..fdde305a198e 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index c3a0c5a7f7e9..87c37f907261 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 0f186ddc15e3..4a6a25d551a8 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index f4b8e9efef19..83d11b46a9d8 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 68638e0450a6..8016e619787f 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*/
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 91ae9f32b580..46696759f194 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Simon Wunderlich, Marek Lindner
*/
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index b7e9923b11a2..f0e5d1429662 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 979864c0fa6b..6717c965f0fa 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index ed9d87ce3407..e48f7ac8a854 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 288201630ceb..8f0102b71656 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2021.0"
+#define BATADV_SOURCE_VERSION "2021.1"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 854e5ff28a3f..28166402d30c 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2014-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Linus Lüssing
*/
@@ -828,7 +828,7 @@ batadv_mcast_bridge_log(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_flags_logs() - output debug information about mcast flag changes
+ * batadv_mcast_flags_log() - output debug information about mcast flag changes
* @bat_priv: the bat priv with all the soft interface information
* @flags: TVLV flags indicating the new multicast state
*
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index d61593d02072..9fee5da08311 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2014-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Linus Lüssing
*/
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 97bcf149633d..f317d206b411 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2016-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Matthias Schiffer
*/
@@ -193,7 +193,7 @@ static int batadv_netlink_mesh_fill_ap_isolation(struct sk_buff *msg,
}
/**
- * batadv_option_set_ap_isolation() - Set ap_isolation from genl msg
+ * batadv_netlink_set_mesh_ap_isolation() - Set ap_isolation from genl msg
* @attr: parsed BATADV_ATTR_AP_ISOLATION_ENABLED attribute
* @bat_priv: the bat priv with all the soft interface information
*
@@ -757,7 +757,7 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info)
}
/**
- * batadv_netlink_tp_meter_start() - Cancel a running tp_meter session
+ * batadv_netlink_tp_meter_cancel() - Cancel a running tp_meter session
* @skb: received netlink message
* @info: receiver information
*
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 7ee48f916997..48102cc7490c 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2016-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Matthias Schiffer
*/
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 0cec108b7a99..4bb76b434d07 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
*/
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 8fb2c01e7837..368cc3130e4c 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Martin Hundebøll, Jeppe Ledet-Pedersen
*/
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 77431e59b228..da7249448474 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index e75d4c4d11f5..805be87d55b8 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 49cbca4aa428..40f5cffde6a3 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 2ed49db6eff5..5f387786e9a7 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 87017332b567..157abe92d827 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 0d36e15589f6..2b0daf8b2bc4 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 97118efbe678..6b8181bc3122 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 74716d9ca4f6..38b0ad182584 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner
*/
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index d4e10005df6c..789c851732b7 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Edo Monticelli, Antonio Quartulli
*/
@@ -131,7 +131,7 @@ static u32 batadv_tp_cwnd(u32 base, u32 increment, u32 min)
}
/**
- * batadv_tp_updated_cwnd() - update the Congestion Windows
+ * batadv_tp_update_cwnd() - update the Congestion Windows
* @tp_vars: the private data of the current TP meter session
* @mss: maximum segment size of transmission
*
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
index 140105215aa2..f0046d366eac 100644
--- a/net/batman-adv/tp_meter.h
+++ b/net/batman-adv/tp_meter.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Edo Monticelli, Antonio Quartulli
*/
diff --git a/net/batman-adv/trace.c b/net/batman-adv/trace.c
index 3444d9e4e90d..ec8b9519076b 100644
--- a/net/batman-adv/trace.c
+++ b/net/batman-adv/trace.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Sven Eckelmann
*/
diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h
index a87547570b4e..d673ebdd0426 100644
--- a/net/batman-adv/trace.h
+++ b/net/batman-adv/trace.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Sven Eckelmann
*/
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index cd09916f97fe..f8761281aab0 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
*/
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 57192c817229..e1285904f885 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich, Antonio Quartulli
*/
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 6a23a566cde1..253f5a33a914 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index d509d00c7a23..54f2a35653d0 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 2f96e96a5ca4..7c0b475cc22a 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors:
+/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Marek Lindner, Simon Wunderlich
*/
@@ -228,7 +228,8 @@ struct batadv_hard_iface {
};
/**
- * struct batadv_orig_ifinfo - B.A.T.M.A.N. IV private orig_ifinfo members
+ * struct batadv_orig_ifinfo_bat_iv - B.A.T.M.A.N. IV private orig_ifinfo
+ * members
*/
struct batadv_orig_ifinfo_bat_iv {
/**
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 64e669acd42f..400c5130dc0a 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -5,7 +5,7 @@
menuconfig BT
tristate "Bluetooth subsystem support"
- depends on NET && !S390
+ depends on !S390
depends on RFKILL || !RFKILL
select CRC16
select CRYPTO
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index da7fd7c8c2dc..463bad58478b 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -381,9 +381,9 @@ static int a2mp_getampassoc_req(struct amp_mgr *mgr, struct sk_buff *skb,
hdev = hci_dev_get(req->id);
if (!hdev || hdev->amp_type == AMP_TYPE_BREDR || tmp) {
struct a2mp_amp_assoc_rsp rsp;
- rsp.id = req->id;
memset(&rsp, 0, sizeof(rsp));
+ rsp.id = req->id;
if (tmp) {
rsp.status = A2MP_STATUS_COLLISION_OCCURED;
@@ -512,6 +512,7 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
assoc = kmemdup(req->amp_assoc, assoc_len, GFP_KERNEL);
if (!assoc) {
amp_ctrl_put(ctrl);
+ hci_dev_put(hdev);
return -ENOMEM;
}
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 4ef6a54403aa..1661979b6a6e 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -72,8 +72,8 @@ void bt_sock_reclassify_lock(struct sock *sk, int proto)
BUG_ON(!sock_allow_reclassification(sk));
sock_lock_init_class_and_name(sk,
- bt_slock_key_strings[proto], &bt_slock_key[proto],
- bt_key_strings[proto], &bt_lock_key[proto]);
+ bt_slock_key_strings[proto], &bt_slock_key[proto],
+ bt_key_strings[proto], &bt_lock_key[proto]);
}
EXPORT_SYMBOL(bt_sock_reclassify_lock);
@@ -451,7 +451,7 @@ static inline __poll_t bt_accept_poll(struct sock *parent)
}
__poll_t bt_sock_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+ poll_table *wait)
{
struct sock *sk = sock->sk;
__poll_t mask = 0;
@@ -478,8 +478,8 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock,
mask |= EPOLLHUP;
if (sk->sk_state == BT_CONNECT ||
- sk->sk_state == BT_CONNECT2 ||
- sk->sk_state == BT_CONFIG)
+ sk->sk_state == BT_CONNECT2 ||
+ sk->sk_state == BT_CONFIG)
return mask;
if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk))
@@ -508,7 +508,7 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
if (amount < 0)
amount = 0;
- err = put_user(amount, (int __user *) arg);
+ err = put_user(amount, (int __user *)arg);
break;
case TIOCINQ:
@@ -519,7 +519,7 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
skb = skb_peek(&sk->sk_receive_queue);
amount = skb ? skb->len : 0;
release_sock(sk);
- err = put_user(amount, (int __user *) arg);
+ err = put_user(amount, (int __user *)arg);
break;
default:
@@ -637,7 +637,7 @@ static int bt_seq_show(struct seq_file *seq, void *v)
struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
if (v == SEQ_START_TOKEN) {
- seq_puts(seq ,"sk RefCnt Rmem Wmem User Inode Parent");
+ seq_puts(seq, "sk RefCnt Rmem Wmem User Inode Parent");
if (l->custom_seq_show) {
seq_putc(seq, ' ');
@@ -657,7 +657,7 @@ static int bt_seq_show(struct seq_file *seq, void *v)
sk_wmem_alloc_get(sk),
from_kuid(seq_user_ns(seq), sock_i_uid(sk)),
sock_i_ino(sk),
- bt->parent? sock_i_ino(bt->parent): 0LU);
+ bt->parent ? sock_i_ino(bt->parent) : 0LU);
if (l->custom_seq_show) {
seq_putc(seq, ' ');
@@ -678,7 +678,7 @@ static const struct seq_operations bt_seq_ops = {
int bt_procfs_init(struct net *net, const char *name,
struct bt_sock_list *sk_list,
- int (* seq_show)(struct seq_file *, void *))
+ int (*seq_show)(struct seq_file *, void *))
{
sk_list->custom_seq_show = seq_show;
@@ -694,7 +694,7 @@ void bt_procfs_cleanup(struct net *net, const char *name)
#else
int bt_procfs_init(struct net *net, const char *name,
struct bt_sock_list *sk_list,
- int (* seq_show)(struct seq_file *, void *))
+ int (*seq_show)(struct seq_file *, void *))
{
return 0;
}
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index 9c711f0dfae3..be2d469d6369 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -297,6 +297,9 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
struct hci_request req;
int err;
+ if (!mgr)
+ return;
+
cp.phy_handle = hcon->handle;
cp.len_so_far = cpu_to_le16(0);
cp.max_len = cpu_to_le16(hdev->amp_assoc_size);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 4f1cd8063e72..6ffa89e3ba0a 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -203,6 +203,23 @@ static void hci_acl_create_connection(struct hci_conn *conn)
BT_DBG("hcon %p", conn);
+ /* Many controllers disallow HCI Create Connection while it is doing
+ * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create
+ * Connection. This may cause the MGMT discovering state to become false
+ * without user space's request but it is okay since the MGMT Discovery
+ * APIs do not promise that discovery should be done forever. Instead,
+ * the user space monitors the status of MGMT discovering and it may
+ * request for discovery again when this flag becomes false.
+ */
+ if (test_bit(HCI_INQUIRY, &hdev->flags)) {
+ /* Put this connection to "pending" state so that it will be
+ * executed after the inquiry cancel command complete event.
+ */
+ conn->state = BT_CONNECT2;
+ hci_send_cmd(hdev, HCI_OP_INQUIRY_CANCEL, 0, NULL);
+ return;
+ }
+
conn->state = BT_CONNECT;
conn->out = true;
conn->role = HCI_ROLE_MASTER;
@@ -276,6 +293,20 @@ static void hci_add_sco(struct hci_conn *conn, __u16 handle)
hci_send_cmd(hdev, HCI_OP_ADD_SCO, sizeof(cp), &cp);
}
+static bool find_next_esco_param(struct hci_conn *conn,
+ const struct sco_param *esco_param, int size)
+{
+ for (; conn->attempt <= size; conn->attempt++) {
+ if (lmp_esco_2m_capable(conn->link) ||
+ (esco_param[conn->attempt - 1].pkt_type & ESCO_2EV3))
+ break;
+ BT_DBG("hcon %p skipped attempt %d, eSCO 2M not supported",
+ conn, conn->attempt);
+ }
+
+ return conn->attempt <= size;
+}
+
bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
{
struct hci_dev *hdev = conn->hdev;
@@ -297,13 +328,15 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
switch (conn->setting & SCO_AIRMODE_MASK) {
case SCO_AIRMODE_TRANSP:
- if (conn->attempt > ARRAY_SIZE(esco_param_msbc))
+ if (!find_next_esco_param(conn, esco_param_msbc,
+ ARRAY_SIZE(esco_param_msbc)))
return false;
param = &esco_param_msbc[conn->attempt - 1];
break;
case SCO_AIRMODE_CVSD:
if (lmp_esco_capable(conn->link)) {
- if (conn->attempt > ARRAY_SIZE(esco_param_cvsd))
+ if (!find_next_esco_param(conn, esco_param_cvsd,
+ ARRAY_SIZE(esco_param_cvsd)))
return false;
param = &esco_param_cvsd[conn->attempt - 1];
} else {
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 9d2c9a1c552f..b0d9c36acc03 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1362,8 +1362,10 @@ int hci_inquiry(void __user *arg)
* cleared). If it is interrupted by a signal, return -EINTR.
*/
if (wait_on_bit(&hdev->flags, HCI_INQUIRY,
- TASK_INTERRUPTIBLE))
- return -EINTR;
+ TASK_INTERRUPTIBLE)) {
+ err = -EINTR;
+ goto done;
+ }
}
/* for unlimited number of responses we will use buffer with
@@ -3051,12 +3053,15 @@ void hci_adv_monitors_clear(struct hci_dev *hdev)
int handle;
idr_for_each_entry(&hdev->adv_monitors_idr, monitor, handle)
- hci_free_adv_monitor(monitor);
+ hci_free_adv_monitor(hdev, monitor);
idr_destroy(&hdev->adv_monitors_idr);
}
-void hci_free_adv_monitor(struct adv_monitor *monitor)
+/* Frees the monitor structure and do some bookkeepings.
+ * This function requires the caller holds hdev->lock.
+ */
+void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor)
{
struct adv_pattern *pattern;
struct adv_pattern *tmp;
@@ -3064,68 +3069,167 @@ void hci_free_adv_monitor(struct adv_monitor *monitor)
if (!monitor)
return;
- list_for_each_entry_safe(pattern, tmp, &monitor->patterns, list)
+ list_for_each_entry_safe(pattern, tmp, &monitor->patterns, list) {
+ list_del(&pattern->list);
kfree(pattern);
+ }
+
+ if (monitor->handle)
+ idr_remove(&hdev->adv_monitors_idr, monitor->handle);
+
+ if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) {
+ hdev->adv_monitors_cnt--;
+ mgmt_adv_monitor_removed(hdev, monitor->handle);
+ }
kfree(monitor);
}
-/* This function requires the caller holds hdev->lock */
-int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor)
+int hci_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status)
+{
+ return mgmt_add_adv_patterns_monitor_complete(hdev, status);
+}
+
+int hci_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status)
+{
+ return mgmt_remove_adv_monitor_complete(hdev, status);
+}
+
+/* Assigns handle to a monitor, and if offloading is supported and power is on,
+ * also attempts to forward the request to the controller.
+ * Returns true if request is forwarded (result is pending), false otherwise.
+ * This function requires the caller holds hdev->lock.
+ */
+bool hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor,
+ int *err)
{
int min, max, handle;
- if (!monitor)
- return -EINVAL;
+ *err = 0;
+
+ if (!monitor) {
+ *err = -EINVAL;
+ return false;
+ }
min = HCI_MIN_ADV_MONITOR_HANDLE;
max = HCI_MIN_ADV_MONITOR_HANDLE + HCI_MAX_ADV_MONITOR_NUM_HANDLES;
handle = idr_alloc(&hdev->adv_monitors_idr, monitor, min, max,
GFP_KERNEL);
- if (handle < 0)
- return handle;
+ if (handle < 0) {
+ *err = handle;
+ return false;
+ }
- hdev->adv_monitors_cnt++;
monitor->handle = handle;
- hci_update_background_scan(hdev);
+ if (!hdev_is_powered(hdev))
+ return false;
- return 0;
+ switch (hci_get_adv_monitor_offload_ext(hdev)) {
+ case HCI_ADV_MONITOR_EXT_NONE:
+ hci_update_background_scan(hdev);
+ bt_dev_dbg(hdev, "%s add monitor status %d", hdev->name, *err);
+ /* Message was not forwarded to controller - not an error */
+ return false;
+ case HCI_ADV_MONITOR_EXT_MSFT:
+ *err = msft_add_monitor_pattern(hdev, monitor);
+ bt_dev_dbg(hdev, "%s add monitor msft status %d", hdev->name,
+ *err);
+ break;
+ }
+
+ return (*err == 0);
}
-static int free_adv_monitor(int id, void *ptr, void *data)
+/* Attempts to tell the controller and free the monitor. If somehow the
+ * controller doesn't have a corresponding handle, remove anyway.
+ * Returns true if request is forwarded (result is pending), false otherwise.
+ * This function requires the caller holds hdev->lock.
+ */
+static bool hci_remove_adv_monitor(struct hci_dev *hdev,
+ struct adv_monitor *monitor,
+ u16 handle, int *err)
{
- struct hci_dev *hdev = data;
- struct adv_monitor *monitor = ptr;
+ *err = 0;
- idr_remove(&hdev->adv_monitors_idr, monitor->handle);
- hci_free_adv_monitor(monitor);
- hdev->adv_monitors_cnt--;
+ switch (hci_get_adv_monitor_offload_ext(hdev)) {
+ case HCI_ADV_MONITOR_EXT_NONE: /* also goes here when powered off */
+ goto free_monitor;
+ case HCI_ADV_MONITOR_EXT_MSFT:
+ *err = msft_remove_monitor(hdev, monitor, handle);
+ break;
+ }
- return 0;
+ /* In case no matching handle registered, just free the monitor */
+ if (*err == -ENOENT)
+ goto free_monitor;
+
+ return (*err == 0);
+
+free_monitor:
+ if (*err == -ENOENT)
+ bt_dev_warn(hdev, "Removing monitor with no matching handle %d",
+ monitor->handle);
+ hci_free_adv_monitor(hdev, monitor);
+
+ *err = 0;
+ return false;
}
-/* This function requires the caller holds hdev->lock */
-int hci_remove_adv_monitor(struct hci_dev *hdev, u16 handle)
+/* Returns true if request is forwarded (result is pending), false otherwise.
+ * This function requires the caller holds hdev->lock.
+ */
+bool hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle, int *err)
+{
+ struct adv_monitor *monitor = idr_find(&hdev->adv_monitors_idr, handle);
+ bool pending;
+
+ if (!monitor) {
+ *err = -EINVAL;
+ return false;
+ }
+
+ pending = hci_remove_adv_monitor(hdev, monitor, handle, err);
+ if (!*err && !pending)
+ hci_update_background_scan(hdev);
+
+ bt_dev_dbg(hdev, "%s remove monitor handle %d, status %d, %spending",
+ hdev->name, handle, *err, pending ? "" : "not ");
+
+ return pending;
+}
+
+/* Returns true if request is forwarded (result is pending), false otherwise.
+ * This function requires the caller holds hdev->lock.
+ */
+bool hci_remove_all_adv_monitor(struct hci_dev *hdev, int *err)
{
struct adv_monitor *monitor;
+ int idr_next_id = 0;
+ bool pending = false;
+ bool update = false;
- if (handle) {
- monitor = idr_find(&hdev->adv_monitors_idr, handle);
+ *err = 0;
+
+ while (!*err && !pending) {
+ monitor = idr_get_next(&hdev->adv_monitors_idr, &idr_next_id);
if (!monitor)
- return -ENOENT;
+ break;
- idr_remove(&hdev->adv_monitors_idr, monitor->handle);
- hci_free_adv_monitor(monitor);
- hdev->adv_monitors_cnt--;
- } else {
- /* Remove all monitors if handle is 0. */
- idr_for_each(&hdev->adv_monitors_idr, &free_adv_monitor, hdev);
+ pending = hci_remove_adv_monitor(hdev, monitor, 0, err);
+
+ if (!*err && !pending)
+ update = true;
}
- hci_update_background_scan(hdev);
+ if (update)
+ hci_update_background_scan(hdev);
- return 0;
+ bt_dev_dbg(hdev, "%s remove all monitors status %d, %spending",
+ hdev->name, *err, pending ? "" : "not ");
+
+ return pending;
}
/* This function requires the caller holds hdev->lock */
@@ -3134,6 +3238,14 @@ bool hci_is_adv_monitoring(struct hci_dev *hdev)
return !idr_is_empty(&hdev->adv_monitors_idr);
}
+int hci_get_adv_monitor_offload_ext(struct hci_dev *hdev)
+{
+ if (msft_monitor_supported(hdev))
+ return HCI_ADV_MONITOR_EXT_MSFT;
+
+ return HCI_ADV_MONITOR_EXT_NONE;
+}
+
struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list,
bdaddr_t *bdaddr, u8 type)
{
@@ -3566,7 +3678,8 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
}
/* Suspend notifier should only act on events when powered. */
- if (!hdev_is_powered(hdev))
+ if (!hdev_is_powered(hdev) ||
+ hci_dev_test_flag(hdev, HCI_UNREGISTER))
goto done;
if (action == PM_SUSPEND_PREPARE) {
@@ -3827,10 +3940,12 @@ int hci_register_dev(struct hci_dev *hdev)
hci_sock_dev_event(hdev, HCI_DEV_REG);
hci_dev_hold(hdev);
- hdev->suspend_notifier.notifier_call = hci_suspend_notifier;
- error = register_pm_notifier(&hdev->suspend_notifier);
- if (error)
- goto err_wqueue;
+ if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) {
+ hdev->suspend_notifier.notifier_call = hci_suspend_notifier;
+ error = register_pm_notifier(&hdev->suspend_notifier);
+ if (error)
+ goto err_wqueue;
+ }
queue_work(hdev->req_workqueue, &hdev->power_on);
@@ -3865,9 +3980,11 @@ void hci_unregister_dev(struct hci_dev *hdev)
cancel_work_sync(&hdev->power_on);
- hci_suspend_clear_tasks(hdev);
- unregister_pm_notifier(&hdev->suspend_notifier);
- cancel_work_sync(&hdev->suspend_prepare);
+ if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) {
+ hci_suspend_clear_tasks(hdev);
+ unregister_pm_notifier(&hdev->suspend_notifier);
+ cancel_work_sync(&hdev->suspend_prepare);
+ }
hci_dev_do_close(hdev);
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 4626e0289a97..1a0ab58bfad0 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -237,8 +237,8 @@ static int conn_info_min_age_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get,
- conn_info_min_age_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get,
+ conn_info_min_age_set, "%llu\n");
static int conn_info_max_age_set(void *data, u64 val)
{
@@ -265,8 +265,8 @@ static int conn_info_max_age_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get,
- conn_info_max_age_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get,
+ conn_info_max_age_set, "%llu\n");
static ssize_t use_debug_keys_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
@@ -419,8 +419,8 @@ static int voice_setting_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(voice_setting_fops, voice_setting_get,
- NULL, "0x%4.4llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(voice_setting_fops, voice_setting_get,
+ NULL, "0x%4.4llx\n");
static ssize_t ssp_debug_mode_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
@@ -476,9 +476,9 @@ static int min_encrypt_key_size_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(min_encrypt_key_size_fops,
- min_encrypt_key_size_get,
- min_encrypt_key_size_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(min_encrypt_key_size_fops,
+ min_encrypt_key_size_get,
+ min_encrypt_key_size_set, "%llu\n");
static int auto_accept_delay_get(void *data, u64 *val)
{
@@ -491,8 +491,8 @@ static int auto_accept_delay_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get,
- auto_accept_delay_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get,
+ auto_accept_delay_set, "%llu\n");
static ssize_t force_bredr_smp_read(struct file *file,
char __user *user_buf,
@@ -558,8 +558,8 @@ static int idle_timeout_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(idle_timeout_fops, idle_timeout_get,
- idle_timeout_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(idle_timeout_fops, idle_timeout_get,
+ idle_timeout_set, "%llu\n");
static int sniff_min_interval_set(void *data, u64 val)
{
@@ -586,8 +586,8 @@ static int sniff_min_interval_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(sniff_min_interval_fops, sniff_min_interval_get,
- sniff_min_interval_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(sniff_min_interval_fops, sniff_min_interval_get,
+ sniff_min_interval_set, "%llu\n");
static int sniff_max_interval_set(void *data, u64 val)
{
@@ -614,8 +614,8 @@ static int sniff_max_interval_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get,
- sniff_max_interval_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get,
+ sniff_max_interval_set, "%llu\n");
void hci_debugfs_create_bredr(struct hci_dev *hdev)
{
@@ -706,8 +706,8 @@ static int rpa_timeout_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(rpa_timeout_fops, rpa_timeout_get,
- rpa_timeout_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(rpa_timeout_fops, rpa_timeout_get,
+ rpa_timeout_set, "%llu\n");
static int random_address_show(struct seq_file *f, void *p)
{
@@ -869,8 +869,8 @@ static int conn_min_interval_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(conn_min_interval_fops, conn_min_interval_get,
- conn_min_interval_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(conn_min_interval_fops, conn_min_interval_get,
+ conn_min_interval_set, "%llu\n");
static int conn_max_interval_set(void *data, u64 val)
{
@@ -897,8 +897,8 @@ static int conn_max_interval_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get,
- conn_max_interval_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get,
+ conn_max_interval_set, "%llu\n");
static int conn_latency_set(void *data, u64 val)
{
@@ -925,8 +925,8 @@ static int conn_latency_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(conn_latency_fops, conn_latency_get,
- conn_latency_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(conn_latency_fops, conn_latency_get,
+ conn_latency_set, "%llu\n");
static int supervision_timeout_set(void *data, u64 val)
{
@@ -953,8 +953,8 @@ static int supervision_timeout_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(supervision_timeout_fops, supervision_timeout_get,
- supervision_timeout_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(supervision_timeout_fops, supervision_timeout_get,
+ supervision_timeout_set, "%llu\n");
static int adv_channel_map_set(void *data, u64 val)
{
@@ -981,8 +981,8 @@ static int adv_channel_map_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get,
- adv_channel_map_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get,
+ adv_channel_map_set, "%llu\n");
static int adv_min_interval_set(void *data, u64 val)
{
@@ -1009,8 +1009,8 @@ static int adv_min_interval_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(adv_min_interval_fops, adv_min_interval_get,
- adv_min_interval_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(adv_min_interval_fops, adv_min_interval_get,
+ adv_min_interval_set, "%llu\n");
static int adv_max_interval_set(void *data, u64 val)
{
@@ -1037,8 +1037,8 @@ static int adv_max_interval_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get,
- adv_max_interval_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get,
+ adv_max_interval_set, "%llu\n");
static int min_key_size_set(void *data, u64 val)
{
@@ -1065,8 +1065,8 @@ static int min_key_size_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(min_key_size_fops, min_key_size_get,
- min_key_size_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(min_key_size_fops, min_key_size_get,
+ min_key_size_set, "%llu\n");
static int max_key_size_set(void *data, u64 val)
{
@@ -1093,8 +1093,8 @@ static int max_key_size_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(max_key_size_fops, max_key_size_get,
- max_key_size_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(max_key_size_fops, max_key_size_get,
+ max_key_size_set, "%llu\n");
static int auth_payload_timeout_set(void *data, u64 val)
{
@@ -1121,9 +1121,9 @@ static int auth_payload_timeout_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(auth_payload_timeout_fops,
- auth_payload_timeout_get,
- auth_payload_timeout_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(auth_payload_timeout_fops,
+ auth_payload_timeout_get,
+ auth_payload_timeout_set, "%llu\n");
static ssize_t force_no_mitm_read(struct file *file,
char __user *user_buf,
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 71bffd745472..e55976db4403 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -29,6 +29,7 @@
#include "smp.h"
#include "hci_request.h"
+#include "msft.h"
#define HCI_REQ_DONE 0
#define HCI_REQ_PEND 1
@@ -404,13 +405,18 @@ static void cancel_interleave_scan(struct hci_dev *hdev)
*/
static bool __hci_update_interleaved_scan(struct hci_dev *hdev)
{
- /* If there is at least one ADV monitors and one pending LE connection
- * or one device to be scanned for, we should alternate between
- * allowlist scan and one without any filters to save power.
+ /* Do interleaved scan only if all of the following are true:
+ * - There is at least one ADV monitor
+ * - At least one pending LE connection or one device to be scanned for
+ * - Monitor offloading is not supported
+ * If so, we should alternate between allowlist scan and one without
+ * any filters to save power.
*/
bool use_interleaving = hci_is_adv_monitoring(hdev) &&
!(list_empty(&hdev->pend_le_conns) &&
- list_empty(&hdev->pend_le_reports));
+ list_empty(&hdev->pend_le_reports)) &&
+ hci_get_adv_monitor_offload_ext(hdev) ==
+ HCI_ADV_MONITOR_EXT_NONE;
bool is_interleaving = is_interleave_scanning(hdev);
if (use_interleaving && !is_interleaving) {
@@ -899,14 +905,11 @@ static u8 update_white_list(struct hci_request *req)
/* Use the allowlist unless the following conditions are all true:
* - We are not currently suspending
- * - There are 1 or more ADV monitors registered
+ * - There are 1 or more ADV monitors registered and it's not offloaded
* - Interleaved scanning is not currently using the allowlist
- *
- * Once the controller offloading of advertisement monitor is in place,
- * the above condition should include the support of MSFT extension
- * support.
*/
if (!idr_is_empty(&hdev->adv_monitors_idr) && !hdev->suspended &&
+ hci_get_adv_monitor_offload_ext(hdev) == HCI_ADV_MONITOR_EXT_NONE &&
hdev->interleave_scan_state != INTERLEAVE_SCAN_ALLOWLIST)
return 0x00;
@@ -1087,6 +1090,8 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
if (hdev->suspended) {
window = hdev->le_scan_window_suspend;
interval = hdev->le_scan_int_suspend;
+
+ set_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks);
} else if (hci_is_le_conn_scanning(hdev)) {
window = hdev->le_scan_window_connect;
interval = hdev->le_scan_int_connect;
@@ -1170,19 +1175,6 @@ static void hci_req_set_event_filter(struct hci_request *req)
hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
}
-static void hci_req_config_le_suspend_scan(struct hci_request *req)
-{
- /* Before changing params disable scan if enabled */
- if (hci_dev_test_flag(req->hdev, HCI_LE_SCAN))
- hci_req_add_le_scan_disable(req, false);
-
- /* Configure params and enable scanning */
- hci_req_add_le_passive_scan(req);
-
- /* Block suspend notifier on response */
- set_bit(SUSPEND_SCAN_ENABLE, req->hdev->suspend_tasks);
-}
-
static void cancel_adv_timeout(struct hci_dev *hdev)
{
if (hdev->adv_instance_timeout) {
@@ -1245,12 +1237,37 @@ static void suspend_req_complete(struct hci_dev *hdev, u8 status, u16 opcode)
{
bt_dev_dbg(hdev, "Request complete opcode=0x%x, status=0x%x", opcode,
status);
- if (test_and_clear_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks) ||
- test_and_clear_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks)) {
+ if (test_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks) ||
+ test_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks)) {
+ clear_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks);
+ clear_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks);
+ wake_up(&hdev->suspend_wait_q);
+ }
+
+ if (test_bit(SUSPEND_SET_ADV_FILTER, hdev->suspend_tasks)) {
+ clear_bit(SUSPEND_SET_ADV_FILTER, hdev->suspend_tasks);
wake_up(&hdev->suspend_wait_q);
}
}
+static void hci_req_add_set_adv_filter_enable(struct hci_request *req,
+ bool enable)
+{
+ struct hci_dev *hdev = req->hdev;
+
+ switch (hci_get_adv_monitor_offload_ext(hdev)) {
+ case HCI_ADV_MONITOR_EXT_MSFT:
+ msft_req_add_set_filter_enable(req, enable);
+ break;
+ default:
+ return;
+ }
+
+ /* No need to block when enabling since it's on resume path */
+ if (hdev->suspended && !enable)
+ set_bit(SUSPEND_SET_ADV_FILTER, hdev->suspend_tasks);
+}
+
/* Call with hci_dev_lock */
void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next)
{
@@ -1308,6 +1325,9 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next)
hci_req_add_le_scan_disable(&req, false);
}
+ /* Disable advertisement filters */
+ hci_req_add_set_adv_filter_enable(&req, false);
+
/* Mark task needing completion */
set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks);
@@ -1336,7 +1356,7 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next)
/* Enable event filter for paired devices */
hci_req_set_event_filter(&req);
/* Enable passive scan at lower duty cycle */
- hci_req_config_le_suspend_scan(&req);
+ __hci_update_background_scan(&req);
/* Pause scan changes again. */
hdev->scanning_paused = true;
hci_req_run(&req, suspend_req_complete);
@@ -1346,7 +1366,9 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next)
hci_req_clear_event_filter(&req);
/* Reset passive/background scanning to normal */
- hci_req_config_le_suspend_scan(&req);
+ __hci_update_background_scan(&req);
+ /* Enable all of the advertisement filters */
+ hci_req_add_set_adv_filter_enable(&req, true);
/* Unpause directed advertising */
hdev->advertising_paused = false;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 17b87b57a175..72c2f5226d67 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4519,6 +4519,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,
}
goto done;
+ case L2CAP_CONF_UNKNOWN:
case L2CAP_CONF_UNACCEPT:
if (chan->num_conf_rsp <= L2CAP_CONF_MAX_CONF_RSP) {
char req[64];
@@ -8276,10 +8277,73 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
mutex_unlock(&conn->chan_lock);
}
+/* Append fragment into frame respecting the maximum len of rx_skb */
+static int l2cap_recv_frag(struct l2cap_conn *conn, struct sk_buff *skb,
+ u16 len)
+{
+ if (!conn->rx_skb) {
+ /* Allocate skb for the complete frame (with header) */
+ conn->rx_skb = bt_skb_alloc(len, GFP_KERNEL);
+ if (!conn->rx_skb)
+ return -ENOMEM;
+ /* Init rx_len */
+ conn->rx_len = len;
+ }
+
+ /* Copy as much as the rx_skb can hold */
+ len = min_t(u16, len, skb->len);
+ skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, len), len);
+ skb_pull(skb, len);
+ conn->rx_len -= len;
+
+ return len;
+}
+
+static int l2cap_recv_len(struct l2cap_conn *conn, struct sk_buff *skb)
+{
+ struct sk_buff *rx_skb;
+ int len;
+
+ /* Append just enough to complete the header */
+ len = l2cap_recv_frag(conn, skb, L2CAP_LEN_SIZE - conn->rx_skb->len);
+
+ /* If header could not be read just continue */
+ if (len < 0 || conn->rx_skb->len < L2CAP_LEN_SIZE)
+ return len;
+
+ rx_skb = conn->rx_skb;
+ len = get_unaligned_le16(rx_skb->data);
+
+ /* Check if rx_skb has enough space to received all fragments */
+ if (len + (L2CAP_HDR_SIZE - L2CAP_LEN_SIZE) <= skb_tailroom(rx_skb)) {
+ /* Update expected len */
+ conn->rx_len = len + (L2CAP_HDR_SIZE - L2CAP_LEN_SIZE);
+ return L2CAP_LEN_SIZE;
+ }
+
+ /* Reset conn->rx_skb since it will need to be reallocated in order to
+ * fit all fragments.
+ */
+ conn->rx_skb = NULL;
+
+ /* Reallocates rx_skb using the exact expected length */
+ len = l2cap_recv_frag(conn, rx_skb,
+ len + (L2CAP_HDR_SIZE - L2CAP_LEN_SIZE));
+ kfree_skb(rx_skb);
+
+ return len;
+}
+
+static void l2cap_recv_reset(struct l2cap_conn *conn)
+{
+ kfree_skb(conn->rx_skb);
+ conn->rx_skb = NULL;
+ conn->rx_len = 0;
+}
+
void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
{
struct l2cap_conn *conn = hcon->l2cap_data;
- struct l2cap_hdr *hdr;
int len;
/* For AMP controller do not create l2cap conn */
@@ -8298,23 +8362,23 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
case ACL_START:
case ACL_START_NO_FLUSH:
case ACL_COMPLETE:
- if (conn->rx_len) {
+ if (conn->rx_skb) {
BT_ERR("Unexpected start frame (len %d)", skb->len);
- kfree_skb(conn->rx_skb);
- conn->rx_skb = NULL;
- conn->rx_len = 0;
+ l2cap_recv_reset(conn);
l2cap_conn_unreliable(conn, ECOMM);
}
- /* Start fragment always begin with Basic L2CAP header */
- if (skb->len < L2CAP_HDR_SIZE) {
- BT_ERR("Frame is too short (len %d)", skb->len);
- l2cap_conn_unreliable(conn, ECOMM);
- goto drop;
+ /* Start fragment may not contain the L2CAP length so just
+ * copy the initial byte when that happens and use conn->mtu as
+ * expected length.
+ */
+ if (skb->len < L2CAP_LEN_SIZE) {
+ if (l2cap_recv_frag(conn, skb, conn->mtu) < 0)
+ goto drop;
+ return;
}
- hdr = (struct l2cap_hdr *) skb->data;
- len = __le16_to_cpu(hdr->len) + L2CAP_HDR_SIZE;
+ len = get_unaligned_le16(skb->data) + L2CAP_HDR_SIZE;
if (len == skb->len) {
/* Complete frame received */
@@ -8331,38 +8395,43 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
goto drop;
}
- /* Allocate skb for the complete frame (with header) */
- conn->rx_skb = bt_skb_alloc(len, GFP_KERNEL);
- if (!conn->rx_skb)
+ /* Append fragment into frame (with header) */
+ if (l2cap_recv_frag(conn, skb, len) < 0)
goto drop;
- skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
- skb->len);
- conn->rx_len = len - skb->len;
break;
case ACL_CONT:
BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len);
- if (!conn->rx_len) {
+ if (!conn->rx_skb) {
BT_ERR("Unexpected continuation frame (len %d)", skb->len);
l2cap_conn_unreliable(conn, ECOMM);
goto drop;
}
+ /* Complete the L2CAP length if it has not been read */
+ if (conn->rx_skb->len < L2CAP_LEN_SIZE) {
+ if (l2cap_recv_len(conn, skb) < 0) {
+ l2cap_conn_unreliable(conn, ECOMM);
+ goto drop;
+ }
+
+ /* Header still could not be read just continue */
+ if (conn->rx_skb->len < L2CAP_LEN_SIZE)
+ return;
+ }
+
if (skb->len > conn->rx_len) {
BT_ERR("Fragment is too long (len %d, expected %d)",
skb->len, conn->rx_len);
- kfree_skb(conn->rx_skb);
- conn->rx_skb = NULL;
- conn->rx_len = 0;
+ l2cap_recv_reset(conn);
l2cap_conn_unreliable(conn, ECOMM);
goto drop;
}
- skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
- skb->len);
- conn->rx_len -= skb->len;
+ /* Append fragment into frame (with header) */
+ l2cap_recv_frag(conn, skb, skb->len);
if (!conn->rx_len) {
/* Complete frame received. l2cap_recv_frame
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index fa0f7a4a1d2f..74971b4bd457 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -124,6 +124,7 @@ static const u16 mgmt_commands[] = {
MGMT_OP_REMOVE_ADV_MONITOR,
MGMT_OP_ADD_EXT_ADV_PARAMS,
MGMT_OP_ADD_EXT_ADV_DATA,
+ MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI,
};
static const u16 mgmt_events[] = {
@@ -4166,14 +4167,24 @@ static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev,
mgmt_event(MGMT_EV_ADV_MONITOR_ADDED, hdev, &ev, sizeof(ev), sk);
}
-static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev,
- u16 handle)
+void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle)
{
- struct mgmt_ev_adv_monitor_added ev;
+ struct mgmt_ev_adv_monitor_removed ev;
+ struct mgmt_pending_cmd *cmd;
+ struct sock *sk_skip = NULL;
+ struct mgmt_cp_remove_adv_monitor *cp;
+
+ cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev);
+ if (cmd) {
+ cp = cmd->param;
+
+ if (cp->monitor_handle)
+ sk_skip = cmd->sk;
+ }
ev.monitor_handle = cpu_to_le16(handle);
- mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk);
+ mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk_skip);
}
static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev,
@@ -4184,6 +4195,7 @@ static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev,
int handle, err;
size_t rp_size = 0;
__u32 supported = 0;
+ __u32 enabled = 0;
__u16 num_handles = 0;
__u16 handles[HCI_MAX_ADV_MONITOR_NUM_HANDLES];
@@ -4191,12 +4203,11 @@ static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev,
hci_dev_lock(hdev);
- if (msft_get_features(hdev) & MSFT_FEATURE_MASK_LE_ADV_MONITOR)
+ if (msft_monitor_supported(hdev))
supported |= MGMT_ADV_MONITOR_FEATURE_MASK_OR_PATTERNS;
- idr_for_each_entry(&hdev->adv_monitors_idr, monitor, handle) {
+ idr_for_each_entry(&hdev->adv_monitors_idr, monitor, handle)
handles[num_handles++] = monitor->handle;
- }
hci_dev_unlock(hdev);
@@ -4205,11 +4216,11 @@ static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev,
if (!rp)
return -ENOMEM;
- /* Once controller-based monitoring is in place, the enabled_features
- * should reflect the use.
- */
+ /* All supported features are currently enabled */
+ enabled = supported;
+
rp->supported_features = cpu_to_le32(supported);
- rp->enabled_features = 0;
+ rp->enabled_features = cpu_to_le32(enabled);
rp->max_num_handles = cpu_to_le16(HCI_MAX_ADV_MONITOR_NUM_HANDLES);
rp->max_num_patterns = HCI_MAX_ADV_MONITOR_NUM_PATTERNS;
rp->num_handles = cpu_to_le16(num_handles);
@@ -4225,105 +4236,267 @@ static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev,
return err;
}
+int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status)
+{
+ struct mgmt_rp_add_adv_patterns_monitor rp;
+ struct mgmt_pending_cmd *cmd;
+ struct adv_monitor *monitor;
+ int err = 0;
+
+ hci_dev_lock(hdev);
+
+ cmd = pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev);
+ if (!cmd) {
+ cmd = pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev);
+ if (!cmd)
+ goto done;
+ }
+
+ monitor = cmd->user_data;
+ rp.monitor_handle = cpu_to_le16(monitor->handle);
+
+ if (!status) {
+ mgmt_adv_monitor_added(cmd->sk, hdev, monitor->handle);
+ hdev->adv_monitors_cnt++;
+ if (monitor->state == ADV_MONITOR_STATE_NOT_REGISTERED)
+ monitor->state = ADV_MONITOR_STATE_REGISTERED;
+ hci_update_background_scan(hdev);
+ }
+
+ err = mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_status(status), &rp, sizeof(rp));
+ mgmt_pending_remove(cmd);
+
+done:
+ hci_dev_unlock(hdev);
+ bt_dev_dbg(hdev, "add monitor %d complete, status %d",
+ rp.monitor_handle, status);
+
+ return err;
+}
+
+static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev,
+ struct adv_monitor *m, u8 status,
+ void *data, u16 len, u16 op)
+{
+ struct mgmt_rp_add_adv_patterns_monitor rp;
+ struct mgmt_pending_cmd *cmd;
+ int err;
+ bool pending;
+
+ hci_dev_lock(hdev);
+
+ if (status)
+ goto unlock;
+
+ if (pending_find(MGMT_OP_SET_LE, hdev) ||
+ pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) ||
+ pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev) ||
+ pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) {
+ status = MGMT_STATUS_BUSY;
+ goto unlock;
+ }
+
+ cmd = mgmt_pending_add(sk, op, hdev, data, len);
+ if (!cmd) {
+ status = MGMT_STATUS_NO_RESOURCES;
+ goto unlock;
+ }
+
+ cmd->user_data = m;
+ pending = hci_add_adv_monitor(hdev, m, &err);
+ if (err) {
+ if (err == -ENOSPC || err == -ENOMEM)
+ status = MGMT_STATUS_NO_RESOURCES;
+ else if (err == -EINVAL)
+ status = MGMT_STATUS_INVALID_PARAMS;
+ else
+ status = MGMT_STATUS_FAILED;
+
+ mgmt_pending_remove(cmd);
+ goto unlock;
+ }
+
+ if (!pending) {
+ mgmt_pending_remove(cmd);
+ rp.monitor_handle = cpu_to_le16(m->handle);
+ mgmt_adv_monitor_added(sk, hdev, m->handle);
+ m->state = ADV_MONITOR_STATE_REGISTERED;
+ hdev->adv_monitors_cnt++;
+
+ hci_dev_unlock(hdev);
+ return mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_SUCCESS,
+ &rp, sizeof(rp));
+ }
+
+ hci_dev_unlock(hdev);
+
+ return 0;
+
+unlock:
+ hci_free_adv_monitor(hdev, m);
+ hci_dev_unlock(hdev);
+ return mgmt_cmd_status(sk, hdev->id, op, status);
+}
+
+static void parse_adv_monitor_rssi(struct adv_monitor *m,
+ struct mgmt_adv_rssi_thresholds *rssi)
+{
+ if (rssi) {
+ m->rssi.low_threshold = rssi->low_threshold;
+ m->rssi.low_threshold_timeout =
+ __le16_to_cpu(rssi->low_threshold_timeout);
+ m->rssi.high_threshold = rssi->high_threshold;
+ m->rssi.high_threshold_timeout =
+ __le16_to_cpu(rssi->high_threshold_timeout);
+ m->rssi.sampling_period = rssi->sampling_period;
+ } else {
+ /* Default values. These numbers are the least constricting
+ * parameters for MSFT API to work, so it behaves as if there
+ * are no rssi parameter to consider. May need to be changed
+ * if other API are to be supported.
+ */
+ m->rssi.low_threshold = -127;
+ m->rssi.low_threshold_timeout = 60;
+ m->rssi.high_threshold = -127;
+ m->rssi.high_threshold_timeout = 0;
+ m->rssi.sampling_period = 0;
+ }
+}
+
+static u8 parse_adv_monitor_pattern(struct adv_monitor *m, u8 pattern_count,
+ struct mgmt_adv_pattern *patterns)
+{
+ u8 offset = 0, length = 0;
+ struct adv_pattern *p = NULL;
+ int i;
+
+ for (i = 0; i < pattern_count; i++) {
+ offset = patterns[i].offset;
+ length = patterns[i].length;
+ if (offset >= HCI_MAX_AD_LENGTH ||
+ length > HCI_MAX_AD_LENGTH ||
+ (offset + length) > HCI_MAX_AD_LENGTH)
+ return MGMT_STATUS_INVALID_PARAMS;
+
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return MGMT_STATUS_NO_RESOURCES;
+
+ p->ad_type = patterns[i].ad_type;
+ p->offset = patterns[i].offset;
+ p->length = patterns[i].length;
+ memcpy(p->value, patterns[i].value, p->length);
+
+ INIT_LIST_HEAD(&p->list);
+ list_add(&p->list, &m->patterns);
+ }
+
+ return MGMT_STATUS_SUCCESS;
+}
+
static int add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev,
void *data, u16 len)
{
struct mgmt_cp_add_adv_patterns_monitor *cp = data;
- struct mgmt_rp_add_adv_patterns_monitor rp;
struct adv_monitor *m = NULL;
- struct adv_pattern *p = NULL;
- unsigned int mp_cnt = 0, prev_adv_monitors_cnt;
- __u8 cp_ofst = 0, cp_len = 0;
- int err, i;
+ u8 status = MGMT_STATUS_SUCCESS;
+ size_t expected_size = sizeof(*cp);
BT_DBG("request for %s", hdev->name);
- if (len <= sizeof(*cp) || cp->pattern_count == 0) {
- err = mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_ADD_ADV_PATTERNS_MONITOR,
- MGMT_STATUS_INVALID_PARAMS);
- goto failed;
+ if (len <= sizeof(*cp)) {
+ status = MGMT_STATUS_INVALID_PARAMS;
+ goto done;
}
- m = kmalloc(sizeof(*m), GFP_KERNEL);
+ expected_size += cp->pattern_count * sizeof(struct mgmt_adv_pattern);
+ if (len != expected_size) {
+ status = MGMT_STATUS_INVALID_PARAMS;
+ goto done;
+ }
+
+ m = kzalloc(sizeof(*m), GFP_KERNEL);
if (!m) {
- err = -ENOMEM;
- goto failed;
+ status = MGMT_STATUS_NO_RESOURCES;
+ goto done;
}
INIT_LIST_HEAD(&m->patterns);
- m->active = false;
- for (i = 0; i < cp->pattern_count; i++) {
- if (++mp_cnt > HCI_MAX_ADV_MONITOR_NUM_PATTERNS) {
- err = mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_ADD_ADV_PATTERNS_MONITOR,
- MGMT_STATUS_INVALID_PARAMS);
- goto failed;
- }
+ parse_adv_monitor_rssi(m, NULL);
+ status = parse_adv_monitor_pattern(m, cp->pattern_count, cp->patterns);
- cp_ofst = cp->patterns[i].offset;
- cp_len = cp->patterns[i].length;
- if (cp_ofst >= HCI_MAX_AD_LENGTH ||
- cp_len > HCI_MAX_AD_LENGTH ||
- (cp_ofst + cp_len) > HCI_MAX_AD_LENGTH) {
- err = mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_ADD_ADV_PATTERNS_MONITOR,
- MGMT_STATUS_INVALID_PARAMS);
- goto failed;
- }
+done:
+ return __add_adv_patterns_monitor(sk, hdev, m, status, data, len,
+ MGMT_OP_ADD_ADV_PATTERNS_MONITOR);
+}
- p = kmalloc(sizeof(*p), GFP_KERNEL);
- if (!p) {
- err = -ENOMEM;
- goto failed;
- }
+static int add_adv_patterns_monitor_rssi(struct sock *sk, struct hci_dev *hdev,
+ void *data, u16 len)
+{
+ struct mgmt_cp_add_adv_patterns_monitor_rssi *cp = data;
+ struct adv_monitor *m = NULL;
+ u8 status = MGMT_STATUS_SUCCESS;
+ size_t expected_size = sizeof(*cp);
- p->ad_type = cp->patterns[i].ad_type;
- p->offset = cp->patterns[i].offset;
- p->length = cp->patterns[i].length;
- memcpy(p->value, cp->patterns[i].value, p->length);
+ BT_DBG("request for %s", hdev->name);
- INIT_LIST_HEAD(&p->list);
- list_add(&p->list, &m->patterns);
+ if (len <= sizeof(*cp)) {
+ status = MGMT_STATUS_INVALID_PARAMS;
+ goto done;
}
- if (mp_cnt != cp->pattern_count) {
- err = mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_ADD_ADV_PATTERNS_MONITOR,
- MGMT_STATUS_INVALID_PARAMS);
- goto failed;
+ expected_size += cp->pattern_count * sizeof(struct mgmt_adv_pattern);
+ if (len != expected_size) {
+ status = MGMT_STATUS_INVALID_PARAMS;
+ goto done;
}
- hci_dev_lock(hdev);
+ m = kzalloc(sizeof(*m), GFP_KERNEL);
+ if (!m) {
+ status = MGMT_STATUS_NO_RESOURCES;
+ goto done;
+ }
- prev_adv_monitors_cnt = hdev->adv_monitors_cnt;
+ INIT_LIST_HEAD(&m->patterns);
- err = hci_add_adv_monitor(hdev, m);
- if (err) {
- if (err == -ENOSPC) {
- mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_ADD_ADV_PATTERNS_MONITOR,
- MGMT_STATUS_NO_RESOURCES);
- }
- goto unlock;
- }
+ parse_adv_monitor_rssi(m, &cp->rssi);
+ status = parse_adv_monitor_pattern(m, cp->pattern_count, cp->patterns);
- if (hdev->adv_monitors_cnt > prev_adv_monitors_cnt)
- mgmt_adv_monitor_added(sk, hdev, m->handle);
+done:
+ return __add_adv_patterns_monitor(sk, hdev, m, status, data, len,
+ MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI);
+}
- hci_dev_unlock(hdev);
+int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status)
+{
+ struct mgmt_rp_remove_adv_monitor rp;
+ struct mgmt_cp_remove_adv_monitor *cp;
+ struct mgmt_pending_cmd *cmd;
+ int err = 0;
- rp.monitor_handle = cpu_to_le16(m->handle);
+ hci_dev_lock(hdev);
- return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_ADV_PATTERNS_MONITOR,
- MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
+ cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev);
+ if (!cmd)
+ goto done;
-unlock:
+ cp = cmd->param;
+ rp.monitor_handle = cp->monitor_handle;
+
+ if (!status)
+ hci_update_background_scan(hdev);
+
+ err = mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_status(status), &rp, sizeof(rp));
+ mgmt_pending_remove(cmd);
+
+done:
hci_dev_unlock(hdev);
+ bt_dev_dbg(hdev, "remove monitor %d complete, status %d",
+ rp.monitor_handle, status);
-failed:
- hci_free_adv_monitor(m);
return err;
}
@@ -4332,37 +4505,64 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev,
{
struct mgmt_cp_remove_adv_monitor *cp = data;
struct mgmt_rp_remove_adv_monitor rp;
- unsigned int prev_adv_monitors_cnt;
- u16 handle;
- int err;
+ struct mgmt_pending_cmd *cmd;
+ u16 handle = __le16_to_cpu(cp->monitor_handle);
+ int err, status;
+ bool pending;
BT_DBG("request for %s", hdev->name);
+ rp.monitor_handle = cp->monitor_handle;
hci_dev_lock(hdev);
- handle = __le16_to_cpu(cp->monitor_handle);
- prev_adv_monitors_cnt = hdev->adv_monitors_cnt;
+ if (pending_find(MGMT_OP_SET_LE, hdev) ||
+ pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev) ||
+ pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) ||
+ pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) {
+ status = MGMT_STATUS_BUSY;
+ goto unlock;
+ }
- err = hci_remove_adv_monitor(hdev, handle);
- if (err == -ENOENT) {
- err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADV_MONITOR,
- MGMT_STATUS_INVALID_INDEX);
+ cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len);
+ if (!cmd) {
+ status = MGMT_STATUS_NO_RESOURCES;
goto unlock;
}
- if (hdev->adv_monitors_cnt < prev_adv_monitors_cnt)
- mgmt_adv_monitor_removed(sk, hdev, handle);
+ if (handle)
+ pending = hci_remove_single_adv_monitor(hdev, handle, &err);
+ else
+ pending = hci_remove_all_adv_monitor(hdev, &err);
- hci_dev_unlock(hdev);
+ if (err) {
+ mgmt_pending_remove(cmd);
- rp.monitor_handle = cp->monitor_handle;
+ if (err == -ENOENT)
+ status = MGMT_STATUS_INVALID_INDEX;
+ else
+ status = MGMT_STATUS_FAILED;
- return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_ADV_MONITOR,
- MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
+ goto unlock;
+ }
+
+ /* monitor can be removed without forwarding request to controller */
+ if (!pending) {
+ mgmt_pending_remove(cmd);
+ hci_dev_unlock(hdev);
+
+ return mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_REMOVE_ADV_MONITOR,
+ MGMT_STATUS_SUCCESS,
+ &rp, sizeof(rp));
+ }
+
+ hci_dev_unlock(hdev);
+ return 0;
unlock:
hci_dev_unlock(hdev);
- return err;
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADV_MONITOR,
+ status);
}
static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status,
@@ -4798,6 +4998,14 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
goto failed;
}
+ if (hdev->discovery_paused) {
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_START_SERVICE_DISCOVERY,
+ MGMT_STATUS_BUSY, &cp->type,
+ sizeof(cp->type));
+ goto failed;
+ }
+
uuid_count = __le16_to_cpu(cp->uuid_count);
if (uuid_count > max_uuid_count) {
bt_dev_err(hdev, "service_discovery: too big uuid_count value %u",
@@ -8234,6 +8442,9 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
HCI_MGMT_VAR_LEN },
{ add_ext_adv_data, MGMT_ADD_EXT_ADV_DATA_SIZE,
HCI_MGMT_VAR_LEN },
+ { add_adv_patterns_monitor_rssi,
+ MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE,
+ HCI_MGMT_VAR_LEN },
};
void mgmt_index_added(struct hci_dev *hdev)
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index 4b39534a14a1..47b104f318e9 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -5,9 +5,16 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/mgmt.h>
+#include "hci_request.h"
+#include "mgmt_util.h"
#include "msft.h"
+#define MSFT_RSSI_THRESHOLD_VALUE_MIN -127
+#define MSFT_RSSI_THRESHOLD_VALUE_MAX 20
+#define MSFT_RSSI_LOW_TIMEOUT_MAX 0x3C
+
#define MSFT_OP_READ_SUPPORTED_FEATURES 0x00
struct msft_cp_read_supported_features {
__u8 sub_opcode;
@@ -21,12 +28,83 @@ struct msft_rp_read_supported_features {
__u8 evt_prefix[];
} __packed;
+#define MSFT_OP_LE_MONITOR_ADVERTISEMENT 0x03
+#define MSFT_MONITOR_ADVERTISEMENT_TYPE_PATTERN 0x01
+struct msft_le_monitor_advertisement_pattern {
+ __u8 length;
+ __u8 data_type;
+ __u8 start_byte;
+ __u8 pattern[0];
+};
+
+struct msft_le_monitor_advertisement_pattern_data {
+ __u8 count;
+ __u8 data[0];
+};
+
+struct msft_cp_le_monitor_advertisement {
+ __u8 sub_opcode;
+ __s8 rssi_high;
+ __s8 rssi_low;
+ __u8 rssi_low_interval;
+ __u8 rssi_sampling_period;
+ __u8 cond_type;
+ __u8 data[0];
+} __packed;
+
+struct msft_rp_le_monitor_advertisement {
+ __u8 status;
+ __u8 sub_opcode;
+ __u8 handle;
+} __packed;
+
+#define MSFT_OP_LE_CANCEL_MONITOR_ADVERTISEMENT 0x04
+struct msft_cp_le_cancel_monitor_advertisement {
+ __u8 sub_opcode;
+ __u8 handle;
+} __packed;
+
+struct msft_rp_le_cancel_monitor_advertisement {
+ __u8 status;
+ __u8 sub_opcode;
+} __packed;
+
+#define MSFT_OP_LE_SET_ADVERTISEMENT_FILTER_ENABLE 0x05
+struct msft_cp_le_set_advertisement_filter_enable {
+ __u8 sub_opcode;
+ __u8 enable;
+} __packed;
+
+struct msft_rp_le_set_advertisement_filter_enable {
+ __u8 status;
+ __u8 sub_opcode;
+} __packed;
+
+struct msft_monitor_advertisement_handle_data {
+ __u8 msft_handle;
+ __u16 mgmt_handle;
+ struct list_head list;
+};
+
struct msft_data {
__u64 features;
__u8 evt_prefix_len;
__u8 *evt_prefix;
+ struct list_head handle_map;
+ __u16 pending_add_handle;
+ __u16 pending_remove_handle;
+ __u8 reregistering;
+ __u8 filter_enabled;
};
+static int __msft_add_monitor_pattern(struct hci_dev *hdev,
+ struct adv_monitor *monitor);
+
+bool msft_monitor_supported(struct hci_dev *hdev)
+{
+ return !!(msft_get_features(hdev) & MSFT_FEATURE_MASK_LE_ADV_MONITOR);
+}
+
static bool read_supported_features(struct hci_dev *hdev,
struct msft_data *msft)
{
@@ -72,6 +150,35 @@ failed:
return false;
}
+/* This function requires the caller holds hdev->lock */
+static void reregister_monitor_on_restart(struct hci_dev *hdev, int handle)
+{
+ struct adv_monitor *monitor;
+ struct msft_data *msft = hdev->msft_data;
+ int err;
+
+ while (1) {
+ monitor = idr_get_next(&hdev->adv_monitors_idr, &handle);
+ if (!monitor) {
+ /* All monitors have been reregistered */
+ msft->reregistering = false;
+ hci_update_background_scan(hdev);
+ return;
+ }
+
+ msft->pending_add_handle = (u16)handle;
+ err = __msft_add_monitor_pattern(hdev, monitor);
+
+ /* If success, we return and wait for monitor added callback */
+ if (!err)
+ return;
+
+ /* Otherwise remove the monitor and keep registering */
+ hci_free_adv_monitor(hdev, monitor);
+ handle++;
+ }
+}
+
void msft_do_open(struct hci_dev *hdev)
{
struct msft_data *msft;
@@ -90,12 +197,21 @@ void msft_do_open(struct hci_dev *hdev)
return;
}
+ INIT_LIST_HEAD(&msft->handle_map);
hdev->msft_data = msft;
+
+ if (msft_monitor_supported(hdev)) {
+ msft->reregistering = true;
+ msft_set_filter_enable(hdev, true);
+ reregister_monitor_on_restart(hdev, 0);
+ }
}
void msft_do_close(struct hci_dev *hdev)
{
struct msft_data *msft = hdev->msft_data;
+ struct msft_monitor_advertisement_handle_data *handle_data, *tmp;
+ struct adv_monitor *monitor;
if (!msft)
return;
@@ -104,6 +220,17 @@ void msft_do_close(struct hci_dev *hdev)
hdev->msft_data = NULL;
+ list_for_each_entry_safe(handle_data, tmp, &msft->handle_map, list) {
+ monitor = idr_find(&hdev->adv_monitors_idr,
+ handle_data->mgmt_handle);
+
+ if (monitor && monitor->state == ADV_MONITOR_STATE_OFFLOADED)
+ monitor->state = ADV_MONITOR_STATE_REGISTERED;
+
+ list_del(&handle_data->list);
+ kfree(handle_data);
+ }
+
kfree(msft->evt_prefix);
kfree(msft);
}
@@ -145,5 +272,336 @@ __u64 msft_get_features(struct hci_dev *hdev)
{
struct msft_data *msft = hdev->msft_data;
- return msft ? msft->features : 0;
+ return msft ? msft->features : 0;
+}
+
+/* is_mgmt = true matches the handle exposed to userspace via mgmt.
+ * is_mgmt = false matches the handle used by the msft controller.
+ * This function requires the caller holds hdev->lock
+ */
+static struct msft_monitor_advertisement_handle_data *msft_find_handle_data
+ (struct hci_dev *hdev, u16 handle, bool is_mgmt)
+{
+ struct msft_monitor_advertisement_handle_data *entry;
+ struct msft_data *msft = hdev->msft_data;
+
+ list_for_each_entry(entry, &msft->handle_map, list) {
+ if (is_mgmt && entry->mgmt_handle == handle)
+ return entry;
+ if (!is_mgmt && entry->msft_handle == handle)
+ return entry;
+ }
+
+ return NULL;
+}
+
+static void msft_le_monitor_advertisement_cb(struct hci_dev *hdev,
+ u8 status, u16 opcode,
+ struct sk_buff *skb)
+{
+ struct msft_rp_le_monitor_advertisement *rp;
+ struct adv_monitor *monitor;
+ struct msft_monitor_advertisement_handle_data *handle_data;
+ struct msft_data *msft = hdev->msft_data;
+
+ hci_dev_lock(hdev);
+
+ monitor = idr_find(&hdev->adv_monitors_idr, msft->pending_add_handle);
+ if (!monitor) {
+ bt_dev_err(hdev, "msft add advmon: monitor %d is not found!",
+ msft->pending_add_handle);
+ status = HCI_ERROR_UNSPECIFIED;
+ goto unlock;
+ }
+
+ if (status)
+ goto unlock;
+
+ rp = (struct msft_rp_le_monitor_advertisement *)skb->data;
+ if (skb->len < sizeof(*rp)) {
+ status = HCI_ERROR_UNSPECIFIED;
+ goto unlock;
+ }
+
+ handle_data = kmalloc(sizeof(*handle_data), GFP_KERNEL);
+ if (!handle_data) {
+ status = HCI_ERROR_UNSPECIFIED;
+ goto unlock;
+ }
+
+ handle_data->mgmt_handle = monitor->handle;
+ handle_data->msft_handle = rp->handle;
+ INIT_LIST_HEAD(&handle_data->list);
+ list_add(&handle_data->list, &msft->handle_map);
+
+ monitor->state = ADV_MONITOR_STATE_OFFLOADED;
+
+unlock:
+ if (status && monitor)
+ hci_free_adv_monitor(hdev, monitor);
+
+ /* If in restart/reregister sequence, keep registering. */
+ if (msft->reregistering)
+ reregister_monitor_on_restart(hdev,
+ msft->pending_add_handle + 1);
+
+ hci_dev_unlock(hdev);
+
+ if (!msft->reregistering)
+ hci_add_adv_patterns_monitor_complete(hdev, status);
+}
+
+static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev,
+ u8 status, u16 opcode,
+ struct sk_buff *skb)
+{
+ struct msft_cp_le_cancel_monitor_advertisement *cp;
+ struct msft_rp_le_cancel_monitor_advertisement *rp;
+ struct adv_monitor *monitor;
+ struct msft_monitor_advertisement_handle_data *handle_data;
+ struct msft_data *msft = hdev->msft_data;
+ int err;
+ bool pending;
+
+ if (status)
+ goto done;
+
+ rp = (struct msft_rp_le_cancel_monitor_advertisement *)skb->data;
+ if (skb->len < sizeof(*rp)) {
+ status = HCI_ERROR_UNSPECIFIED;
+ goto done;
+ }
+
+ hci_dev_lock(hdev);
+
+ cp = hci_sent_cmd_data(hdev, hdev->msft_opcode);
+ handle_data = msft_find_handle_data(hdev, cp->handle, false);
+
+ if (handle_data) {
+ monitor = idr_find(&hdev->adv_monitors_idr,
+ handle_data->mgmt_handle);
+ if (monitor)
+ hci_free_adv_monitor(hdev, monitor);
+
+ list_del(&handle_data->list);
+ kfree(handle_data);
+ }
+
+ /* If remove all monitors is required, we need to continue the process
+ * here because the earlier it was paused when waiting for the
+ * response from controller.
+ */
+ if (msft->pending_remove_handle == 0) {
+ pending = hci_remove_all_adv_monitor(hdev, &err);
+ if (pending) {
+ hci_dev_unlock(hdev);
+ return;
+ }
+
+ if (err)
+ status = HCI_ERROR_UNSPECIFIED;
+ }
+
+ hci_dev_unlock(hdev);
+
+done:
+ hci_remove_adv_monitor_complete(hdev, status);
+}
+
+static void msft_le_set_advertisement_filter_enable_cb(struct hci_dev *hdev,
+ u8 status, u16 opcode,
+ struct sk_buff *skb)
+{
+ struct msft_cp_le_set_advertisement_filter_enable *cp;
+ struct msft_rp_le_set_advertisement_filter_enable *rp;
+ struct msft_data *msft = hdev->msft_data;
+
+ rp = (struct msft_rp_le_set_advertisement_filter_enable *)skb->data;
+ if (skb->len < sizeof(*rp))
+ return;
+
+ /* Error 0x0C would be returned if the filter enabled status is
+ * already set to whatever we were trying to set.
+ * Although the default state should be disabled, some controller set
+ * the initial value to enabled. Because there is no way to know the
+ * actual initial value before sending this command, here we also treat
+ * error 0x0C as success.
+ */
+ if (status != 0x00 && status != 0x0C)
+ return;
+
+ hci_dev_lock(hdev);
+
+ cp = hci_sent_cmd_data(hdev, hdev->msft_opcode);
+ msft->filter_enabled = cp->enable;
+
+ if (status == 0x0C)
+ bt_dev_warn(hdev, "MSFT filter_enable is already %s",
+ cp->enable ? "on" : "off");
+
+ hci_dev_unlock(hdev);
+}
+
+static bool msft_monitor_rssi_valid(struct adv_monitor *monitor)
+{
+ struct adv_rssi_thresholds *r = &monitor->rssi;
+
+ if (r->high_threshold < MSFT_RSSI_THRESHOLD_VALUE_MIN ||
+ r->high_threshold > MSFT_RSSI_THRESHOLD_VALUE_MAX ||
+ r->low_threshold < MSFT_RSSI_THRESHOLD_VALUE_MIN ||
+ r->low_threshold > MSFT_RSSI_THRESHOLD_VALUE_MAX)
+ return false;
+
+ /* High_threshold_timeout is not supported,
+ * once high_threshold is reached, events are immediately reported.
+ */
+ if (r->high_threshold_timeout != 0)
+ return false;
+
+ if (r->low_threshold_timeout > MSFT_RSSI_LOW_TIMEOUT_MAX)
+ return false;
+
+ /* Sampling period from 0x00 to 0xFF are all allowed */
+ return true;
+}
+
+static bool msft_monitor_pattern_valid(struct adv_monitor *monitor)
+{
+ return msft_monitor_rssi_valid(monitor);
+ /* No additional check needed for pattern-based monitor */
+}
+
+/* This function requires the caller holds hdev->lock */
+static int __msft_add_monitor_pattern(struct hci_dev *hdev,
+ struct adv_monitor *monitor)
+{
+ struct msft_cp_le_monitor_advertisement *cp;
+ struct msft_le_monitor_advertisement_pattern_data *pattern_data;
+ struct msft_le_monitor_advertisement_pattern *pattern;
+ struct adv_pattern *entry;
+ struct hci_request req;
+ struct msft_data *msft = hdev->msft_data;
+ size_t total_size = sizeof(*cp) + sizeof(*pattern_data);
+ ptrdiff_t offset = 0;
+ u8 pattern_count = 0;
+ int err = 0;
+
+ if (!msft_monitor_pattern_valid(monitor))
+ return -EINVAL;
+
+ list_for_each_entry(entry, &monitor->patterns, list) {
+ pattern_count++;
+ total_size += sizeof(*pattern) + entry->length;
+ }
+
+ cp = kmalloc(total_size, GFP_KERNEL);
+ if (!cp)
+ return -ENOMEM;
+
+ cp->sub_opcode = MSFT_OP_LE_MONITOR_ADVERTISEMENT;
+ cp->rssi_high = monitor->rssi.high_threshold;
+ cp->rssi_low = monitor->rssi.low_threshold;
+ cp->rssi_low_interval = (u8)monitor->rssi.low_threshold_timeout;
+ cp->rssi_sampling_period = monitor->rssi.sampling_period;
+
+ cp->cond_type = MSFT_MONITOR_ADVERTISEMENT_TYPE_PATTERN;
+
+ pattern_data = (void *)cp->data;
+ pattern_data->count = pattern_count;
+
+ list_for_each_entry(entry, &monitor->patterns, list) {
+ pattern = (void *)(pattern_data->data + offset);
+ /* the length also includes data_type and offset */
+ pattern->length = entry->length + 2;
+ pattern->data_type = entry->ad_type;
+ pattern->start_byte = entry->offset;
+ memcpy(pattern->pattern, entry->value, entry->length);
+ offset += sizeof(*pattern) + entry->length;
+ }
+
+ hci_req_init(&req, hdev);
+ hci_req_add(&req, hdev->msft_opcode, total_size, cp);
+ err = hci_req_run_skb(&req, msft_le_monitor_advertisement_cb);
+ kfree(cp);
+
+ if (!err)
+ msft->pending_add_handle = monitor->handle;
+
+ return err;
+}
+
+/* This function requires the caller holds hdev->lock */
+int msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor)
+{
+ struct msft_data *msft = hdev->msft_data;
+
+ if (!msft)
+ return -EOPNOTSUPP;
+
+ if (msft->reregistering)
+ return -EBUSY;
+
+ return __msft_add_monitor_pattern(hdev, monitor);
+}
+
+/* This function requires the caller holds hdev->lock */
+int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor,
+ u16 handle)
+{
+ struct msft_cp_le_cancel_monitor_advertisement cp;
+ struct msft_monitor_advertisement_handle_data *handle_data;
+ struct hci_request req;
+ struct msft_data *msft = hdev->msft_data;
+ int err = 0;
+
+ if (!msft)
+ return -EOPNOTSUPP;
+
+ if (msft->reregistering)
+ return -EBUSY;
+
+ handle_data = msft_find_handle_data(hdev, monitor->handle, true);
+
+ /* If no matched handle, just remove without telling controller */
+ if (!handle_data)
+ return -ENOENT;
+
+ cp.sub_opcode = MSFT_OP_LE_CANCEL_MONITOR_ADVERTISEMENT;
+ cp.handle = handle_data->msft_handle;
+
+ hci_req_init(&req, hdev);
+ hci_req_add(&req, hdev->msft_opcode, sizeof(cp), &cp);
+ err = hci_req_run_skb(&req, msft_le_cancel_monitor_advertisement_cb);
+
+ if (!err)
+ msft->pending_remove_handle = handle;
+
+ return err;
+}
+
+void msft_req_add_set_filter_enable(struct hci_request *req, bool enable)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct msft_cp_le_set_advertisement_filter_enable cp;
+
+ cp.sub_opcode = MSFT_OP_LE_SET_ADVERTISEMENT_FILTER_ENABLE;
+ cp.enable = enable;
+
+ hci_req_add(req, hdev->msft_opcode, sizeof(cp), &cp);
+}
+
+int msft_set_filter_enable(struct hci_dev *hdev, bool enable)
+{
+ struct hci_request req;
+ struct msft_data *msft = hdev->msft_data;
+ int err;
+
+ if (!msft)
+ return -EOPNOTSUPP;
+
+ hci_req_init(&req, hdev);
+ msft_req_add_set_filter_enable(&req, enable);
+ err = hci_req_run_skb(&req, msft_le_set_advertisement_filter_enable_cb);
+
+ return err;
}
diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h
index e9c478e890b8..88ed613dfa08 100644
--- a/net/bluetooth/msft.h
+++ b/net/bluetooth/msft.h
@@ -12,16 +12,46 @@
#if IS_ENABLED(CONFIG_BT_MSFTEXT)
+bool msft_monitor_supported(struct hci_dev *hdev);
void msft_do_open(struct hci_dev *hdev);
void msft_do_close(struct hci_dev *hdev);
void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb);
__u64 msft_get_features(struct hci_dev *hdev);
+int msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor);
+int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor,
+ u16 handle);
+void msft_req_add_set_filter_enable(struct hci_request *req, bool enable);
+int msft_set_filter_enable(struct hci_dev *hdev, bool enable);
#else
+static inline bool msft_monitor_supported(struct hci_dev *hdev)
+{
+ return false;
+}
+
static inline void msft_do_open(struct hci_dev *hdev) {}
static inline void msft_do_close(struct hci_dev *hdev) {}
static inline void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb) {}
static inline __u64 msft_get_features(struct hci_dev *hdev) { return 0; }
+static inline int msft_add_monitor_pattern(struct hci_dev *hdev,
+ struct adv_monitor *monitor)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int msft_remove_monitor(struct hci_dev *hdev,
+ struct adv_monitor *monitor,
+ u16 handle)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void msft_req_add_set_filter_enable(struct hci_request *req,
+ bool enable) {}
+static inline int msft_set_filter_enable(struct hci_dev *hdev, bool enable)
+{
+ return -EOPNOTSUPP;
+}
#endif
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index c659c464f7ca..b0c1ee110eff 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -25,7 +25,6 @@
#include <linux/crypto.h>
#include <crypto/aes.h>
#include <crypto/algapi.h>
-#include <crypto/b128ops.h>
#include <crypto/hash.h>
#include <crypto/kpp.h>
@@ -425,7 +424,7 @@ static int smp_c1(const u8 k[16],
SMP_DBG("p1 %16phN", p1);
/* res = r XOR p1 */
- u128_xor((u128 *) res, (u128 *) r, (u128 *) p1);
+ crypto_xor_cpy(res, r, p1, sizeof(p1));
/* res = e(k, res) */
err = smp_e(k, res);
@@ -442,7 +441,7 @@ static int smp_c1(const u8 k[16],
SMP_DBG("p2 %16phN", p2);
/* res = res XOR p2 */
- u128_xor((u128 *) res, (u128 *) res, (u128 *) p2);
+ crypto_xor(res, p2, sizeof(p2));
/* res = e(k, res) */
err = smp_e(k, res);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index c1c30a9f76f3..58bcb8c849d5 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -272,7 +272,8 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
kattr->test.repeat)
return -EINVAL;
- if (ctx_size_in < prog->aux->max_ctx_offset)
+ if (ctx_size_in < prog->aux->max_ctx_offset ||
+ ctx_size_in > MAX_BPF_FUNC_ARGS * sizeof(u64))
return -EINVAL;
if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0)
@@ -636,14 +637,11 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
if (IS_ERR(data))
return PTR_ERR(data);
- xdp.data_hard_start = data;
- xdp.data = data + headroom;
- xdp.data_meta = xdp.data;
- xdp.data_end = xdp.data + size;
- xdp.frame_sz = headroom + max_data_sz + tailroom;
-
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
- xdp.rxq = &rxqueue->xdp_rxq;
+ xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
+ &rxqueue->xdp_rxq);
+ xdp_prepare_buff(&xdp, data, headroom, size, true);
+
bpf_prog_change_xdp(NULL, prog);
ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
if (ret)
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
index 8ad0233ce497..3d4a21462458 100644
--- a/net/bpfilter/Kconfig
+++ b/net/bpfilter/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
menuconfig BPFILTER
bool "BPF based packet filtering framework (BPFILTER)"
- depends on NET && BPF && INET
+ depends on BPF && INET
select USERMODE_DRIVER
help
This builds experimental bpfilter framework that is aiming to
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 4702702a74d3..7fb9a021873b 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -18,7 +18,7 @@ br_netfilter-y := br_netfilter_hooks.o
br_netfilter-$(subst m,y,$(CONFIG_IPV6)) += br_netfilter_ipv6.o
obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
-bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
+bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o br_multicast_eht.o
bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o br_vlan_options.o
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1b169f8e7491..ef743f94254d 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -122,7 +122,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
break;
case NETDEV_PRE_TYPE_CHANGE:
- /* Forbid underlaying device to change its type. */
+ /* Forbid underlying device to change its type. */
return NOTIFY_BAD;
case NETDEV_RESEND_IGMP:
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 32ac8343b0ba..b7490237f3fc 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -602,6 +602,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
/* fastpath: update of existing entry */
if (unlikely(source != fdb->dst &&
!test_bit(BR_FDB_STICKY, &fdb->flags))) {
+ br_switchdev_fdb_notify(fdb, RTM_DELNEIGH);
fdb->dst = source;
fdb_modified = true;
/* Take over HW learned entry */
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index e28ffadd1371..6e9b049ae521 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -39,8 +39,7 @@ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb
br_drop_fake_rtable(skb);
if (skb->ip_summed == CHECKSUM_PARTIAL &&
- (skb->protocol == htons(ETH_P_8021Q) ||
- skb->protocol == htons(ETH_P_8021AD))) {
+ eth_type_vlan(skb->protocol)) {
int depth;
if (!__vlan_get_protocol(skb, skb->protocol, &depth))
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 8ca1f1bc6d12..222285d9dae2 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -40,7 +40,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
vg = br_vlan_group_rcu(br);
/* Bridge is just like any other port. Make sure the
- * packet is allowed except in promisc modue when someone
+ * packet is allowed except in promisc mode when someone
* may be running packet capture.
*/
if (!(brdev->flags & IFF_PROMISC) &&
diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index cec2c4e4561d..12487f6fe9b4 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -557,19 +557,22 @@ int br_mrp_del(struct net_bridge *br, struct br_mrp_instance *instance)
int br_mrp_set_port_state(struct net_bridge_port *p,
enum br_mrp_port_state_type state)
{
+ u32 port_state;
+
if (!p || !(p->flags & BR_MRP_AWARE))
return -EINVAL;
spin_lock_bh(&p->br->lock);
if (state == BR_MRP_PORT_STATE_FORWARDING)
- p->state = BR_STATE_FORWARDING;
+ port_state = BR_STATE_FORWARDING;
else
- p->state = BR_STATE_BLOCKING;
+ port_state = BR_STATE_BLOCKING;
+ p->state = port_state;
spin_unlock_bh(&p->br->lock);
- br_mrp_port_switchdev_set_state(p, state);
+ br_mrp_port_switchdev_set_state(p, port_state);
return 0;
}
@@ -636,7 +639,7 @@ int br_mrp_set_ring_role(struct net_bridge *br,
struct br_mrp_ring_role *role)
{
struct br_mrp *mrp = br_mrp_find_id(br, role->ring_id);
- int err;
+ enum br_mrp_hw_support support;
if (!mrp)
return -EINVAL;
@@ -644,9 +647,9 @@ int br_mrp_set_ring_role(struct net_bridge *br,
mrp->ring_role = role->ring_role;
/* If there is an error just bailed out */
- err = br_mrp_switchdev_set_ring_role(br, mrp, role->ring_role);
- if (err && err != -EOPNOTSUPP)
- return err;
+ support = br_mrp_switchdev_set_ring_role(br, mrp, role->ring_role);
+ if (support == BR_MRP_NONE)
+ return -EOPNOTSUPP;
/* Now detect if the HW actually applied the role or not. If the HW
* applied the role it means that the SW will not to do those operations
@@ -654,7 +657,7 @@ int br_mrp_set_ring_role(struct net_bridge *br,
* SW when ring is open, but if the is not pushed to the HW the SW will
* need to detect when the ring is open
*/
- mrp->ring_role_offloaded = err == -EOPNOTSUPP ? 0 : 1;
+ mrp->ring_role_offloaded = support == BR_MRP_SW ? 0 : 1;
return 0;
}
@@ -667,6 +670,7 @@ int br_mrp_start_test(struct net_bridge *br,
struct br_mrp_start_test *test)
{
struct br_mrp *mrp = br_mrp_find_id(br, test->ring_id);
+ enum br_mrp_hw_support support;
if (!mrp)
return -EINVAL;
@@ -674,9 +678,13 @@ int br_mrp_start_test(struct net_bridge *br,
/* Try to push it to the HW and if it fails then continue with SW
* implementation and if that also fails then return error.
*/
- if (!br_mrp_switchdev_send_ring_test(br, mrp, test->interval,
- test->max_miss, test->period,
- test->monitor))
+ support = br_mrp_switchdev_send_ring_test(br, mrp, test->interval,
+ test->max_miss, test->period,
+ test->monitor);
+ if (support == BR_MRP_NONE)
+ return -EOPNOTSUPP;
+
+ if (support == BR_MRP_HW)
return 0;
mrp->test_interval = test->interval;
@@ -718,8 +726,8 @@ int br_mrp_set_in_state(struct net_bridge *br, struct br_mrp_in_state *state)
int br_mrp_set_in_role(struct net_bridge *br, struct br_mrp_in_role *role)
{
struct br_mrp *mrp = br_mrp_find_id(br, role->ring_id);
+ enum br_mrp_hw_support support;
struct net_bridge_port *p;
- int err;
if (!mrp)
return -EINVAL;
@@ -777,10 +785,10 @@ int br_mrp_set_in_role(struct net_bridge *br, struct br_mrp_in_role *role)
mrp->in_id = role->in_id;
/* If there is an error just bailed out */
- err = br_mrp_switchdev_set_in_role(br, mrp, role->in_id,
- role->ring_id, role->in_role);
- if (err && err != -EOPNOTSUPP)
- return err;
+ support = br_mrp_switchdev_set_in_role(br, mrp, role->in_id,
+ role->ring_id, role->in_role);
+ if (support == BR_MRP_NONE)
+ return -EOPNOTSUPP;
/* Now detect if the HW actually applied the role or not. If the HW
* applied the role it means that the SW will not to do those operations
@@ -788,7 +796,7 @@ int br_mrp_set_in_role(struct net_bridge *br, struct br_mrp_in_role *role)
* SW when interconnect ring is open, but if the is not pushed to the HW
* the SW will need to detect when the interconnect ring is open.
*/
- mrp->in_role_offloaded = err == -EOPNOTSUPP ? 0 : 1;
+ mrp->in_role_offloaded = support == BR_MRP_SW ? 0 : 1;
return 0;
}
@@ -801,6 +809,7 @@ int br_mrp_start_in_test(struct net_bridge *br,
struct br_mrp_start_in_test *in_test)
{
struct br_mrp *mrp = br_mrp_find_in_id(br, in_test->in_id);
+ enum br_mrp_hw_support support;
if (!mrp)
return -EINVAL;
@@ -811,8 +820,13 @@ int br_mrp_start_in_test(struct net_bridge *br,
/* Try to push it to the HW and if it fails then continue with SW
* implementation and if that also fails then return error.
*/
- if (!br_mrp_switchdev_send_in_test(br, mrp, in_test->interval,
- in_test->max_miss, in_test->period))
+ support = br_mrp_switchdev_send_in_test(br, mrp, in_test->interval,
+ in_test->max_miss,
+ in_test->period);
+ if (support == BR_MRP_NONE)
+ return -EOPNOTSUPP;
+
+ if (support == BR_MRP_HW)
return 0;
mrp->in_test_interval = in_test->interval;
@@ -825,7 +839,7 @@ int br_mrp_start_in_test(struct net_bridge *br,
return 0;
}
-/* Determin if the frame type is a ring frame */
+/* Determine if the frame type is a ring frame */
static bool br_mrp_ring_frame(struct sk_buff *skb)
{
const struct br_mrp_tlv_hdr *hdr;
@@ -845,7 +859,7 @@ static bool br_mrp_ring_frame(struct sk_buff *skb)
return false;
}
-/* Determin if the frame type is an interconnect frame */
+/* Determine if the frame type is an interconnect frame */
static bool br_mrp_in_frame(struct sk_buff *skb)
{
const struct br_mrp_tlv_hdr *hdr;
@@ -894,7 +908,7 @@ static void br_mrp_mrm_process(struct br_mrp *mrp, struct net_bridge_port *port,
br_mrp_ring_port_open(port->dev, false);
}
-/* Determin if the test hdr has a better priority than the node */
+/* Determine if the test hdr has a better priority than the node */
static bool br_mrp_test_better_than_own(struct br_mrp *mrp,
struct net_bridge *br,
const struct br_mrp_ring_test_hdr *hdr)
diff --git a/net/bridge/br_mrp_switchdev.c b/net/bridge/br_mrp_switchdev.c
index ed547e03ace1..cb54b324fa8c 100644
--- a/net/bridge/br_mrp_switchdev.c
+++ b/net/bridge/br_mrp_switchdev.c
@@ -4,6 +4,30 @@
#include "br_private_mrp.h"
+static enum br_mrp_hw_support
+br_mrp_switchdev_port_obj(struct net_bridge *br,
+ const struct switchdev_obj *obj, bool add)
+{
+ int err;
+
+ if (add)
+ err = switchdev_port_obj_add(br->dev, obj, NULL);
+ else
+ err = switchdev_port_obj_del(br->dev, obj);
+
+ /* In case of success just return and notify the SW that doesn't need
+ * to do anything
+ */
+ if (!err)
+ return BR_MRP_HW;
+
+ if (err != -EOPNOTSUPP)
+ return BR_MRP_NONE;
+
+ /* Continue with SW backup */
+ return BR_MRP_SW;
+}
+
int br_mrp_switchdev_add(struct net_bridge *br, struct br_mrp *mrp)
{
struct switchdev_obj_mrp mrp_obj = {
@@ -14,14 +38,11 @@ int br_mrp_switchdev_add(struct net_bridge *br, struct br_mrp *mrp)
.ring_id = mrp->ring_id,
.prio = mrp->prio,
};
- int err;
- err = switchdev_port_obj_add(br->dev, &mrp_obj.obj, NULL);
+ if (!IS_ENABLED(CONFIG_NET_SWITCHDEV))
+ return 0;
- if (err && err != -EOPNOTSUPP)
- return err;
-
- return 0;
+ return switchdev_port_obj_add(br->dev, &mrp_obj.obj, NULL);
}
int br_mrp_switchdev_del(struct net_bridge *br, struct br_mrp *mrp)
@@ -33,40 +54,54 @@ int br_mrp_switchdev_del(struct net_bridge *br, struct br_mrp *mrp)
.s_port = NULL,
.ring_id = mrp->ring_id,
};
- int err;
-
- err = switchdev_port_obj_del(br->dev, &mrp_obj.obj);
- if (err && err != -EOPNOTSUPP)
- return err;
+ if (!IS_ENABLED(CONFIG_NET_SWITCHDEV))
+ return 0;
- return 0;
+ return switchdev_port_obj_del(br->dev, &mrp_obj.obj);
}
-int br_mrp_switchdev_set_ring_role(struct net_bridge *br,
- struct br_mrp *mrp,
- enum br_mrp_ring_role_type role)
+enum br_mrp_hw_support
+br_mrp_switchdev_set_ring_role(struct net_bridge *br, struct br_mrp *mrp,
+ enum br_mrp_ring_role_type role)
{
struct switchdev_obj_ring_role_mrp mrp_role = {
.obj.orig_dev = br->dev,
.obj.id = SWITCHDEV_OBJ_ID_RING_ROLE_MRP,
.ring_role = role,
.ring_id = mrp->ring_id,
+ .sw_backup = false,
};
+ enum br_mrp_hw_support support;
int err;
- if (role == BR_MRP_RING_ROLE_DISABLED)
- err = switchdev_port_obj_del(br->dev, &mrp_role.obj);
- else
+ if (!IS_ENABLED(CONFIG_NET_SWITCHDEV))
+ return BR_MRP_SW;
+
+ support = br_mrp_switchdev_port_obj(br, &mrp_role.obj,
+ role != BR_MRP_RING_ROLE_DISABLED);
+ if (support != BR_MRP_SW)
+ return support;
+
+ /* If the driver can't configure to run completely the protocol in HW,
+ * then try again to configure the HW so the SW can run the protocol.
+ */
+ mrp_role.sw_backup = true;
+ if (role != BR_MRP_RING_ROLE_DISABLED)
err = switchdev_port_obj_add(br->dev, &mrp_role.obj, NULL);
+ else
+ err = switchdev_port_obj_del(br->dev, &mrp_role.obj);
- return err;
+ if (!err)
+ return BR_MRP_SW;
+
+ return BR_MRP_NONE;
}
-int br_mrp_switchdev_send_ring_test(struct net_bridge *br,
- struct br_mrp *mrp, u32 interval,
- u8 max_miss, u32 period,
- bool monitor)
+enum br_mrp_hw_support
+br_mrp_switchdev_send_ring_test(struct net_bridge *br, struct br_mrp *mrp,
+ u32 interval, u8 max_miss, u32 period,
+ bool monitor)
{
struct switchdev_obj_ring_test_mrp test = {
.obj.orig_dev = br->dev,
@@ -77,14 +112,11 @@ int br_mrp_switchdev_send_ring_test(struct net_bridge *br,
.period = period,
.monitor = monitor,
};
- int err;
- if (interval == 0)
- err = switchdev_port_obj_del(br->dev, &test.obj);
- else
- err = switchdev_port_obj_add(br->dev, &test.obj, NULL);
+ if (!IS_ENABLED(CONFIG_NET_SWITCHDEV))
+ return BR_MRP_SW;
- return err;
+ return br_mrp_switchdev_port_obj(br, &test.obj, interval != 0);
}
int br_mrp_switchdev_set_ring_state(struct net_bridge *br,
@@ -97,19 +129,17 @@ int br_mrp_switchdev_set_ring_state(struct net_bridge *br,
.ring_state = state,
.ring_id = mrp->ring_id,
};
- int err;
-
- err = switchdev_port_obj_add(br->dev, &mrp_state.obj, NULL);
- if (err && err != -EOPNOTSUPP)
- return err;
+ if (!IS_ENABLED(CONFIG_NET_SWITCHDEV))
+ return 0;
- return 0;
+ return switchdev_port_obj_add(br->dev, &mrp_state.obj, NULL);
}
-int br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp,
- u16 in_id, u32 ring_id,
- enum br_mrp_in_role_type role)
+enum br_mrp_hw_support
+br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp,
+ u16 in_id, u32 ring_id,
+ enum br_mrp_in_role_type role)
{
struct switchdev_obj_in_role_mrp mrp_role = {
.obj.orig_dev = br->dev,
@@ -118,15 +148,32 @@ int br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp,
.in_id = mrp->in_id,
.ring_id = mrp->ring_id,
.i_port = rtnl_dereference(mrp->i_port)->dev,
+ .sw_backup = false,
};
+ enum br_mrp_hw_support support;
int err;
- if (role == BR_MRP_IN_ROLE_DISABLED)
- err = switchdev_port_obj_del(br->dev, &mrp_role.obj);
- else
+ if (!IS_ENABLED(CONFIG_NET_SWITCHDEV))
+ return BR_MRP_SW;
+
+ support = br_mrp_switchdev_port_obj(br, &mrp_role.obj,
+ role != BR_MRP_IN_ROLE_DISABLED);
+ if (support != BR_MRP_NONE)
+ return support;
+
+ /* If the driver can't configure to run completely the protocol in HW,
+ * then try again to configure the HW so the SW can run the protocol.
+ */
+ mrp_role.sw_backup = true;
+ if (role != BR_MRP_IN_ROLE_DISABLED)
err = switchdev_port_obj_add(br->dev, &mrp_role.obj, NULL);
+ else
+ err = switchdev_port_obj_del(br->dev, &mrp_role.obj);
+
+ if (!err)
+ return BR_MRP_SW;
- return err;
+ return BR_MRP_NONE;
}
int br_mrp_switchdev_set_in_state(struct net_bridge *br, struct br_mrp *mrp,
@@ -138,18 +185,16 @@ int br_mrp_switchdev_set_in_state(struct net_bridge *br, struct br_mrp *mrp,
.in_state = state,
.in_id = mrp->in_id,
};
- int err;
-
- err = switchdev_port_obj_add(br->dev, &mrp_state.obj, NULL);
- if (err && err != -EOPNOTSUPP)
- return err;
+ if (!IS_ENABLED(CONFIG_NET_SWITCHDEV))
+ return 0;
- return 0;
+ return switchdev_port_obj_add(br->dev, &mrp_state.obj, NULL);
}
-int br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp,
- u32 interval, u8 max_miss, u32 period)
+enum br_mrp_hw_support
+br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp,
+ u32 interval, u8 max_miss, u32 period)
{
struct switchdev_obj_in_test_mrp test = {
.obj.orig_dev = br->dev,
@@ -159,32 +204,25 @@ int br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp,
.in_id = mrp->in_id,
.period = period,
};
- int err;
- if (interval == 0)
- err = switchdev_port_obj_del(br->dev, &test.obj);
- else
- err = switchdev_port_obj_add(br->dev, &test.obj, NULL);
+ if (!IS_ENABLED(CONFIG_NET_SWITCHDEV))
+ return BR_MRP_SW;
- return err;
+ return br_mrp_switchdev_port_obj(br, &test.obj, interval != 0);
}
-int br_mrp_port_switchdev_set_state(struct net_bridge_port *p,
- enum br_mrp_port_state_type state)
+int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, u32 state)
{
struct switchdev_attr attr = {
.orig_dev = p->dev,
- .id = SWITCHDEV_ATTR_ID_MRP_PORT_STATE,
- .u.mrp_port_state = state,
+ .id = SWITCHDEV_ATTR_ID_PORT_STP_STATE,
+ .u.stp_state = state,
};
- int err;
- err = switchdev_port_attr_set(p->dev, &attr);
- if (err && err != -EOPNOTSUPP)
- br_warn(p->br, "error setting offload MRP state on port %u(%s)\n",
- (unsigned int)p->port_no, p->dev->name);
+ if (!IS_ENABLED(CONFIG_NET_SWITCHDEV))
+ return 0;
- return err;
+ return switchdev_port_attr_set(p->dev, &attr, NULL);
}
int br_mrp_port_switchdev_set_role(struct net_bridge_port *p,
@@ -195,11 +233,9 @@ int br_mrp_port_switchdev_set_role(struct net_bridge_port *p,
.id = SWITCHDEV_ATTR_ID_MRP_PORT_ROLE,
.u.mrp_port_role = role,
};
- int err;
- err = switchdev_port_attr_set(p->dev, &attr);
- if (err && err != -EOPNOTSUPP)
- return err;
+ if (!IS_ENABLED(CONFIG_NET_SWITCHDEV))
+ return 0;
- return 0;
+ return switchdev_port_attr_set(p->dev, &attr, NULL);
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 257ac4e25f6d..9d265447d654 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -33,6 +33,7 @@
#endif
#include "br_private.h"
+#include "br_private_mcast_eht.h"
static const struct rhashtable_params br_mdb_rht_params = {
.head_offset = offsetof(struct net_bridge_mdb_entry, rhnode),
@@ -441,7 +442,8 @@ static void br_multicast_fwd_src_add(struct net_bridge_group_src *src)
br_multicast_sg_add_exclude_ports(star_mp, sg);
}
-static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src)
+static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src,
+ bool fastleave)
{
struct net_bridge_port_group *p, *pg = src->pg;
struct net_bridge_port_group __rcu **pp;
@@ -466,6 +468,8 @@ static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src)
(p->flags & MDB_PG_FLAGS_PERMANENT))
break;
+ if (fastleave)
+ p->flags |= MDB_PG_FLAGS_FAST_LEAVE;
br_multicast_del_pg(mp, p, pp);
break;
}
@@ -559,11 +563,12 @@ static void br_multicast_destroy_group_src(struct net_bridge_mcast_gc *gc)
kfree_rcu(src, rcu);
}
-static void br_multicast_del_group_src(struct net_bridge_group_src *src)
+void br_multicast_del_group_src(struct net_bridge_group_src *src,
+ bool fastleave)
{
struct net_bridge *br = src->pg->key.port->br;
- br_multicast_fwd_src_remove(src);
+ br_multicast_fwd_src_remove(src, fastleave);
hlist_del_init_rcu(&src->node);
src->pg->src_ents--;
hlist_add_head(&src->mcast_gc.gc_node, &br->mcast_gc_list);
@@ -593,8 +598,9 @@ void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
rcu_assign_pointer(*pp, pg->next);
hlist_del_init(&pg->mglist);
+ br_multicast_eht_clean_sets(pg);
hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node)
- br_multicast_del_group_src(ent);
+ br_multicast_del_group_src(ent, false);
br_mdb_notify(br->dev, mp, pg, RTM_DELMDB);
if (!br_multicast_is_star_g(&mp->addr)) {
rhashtable_remove_fast(&br->sg_port_tbl, &pg->rhnode,
@@ -651,7 +657,7 @@ static void br_multicast_port_group_expired(struct timer_list *t)
pg->filter_mode = MCAST_INCLUDE;
hlist_for_each_entry_safe(src_ent, tmp, &pg->src_list, node) {
if (!timer_pending(&src_ent->timer)) {
- br_multicast_del_group_src(src_ent);
+ br_multicast_del_group_src(src_ent, false);
changed = true;
}
}
@@ -1078,7 +1084,7 @@ static void br_multicast_group_src_expired(struct timer_list *t)
pg = src->pg;
if (pg->filter_mode == MCAST_INCLUDE) {
- br_multicast_del_group_src(src);
+ br_multicast_del_group_src(src, false);
if (!hlist_empty(&pg->src_list))
goto out;
br_multicast_find_del_pg(br, pg);
@@ -1090,7 +1096,7 @@ out:
spin_unlock(&br->multicast_lock);
}
-static struct net_bridge_group_src *
+struct net_bridge_group_src *
br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip)
{
struct net_bridge_group_src *ent;
@@ -1172,6 +1178,8 @@ struct net_bridge_port_group *br_multicast_new_port_group(
p->flags = flags;
p->filter_mode = filter_mode;
p->rt_protocol = rt_protocol;
+ p->eht_host_tree = RB_ROOT;
+ p->eht_set_tree = RB_ROOT;
p->mcast_gc.destroy = br_multicast_destroy_port_group;
INIT_HLIST_HEAD(&p->src_list);
@@ -1243,7 +1251,7 @@ __br_multicast_add_group(struct net_bridge *br,
mp = br_multicast_new_group(br, group);
if (IS_ERR(mp))
- return ERR_PTR(PTR_ERR(mp));
+ return ERR_CAST(mp);
if (!port) {
br_multicast_host_join(mp, true);
@@ -1292,7 +1300,7 @@ static int br_multicast_add_group(struct net_bridge *br,
pg = __br_multicast_add_group(br, port, group, src, filter_mode,
igmpv2_mldv1, false);
/* NULL is considered valid for host joined groups */
- err = IS_ERR(pg) ? PTR_ERR(pg) : 0;
+ err = PTR_ERR_OR_ZERO(pg);
spin_unlock(&br->multicast_lock);
return err;
@@ -1373,7 +1381,7 @@ static void br_mc_router_state_change(struct net_bridge *p,
.u.mrouter = is_mc_router,
};
- switchdev_port_attr_set(p->dev, &attr);
+ switchdev_port_attr_set(p->dev, &attr, NULL);
}
static void br_multicast_local_router_expired(struct timer_list *t)
@@ -1594,12 +1602,13 @@ static void br_mc_disabled_update(struct net_device *dev, bool value)
.u.mc_disabled = !value,
};
- switchdev_port_attr_set(dev, &attr);
+ switchdev_port_attr_set(dev, &attr, NULL);
}
int br_multicast_add_port(struct net_bridge_port *port)
{
port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+ port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT;
timer_setup(&port->multicast_router_timer,
br_multicast_router_expired, 0);
@@ -1700,7 +1709,7 @@ static int __grp_src_delete_marked(struct net_bridge_port_group *pg)
hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node)
if (ent->flags & BR_SGRP_F_DELETE) {
- br_multicast_del_group_src(ent);
+ br_multicast_del_group_src(ent, false);
deleted++;
}
@@ -1799,8 +1808,9 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
* INCLUDE (A) ALLOW (B) INCLUDE (A+B) (B)=GMI
* EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI
*/
-static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
- void *srcs, u32 nsrcs, size_t src_size)
+static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_addr,
+ void *srcs, u32 nsrcs, size_t addr_size,
+ int grec_type)
{
struct net_bridge *br = pg->key.port->br;
struct net_bridge_group_src *ent;
@@ -1812,7 +1822,7 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
memset(&src_ip, 0, sizeof(src_ip));
src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.src, srcs, src_size);
+ memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (!ent) {
ent = br_multicast_new_group_src(pg, &src_ip);
@@ -1822,9 +1832,11 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
if (ent)
__grp_src_mod_timer(ent, now + br_multicast_gmi(br));
- srcs += src_size;
}
+ if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ changed = true;
+
return changed;
}
@@ -1833,8 +1845,9 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
* Delete (A-B)
* Group Timer=GMI
*/
-static void __grp_src_isexc_incl(struct net_bridge_port_group *pg,
- void *srcs, u32 nsrcs, size_t src_size)
+static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr,
+ void *srcs, u32 nsrcs, size_t addr_size,
+ int grec_type)
{
struct net_bridge_group_src *ent;
struct br_ip src_ip;
@@ -1846,7 +1859,7 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg,
memset(&src_ip, 0, sizeof(src_ip));
src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.src, srcs, src_size);
+ memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (ent)
ent->flags &= ~BR_SGRP_F_DELETE;
@@ -1854,9 +1867,10 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg,
ent = br_multicast_new_group_src(pg, &src_ip);
if (ent)
br_multicast_fwd_src_handle(ent);
- srcs += src_size;
}
+ br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type);
+
__grp_src_delete_marked(pg);
}
@@ -1866,8 +1880,9 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg,
* Delete (Y-A)
* Group Timer=GMI
*/
-static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg,
- void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr,
+ void *srcs, u32 nsrcs, size_t addr_size,
+ int grec_type)
{
struct net_bridge *br = pg->key.port->br;
struct net_bridge_group_src *ent;
@@ -1882,7 +1897,7 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg,
memset(&src_ip, 0, sizeof(src_ip));
src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.src, srcs, src_size);
+ memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (ent) {
ent->flags &= ~BR_SGRP_F_DELETE;
@@ -1894,29 +1909,34 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg,
changed = true;
}
}
- srcs += src_size;
}
+ if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ changed = true;
+
if (__grp_src_delete_marked(pg))
changed = true;
return changed;
}
-static bool br_multicast_isexc(struct net_bridge_port_group *pg,
- void *srcs, u32 nsrcs, size_t src_size)
+static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr,
+ void *srcs, u32 nsrcs, size_t addr_size,
+ int grec_type)
{
struct net_bridge *br = pg->key.port->br;
bool changed = false;
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- __grp_src_isexc_incl(pg, srcs, nsrcs, src_size);
+ __grp_src_isexc_incl(pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type);
br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
changed = true;
break;
case MCAST_EXCLUDE:
- changed = __grp_src_isexc_excl(pg, srcs, nsrcs, src_size);
+ changed = __grp_src_isexc_excl(pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type);
break;
}
@@ -1930,8 +1950,9 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg,
* INCLUDE (A) TO_IN (B) INCLUDE (A+B) (B)=GMI
* Send Q(G,A-B)
*/
-static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
- void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr,
+ void *srcs, u32 nsrcs, size_t addr_size,
+ int grec_type)
{
struct net_bridge *br = pg->key.port->br;
u32 src_idx, to_send = pg->src_ents;
@@ -1946,7 +1967,7 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
memset(&src_ip, 0, sizeof(src_ip));
src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.src, srcs, src_size);
+ memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (ent) {
ent->flags &= ~BR_SGRP_F_SEND;
@@ -1958,9 +1979,11 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
}
if (ent)
__grp_src_mod_timer(ent, now + br_multicast_gmi(br));
- srcs += src_size;
}
+ if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ changed = true;
+
if (to_send)
__grp_src_query_marked_and_rexmit(pg);
@@ -1972,8 +1995,9 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
* Send Q(G,X-A)
* Send Q(G)
*/
-static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
- void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr,
+ void *srcs, u32 nsrcs, size_t addr_size,
+ int grec_type)
{
struct net_bridge *br = pg->key.port->br;
u32 src_idx, to_send = pg->src_ents;
@@ -1989,7 +2013,7 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
memset(&src_ip, 0, sizeof(src_ip));
src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.src, srcs, src_size);
+ memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (ent) {
if (timer_pending(&ent->timer)) {
@@ -2003,9 +2027,11 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
}
if (ent)
__grp_src_mod_timer(ent, now + br_multicast_gmi(br));
- srcs += src_size;
}
+ if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ changed = true;
+
if (to_send)
__grp_src_query_marked_and_rexmit(pg);
@@ -2014,20 +2040,32 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
return changed;
}
-static bool br_multicast_toin(struct net_bridge_port_group *pg,
- void *srcs, u32 nsrcs, size_t src_size)
+static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr,
+ void *srcs, u32 nsrcs, size_t addr_size,
+ int grec_type)
{
bool changed = false;
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- changed = __grp_src_toin_incl(pg, srcs, nsrcs, src_size);
+ changed = __grp_src_toin_incl(pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type);
break;
case MCAST_EXCLUDE:
- changed = __grp_src_toin_excl(pg, srcs, nsrcs, src_size);
+ changed = __grp_src_toin_excl(pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type);
break;
}
+ if (br_multicast_eht_should_del_pg(pg)) {
+ pg->flags |= MDB_PG_FLAGS_FAST_LEAVE;
+ br_multicast_find_del_pg(pg->key.port->br, pg);
+ /* a notification has already been sent and we shouldn't
+ * access pg after the delete so we have to return false
+ */
+ changed = false;
+ }
+
return changed;
}
@@ -2037,8 +2075,9 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg,
* Send Q(G,A*B)
* Group Timer=GMI
*/
-static void __grp_src_toex_incl(struct net_bridge_port_group *pg,
- void *srcs, u32 nsrcs, size_t src_size)
+static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr,
+ void *srcs, u32 nsrcs, size_t addr_size,
+ int grec_type)
{
struct net_bridge_group_src *ent;
u32 src_idx, to_send = 0;
@@ -2050,7 +2089,7 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg,
memset(&src_ip, 0, sizeof(src_ip));
src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.src, srcs, src_size);
+ memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (ent) {
ent->flags = (ent->flags & ~BR_SGRP_F_DELETE) |
@@ -2061,9 +2100,10 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg,
}
if (ent)
br_multicast_fwd_src_handle(ent);
- srcs += src_size;
}
+ br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type);
+
__grp_src_delete_marked(pg);
if (to_send)
__grp_src_query_marked_and_rexmit(pg);
@@ -2076,8 +2116,9 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg,
* Send Q(G,A-Y)
* Group Timer=GMI
*/
-static bool __grp_src_toex_excl(struct net_bridge_port_group *pg,
- void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr,
+ void *srcs, u32 nsrcs, size_t addr_size,
+ int grec_type)
{
struct net_bridge_group_src *ent;
u32 src_idx, to_send = 0;
@@ -2090,7 +2131,7 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg,
memset(&src_ip, 0, sizeof(src_ip));
src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.src, srcs, src_size);
+ memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (ent) {
ent->flags &= ~BR_SGRP_F_DELETE;
@@ -2105,9 +2146,11 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg,
ent->flags |= BR_SGRP_F_SEND;
to_send++;
}
- srcs += src_size;
}
+ if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ changed = true;
+
if (__grp_src_delete_marked(pg))
changed = true;
if (to_send)
@@ -2116,20 +2159,23 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg,
return changed;
}
-static bool br_multicast_toex(struct net_bridge_port_group *pg,
- void *srcs, u32 nsrcs, size_t src_size)
+static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr,
+ void *srcs, u32 nsrcs, size_t addr_size,
+ int grec_type)
{
struct net_bridge *br = pg->key.port->br;
bool changed = false;
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- __grp_src_toex_incl(pg, srcs, nsrcs, src_size);
+ __grp_src_toex_incl(pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type);
br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
changed = true;
break;
case MCAST_EXCLUDE:
- changed = __grp_src_toex_excl(pg, srcs, nsrcs, src_size);
+ changed = __grp_src_toex_excl(pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type);
break;
}
@@ -2142,11 +2188,12 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg,
/* State Msg type New state Actions
* INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B)
*/
-static void __grp_src_block_incl(struct net_bridge_port_group *pg,
- void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr,
+ void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
{
struct net_bridge_group_src *ent;
u32 src_idx, to_send = 0;
+ bool changed = false;
struct br_ip src_ip;
hlist_for_each_entry(ent, &pg->src_list, node)
@@ -2155,28 +2202,29 @@ static void __grp_src_block_incl(struct net_bridge_port_group *pg,
memset(&src_ip, 0, sizeof(src_ip));
src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.src, srcs, src_size);
+ memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (ent) {
ent->flags |= BR_SGRP_F_SEND;
to_send++;
}
- srcs += src_size;
}
+ if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ changed = true;
+
if (to_send)
__grp_src_query_marked_and_rexmit(pg);
- if (pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list))
- br_multicast_find_del_pg(pg->key.port->br, pg);
+ return changed;
}
/* State Msg type New state Actions
* EXCLUDE (X,Y) BLOCK (A) EXCLUDE (X+(A-Y),Y) (A-X-Y)=Group Timer
* Send Q(G,A-Y)
*/
-static bool __grp_src_block_excl(struct net_bridge_port_group *pg,
- void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr,
+ void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
{
struct net_bridge_group_src *ent;
u32 src_idx, to_send = 0;
@@ -2189,7 +2237,7 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg,
memset(&src_ip, 0, sizeof(src_ip));
src_ip.proto = pg->key.addr.proto;
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
- memcpy(&src_ip.src, srcs, src_size);
+ memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
ent = br_multicast_find_group_src(pg, &src_ip);
if (!ent) {
ent = br_multicast_new_group_src(pg, &src_ip);
@@ -2202,29 +2250,44 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg,
ent->flags |= BR_SGRP_F_SEND;
to_send++;
}
- srcs += src_size;
}
+ if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ changed = true;
+
if (to_send)
__grp_src_query_marked_and_rexmit(pg);
return changed;
}
-static bool br_multicast_block(struct net_bridge_port_group *pg,
- void *srcs, u32 nsrcs, size_t src_size)
+static bool br_multicast_block(struct net_bridge_port_group *pg, void *h_addr,
+ void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
{
bool changed = false;
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- __grp_src_block_incl(pg, srcs, nsrcs, src_size);
+ changed = __grp_src_block_incl(pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type);
break;
case MCAST_EXCLUDE:
- changed = __grp_src_block_excl(pg, srcs, nsrcs, src_size);
+ changed = __grp_src_block_excl(pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type);
break;
}
+ if ((pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list)) ||
+ br_multicast_eht_should_del_pg(pg)) {
+ if (br_multicast_eht_should_del_pg(pg))
+ pg->flags |= MDB_PG_FLAGS_FAST_LEAVE;
+ br_multicast_find_del_pg(pg->key.port->br, pg);
+ /* a notification has already been sent and we shouldn't
+ * access pg after the delete so we have to return false
+ */
+ changed = false;
+ }
+
return changed;
}
@@ -2257,8 +2320,8 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
struct igmpv3_report *ih;
struct igmpv3_grec *grec;
int i, len, num, type;
+ __be32 group, *h_addr;
bool changed = false;
- __be32 group;
int err = 0;
u16 nsrcs;
@@ -2318,32 +2381,33 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
pg = br_multicast_find_port(mdst, port, src);
if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
goto unlock_continue;
- /* reload grec */
+ /* reload grec and host addr */
grec = (void *)(skb->data + len - sizeof(*grec) - (nsrcs * 4));
+ h_addr = &ip_hdr(skb)->saddr;
switch (type) {
case IGMPV3_ALLOW_NEW_SOURCES:
- changed = br_multicast_isinc_allow(pg, grec->grec_src,
- nsrcs, sizeof(__be32));
+ changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src,
+ nsrcs, sizeof(__be32), type);
break;
case IGMPV3_MODE_IS_INCLUDE:
- changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs,
- sizeof(__be32));
+ changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src,
+ nsrcs, sizeof(__be32), type);
break;
case IGMPV3_MODE_IS_EXCLUDE:
- changed = br_multicast_isexc(pg, grec->grec_src, nsrcs,
- sizeof(__be32));
+ changed = br_multicast_isexc(pg, h_addr, grec->grec_src,
+ nsrcs, sizeof(__be32), type);
break;
case IGMPV3_CHANGE_TO_INCLUDE:
- changed = br_multicast_toin(pg, grec->grec_src, nsrcs,
- sizeof(__be32));
+ changed = br_multicast_toin(pg, h_addr, grec->grec_src,
+ nsrcs, sizeof(__be32), type);
break;
case IGMPV3_CHANGE_TO_EXCLUDE:
- changed = br_multicast_toex(pg, grec->grec_src, nsrcs,
- sizeof(__be32));
+ changed = br_multicast_toex(pg, h_addr, grec->grec_src,
+ nsrcs, sizeof(__be32), type);
break;
case IGMPV3_BLOCK_OLD_SOURCES:
- changed = br_multicast_block(pg, grec->grec_src, nsrcs,
- sizeof(__be32));
+ changed = br_multicast_block(pg, h_addr, grec->grec_src,
+ nsrcs, sizeof(__be32), type);
break;
}
if (changed)
@@ -2367,6 +2431,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
unsigned int nsrcs_offset;
const unsigned char *src;
struct icmp6hdr *icmp6h;
+ struct in6_addr *h_addr;
struct mld2_grec *grec;
unsigned int grec_len;
bool changed = false;
@@ -2445,31 +2510,43 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
pg = br_multicast_find_port(mdst, port, src);
if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
goto unlock_continue;
+ h_addr = &ipv6_hdr(skb)->saddr;
switch (grec->grec_type) {
case MLD2_ALLOW_NEW_SOURCES:
- changed = br_multicast_isinc_allow(pg, grec->grec_src,
- nsrcs,
- sizeof(struct in6_addr));
+ changed = br_multicast_isinc_allow(pg, h_addr,
+ grec->grec_src, nsrcs,
+ sizeof(struct in6_addr),
+ grec->grec_type);
break;
case MLD2_MODE_IS_INCLUDE:
- changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs,
- sizeof(struct in6_addr));
+ changed = br_multicast_isinc_allow(pg, h_addr,
+ grec->grec_src, nsrcs,
+ sizeof(struct in6_addr),
+ grec->grec_type);
break;
case MLD2_MODE_IS_EXCLUDE:
- changed = br_multicast_isexc(pg, grec->grec_src, nsrcs,
- sizeof(struct in6_addr));
+ changed = br_multicast_isexc(pg, h_addr,
+ grec->grec_src, nsrcs,
+ sizeof(struct in6_addr),
+ grec->grec_type);
break;
case MLD2_CHANGE_TO_INCLUDE:
- changed = br_multicast_toin(pg, grec->grec_src, nsrcs,
- sizeof(struct in6_addr));
+ changed = br_multicast_toin(pg, h_addr,
+ grec->grec_src, nsrcs,
+ sizeof(struct in6_addr),
+ grec->grec_type);
break;
case MLD2_CHANGE_TO_EXCLUDE:
- changed = br_multicast_toex(pg, grec->grec_src, nsrcs,
- sizeof(struct in6_addr));
+ changed = br_multicast_toex(pg, h_addr,
+ grec->grec_src, nsrcs,
+ sizeof(struct in6_addr),
+ grec->grec_type);
break;
case MLD2_BLOCK_OLD_SOURCES:
- changed = br_multicast_block(pg, grec->grec_src, nsrcs,
- sizeof(struct in6_addr));
+ changed = br_multicast_block(pg, h_addr,
+ grec->grec_src, nsrcs,
+ sizeof(struct in6_addr),
+ grec->grec_type);
break;
}
if (changed)
@@ -2568,7 +2645,7 @@ static void br_port_mc_router_state_change(struct net_bridge_port *p,
.u.mrouter = is_mc_router,
};
- switchdev_port_attr_set(p->dev, &attr);
+ switchdev_port_attr_set(p->dev, &attr, NULL);
}
/*
diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c
new file mode 100644
index 000000000000..fea38b9a7268
--- /dev/null
+++ b/net/bridge/br_multicast_eht.c
@@ -0,0 +1,878 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2020, Nikolay Aleksandrov <nikolay@nvidia.com>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/if_ether.h>
+#include <linux/igmp.h>
+#include <linux/in.h>
+#include <linux/jhash.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/random.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/inetdevice.h>
+#include <linux/mroute.h>
+#include <net/ip.h>
+#include <net/switchdev.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <linux/icmpv6.h>
+#include <net/ipv6.h>
+#include <net/mld.h>
+#include <net/ip6_checksum.h>
+#include <net/addrconf.h>
+#endif
+
+#include "br_private.h"
+#include "br_private_mcast_eht.h"
+
+static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *src_addr,
+ union net_bridge_eht_addr *h_addr);
+static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *src_addr,
+ union net_bridge_eht_addr *h_addr,
+ int filter_mode,
+ bool allow_zero_src);
+
+static struct net_bridge_group_eht_host *
+br_multicast_eht_host_lookup(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr)
+{
+ struct rb_node *node = pg->eht_host_tree.rb_node;
+
+ while (node) {
+ struct net_bridge_group_eht_host *this;
+ int result;
+
+ this = rb_entry(node, struct net_bridge_group_eht_host,
+ rb_node);
+ result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr));
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return this;
+ }
+
+ return NULL;
+}
+
+static int br_multicast_eht_host_filter_mode(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr)
+{
+ struct net_bridge_group_eht_host *eht_host;
+
+ eht_host = br_multicast_eht_host_lookup(pg, h_addr);
+ if (!eht_host)
+ return MCAST_INCLUDE;
+
+ return eht_host->filter_mode;
+}
+
+static struct net_bridge_group_eht_set_entry *
+br_multicast_eht_set_entry_lookup(struct net_bridge_group_eht_set *eht_set,
+ union net_bridge_eht_addr *h_addr)
+{
+ struct rb_node *node = eht_set->entry_tree.rb_node;
+
+ while (node) {
+ struct net_bridge_group_eht_set_entry *this;
+ int result;
+
+ this = rb_entry(node, struct net_bridge_group_eht_set_entry,
+ rb_node);
+ result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr));
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return this;
+ }
+
+ return NULL;
+}
+
+static struct net_bridge_group_eht_set *
+br_multicast_eht_set_lookup(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *src_addr)
+{
+ struct rb_node *node = pg->eht_set_tree.rb_node;
+
+ while (node) {
+ struct net_bridge_group_eht_set *this;
+ int result;
+
+ this = rb_entry(node, struct net_bridge_group_eht_set,
+ rb_node);
+ result = memcmp(src_addr, &this->src_addr, sizeof(*src_addr));
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return this;
+ }
+
+ return NULL;
+}
+
+static void __eht_destroy_host(struct net_bridge_group_eht_host *eht_host)
+{
+ WARN_ON(!hlist_empty(&eht_host->set_entries));
+
+ br_multicast_eht_hosts_dec(eht_host->pg);
+
+ rb_erase(&eht_host->rb_node, &eht_host->pg->eht_host_tree);
+ RB_CLEAR_NODE(&eht_host->rb_node);
+ kfree(eht_host);
+}
+
+static void br_multicast_destroy_eht_set_entry(struct net_bridge_mcast_gc *gc)
+{
+ struct net_bridge_group_eht_set_entry *set_h;
+
+ set_h = container_of(gc, struct net_bridge_group_eht_set_entry, mcast_gc);
+ WARN_ON(!RB_EMPTY_NODE(&set_h->rb_node));
+
+ del_timer_sync(&set_h->timer);
+ kfree(set_h);
+}
+
+static void br_multicast_destroy_eht_set(struct net_bridge_mcast_gc *gc)
+{
+ struct net_bridge_group_eht_set *eht_set;
+
+ eht_set = container_of(gc, struct net_bridge_group_eht_set, mcast_gc);
+ WARN_ON(!RB_EMPTY_NODE(&eht_set->rb_node));
+ WARN_ON(!RB_EMPTY_ROOT(&eht_set->entry_tree));
+
+ del_timer_sync(&eht_set->timer);
+ kfree(eht_set);
+}
+
+static void __eht_del_set_entry(struct net_bridge_group_eht_set_entry *set_h)
+{
+ struct net_bridge_group_eht_host *eht_host = set_h->h_parent;
+ union net_bridge_eht_addr zero_addr;
+
+ rb_erase(&set_h->rb_node, &set_h->eht_set->entry_tree);
+ RB_CLEAR_NODE(&set_h->rb_node);
+ hlist_del_init(&set_h->host_list);
+ memset(&zero_addr, 0, sizeof(zero_addr));
+ if (memcmp(&set_h->h_addr, &zero_addr, sizeof(zero_addr)))
+ eht_host->num_entries--;
+ hlist_add_head(&set_h->mcast_gc.gc_node, &set_h->br->mcast_gc_list);
+ queue_work(system_long_wq, &set_h->br->mcast_gc_work);
+
+ if (hlist_empty(&eht_host->set_entries))
+ __eht_destroy_host(eht_host);
+}
+
+static void br_multicast_del_eht_set(struct net_bridge_group_eht_set *eht_set)
+{
+ struct net_bridge_group_eht_set_entry *set_h;
+ struct rb_node *node;
+
+ while ((node = rb_first(&eht_set->entry_tree))) {
+ set_h = rb_entry(node, struct net_bridge_group_eht_set_entry,
+ rb_node);
+ __eht_del_set_entry(set_h);
+ }
+
+ rb_erase(&eht_set->rb_node, &eht_set->pg->eht_set_tree);
+ RB_CLEAR_NODE(&eht_set->rb_node);
+ hlist_add_head(&eht_set->mcast_gc.gc_node, &eht_set->br->mcast_gc_list);
+ queue_work(system_long_wq, &eht_set->br->mcast_gc_work);
+}
+
+void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg)
+{
+ struct net_bridge_group_eht_set *eht_set;
+ struct rb_node *node;
+
+ while ((node = rb_first(&pg->eht_set_tree))) {
+ eht_set = rb_entry(node, struct net_bridge_group_eht_set,
+ rb_node);
+ br_multicast_del_eht_set(eht_set);
+ }
+}
+
+static void br_multicast_eht_set_entry_expired(struct timer_list *t)
+{
+ struct net_bridge_group_eht_set_entry *set_h = from_timer(set_h, t, timer);
+ struct net_bridge *br = set_h->br;
+
+ spin_lock(&br->multicast_lock);
+ if (RB_EMPTY_NODE(&set_h->rb_node) || timer_pending(&set_h->timer))
+ goto out;
+
+ br_multicast_del_eht_set_entry(set_h->eht_set->pg,
+ &set_h->eht_set->src_addr,
+ &set_h->h_addr);
+out:
+ spin_unlock(&br->multicast_lock);
+}
+
+static void br_multicast_eht_set_expired(struct timer_list *t)
+{
+ struct net_bridge_group_eht_set *eht_set = from_timer(eht_set, t,
+ timer);
+ struct net_bridge *br = eht_set->br;
+
+ spin_lock(&br->multicast_lock);
+ if (RB_EMPTY_NODE(&eht_set->rb_node) || timer_pending(&eht_set->timer))
+ goto out;
+
+ br_multicast_del_eht_set(eht_set);
+out:
+ spin_unlock(&br->multicast_lock);
+}
+
+static struct net_bridge_group_eht_host *
+__eht_lookup_create_host(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr,
+ unsigned char filter_mode)
+{
+ struct rb_node **link = &pg->eht_host_tree.rb_node, *parent = NULL;
+ struct net_bridge_group_eht_host *eht_host;
+
+ while (*link) {
+ struct net_bridge_group_eht_host *this;
+ int result;
+
+ this = rb_entry(*link, struct net_bridge_group_eht_host,
+ rb_node);
+ result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr));
+ parent = *link;
+ if (result < 0)
+ link = &((*link)->rb_left);
+ else if (result > 0)
+ link = &((*link)->rb_right);
+ else
+ return this;
+ }
+
+ if (br_multicast_eht_hosts_over_limit(pg))
+ return NULL;
+
+ eht_host = kzalloc(sizeof(*eht_host), GFP_ATOMIC);
+ if (!eht_host)
+ return NULL;
+
+ memcpy(&eht_host->h_addr, h_addr, sizeof(*h_addr));
+ INIT_HLIST_HEAD(&eht_host->set_entries);
+ eht_host->pg = pg;
+ eht_host->filter_mode = filter_mode;
+
+ rb_link_node(&eht_host->rb_node, parent, link);
+ rb_insert_color(&eht_host->rb_node, &pg->eht_host_tree);
+
+ br_multicast_eht_hosts_inc(pg);
+
+ return eht_host;
+}
+
+static struct net_bridge_group_eht_set_entry *
+__eht_lookup_create_set_entry(struct net_bridge *br,
+ struct net_bridge_group_eht_set *eht_set,
+ struct net_bridge_group_eht_host *eht_host,
+ bool allow_zero_src)
+{
+ struct rb_node **link = &eht_set->entry_tree.rb_node, *parent = NULL;
+ struct net_bridge_group_eht_set_entry *set_h;
+
+ while (*link) {
+ struct net_bridge_group_eht_set_entry *this;
+ int result;
+
+ this = rb_entry(*link, struct net_bridge_group_eht_set_entry,
+ rb_node);
+ result = memcmp(&eht_host->h_addr, &this->h_addr,
+ sizeof(union net_bridge_eht_addr));
+ parent = *link;
+ if (result < 0)
+ link = &((*link)->rb_left);
+ else if (result > 0)
+ link = &((*link)->rb_right);
+ else
+ return this;
+ }
+
+ /* always allow auto-created zero entry */
+ if (!allow_zero_src && eht_host->num_entries >= PG_SRC_ENT_LIMIT)
+ return NULL;
+
+ set_h = kzalloc(sizeof(*set_h), GFP_ATOMIC);
+ if (!set_h)
+ return NULL;
+
+ memcpy(&set_h->h_addr, &eht_host->h_addr,
+ sizeof(union net_bridge_eht_addr));
+ set_h->mcast_gc.destroy = br_multicast_destroy_eht_set_entry;
+ set_h->eht_set = eht_set;
+ set_h->h_parent = eht_host;
+ set_h->br = br;
+ timer_setup(&set_h->timer, br_multicast_eht_set_entry_expired, 0);
+
+ hlist_add_head(&set_h->host_list, &eht_host->set_entries);
+ rb_link_node(&set_h->rb_node, parent, link);
+ rb_insert_color(&set_h->rb_node, &eht_set->entry_tree);
+ /* we must not count the auto-created zero entry otherwise we won't be
+ * able to track the full list of PG_SRC_ENT_LIMIT entries
+ */
+ if (!allow_zero_src)
+ eht_host->num_entries++;
+
+ return set_h;
+}
+
+static struct net_bridge_group_eht_set *
+__eht_lookup_create_set(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *src_addr)
+{
+ struct rb_node **link = &pg->eht_set_tree.rb_node, *parent = NULL;
+ struct net_bridge_group_eht_set *eht_set;
+
+ while (*link) {
+ struct net_bridge_group_eht_set *this;
+ int result;
+
+ this = rb_entry(*link, struct net_bridge_group_eht_set,
+ rb_node);
+ result = memcmp(src_addr, &this->src_addr, sizeof(*src_addr));
+ parent = *link;
+ if (result < 0)
+ link = &((*link)->rb_left);
+ else if (result > 0)
+ link = &((*link)->rb_right);
+ else
+ return this;
+ }
+
+ eht_set = kzalloc(sizeof(*eht_set), GFP_ATOMIC);
+ if (!eht_set)
+ return NULL;
+
+ memcpy(&eht_set->src_addr, src_addr, sizeof(*src_addr));
+ eht_set->mcast_gc.destroy = br_multicast_destroy_eht_set;
+ eht_set->pg = pg;
+ eht_set->br = pg->key.port->br;
+ eht_set->entry_tree = RB_ROOT;
+ timer_setup(&eht_set->timer, br_multicast_eht_set_expired, 0);
+
+ rb_link_node(&eht_set->rb_node, parent, link);
+ rb_insert_color(&eht_set->rb_node, &pg->eht_set_tree);
+
+ return eht_set;
+}
+
+static void br_multicast_ip_src_to_eht_addr(const struct br_ip *src,
+ union net_bridge_eht_addr *dest)
+{
+ switch (src->proto) {
+ case htons(ETH_P_IP):
+ dest->ip4 = src->src.ip4;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ memcpy(&dest->ip6, &src->src.ip6, sizeof(struct in6_addr));
+ break;
+#endif
+ }
+}
+
+static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr,
+ int filter_mode)
+{
+ struct net_bridge_group_eht_host *eht_host;
+ union net_bridge_eht_addr zero_addr;
+
+ eht_host = br_multicast_eht_host_lookup(pg, h_addr);
+ if (eht_host)
+ eht_host->filter_mode = filter_mode;
+
+ memset(&zero_addr, 0, sizeof(zero_addr));
+ switch (filter_mode) {
+ case MCAST_INCLUDE:
+ br_multicast_del_eht_set_entry(pg, &zero_addr, h_addr);
+ break;
+ case MCAST_EXCLUDE:
+ br_multicast_create_eht_set_entry(pg, &zero_addr, h_addr,
+ MCAST_EXCLUDE,
+ true);
+ break;
+ }
+}
+
+static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *src_addr,
+ union net_bridge_eht_addr *h_addr,
+ int filter_mode,
+ bool allow_zero_src)
+{
+ struct net_bridge_group_eht_set_entry *set_h;
+ struct net_bridge_group_eht_host *eht_host;
+ struct net_bridge *br = pg->key.port->br;
+ struct net_bridge_group_eht_set *eht_set;
+ union net_bridge_eht_addr zero_addr;
+
+ memset(&zero_addr, 0, sizeof(zero_addr));
+ if (!allow_zero_src && !memcmp(src_addr, &zero_addr, sizeof(zero_addr)))
+ return;
+
+ eht_set = __eht_lookup_create_set(pg, src_addr);
+ if (!eht_set)
+ return;
+
+ eht_host = __eht_lookup_create_host(pg, h_addr, filter_mode);
+ if (!eht_host)
+ goto fail_host;
+
+ set_h = __eht_lookup_create_set_entry(br, eht_set, eht_host,
+ allow_zero_src);
+ if (!set_h)
+ goto fail_set_entry;
+
+ mod_timer(&set_h->timer, jiffies + br_multicast_gmi(br));
+ mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(br));
+
+ return;
+
+fail_set_entry:
+ if (hlist_empty(&eht_host->set_entries))
+ __eht_destroy_host(eht_host);
+fail_host:
+ if (RB_EMPTY_ROOT(&eht_set->entry_tree))
+ br_multicast_del_eht_set(eht_set);
+}
+
+static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *src_addr,
+ union net_bridge_eht_addr *h_addr)
+{
+ struct net_bridge_group_eht_set_entry *set_h;
+ struct net_bridge_group_eht_set *eht_set;
+ bool set_deleted = false;
+
+ eht_set = br_multicast_eht_set_lookup(pg, src_addr);
+ if (!eht_set)
+ goto out;
+
+ set_h = br_multicast_eht_set_entry_lookup(eht_set, h_addr);
+ if (!set_h)
+ goto out;
+
+ __eht_del_set_entry(set_h);
+
+ if (RB_EMPTY_ROOT(&eht_set->entry_tree)) {
+ br_multicast_del_eht_set(eht_set);
+ set_deleted = true;
+ }
+
+out:
+ return set_deleted;
+}
+
+static void br_multicast_del_eht_host(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr)
+{
+ struct net_bridge_group_eht_set_entry *set_h;
+ struct net_bridge_group_eht_host *eht_host;
+ struct hlist_node *tmp;
+
+ eht_host = br_multicast_eht_host_lookup(pg, h_addr);
+ if (!eht_host)
+ return;
+
+ hlist_for_each_entry_safe(set_h, tmp, &eht_host->set_entries, host_list)
+ br_multicast_del_eht_set_entry(set_h->eht_set->pg,
+ &set_h->eht_set->src_addr,
+ &set_h->h_addr);
+}
+
+static void __eht_allow_incl(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr,
+ void *srcs,
+ u32 nsrcs,
+ size_t addr_size)
+{
+ union net_bridge_eht_addr eht_src_addr;
+ u32 src_idx;
+
+ memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+ for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+ memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
+ br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
+ MCAST_INCLUDE,
+ false);
+ }
+}
+
+static bool __eht_allow_excl(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr,
+ void *srcs,
+ u32 nsrcs,
+ size_t addr_size)
+{
+ bool changed = false, host_excl = false;
+ union net_bridge_eht_addr eht_src_addr;
+ struct net_bridge_group_src *src_ent;
+ struct br_ip src_ip;
+ u32 src_idx;
+
+ host_excl = !!(br_multicast_eht_host_filter_mode(pg, h_addr) == MCAST_EXCLUDE);
+ memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+ for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+ memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
+ if (!host_excl) {
+ br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
+ MCAST_INCLUDE,
+ false);
+ } else {
+ if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr,
+ h_addr))
+ continue;
+ memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
+ src_ent = br_multicast_find_group_src(pg, &src_ip);
+ if (!src_ent)
+ continue;
+ br_multicast_del_group_src(src_ent, true);
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr,
+ void *srcs,
+ u32 nsrcs,
+ size_t addr_size)
+{
+ bool changed = false;
+
+ switch (br_multicast_eht_host_filter_mode(pg, h_addr)) {
+ case MCAST_INCLUDE:
+ __eht_allow_incl(pg, h_addr, srcs, nsrcs, addr_size);
+ break;
+ case MCAST_EXCLUDE:
+ changed = __eht_allow_excl(pg, h_addr, srcs, nsrcs, addr_size);
+ break;
+ }
+
+ return changed;
+}
+
+static bool __eht_block_incl(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr,
+ void *srcs,
+ u32 nsrcs,
+ size_t addr_size)
+{
+ union net_bridge_eht_addr eht_src_addr;
+ struct net_bridge_group_src *src_ent;
+ bool changed = false;
+ struct br_ip src_ip;
+ u32 src_idx;
+
+ memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+ memset(&src_ip, 0, sizeof(src_ip));
+ src_ip.proto = pg->key.addr.proto;
+ for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+ memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
+ if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr, h_addr))
+ continue;
+ memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
+ src_ent = br_multicast_find_group_src(pg, &src_ip);
+ if (!src_ent)
+ continue;
+ br_multicast_del_group_src(src_ent, true);
+ changed = true;
+ }
+
+ return changed;
+}
+
+static bool __eht_block_excl(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr,
+ void *srcs,
+ u32 nsrcs,
+ size_t addr_size)
+{
+ bool changed = false, host_excl = false;
+ union net_bridge_eht_addr eht_src_addr;
+ struct net_bridge_group_src *src_ent;
+ struct br_ip src_ip;
+ u32 src_idx;
+
+ host_excl = !!(br_multicast_eht_host_filter_mode(pg, h_addr) == MCAST_EXCLUDE);
+ memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+ memset(&src_ip, 0, sizeof(src_ip));
+ src_ip.proto = pg->key.addr.proto;
+ for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+ memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
+ if (host_excl) {
+ br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
+ MCAST_EXCLUDE,
+ false);
+ } else {
+ if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr,
+ h_addr))
+ continue;
+ memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
+ src_ent = br_multicast_find_group_src(pg, &src_ip);
+ if (!src_ent)
+ continue;
+ br_multicast_del_group_src(src_ent, true);
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr,
+ void *srcs,
+ u32 nsrcs,
+ size_t addr_size)
+{
+ bool changed = false;
+
+ switch (br_multicast_eht_host_filter_mode(pg, h_addr)) {
+ case MCAST_INCLUDE:
+ changed = __eht_block_incl(pg, h_addr, srcs, nsrcs, addr_size);
+ break;
+ case MCAST_EXCLUDE:
+ changed = __eht_block_excl(pg, h_addr, srcs, nsrcs, addr_size);
+ break;
+ }
+
+ return changed;
+}
+
+/* flush_entries is true when changing mode */
+static bool __eht_inc_exc(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr,
+ void *srcs,
+ u32 nsrcs,
+ size_t addr_size,
+ unsigned char filter_mode,
+ bool to_report)
+{
+ bool changed = false, flush_entries = to_report;
+ union net_bridge_eht_addr eht_src_addr;
+ u32 src_idx;
+
+ if (br_multicast_eht_host_filter_mode(pg, h_addr) != filter_mode)
+ flush_entries = true;
+
+ memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+ /* if we're changing mode del host and its entries */
+ if (flush_entries)
+ br_multicast_del_eht_host(pg, h_addr);
+ for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+ memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
+ br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
+ filter_mode, false);
+ }
+ /* we can be missing sets only if we've deleted some entries */
+ if (flush_entries) {
+ struct net_bridge *br = pg->key.port->br;
+ struct net_bridge_group_eht_set *eht_set;
+ struct net_bridge_group_src *src_ent;
+ struct hlist_node *tmp;
+
+ hlist_for_each_entry_safe(src_ent, tmp, &pg->src_list, node) {
+ br_multicast_ip_src_to_eht_addr(&src_ent->addr,
+ &eht_src_addr);
+ if (!br_multicast_eht_set_lookup(pg, &eht_src_addr)) {
+ br_multicast_del_group_src(src_ent, true);
+ changed = true;
+ continue;
+ }
+ /* this is an optimization for TO_INCLUDE where we lower
+ * the set's timeout to LMQT to catch timeout hosts:
+ * - host A (timing out): set entries X, Y
+ * - host B: set entry Z (new from current TO_INCLUDE)
+ * sends BLOCK Z after LMQT but host A's EHT
+ * entries still exist (unless lowered to LMQT
+ * so they can timeout with the S,Gs)
+ * => we wait another LMQT, when we can just delete the
+ * group immediately
+ */
+ if (!(src_ent->flags & BR_SGRP_F_SEND) ||
+ filter_mode != MCAST_INCLUDE ||
+ !to_report)
+ continue;
+ eht_set = br_multicast_eht_set_lookup(pg,
+ &eht_src_addr);
+ if (!eht_set)
+ continue;
+ mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(br));
+ }
+ }
+
+ return changed;
+}
+
+static bool br_multicast_eht_inc(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr,
+ void *srcs,
+ u32 nsrcs,
+ size_t addr_size,
+ bool to_report)
+{
+ bool changed;
+
+ changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size,
+ MCAST_INCLUDE, to_report);
+ br_eht_convert_host_filter_mode(pg, h_addr, MCAST_INCLUDE);
+
+ return changed;
+}
+
+static bool br_multicast_eht_exc(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr,
+ void *srcs,
+ u32 nsrcs,
+ size_t addr_size,
+ bool to_report)
+{
+ bool changed;
+
+ changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size,
+ MCAST_EXCLUDE, to_report);
+ br_eht_convert_host_filter_mode(pg, h_addr, MCAST_EXCLUDE);
+
+ return changed;
+}
+
+static bool __eht_ip4_handle(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr,
+ void *srcs,
+ u32 nsrcs,
+ int grec_type)
+{
+ bool changed = false, to_report = false;
+
+ switch (grec_type) {
+ case IGMPV3_ALLOW_NEW_SOURCES:
+ br_multicast_eht_allow(pg, h_addr, srcs, nsrcs, sizeof(__be32));
+ break;
+ case IGMPV3_BLOCK_OLD_SOURCES:
+ changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs,
+ sizeof(__be32));
+ break;
+ case IGMPV3_CHANGE_TO_INCLUDE:
+ to_report = true;
+ fallthrough;
+ case IGMPV3_MODE_IS_INCLUDE:
+ changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs,
+ sizeof(__be32), to_report);
+ break;
+ case IGMPV3_CHANGE_TO_EXCLUDE:
+ to_report = true;
+ fallthrough;
+ case IGMPV3_MODE_IS_EXCLUDE:
+ changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs,
+ sizeof(__be32), to_report);
+ break;
+ }
+
+ return changed;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
+ union net_bridge_eht_addr *h_addr,
+ void *srcs,
+ u32 nsrcs,
+ int grec_type)
+{
+ bool changed = false, to_report = false;
+
+ switch (grec_type) {
+ case MLD2_ALLOW_NEW_SOURCES:
+ br_multicast_eht_allow(pg, h_addr, srcs, nsrcs,
+ sizeof(struct in6_addr));
+ break;
+ case MLD2_BLOCK_OLD_SOURCES:
+ changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs,
+ sizeof(struct in6_addr));
+ break;
+ case MLD2_CHANGE_TO_INCLUDE:
+ to_report = true;
+ fallthrough;
+ case MLD2_MODE_IS_INCLUDE:
+ changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs,
+ sizeof(struct in6_addr),
+ to_report);
+ break;
+ case MLD2_CHANGE_TO_EXCLUDE:
+ to_report = true;
+ fallthrough;
+ case MLD2_MODE_IS_EXCLUDE:
+ changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs,
+ sizeof(struct in6_addr),
+ to_report);
+ break;
+ }
+
+ return changed;
+}
+#endif
+
+/* true means an entry was deleted */
+bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
+ void *h_addr,
+ void *srcs,
+ u32 nsrcs,
+ size_t addr_size,
+ int grec_type)
+{
+ bool eht_enabled = !!(pg->key.port->flags & BR_MULTICAST_FAST_LEAVE);
+ union net_bridge_eht_addr eht_host_addr;
+ bool changed = false;
+
+ if (!eht_enabled)
+ goto out;
+
+ memset(&eht_host_addr, 0, sizeof(eht_host_addr));
+ memcpy(&eht_host_addr, h_addr, addr_size);
+ if (addr_size == sizeof(__be32))
+ changed = __eht_ip4_handle(pg, &eht_host_addr, srcs, nsrcs,
+ grec_type);
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ changed = __eht_ip6_handle(pg, &eht_host_addr, srcs, nsrcs,
+ grec_type);
+#endif
+
+out:
+ return changed;
+}
+
+int br_multicast_eht_set_hosts_limit(struct net_bridge_port *p,
+ u32 eht_hosts_limit)
+{
+ struct net_bridge *br = p->br;
+
+ if (!eht_hosts_limit)
+ return -EINVAL;
+
+ spin_lock_bh(&br->multicast_lock);
+ p->multicast_eht_hosts_limit = eht_hosts_limit;
+ spin_unlock_bh(&br->multicast_lock);
+
+ return 0;
+}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 49700ce0e919..f2b1343f8332 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -18,6 +18,7 @@
#include "br_private_stp.h"
#include "br_private_cfm.h"
#include "br_private_tunnel.h"
+#include "br_private_mcast_eht.h"
static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg,
u32 filter_mask)
@@ -199,6 +200,8 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_RING_OPEN */
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_IN_OPEN */
+ + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT */
+ + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_EHT_HOSTS_CNT */
+ 0;
}
@@ -283,7 +286,11 @@ static int br_port_fill_attrs(struct sk_buff *skb,
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
if (nla_put_u8(skb, IFLA_BRPORT_MULTICAST_ROUTER,
- p->multicast_router))
+ p->multicast_router) ||
+ nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
+ p->multicast_eht_hosts_limit) ||
+ nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
+ p->multicast_eht_hosts_cnt))
return -EMSGSIZE;
#endif
@@ -820,6 +827,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 },
[IFLA_BRPORT_ISOLATED] = { .type = NLA_U8 },
[IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 },
+ [IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 },
};
/* Change the state of the port and notify spanning tree */
@@ -845,87 +853,59 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state)
}
/* Set/clear or port flags based on attribute */
-static int br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[],
- int attrtype, unsigned long mask)
+static void br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[],
+ int attrtype, unsigned long mask)
{
- unsigned long flags;
- int err;
-
if (!tb[attrtype])
- return 0;
+ return;
if (nla_get_u8(tb[attrtype]))
- flags = p->flags | mask;
+ p->flags |= mask;
else
- flags = p->flags & ~mask;
-
- err = br_switchdev_set_port_flag(p, flags, mask);
- if (err)
- return err;
-
- p->flags = flags;
- return 0;
+ p->flags &= ~mask;
}
/* Process bridge protocol info on port */
-static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
+static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
{
- unsigned long old_flags = p->flags;
- bool br_vlan_tunnel_old = false;
+ unsigned long old_flags, changed_mask;
+ bool br_vlan_tunnel_old;
int err;
- err = br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE);
- if (err)
- return err;
-
- err = br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD);
- if (err)
- return err;
-
- err = br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE);
- if (err)
- return err;
-
- err = br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK);
- if (err)
- return err;
-
- err = br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING);
- if (err)
- return err;
-
- err = br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);
- if (err)
- return err;
-
- err = br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD);
- if (err)
- return err;
-
- err = br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_TO_UCAST, BR_MULTICAST_TO_UNICAST);
- if (err)
- return err;
-
- err = br_set_port_flag(p, tb, IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD);
- if (err)
- return err;
-
- err = br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP);
- if (err)
- return err;
-
- err = br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI);
- if (err)
- return err;
-
- br_vlan_tunnel_old = (p->flags & BR_VLAN_TUNNEL) ? true : false;
- err = br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL);
- if (err)
+ old_flags = p->flags;
+ br_vlan_tunnel_old = (old_flags & BR_VLAN_TUNNEL) ? true : false;
+
+ br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE);
+ br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD);
+ br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE,
+ BR_MULTICAST_FAST_LEAVE);
+ br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK);
+ br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING);
+ br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);
+ br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD);
+ br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_TO_UCAST,
+ BR_MULTICAST_TO_UNICAST);
+ br_set_port_flag(p, tb, IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD);
+ br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP);
+ br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI);
+ br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL);
+ br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS, BR_NEIGH_SUPPRESS);
+ br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED);
+
+ changed_mask = old_flags ^ p->flags;
+
+ err = br_switchdev_set_port_flag(p, p->flags, changed_mask, extack);
+ if (err) {
+ p->flags = old_flags;
return err;
+ }
if (br_vlan_tunnel_old && !(p->flags & BR_VLAN_TUNNEL))
nbp_vlan_tunnel_info_flush(p);
+ br_port_flags_change(p, changed_mask);
+
if (tb[IFLA_BRPORT_COST]) {
err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST]));
if (err)
@@ -955,6 +935,15 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
if (err)
return err;
}
+
+ if (tb[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT]) {
+ u32 hlimit;
+
+ hlimit = nla_get_u32(tb[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT]);
+ err = br_multicast_eht_set_hosts_limit(p, hlimit);
+ if (err)
+ return err;
+ }
#endif
if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) {
@@ -965,15 +954,6 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
p->group_fwd_mask = fwd_mask;
}
- err = br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS,
- BR_NEIGH_SUPPRESS);
- if (err)
- return err;
-
- err = br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED);
- if (err)
- return err;
-
if (tb[IFLA_BRPORT_BACKUP_PORT]) {
struct net_device *backup_dev = NULL;
u32 backup_ifindex;
@@ -991,7 +971,6 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
return err;
}
- br_port_flags_change(p, old_flags ^ p->flags);
return 0;
}
@@ -1029,7 +1008,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags,
return err;
spin_lock_bh(&p->br->lock);
- err = br_setport(p, tb);
+ err = br_setport(p, tb, extack);
spin_unlock_bh(&p->br->lock);
} else {
/* Binary compatibility with old RSTP */
@@ -1096,15 +1075,9 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[],
return 0;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
- if (data[IFLA_BR_VLAN_PROTOCOL]) {
- switch (nla_get_be16(data[IFLA_BR_VLAN_PROTOCOL])) {
- case htons(ETH_P_8021Q):
- case htons(ETH_P_8021AD):
- break;
- default:
- return -EPROTONOSUPPORT;
- }
- }
+ if (data[IFLA_BR_VLAN_PROTOCOL] &&
+ !eth_type_vlan(nla_get_be16(data[IFLA_BR_VLAN_PROTOCOL])))
+ return -EPROTONOSUPPORT;
if (data[IFLA_BR_VLAN_DEFAULT_PVID]) {
__u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]);
@@ -1130,7 +1103,7 @@ static int br_port_slave_changelink(struct net_device *brdev,
return 0;
spin_lock_bh(&br->lock);
- ret = br_setport(br_port_get_rtnl(dev), data);
+ ret = br_setport(br_port_get_rtnl(dev), data, extack);
spin_unlock_bh(&br->lock);
return ret;
@@ -1239,7 +1212,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_VLAN_FILTERING]) {
u8 vlan_filter = nla_get_u8(data[IFLA_BR_VLAN_FILTERING]);
- err = __br_vlan_filter_toggle(br, vlan_filter);
+ err = br_vlan_filter_toggle(br, vlan_filter, extack);
if (err)
return err;
}
@@ -1248,7 +1221,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_VLAN_PROTOCOL]) {
__be16 vlan_proto = nla_get_be16(data[IFLA_BR_VLAN_PROTOCOL]);
- err = __br_vlan_set_proto(br, vlan_proto);
+ err = __br_vlan_set_proto(br, vlan_proto, extack);
if (err)
return err;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d62c6e1af64a..d7d167e10b70 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -252,6 +252,8 @@ struct net_bridge_port_group {
struct timer_list timer;
struct timer_list rexmit_timer;
struct hlist_node mglist;
+ struct rb_root eht_set_tree;
+ struct rb_root eht_host_tree;
struct rhash_head rhnode;
struct net_bridge_mcast_gc mcast_gc;
@@ -308,6 +310,8 @@ struct net_bridge_port {
#if IS_ENABLED(CONFIG_IPV6)
struct bridge_mcast_own_query ip6_own_query;
#endif /* IS_ENABLED(CONFIG_IPV6) */
+ u32 multicast_eht_hosts_limit;
+ u32 multicast_eht_hosts_cnt;
unsigned char multicast_router;
struct bridge_mcast_stats __percpu *mcast_stats;
struct timer_list multicast_router_timer;
@@ -846,6 +850,10 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
u8 filter_mode);
void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
struct net_bridge_port_group *sg);
+struct net_bridge_group_src *
+br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip);
+void br_multicast_del_group_src(struct net_bridge_group_src *src,
+ bool fastleave);
static inline bool br_group_is_l2(const struct br_ip *group)
{
@@ -1077,14 +1085,17 @@ int br_vlan_delete(struct net_bridge *br, u16 vid);
void br_vlan_flush(struct net_bridge *br);
struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid);
void br_recalculate_fwd_mask(struct net_bridge *br);
-int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
-int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
-int __br_vlan_set_proto(struct net_bridge *br, __be16 proto);
-int br_vlan_set_proto(struct net_bridge *br, unsigned long val);
+int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack);
+int __br_vlan_set_proto(struct net_bridge *br, __be16 proto,
+ struct netlink_ext_ack *extack);
+int br_vlan_set_proto(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack);
int br_vlan_set_stats(struct net_bridge *br, unsigned long val);
int br_vlan_set_stats_per_port(struct net_bridge *br, unsigned long val);
int br_vlan_init(struct net_bridge *br);
-int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
+int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack);
int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid,
struct netlink_ext_ack *extack);
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
@@ -1253,8 +1264,9 @@ static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg)
return 0;
}
-static inline int __br_vlan_filter_toggle(struct net_bridge *br,
- unsigned long val)
+static inline int br_vlan_filter_toggle(struct net_bridge *br,
+ unsigned long val,
+ struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
@@ -1567,7 +1579,8 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
const struct sk_buff *skb);
int br_switchdev_set_port_flag(struct net_bridge_port *p,
unsigned long flags,
- unsigned long mask);
+ unsigned long mask,
+ struct netlink_ext_ack *extack);
void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb,
int type);
int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
@@ -1597,7 +1610,8 @@ static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
static inline int br_switchdev_set_port_flag(struct net_bridge_port *p,
unsigned long flags,
- unsigned long mask)
+ unsigned long mask,
+ struct netlink_ext_ack *extack)
{
return 0;
}
diff --git a/net/bridge/br_private_mcast_eht.h b/net/bridge/br_private_mcast_eht.h
new file mode 100644
index 000000000000..f89049f4892c
--- /dev/null
+++ b/net/bridge/br_private_mcast_eht.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (c) 2020, Nikolay Aleksandrov <nikolay@nvidia.com>
+ */
+#ifndef _BR_PRIVATE_MCAST_EHT_H_
+#define _BR_PRIVATE_MCAST_EHT_H_
+
+#define BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT 512
+
+union net_bridge_eht_addr {
+ __be32 ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr ip6;
+#endif
+};
+
+/* single host's list of set entries and filter_mode */
+struct net_bridge_group_eht_host {
+ struct rb_node rb_node;
+
+ union net_bridge_eht_addr h_addr;
+ struct hlist_head set_entries;
+ unsigned int num_entries;
+ unsigned char filter_mode;
+ struct net_bridge_port_group *pg;
+};
+
+/* (host, src entry) added to a per-src set and host's list */
+struct net_bridge_group_eht_set_entry {
+ struct rb_node rb_node;
+ struct hlist_node host_list;
+
+ union net_bridge_eht_addr h_addr;
+ struct timer_list timer;
+ struct net_bridge *br;
+ struct net_bridge_group_eht_set *eht_set;
+ struct net_bridge_group_eht_host *h_parent;
+ struct net_bridge_mcast_gc mcast_gc;
+};
+
+/* per-src set */
+struct net_bridge_group_eht_set {
+ struct rb_node rb_node;
+
+ union net_bridge_eht_addr src_addr;
+ struct rb_root entry_tree;
+ struct timer_list timer;
+ struct net_bridge_port_group *pg;
+ struct net_bridge *br;
+ struct net_bridge_mcast_gc mcast_gc;
+};
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg);
+bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
+ void *h_addr,
+ void *srcs,
+ u32 nsrcs,
+ size_t addr_size,
+ int grec_type);
+int br_multicast_eht_set_hosts_limit(struct net_bridge_port *p,
+ u32 eht_hosts_limit);
+
+static inline bool
+br_multicast_eht_should_del_pg(const struct net_bridge_port_group *pg)
+{
+ return !!((pg->key.port->flags & BR_MULTICAST_FAST_LEAVE) &&
+ RB_EMPTY_ROOT(&pg->eht_host_tree));
+}
+
+static inline bool
+br_multicast_eht_hosts_over_limit(const struct net_bridge_port_group *pg)
+{
+ const struct net_bridge_port *p = pg->key.port;
+
+ return !!(p->multicast_eht_hosts_cnt >= p->multicast_eht_hosts_limit);
+}
+
+static inline void br_multicast_eht_hosts_inc(struct net_bridge_port_group *pg)
+{
+ struct net_bridge_port *p = pg->key.port;
+
+ p->multicast_eht_hosts_cnt++;
+}
+
+static inline void br_multicast_eht_hosts_dec(struct net_bridge_port_group *pg)
+{
+ struct net_bridge_port *p = pg->key.port;
+
+ p->multicast_eht_hosts_cnt--;
+}
+#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
+
+#endif /* _BR_PRIVATE_MCAST_EHT_H_ */
diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h
index 1883118aae55..9559aa2750fb 100644
--- a/net/bridge/br_private_mrp.h
+++ b/net/bridge/br_private_mrp.h
@@ -46,6 +46,20 @@ struct br_mrp {
struct rcu_head rcu;
};
+/* This type is returned by br_mrp_switchdev functions that allow to have a SW
+ * backup in case the HW can't implement completely the protocol.
+ * BR_MRP_NONE - means the HW can't run at all the protocol, so the SW stops
+ * configuring the node anymore.
+ * BR_MRP_SW - the HW can help the SW to run the protocol, by redirecting MRP
+ * frames to CPU.
+ * BR_MRP_HW - the HW can implement completely the protocol.
+ */
+enum br_mrp_hw_support {
+ BR_MRP_NONE,
+ BR_MRP_SW,
+ BR_MRP_HW,
+};
+
/* br_mrp.c */
int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance);
int br_mrp_del(struct net_bridge *br, struct br_mrp_instance *instance);
@@ -65,27 +79,59 @@ int br_mrp_start_in_test(struct net_bridge *br,
/* br_mrp_switchdev.c */
int br_mrp_switchdev_add(struct net_bridge *br, struct br_mrp *mrp);
int br_mrp_switchdev_del(struct net_bridge *br, struct br_mrp *mrp);
-int br_mrp_switchdev_set_ring_role(struct net_bridge *br, struct br_mrp *mrp,
- enum br_mrp_ring_role_type role);
+enum br_mrp_hw_support
+br_mrp_switchdev_set_ring_role(struct net_bridge *br, struct br_mrp *mrp,
+ enum br_mrp_ring_role_type role);
int br_mrp_switchdev_set_ring_state(struct net_bridge *br, struct br_mrp *mrp,
enum br_mrp_ring_state_type state);
-int br_mrp_switchdev_send_ring_test(struct net_bridge *br, struct br_mrp *mrp,
- u32 interval, u8 max_miss, u32 period,
- bool monitor);
-int br_mrp_port_switchdev_set_state(struct net_bridge_port *p,
- enum br_mrp_port_state_type state);
+enum br_mrp_hw_support
+br_mrp_switchdev_send_ring_test(struct net_bridge *br, struct br_mrp *mrp,
+ u32 interval, u8 max_miss, u32 period,
+ bool monitor);
+int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, u32 state);
int br_mrp_port_switchdev_set_role(struct net_bridge_port *p,
enum br_mrp_port_role_type role);
-int br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp,
- u16 in_id, u32 ring_id,
- enum br_mrp_in_role_type role);
+enum br_mrp_hw_support
+br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp,
+ u16 in_id, u32 ring_id,
+ enum br_mrp_in_role_type role);
int br_mrp_switchdev_set_in_state(struct net_bridge *br, struct br_mrp *mrp,
enum br_mrp_in_state_type state);
-int br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp,
- u32 interval, u8 max_miss, u32 period);
+enum br_mrp_hw_support
+br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp,
+ u32 interval, u8 max_miss, u32 period);
/* br_mrp_netlink.c */
int br_mrp_ring_port_open(struct net_device *dev, u8 loc);
int br_mrp_in_port_open(struct net_device *dev, u8 loc);
+/* MRP protocol data units */
+struct br_mrp_tlv_hdr {
+ __u8 type;
+ __u8 length;
+};
+
+struct br_mrp_common_hdr {
+ __be16 seq_id;
+ __u8 domain[MRP_DOMAIN_UUID_LENGTH];
+};
+
+struct br_mrp_ring_test_hdr {
+ __be16 prio;
+ __u8 sa[ETH_ALEN];
+ __be16 port_role;
+ __be16 state;
+ __be16 transitions;
+ __be32 timestamp;
+} __attribute__((__packed__));
+
+struct br_mrp_in_test_hdr {
+ __be16 id;
+ __u8 sa[ETH_ALEN];
+ __be16 port_role;
+ __be16 state;
+ __be16 transitions;
+ __be32 timestamp;
+} __attribute__((__packed__));
+
#endif /* _BR_PRIVATE_MRP_H */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 3e88be7aa269..21c6781906aa 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -43,7 +43,7 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
return;
p->state = state;
- err = switchdev_port_attr_set(p->dev, &attr);
+ err = switchdev_port_attr_set(p->dev, &attr, NULL);
if (err && err != -EOPNOTSUPP)
br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
(unsigned int) p->port_no, p->dev->name);
@@ -591,7 +591,7 @@ int __set_ageing_time(struct net_device *dev, unsigned long t)
};
int err;
- err = switchdev_port_attr_set(dev, &attr);
+ err = switchdev_port_attr_set(dev, &attr, NULL);
if (err && err != -EOPNOTSUPP)
return err;
@@ -601,8 +601,8 @@ int __set_ageing_time(struct net_device *dev, unsigned long t)
/* Set time interval that dynamic forwarding entries live
* For pure software bridge, allow values outside the 802.1
* standard specification for special cases:
- * 0 - entry never ages (all permanant)
- * 1 - entry disappears (no persistance)
+ * 0 - entry never ages (all permanent)
+ * 1 - entry disappears (no persistence)
*
* Offloaded switch entries maybe more restrictive
*/
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 015209bf44aa..b89503832fcc 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -60,42 +60,47 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
int br_switchdev_set_port_flag(struct net_bridge_port *p,
unsigned long flags,
- unsigned long mask)
+ unsigned long mask,
+ struct netlink_ext_ack *extack)
{
struct switchdev_attr attr = {
.orig_dev = p->dev,
- .id = SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS,
- .u.brport_flags = mask,
};
struct switchdev_notifier_port_attr_info info = {
.attr = &attr,
};
int err;
- if (mask & ~BR_PORT_FLAGS_HW_OFFLOAD)
+ mask &= BR_PORT_FLAGS_HW_OFFLOAD;
+ if (!mask)
return 0;
+ attr.id = SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS;
+ attr.u.brport_flags.val = flags;
+ attr.u.brport_flags.mask = mask;
+
/* We run from atomic context here */
err = call_switchdev_notifiers(SWITCHDEV_PORT_ATTR_SET, p->dev,
- &info.info, NULL);
+ &info.info, extack);
err = notifier_to_errno(err);
if (err == -EOPNOTSUPP)
return 0;
if (err) {
- br_warn(p->br, "bridge flag offload is not supported %u(%s)\n",
- (unsigned int)p->port_no, p->dev->name);
+ if (extack && !extack->_msg)
+ NL_SET_ERR_MSG_MOD(extack,
+ "bridge flag offload is not supported");
return -EOPNOTSUPP;
}
attr.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS;
attr.flags = SWITCHDEV_F_DEFER;
- attr.u.brport_flags = flags;
- err = switchdev_port_attr_set(p->dev, &attr);
+ err = switchdev_port_attr_set(p->dev, &attr, extack);
if (err) {
- br_warn(p->br, "error setting offload flag on port %u(%s)\n",
- (unsigned int)p->port_no, p->dev->name);
+ if (extack && !extack->_msg)
+ NL_SET_ERR_MSG_MOD(extack,
+ "error setting offload flag on port");
return err;
}
@@ -153,8 +158,7 @@ int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
.obj.orig_dev = dev,
.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
.flags = flags,
- .vid_begin = vid,
- .vid_end = vid,
+ .vid = vid,
};
return switchdev_port_obj_add(dev, &v.obj, extack);
@@ -165,8 +169,7 @@ int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid)
struct switchdev_obj_port_vlan v = {
.obj.orig_dev = dev,
.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
- .vid_begin = vid,
- .vid_end = vid,
+ .vid = vid,
};
return switchdev_port_obj_del(dev, &v.obj);
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 7db06e3f642a..072e29840082 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -19,6 +19,10 @@
#include "br_private.h"
+/* IMPORTANT: new bridge options must be added with netlink support only
+ * please do not add new sysfs entries
+ */
+
#define to_bridge(cd) ((struct net_bridge *)netdev_priv(to_net_dev(cd)))
/*
@@ -26,11 +30,13 @@
*/
static ssize_t store_bridge_parm(struct device *d,
const char *buf, size_t len,
- int (*set)(struct net_bridge *, unsigned long))
+ int (*set)(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack))
{
struct net_bridge *br = to_bridge(d);
- char *endp;
+ struct netlink_ext_ack extack = {0};
unsigned long val;
+ char *endp;
int err;
if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN))
@@ -43,9 +49,15 @@ static ssize_t store_bridge_parm(struct device *d,
if (!rtnl_trylock())
return restart_syscall();
- err = (*set)(br, val);
+ err = (*set)(br, val, &extack);
if (!err)
netdev_state_change(br->dev);
+ if (extack._msg) {
+ if (err)
+ br_err(br, "%s\n", extack._msg);
+ else
+ br_warn(br, "%s\n", extack._msg);
+ }
rtnl_unlock();
return err ? err : len;
@@ -59,11 +71,17 @@ static ssize_t forward_delay_show(struct device *d,
return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay));
}
+static int set_forward_delay(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
+{
+ return br_set_forward_delay(br, val);
+}
+
static ssize_t forward_delay_store(struct device *d,
struct device_attribute *attr,
const char *buf, size_t len)
{
- return store_bridge_parm(d, buf, len, br_set_forward_delay);
+ return store_bridge_parm(d, buf, len, set_forward_delay);
}
static DEVICE_ATTR_RW(forward_delay);
@@ -74,11 +92,17 @@ static ssize_t hello_time_show(struct device *d, struct device_attribute *attr,
jiffies_to_clock_t(to_bridge(d)->hello_time));
}
+static int set_hello_time(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
+{
+ return br_set_hello_time(br, val);
+}
+
static ssize_t hello_time_store(struct device *d,
struct device_attribute *attr, const char *buf,
size_t len)
{
- return store_bridge_parm(d, buf, len, br_set_hello_time);
+ return store_bridge_parm(d, buf, len, set_hello_time);
}
static DEVICE_ATTR_RW(hello_time);
@@ -89,10 +113,16 @@ static ssize_t max_age_show(struct device *d, struct device_attribute *attr,
jiffies_to_clock_t(to_bridge(d)->max_age));
}
+static int set_max_age(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
+{
+ return br_set_max_age(br, val);
+}
+
static ssize_t max_age_store(struct device *d, struct device_attribute *attr,
const char *buf, size_t len)
{
- return store_bridge_parm(d, buf, len, br_set_max_age);
+ return store_bridge_parm(d, buf, len, set_max_age);
}
static DEVICE_ATTR_RW(max_age);
@@ -103,7 +133,8 @@ static ssize_t ageing_time_show(struct device *d,
return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->ageing_time));
}
-static int set_ageing_time(struct net_bridge *br, unsigned long val)
+static int set_ageing_time(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
return br_set_ageing_time(br, val);
}
@@ -124,9 +155,10 @@ static ssize_t stp_state_show(struct device *d,
}
-static int set_stp_state(struct net_bridge *br, unsigned long val)
+static int set_stp_state(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
- return br_stp_set_enabled(br, val, NULL);
+ return br_stp_set_enabled(br, val, extack);
}
static ssize_t stp_state_store(struct device *d,
@@ -145,7 +177,8 @@ static ssize_t group_fwd_mask_show(struct device *d,
return sprintf(buf, "%#x\n", br->group_fwd_mask);
}
-static int set_group_fwd_mask(struct net_bridge *br, unsigned long val)
+static int set_group_fwd_mask(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
if (val & BR_GROUPFWD_RESTRICTED)
return -EINVAL;
@@ -172,7 +205,8 @@ static ssize_t priority_show(struct device *d, struct device_attribute *attr,
(br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]);
}
-static int set_priority(struct net_bridge *br, unsigned long val)
+static int set_priority(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br_stp_set_bridge_priority(br, (u16) val);
return 0;
@@ -308,7 +342,8 @@ static ssize_t group_addr_store(struct device *d,
static DEVICE_ATTR_RW(group_addr);
-static int set_flush(struct net_bridge *br, unsigned long val)
+static int set_flush(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br_fdb_flush(br);
return 0;
@@ -330,9 +365,10 @@ static ssize_t no_linklocal_learn_show(struct device *d,
return sprintf(buf, "%d\n", br_boolopt_get(br, BR_BOOLOPT_NO_LL_LEARN));
}
-static int set_no_linklocal_learn(struct net_bridge *br, unsigned long val)
+static int set_no_linklocal_learn(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
- return br_boolopt_toggle(br, BR_BOOLOPT_NO_LL_LEARN, !!val, NULL);
+ return br_boolopt_toggle(br, BR_BOOLOPT_NO_LL_LEARN, !!val, extack);
}
static ssize_t no_linklocal_learn_store(struct device *d,
@@ -351,11 +387,17 @@ static ssize_t multicast_router_show(struct device *d,
return sprintf(buf, "%d\n", br->multicast_router);
}
+static int set_multicast_router(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
+{
+ return br_multicast_set_router(br, val);
+}
+
static ssize_t multicast_router_store(struct device *d,
struct device_attribute *attr,
const char *buf, size_t len)
{
- return store_bridge_parm(d, buf, len, br_multicast_set_router);
+ return store_bridge_parm(d, buf, len, set_multicast_router);
}
static DEVICE_ATTR_RW(multicast_router);
@@ -367,11 +409,17 @@ static ssize_t multicast_snooping_show(struct device *d,
return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_ENABLED));
}
+static int toggle_multicast(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
+{
+ return br_multicast_toggle(br, val);
+}
+
static ssize_t multicast_snooping_store(struct device *d,
struct device_attribute *attr,
const char *buf, size_t len)
{
- return store_bridge_parm(d, buf, len, br_multicast_toggle);
+ return store_bridge_parm(d, buf, len, toggle_multicast);
}
static DEVICE_ATTR_RW(multicast_snooping);
@@ -384,7 +432,8 @@ static ssize_t multicast_query_use_ifaddr_show(struct device *d,
br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR));
}
-static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val)
+static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br_opt_toggle(br, BROPT_MULTICAST_QUERY_USE_IFADDR, !!val);
return 0;
@@ -407,11 +456,17 @@ static ssize_t multicast_querier_show(struct device *d,
return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_QUERIER));
}
+static int set_multicast_querier(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
+{
+ return br_multicast_set_querier(br, val);
+}
+
static ssize_t multicast_querier_store(struct device *d,
struct device_attribute *attr,
const char *buf, size_t len)
{
- return store_bridge_parm(d, buf, len, br_multicast_set_querier);
+ return store_bridge_parm(d, buf, len, set_multicast_querier);
}
static DEVICE_ATTR_RW(multicast_querier);
@@ -421,10 +476,12 @@ static ssize_t hash_elasticity_show(struct device *d,
return sprintf(buf, "%u\n", RHT_ELASTICITY);
}
-static int set_elasticity(struct net_bridge *br, unsigned long val)
+static int set_elasticity(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
- br_warn(br, "the hash_elasticity option has been deprecated and is always %u\n",
- RHT_ELASTICITY);
+ /* 16 is RHT_ELASTICITY */
+ NL_SET_ERR_MSG_MOD(extack,
+ "the hash_elasticity option has been deprecated and is always 16");
return 0;
}
@@ -443,7 +500,8 @@ static ssize_t hash_max_show(struct device *d, struct device_attribute *attr,
return sprintf(buf, "%u\n", br->hash_max);
}
-static int set_hash_max(struct net_bridge *br, unsigned long val)
+static int set_hash_max(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br->hash_max = val;
return 0;
@@ -465,11 +523,17 @@ static ssize_t multicast_igmp_version_show(struct device *d,
return sprintf(buf, "%u\n", br->multicast_igmp_version);
}
+static int set_multicast_igmp_version(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
+{
+ return br_multicast_set_igmp_version(br, val);
+}
+
static ssize_t multicast_igmp_version_store(struct device *d,
struct device_attribute *attr,
const char *buf, size_t len)
{
- return store_bridge_parm(d, buf, len, br_multicast_set_igmp_version);
+ return store_bridge_parm(d, buf, len, set_multicast_igmp_version);
}
static DEVICE_ATTR_RW(multicast_igmp_version);
@@ -481,7 +545,8 @@ static ssize_t multicast_last_member_count_show(struct device *d,
return sprintf(buf, "%u\n", br->multicast_last_member_count);
}
-static int set_last_member_count(struct net_bridge *br, unsigned long val)
+static int set_last_member_count(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br->multicast_last_member_count = val;
return 0;
@@ -502,7 +567,8 @@ static ssize_t multicast_startup_query_count_show(
return sprintf(buf, "%u\n", br->multicast_startup_query_count);
}
-static int set_startup_query_count(struct net_bridge *br, unsigned long val)
+static int set_startup_query_count(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br->multicast_startup_query_count = val;
return 0;
@@ -524,7 +590,8 @@ static ssize_t multicast_last_member_interval_show(
jiffies_to_clock_t(br->multicast_last_member_interval));
}
-static int set_last_member_interval(struct net_bridge *br, unsigned long val)
+static int set_last_member_interval(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br->multicast_last_member_interval = clock_t_to_jiffies(val);
return 0;
@@ -546,7 +613,8 @@ static ssize_t multicast_membership_interval_show(
jiffies_to_clock_t(br->multicast_membership_interval));
}
-static int set_membership_interval(struct net_bridge *br, unsigned long val)
+static int set_membership_interval(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br->multicast_membership_interval = clock_t_to_jiffies(val);
return 0;
@@ -569,7 +637,8 @@ static ssize_t multicast_querier_interval_show(struct device *d,
jiffies_to_clock_t(br->multicast_querier_interval));
}
-static int set_querier_interval(struct net_bridge *br, unsigned long val)
+static int set_querier_interval(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br->multicast_querier_interval = clock_t_to_jiffies(val);
return 0;
@@ -592,7 +661,8 @@ static ssize_t multicast_query_interval_show(struct device *d,
jiffies_to_clock_t(br->multicast_query_interval));
}
-static int set_query_interval(struct net_bridge *br, unsigned long val)
+static int set_query_interval(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br->multicast_query_interval = clock_t_to_jiffies(val);
return 0;
@@ -615,7 +685,8 @@ static ssize_t multicast_query_response_interval_show(
jiffies_to_clock_t(br->multicast_query_response_interval));
}
-static int set_query_response_interval(struct net_bridge *br, unsigned long val)
+static int set_query_response_interval(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br->multicast_query_response_interval = clock_t_to_jiffies(val);
return 0;
@@ -638,7 +709,8 @@ static ssize_t multicast_startup_query_interval_show(
jiffies_to_clock_t(br->multicast_startup_query_interval));
}
-static int set_startup_query_interval(struct net_bridge *br, unsigned long val)
+static int set_startup_query_interval(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br->multicast_startup_query_interval = clock_t_to_jiffies(val);
return 0;
@@ -662,7 +734,8 @@ static ssize_t multicast_stats_enabled_show(struct device *d,
br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED));
}
-static int set_stats_enabled(struct net_bridge *br, unsigned long val)
+static int set_stats_enabled(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br_opt_toggle(br, BROPT_MULTICAST_STATS_ENABLED, !!val);
return 0;
@@ -687,11 +760,17 @@ static ssize_t multicast_mld_version_show(struct device *d,
return sprintf(buf, "%u\n", br->multicast_mld_version);
}
+static int set_multicast_mld_version(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
+{
+ return br_multicast_set_mld_version(br, val);
+}
+
static ssize_t multicast_mld_version_store(struct device *d,
struct device_attribute *attr,
const char *buf, size_t len)
{
- return store_bridge_parm(d, buf, len, br_multicast_set_mld_version);
+ return store_bridge_parm(d, buf, len, set_multicast_mld_version);
}
static DEVICE_ATTR_RW(multicast_mld_version);
#endif
@@ -704,7 +783,8 @@ static ssize_t nf_call_iptables_show(
return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_IPTABLES));
}
-static int set_nf_call_iptables(struct net_bridge *br, unsigned long val)
+static int set_nf_call_iptables(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br_opt_toggle(br, BROPT_NF_CALL_IPTABLES, !!val);
return 0;
@@ -725,7 +805,8 @@ static ssize_t nf_call_ip6tables_show(
return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_IP6TABLES));
}
-static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val)
+static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br_opt_toggle(br, BROPT_NF_CALL_IP6TABLES, !!val);
return 0;
@@ -746,7 +827,8 @@ static ssize_t nf_call_arptables_show(
return sprintf(buf, "%u\n", br_opt_get(br, BROPT_NF_CALL_ARPTABLES));
}
-static int set_nf_call_arptables(struct net_bridge *br, unsigned long val)
+static int set_nf_call_arptables(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
br_opt_toggle(br, BROPT_NF_CALL_ARPTABLES, !!val);
return 0;
@@ -817,11 +899,17 @@ static ssize_t vlan_stats_enabled_show(struct device *d,
return sprintf(buf, "%u\n", br_opt_get(br, BROPT_VLAN_STATS_ENABLED));
}
+static int set_vlan_stats_enabled(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
+{
+ return br_vlan_set_stats(br, val);
+}
+
static ssize_t vlan_stats_enabled_store(struct device *d,
struct device_attribute *attr,
const char *buf, size_t len)
{
- return store_bridge_parm(d, buf, len, br_vlan_set_stats);
+ return store_bridge_parm(d, buf, len, set_vlan_stats_enabled);
}
static DEVICE_ATTR_RW(vlan_stats_enabled);
@@ -833,11 +921,17 @@ static ssize_t vlan_stats_per_port_show(struct device *d,
return sprintf(buf, "%u\n", br_opt_get(br, BROPT_VLAN_STATS_PER_PORT));
}
+static int set_vlan_stats_per_port(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
+{
+ return br_vlan_set_stats_per_port(br, val);
+}
+
static ssize_t vlan_stats_per_port_store(struct device *d,
struct device_attribute *attr,
const char *buf, size_t len)
{
- return store_bridge_parm(d, buf, len, br_vlan_set_stats_per_port);
+ return store_bridge_parm(d, buf, len, set_vlan_stats_per_port);
}
static DEVICE_ATTR_RW(vlan_stats_per_port);
#endif
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 7a59cdddd3ce..72e92376eef1 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -17,6 +17,10 @@
#include "br_private.h"
+/* IMPORTANT: new bridge port options must be added with netlink support only
+ * please do not add new sysfs entries
+ */
+
struct brport_attribute {
struct attribute attr;
ssize_t (*show)(struct net_bridge_port *, char *);
@@ -55,9 +59,9 @@ static BRPORT_ATTR(_name, 0644, \
static int store_flag(struct net_bridge_port *p, unsigned long v,
unsigned long mask)
{
- unsigned long flags;
-
- flags = p->flags;
+ struct netlink_ext_ack extack = {0};
+ unsigned long flags = p->flags;
+ int err;
if (v)
flags |= mask;
@@ -65,6 +69,12 @@ static int store_flag(struct net_bridge_port *p, unsigned long v,
flags &= ~mask;
if (flags != p->flags) {
+ err = br_switchdev_set_port_flag(p, flags, mask, &extack);
+ if (err) {
+ netdev_err(p->dev, "%s\n", extack._msg);
+ return err;
+ }
+
p->flags = flags;
br_port_flags_change(p, mask);
}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 701cad646b20..8829f621b8ec 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -806,7 +806,8 @@ void br_recalculate_fwd_mask(struct net_bridge *br)
~(1u << br->group_addr[5]);
}
-int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
+int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
struct switchdev_attr attr = {
.orig_dev = br->dev,
@@ -819,7 +820,7 @@ int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
if (br_opt_get(br, BROPT_VLAN_ENABLED) == !!val)
return 0;
- err = switchdev_port_attr_set(br->dev, &attr);
+ err = switchdev_port_attr_set(br->dev, &attr, extack);
if (err && err != -EOPNOTSUPP)
return err;
@@ -831,11 +832,6 @@ int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
return 0;
}
-int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
-{
- return __br_vlan_filter_toggle(br, val);
-}
-
bool br_vlan_enabled(const struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
@@ -854,7 +850,8 @@ int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto)
}
EXPORT_SYMBOL_GPL(br_vlan_get_proto);
-int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
+int __br_vlan_set_proto(struct net_bridge *br, __be16 proto,
+ struct netlink_ext_ack *extack)
{
struct switchdev_attr attr = {
.orig_dev = br->dev,
@@ -871,7 +868,7 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
if (br->vlan_proto == proto)
return 0;
- err = switchdev_port_attr_set(br->dev, &attr);
+ err = switchdev_port_attr_set(br->dev, &attr, extack);
if (err && err != -EOPNOTSUPP)
return err;
@@ -901,7 +898,7 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
err_filt:
attr.u.vlan_protocol = ntohs(oldproto);
- switchdev_port_attr_set(br->dev, &attr);
+ switchdev_port_attr_set(br->dev, &attr, NULL);
list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist)
vlan_vid_del(p->dev, proto, vlan->vid);
@@ -915,12 +912,13 @@ err_filt:
return err;
}
-int br_vlan_set_proto(struct net_bridge *br, unsigned long val)
+int br_vlan_set_proto(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
- if (val != ETH_P_8021Q && val != ETH_P_8021AD)
+ if (!eth_type_vlan(htons(val)))
return -EPROTONOSUPPORT;
- return __br_vlan_set_proto(br, htons(val));
+ return __br_vlan_set_proto(br, htons(val), extack);
}
int br_vlan_set_stats(struct net_bridge *br, unsigned long val)
@@ -1100,7 +1098,8 @@ err_port:
goto out;
}
-int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val)
+int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
u16 pvid = val;
int err = 0;
@@ -1117,7 +1116,7 @@ int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val)
err = -EPERM;
goto out;
}
- err = __br_vlan_set_default_pvid(br, pvid, NULL);
+ err = __br_vlan_set_default_pvid(br, pvid, extack);
out:
return err;
}
@@ -1167,7 +1166,7 @@ int nbp_vlan_init(struct net_bridge_port *p, struct netlink_ext_ack *extack)
if (!vg)
goto out;
- ret = switchdev_port_attr_set(p->dev, &attr);
+ ret = switchdev_port_attr_set(p->dev, &attr, extack);
if (ret && ret != -EOPNOTSUPP)
goto err_vlan_enabled;
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index 8e8ffac037cd..97805ec424c1 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -87,9 +87,8 @@ static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
return nft_meta_get_init(ctx, expr, tb);
}
- priv->dreg = nft_parse_register(tb[NFTA_META_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
static struct nft_expr_type nft_meta_bridge_type;
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 79b6a04d8eb6..fadc7c8a3107 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -115,10 +115,7 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
else
skb->ip_summed = CHECKSUM_NONE;
- if (in_interrupt())
- netif_rx(skb);
- else
- netif_rx_ni(skb);
+ netif_rx_any_context(skb);
/* Update statistics. */
priv->netdev->stats.rx_packets++;
diff --git a/net/can/Kconfig b/net/can/Kconfig
index 7c9958df91d3..a9ac5ffab286 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -4,7 +4,6 @@
#
menuconfig CAN
- depends on NET
tristate "CAN bus subsystem support"
help
Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 837bb8af0ec3..cce2af10eb3e 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -304,8 +304,8 @@ static struct can_dev_rcv_lists *can_dev_rcv_lists_find(struct net *net,
struct net_device *dev)
{
if (dev) {
- struct can_ml_priv *ml_priv = dev->ml_priv;
- return &ml_priv->dev_rcv_lists;
+ struct can_ml_priv *can_ml = can_get_ml_priv(dev);
+ return &can_ml->dev_rcv_lists;
} else {
return net->can.rx_alldev_list;
}
@@ -790,25 +790,6 @@ void can_proto_unregister(const struct can_proto *cp)
}
EXPORT_SYMBOL(can_proto_unregister);
-/* af_can notifier to create/remove CAN netdevice specific structs */
-static int can_notifier(struct notifier_block *nb, unsigned long msg,
- void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
- if (dev->type != ARPHRD_CAN)
- return NOTIFY_DONE;
-
- switch (msg) {
- case NETDEV_REGISTER:
- WARN(!dev->ml_priv,
- "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n");
- break;
- }
-
- return NOTIFY_DONE;
-}
-
static int can_pernet_init(struct net *net)
{
spin_lock_init(&net->can.rcvlists_lock);
@@ -876,11 +857,6 @@ static const struct net_proto_family can_family_ops = {
.owner = THIS_MODULE,
};
-/* notifier block for netdevice event */
-static struct notifier_block can_netdev_notifier __read_mostly = {
- .notifier_call = can_notifier,
-};
-
static struct pernet_operations can_pernet_ops __read_mostly = {
.init = can_pernet_init,
.exit = can_pernet_exit,
@@ -911,17 +887,12 @@ static __init int can_init(void)
err = sock_register(&can_family_ops);
if (err)
goto out_sock;
- err = register_netdevice_notifier(&can_netdev_notifier);
- if (err)
- goto out_notifier;
dev_add_pack(&can_packet);
dev_add_pack(&canfd_packet);
return 0;
-out_notifier:
- sock_unregister(PF_CAN);
out_sock:
unregister_pernet_subsys(&can_pernet_ops);
out_pernet:
@@ -935,7 +906,6 @@ static __exit void can_exit(void)
/* protocol unregister */
dev_remove_pack(&canfd_packet);
dev_remove_pack(&can_packet);
- unregister_netdevice_notifier(&can_netdev_notifier);
sock_unregister(PF_CAN);
unregister_pernet_subsys(&can_pernet_ops);
diff --git a/net/can/gw.c b/net/can/gw.c
index 8598d9da0e5f..ba4124805602 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -225,7 +225,7 @@ static void mod_store_ccdlc(struct canfd_frame *cf)
if (ccf->len <= CAN_MAX_DLEN)
return;
- /* potentially broken values are catched in can_can_gw_rcv() */
+ /* potentially broken values are caught in can_can_gw_rcv() */
if (ccf->len > CAN_MAX_RAW_DLC)
return;
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 7839c3b9e5be..3ef7f78e553b 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1155,6 +1155,7 @@ static int isotp_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
if (peer)
return -EOPNOTSUPP;
+ memset(addr, 0, sizeof(*addr));
addr->can_family = AF_CAN;
addr->can_ifindex = so->ifindex;
addr->can_addr.tp.rx_id = so->rxid;
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index bb914d8b4216..da3a7a7bcff2 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -140,9 +140,9 @@ static struct j1939_priv *j1939_priv_create(struct net_device *ndev)
static inline void j1939_priv_set(struct net_device *ndev,
struct j1939_priv *priv)
{
- struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+ struct can_ml_priv *can_ml = can_get_ml_priv(ndev);
- can_ml_priv->j1939_priv = priv;
+ can_ml->j1939_priv = priv;
}
static void __j1939_priv_release(struct kref *kref)
@@ -211,12 +211,9 @@ static void __j1939_rx_release(struct kref *kref)
/* get pointer to priv without increasing ref counter */
static inline struct j1939_priv *j1939_ndev_to_priv(struct net_device *ndev)
{
- struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+ struct can_ml_priv *can_ml = can_get_ml_priv(ndev);
- if (!can_ml_priv)
- return NULL;
-
- return can_ml_priv->j1939_priv;
+ return can_ml->j1939_priv;
}
static struct j1939_priv *j1939_priv_get_by_ndev_locked(struct net_device *ndev)
@@ -225,9 +222,6 @@ static struct j1939_priv *j1939_priv_get_by_ndev_locked(struct net_device *ndev)
lockdep_assert_held(&j1939_netdev_lock);
- if (ndev->type != ARPHRD_CAN)
- return NULL;
-
priv = j1939_ndev_to_priv(ndev);
if (priv)
j1939_priv_get(priv);
@@ -348,15 +342,16 @@ static int j1939_netdev_notify(struct notifier_block *nb,
unsigned long msg, void *data)
{
struct net_device *ndev = netdev_notifier_info_to_dev(data);
+ struct can_ml_priv *can_ml = can_get_ml_priv(ndev);
struct j1939_priv *priv;
+ if (!can_ml)
+ goto notify_done;
+
priv = j1939_priv_get_by_ndev(ndev);
if (!priv)
goto notify_done;
- if (ndev->type != ARPHRD_CAN)
- goto notify_put;
-
switch (msg) {
case NETDEV_DOWN:
j1939_cancel_active_session(priv, NULL);
@@ -365,7 +360,6 @@ static int j1939_netdev_notify(struct notifier_block *nb,
break;
}
-notify_put:
j1939_priv_put(priv);
notify_done:
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index f23966526a88..56aa66147d5a 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -12,6 +12,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/can/can-ml.h>
#include <linux/can/core.h>
#include <linux/can/skb.h>
#include <linux/errqueue.h>
@@ -453,6 +454,7 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
j1939_jsk_del(priv, jsk);
j1939_local_ecu_put(priv, jsk->addr.src_name, jsk->addr.sa);
} else {
+ struct can_ml_priv *can_ml;
struct net_device *ndev;
ndev = dev_get_by_index(net, addr->can_ifindex);
@@ -461,15 +463,8 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
goto out_release_sock;
}
- if (ndev->type != ARPHRD_CAN) {
- dev_put(ndev);
- ret = -ENODEV;
- goto out_release_sock;
- }
-
- if (!ndev->ml_priv) {
- netdev_warn_once(ndev,
- "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n");
+ can_ml = can_get_ml_priv(ndev);
+ if (!can_ml) {
dev_put(ndev);
ret = -ENODEV;
goto out_release_sock;
diff --git a/net/can/proc.c b/net/can/proc.c
index 5ea8695f507e..b15760b5c1cc 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -322,8 +322,11 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
/* receive list for registered CAN devices */
for_each_netdev_rcu(net, dev) {
- if (dev->type == ARPHRD_CAN && dev->ml_priv)
- can_rcvlist_proc_show_one(m, idx, dev, dev->ml_priv);
+ struct can_ml_priv *can_ml = can_get_ml_priv(dev);
+
+ if (can_ml)
+ can_rcvlist_proc_show_one(m, idx, dev,
+ &can_ml->dev_rcv_lists);
}
rcu_read_unlock();
@@ -375,8 +378,10 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
/* sff receive list for registered CAN devices */
for_each_netdev_rcu(net, dev) {
- if (dev->type == ARPHRD_CAN && dev->ml_priv) {
- dev_rcv_lists = dev->ml_priv;
+ struct can_ml_priv *can_ml = can_get_ml_priv(dev);
+
+ if (can_ml) {
+ dev_rcv_lists = &can_ml->dev_rcv_lists;
can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_sff,
ARRAY_SIZE(dev_rcv_lists->rx_sff));
}
@@ -406,8 +411,10 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
/* eff receive list for registered CAN devices */
for_each_netdev_rcu(net, dev) {
- if (dev->type == ARPHRD_CAN && dev->ml_priv) {
- dev_rcv_lists = dev->ml_priv;
+ struct can_ml_priv *can_ml = can_get_ml_priv(dev);
+
+ if (can_ml) {
+ dev_rcv_lists = &can_ml->dev_rcv_lists;
can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_eff,
ARRAY_SIZE(dev_rcv_lists->rx_eff));
}
diff --git a/net/can/raw.c b/net/can/raw.c
index 6ec8aa1d0da4..37b47a39a3ed 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -665,10 +665,18 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
if (ro->count > 0) {
int fsize = ro->count * sizeof(struct can_filter);
- if (len > fsize)
- len = fsize;
- if (copy_to_user(optval, ro->filter, len))
- err = -EFAULT;
+ /* user space buffer to small for filter list? */
+ if (len < fsize) {
+ /* return -ERANGE and needed space in optlen */
+ err = -ERANGE;
+ if (put_user(fsize, optlen))
+ err = -EFAULT;
+ } else {
+ if (len > fsize)
+ len = fsize;
+ if (copy_to_user(optval, ro->filter, len))
+ err = -EFAULT;
+ }
} else {
len = 0;
}
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 9815cfe42af0..ca44c327bace 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -569,6 +569,34 @@ e_range:
return -ERANGE;
}
+static int decode_con_secret(void **p, void *end, u8 *con_secret,
+ int *con_secret_len)
+{
+ int len;
+
+ ceph_decode_32_safe(p, end, len, bad);
+ ceph_decode_need(p, end, len, bad);
+
+ dout("%s len %d\n", __func__, len);
+ if (con_secret) {
+ if (len > CEPH_MAX_CON_SECRET_LEN) {
+ pr_err("connection secret too big %d\n", len);
+ goto bad_memzero;
+ }
+ memcpy(con_secret, *p, len);
+ *con_secret_len = len;
+ }
+ memzero_explicit(*p, len);
+ *p += len;
+ return 0;
+
+bad_memzero:
+ memzero_explicit(*p, len);
+bad:
+ pr_err("failed to decode connection secret\n");
+ return -EINVAL;
+}
+
static int handle_auth_session_key(struct ceph_auth_client *ac,
void **p, void *end,
u8 *session_key, int *session_key_len,
@@ -612,17 +640,9 @@ static int handle_auth_session_key(struct ceph_auth_client *ac,
dout("%s decrypted %d bytes\n", __func__, ret);
dend = dp + ret;
- ceph_decode_32_safe(&dp, dend, len, e_inval);
- if (len > CEPH_MAX_CON_SECRET_LEN) {
- pr_err("connection secret too big %d\n", len);
- return -EINVAL;
- }
-
- dout("%s connection secret len %d\n", __func__, len);
- if (con_secret) {
- memcpy(con_secret, dp, len);
- *con_secret_len = len;
- }
+ ret = decode_con_secret(&dp, dend, con_secret, con_secret_len);
+ if (ret)
+ return ret;
}
/* service tickets */
@@ -828,7 +848,6 @@ static int decrypt_authorizer_reply(struct ceph_crypto_key *secret,
{
void *dp, *dend;
u8 struct_v;
- int len;
int ret;
dp = *p + ceph_x_encrypt_offset();
@@ -843,17 +862,9 @@ static int decrypt_authorizer_reply(struct ceph_crypto_key *secret,
ceph_decode_64_safe(&dp, dend, *nonce_plus_one, e_inval);
dout("%s nonce_plus_one %llu\n", __func__, *nonce_plus_one);
if (struct_v >= 2) {
- ceph_decode_32_safe(&dp, dend, len, e_inval);
- if (len > CEPH_MAX_CON_SECRET_LEN) {
- pr_err("connection secret too big %d\n", len);
- return -EINVAL;
- }
-
- dout("%s connection secret len %d\n", __func__, len);
- if (con_secret) {
- memcpy(con_secret, dp, len);
- *con_secret_len = len;
- }
+ ret = decode_con_secret(&dp, dend, con_secret, con_secret_len);
+ if (ret)
+ return ret;
}
return 0;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 271287c5ec12..97d6ea763e32 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -252,7 +252,6 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid)
* ceph options
*/
enum {
- Opt_osdtimeout,
Opt_osdkeepalivetimeout,
Opt_mount_timeout,
Opt_osd_idle_ttl,
@@ -307,7 +306,8 @@ static const struct constant_table ceph_param_ms_mode[] = {
static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_flag ("abort_on_full", Opt_abort_on_full),
- fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures),
+ __fsparam (NULL, "cephx_require_signatures", Opt_cephx_require_signatures,
+ fs_param_neg_with_no|fs_param_deprecated, NULL),
fsparam_flag_no ("cephx_sign_messages", Opt_cephx_sign_messages),
fsparam_flag_no ("crc", Opt_crc),
fsparam_string ("crush_location", Opt_crush_location),
@@ -319,8 +319,6 @@ static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_u32 ("osd_idle_ttl", Opt_osd_idle_ttl),
fsparam_u32 ("osd_request_timeout", Opt_osd_request_timeout),
fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout),
- __fsparam (fs_param_is_s32, "osdtimeout", Opt_osdtimeout,
- fs_param_deprecated, NULL),
fsparam_enum ("read_from_replica", Opt_read_from_replica,
ceph_param_read_from_replica),
fsparam_enum ("ms_mode", Opt_ms_mode,
@@ -552,9 +550,6 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
}
break;
- case Opt_osdtimeout:
- warn_plog(&log, "Ignoring osdtimeout");
- break;
case Opt_osdkeepalivetimeout:
/* 0 isn't well defined right now, reject it */
if (result.uint_32 < 1 || result.uint_32 > INT_MAX / 1000)
@@ -596,9 +591,9 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
break;
case Opt_cephx_require_signatures:
if (!result.negated)
- opt->flags &= ~CEPH_OPT_NOMSGAUTH;
+ warn_plog(&log, "Ignoring cephx_require_signatures");
else
- opt->flags |= CEPH_OPT_NOMSGAUTH;
+ warn_plog(&log, "Ignoring nocephx_require_signatures, use nocephx_sign_messages");
break;
case Opt_cephx_sign_messages:
if (!result.negated)
@@ -686,8 +681,6 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
seq_puts(m, "noshare,");
if (opt->flags & CEPH_OPT_NOCRC)
seq_puts(m, "nocrc,");
- if (opt->flags & CEPH_OPT_NOMSGAUTH)
- seq_puts(m, "nocephx_require_signatures,");
if (opt->flags & CEPH_OPT_NOMSGSIGN)
seq_puts(m, "nocephx_sign_messages,");
if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
@@ -756,7 +749,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT;
- if (!ceph_test_opt(client, NOMSGAUTH))
+ if (!ceph_test_opt(client, NOMSGSIGN))
client->required_features |= CEPH_FEATURE_MSG_AUTH;
/* msgr */
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 4f75df40fb12..92d89b331645 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -96,6 +96,7 @@ int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end)
key->len = ceph_decode_16(p);
ceph_decode_need(p, end, key->len, bad);
ret = set_secret(key, *p);
+ memzero_explicit(*p, key->len);
*p += key->len;
return ret;
@@ -134,7 +135,7 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey)
void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
{
if (key) {
- kfree(key->key);
+ kfree_sensitive(key->key);
key->key = NULL;
if (key->tfm) {
crypto_free_sync_skcipher(key->tfm);
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
index 04f653b3c897..2cb5ffdf071a 100644
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -1100,7 +1100,7 @@ static int read_partial_message(struct ceph_connection *con)
if (ret < 0)
return ret;
- BUG_ON(!con->in_msg ^ skip);
+ BUG_ON((!con->in_msg) ^ skip);
if (skip) {
/* skip this message */
dout("alloc_msg said skip message\n");
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index c1ebb2aa08b5..cc40ce4e02fb 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -689,11 +689,10 @@ static int verify_epilogue_crcs(struct ceph_connection *con, u32 front_crc,
}
static int setup_crypto(struct ceph_connection *con,
- u8 *session_key, int session_key_len,
- u8 *con_secret, int con_secret_len)
+ const u8 *session_key, int session_key_len,
+ const u8 *con_secret, int con_secret_len)
{
unsigned int noio_flag;
- void *p;
int ret;
dout("%s con %p con_mode %d session_key_len %d con_secret_len %d\n",
@@ -751,15 +750,14 @@ static int setup_crypto(struct ceph_connection *con,
return ret;
}
- p = con_secret;
- WARN_ON((unsigned long)p & crypto_aead_alignmask(con->v2.gcm_tfm));
- ret = crypto_aead_setkey(con->v2.gcm_tfm, p, CEPH_GCM_KEY_LEN);
+ WARN_ON((unsigned long)con_secret &
+ crypto_aead_alignmask(con->v2.gcm_tfm));
+ ret = crypto_aead_setkey(con->v2.gcm_tfm, con_secret, CEPH_GCM_KEY_LEN);
if (ret) {
pr_err("failed to set gcm key: %d\n", ret);
return ret;
}
- p += CEPH_GCM_KEY_LEN;
WARN_ON(crypto_aead_ivsize(con->v2.gcm_tfm) != CEPH_GCM_IV_LEN);
ret = crypto_aead_setauthsize(con->v2.gcm_tfm, CEPH_GCM_TAG_LEN);
if (ret) {
@@ -777,8 +775,11 @@ static int setup_crypto(struct ceph_connection *con,
aead_request_set_callback(con->v2.gcm_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &con->v2.gcm_wait);
- memcpy(&con->v2.in_gcm_nonce, p, CEPH_GCM_IV_LEN);
- memcpy(&con->v2.out_gcm_nonce, p + CEPH_GCM_IV_LEN, CEPH_GCM_IV_LEN);
+ memcpy(&con->v2.in_gcm_nonce, con_secret + CEPH_GCM_KEY_LEN,
+ CEPH_GCM_IV_LEN);
+ memcpy(&con->v2.out_gcm_nonce,
+ con_secret + CEPH_GCM_KEY_LEN + CEPH_GCM_IV_LEN,
+ CEPH_GCM_IV_LEN);
return 0; /* auth_x, secure mode */
}
@@ -800,7 +801,7 @@ static int hmac_sha256(struct ceph_connection *con, const struct kvec *kvecs,
desc->tfm = con->v2.hmac_tfm;
ret = crypto_shash_init(desc);
if (ret)
- return ret;
+ goto out;
for (i = 0; i < kvec_cnt; i++) {
WARN_ON((unsigned long)kvecs[i].iov_base &
@@ -808,15 +809,14 @@ static int hmac_sha256(struct ceph_connection *con, const struct kvec *kvecs,
ret = crypto_shash_update(desc, kvecs[i].iov_base,
kvecs[i].iov_len);
if (ret)
- return ret;
+ goto out;
}
ret = crypto_shash_final(desc, hmac);
- if (ret)
- return ret;
+out:
shash_desc_zero(desc);
- return 0; /* auth_x, both plain and secure modes */
+ return ret; /* auth_x, both plain and secure modes */
}
static void gcm_inc_nonce(struct ceph_gcm_nonce *nonce)
@@ -1333,7 +1333,8 @@ static int prepare_auth_signature(struct ceph_connection *con)
void *buf;
int ret;
- buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE, false));
+ buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE,
+ con_secure(con)));
if (!buf)
return -ENOMEM;
@@ -2032,10 +2033,18 @@ bad:
return -EINVAL;
}
+/*
+ * Align session_key and con_secret to avoid GFP_ATOMIC allocation
+ * inside crypto_shash_setkey() and crypto_aead_setkey() called from
+ * setup_crypto(). __aligned(16) isn't guaranteed to work for stack
+ * objects, so do it by hand.
+ */
static int process_auth_done(struct ceph_connection *con, void *p, void *end)
{
- u8 session_key[CEPH_KEY_LEN];
- u8 con_secret[CEPH_MAX_CON_SECRET_LEN];
+ u8 session_key_buf[CEPH_KEY_LEN + 16];
+ u8 con_secret_buf[CEPH_MAX_CON_SECRET_LEN + 16];
+ u8 *session_key = PTR_ALIGN(&session_key_buf[0], 16);
+ u8 *con_secret = PTR_ALIGN(&con_secret_buf[0], 16);
int session_key_len, con_secret_len;
int payload_len;
u64 global_id;
@@ -2063,27 +2072,32 @@ static int process_auth_done(struct ceph_connection *con, void *p, void *end)
if (con->state != CEPH_CON_S_V2_AUTH) {
dout("%s con %p state changed to %d\n", __func__, con,
con->state);
- return -EAGAIN;
+ ret = -EAGAIN;
+ goto out;
}
dout("%s con %p handle_auth_done ret %d\n", __func__, con, ret);
if (ret)
- return ret;
+ goto out;
ret = setup_crypto(con, session_key, session_key_len, con_secret,
con_secret_len);
if (ret)
- return ret;
+ goto out;
reset_out_kvecs(con);
ret = prepare_auth_signature(con);
if (ret) {
pr_err("prepare_auth_signature failed: %d\n", ret);
- return ret;
+ goto out;
}
con->state = CEPH_CON_S_V2_AUTH_SIGNATURE;
- return 0;
+
+out:
+ memzero_explicit(session_key_buf, sizeof(session_key_buf));
+ memzero_explicit(con_secret_buf, sizeof(con_secret_buf));
+ return ret;
bad:
pr_err("failed to decode auth_done\n");
@@ -3427,6 +3441,8 @@ void ceph_con_v2_reset_protocol(struct ceph_connection *con)
}
con->v2.con_mode = CEPH_CON_MODE_UNKNOWN;
+ memzero_explicit(&con->v2.in_gcm_nonce, CEPH_GCM_IV_LEN);
+ memzero_explicit(&con->v2.out_gcm_nonce, CEPH_GCM_IV_LEN);
if (con->v2.hmac_tfm) {
crypto_free_shash(con->v2.hmac_tfm);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index b9d54ed9f338..195ceb8afb06 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1433,7 +1433,7 @@ static int mon_handle_auth_bad_method(struct ceph_connection *con,
/*
* handle incoming message
*/
-static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
+static void mon_dispatch(struct ceph_connection *con, struct ceph_msg *msg)
{
struct ceph_mon_client *monc = con->private;
int type = le16_to_cpu(msg->hdr.type);
@@ -1565,21 +1565,21 @@ static void mon_fault(struct ceph_connection *con)
* will come from the messenger workqueue, which is drained prior to
* mon_client destruction.
*/
-static struct ceph_connection *con_get(struct ceph_connection *con)
+static struct ceph_connection *mon_get_con(struct ceph_connection *con)
{
return con;
}
-static void con_put(struct ceph_connection *con)
+static void mon_put_con(struct ceph_connection *con)
{
}
static const struct ceph_connection_operations mon_con_ops = {
- .get = con_get,
- .put = con_put,
- .dispatch = dispatch,
- .fault = mon_fault,
+ .get = mon_get_con,
+ .put = mon_put_con,
.alloc_msg = mon_alloc_msg,
+ .dispatch = mon_dispatch,
+ .fault = mon_fault,
.get_auth_request = mon_get_auth_request,
.handle_auth_reply_more = mon_handle_auth_reply_more,
.handle_auth_done = mon_handle_auth_done,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 61229c5e22cb..ff8624a7c964 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5412,7 +5412,7 @@ void ceph_osdc_cleanup(void)
/*
* handle incoming message
*/
-static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
+static void osd_dispatch(struct ceph_connection *con, struct ceph_msg *msg)
{
struct ceph_osd *osd = con->private;
struct ceph_osd_client *osdc = osd->o_osdc;
@@ -5534,9 +5534,9 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr)
return m;
}
-static struct ceph_msg *alloc_msg(struct ceph_connection *con,
- struct ceph_msg_header *hdr,
- int *skip)
+static struct ceph_msg *osd_alloc_msg(struct ceph_connection *con,
+ struct ceph_msg_header *hdr,
+ int *skip)
{
struct ceph_osd *osd = con->private;
int type = le16_to_cpu(hdr->type);
@@ -5560,7 +5560,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
/*
* Wrappers to refcount containing ceph_osd struct
*/
-static struct ceph_connection *get_osd_con(struct ceph_connection *con)
+static struct ceph_connection *osd_get_con(struct ceph_connection *con)
{
struct ceph_osd *osd = con->private;
if (get_osd(osd))
@@ -5568,7 +5568,7 @@ static struct ceph_connection *get_osd_con(struct ceph_connection *con)
return NULL;
}
-static void put_osd_con(struct ceph_connection *con)
+static void osd_put_con(struct ceph_connection *con)
{
struct ceph_osd *osd = con->private;
put_osd(osd);
@@ -5582,8 +5582,8 @@ static void put_osd_con(struct ceph_connection *con)
* Note: returned pointer is the address of a structure that's
* managed separately. Caller must *not* attempt to free it.
*/
-static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
- int *proto, int force_new)
+static struct ceph_auth_handshake *
+osd_get_authorizer(struct ceph_connection *con, int *proto, int force_new)
{
struct ceph_osd *o = con->private;
struct ceph_osd_client *osdc = o->o_osdc;
@@ -5599,7 +5599,7 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
return auth;
}
-static int add_authorizer_challenge(struct ceph_connection *con,
+static int osd_add_authorizer_challenge(struct ceph_connection *con,
void *challenge_buf, int challenge_buf_len)
{
struct ceph_osd *o = con->private;
@@ -5610,7 +5610,7 @@ static int add_authorizer_challenge(struct ceph_connection *con,
challenge_buf, challenge_buf_len);
}
-static int verify_authorizer_reply(struct ceph_connection *con)
+static int osd_verify_authorizer_reply(struct ceph_connection *con)
{
struct ceph_osd *o = con->private;
struct ceph_osd_client *osdc = o->o_osdc;
@@ -5622,7 +5622,7 @@ static int verify_authorizer_reply(struct ceph_connection *con)
NULL, NULL, NULL, NULL);
}
-static int invalidate_authorizer(struct ceph_connection *con)
+static int osd_invalidate_authorizer(struct ceph_connection *con)
{
struct ceph_osd *o = con->private;
struct ceph_osd_client *osdc = o->o_osdc;
@@ -5731,18 +5731,18 @@ static int osd_check_message_signature(struct ceph_msg *msg)
}
static const struct ceph_connection_operations osd_con_ops = {
- .get = get_osd_con,
- .put = put_osd_con,
- .dispatch = dispatch,
- .get_authorizer = get_authorizer,
- .add_authorizer_challenge = add_authorizer_challenge,
- .verify_authorizer_reply = verify_authorizer_reply,
- .invalidate_authorizer = invalidate_authorizer,
- .alloc_msg = alloc_msg,
+ .get = osd_get_con,
+ .put = osd_put_con,
+ .alloc_msg = osd_alloc_msg,
+ .dispatch = osd_dispatch,
+ .fault = osd_fault,
.reencode_message = osd_reencode_message,
+ .get_authorizer = osd_get_authorizer,
+ .add_authorizer_challenge = osd_add_authorizer_challenge,
+ .verify_authorizer_reply = osd_verify_authorizer_reply,
+ .invalidate_authorizer = osd_invalidate_authorizer,
.sign_message = osd_sign_message,
.check_message_signature = osd_check_message_signature,
- .fault = osd_fault,
.get_auth_request = osd_get_auth_request,
.handle_auth_reply_more = osd_handle_auth_reply_more,
.handle_auth_done = osd_handle_auth_done,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 81809fa735a7..15ab9ffb27fe 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -721,8 +721,16 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
struct iov_iter *to, int len,
__wsum *csump)
{
- return __skb_datagram_iter(skb, offset, to, len, true,
- csum_and_copy_to_iter, csump);
+ struct csum_state csdata = { .csum = *csump };
+ int ret;
+
+ ret = __skb_datagram_iter(skb, offset, to, len, true,
+ csum_and_copy_to_iter, &csdata);
+ if (ret)
+ return ret;
+
+ *csump = csdata.csum;
+ return 0;
}
/**
diff --git a/net/core/dev.c b/net/core/dev.c
index 8fa739259041..6c5967e80132 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -91,6 +91,7 @@
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/skbuff.h>
+#include <linux/kthread.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <net/net_namespace.h>
@@ -101,6 +102,7 @@
#include <net/dsa.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
+#include <net/gro.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <net/checksum.h>
@@ -1493,6 +1495,27 @@ void netdev_notify_peers(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_notify_peers);
+static int napi_threaded_poll(void *data);
+
+static int napi_kthread_create(struct napi_struct *n)
+{
+ int err = 0;
+
+ /* Create and wake up the kthread once to put it in
+ * TASK_INTERRUPTIBLE mode to avoid the blocked task
+ * warning and work with loadavg.
+ */
+ n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d",
+ n->dev->name, n->napi_id);
+ if (IS_ERR(n->thread)) {
+ err = PTR_ERR(n->thread);
+ pr_err("kthread_run failed with err %d\n", err);
+ n->thread = NULL;
+ }
+
+ return err;
+}
+
static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -2194,28 +2217,14 @@ static inline void net_timestamp_set(struct sk_buff *skb)
bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
{
- unsigned int len;
-
- if (!(dev->flags & IFF_UP))
- return false;
-
- len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
- if (skb->len <= len)
- return true;
-
- /* if TSO is enabled, we don't care about the length as the packet
- * could be forwarded without being segmented before
- */
- if (skb_is_gso(skb))
- return true;
-
- return false;
+ return __is_skb_forwardable(dev, skb, true);
}
EXPORT_SYMBOL_GPL(is_skb_forwardable);
-int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
+static int __dev_forward_skb2(struct net_device *dev, struct sk_buff *skb,
+ bool check_mtu)
{
- int ret = ____dev_forward_skb(dev, skb);
+ int ret = ____dev_forward_skb(dev, skb, check_mtu);
if (likely(!ret)) {
skb->protocol = eth_type_trans(skb, dev);
@@ -2224,6 +2233,11 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
return ret;
}
+
+int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
+{
+ return __dev_forward_skb2(dev, skb, true);
+}
EXPORT_SYMBOL_GPL(__dev_forward_skb);
/**
@@ -2250,6 +2264,11 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(dev_forward_skb);
+int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb)
+{
+ return __dev_forward_skb2(dev, skb, false) ?: netif_rx_internal(skb);
+}
+
static inline int deliver_skb(struct sk_buff *skb,
struct packet_type *pt_prev,
struct net_device *orig_dev)
@@ -3617,11 +3636,22 @@ static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
int skb_csum_hwoffload_help(struct sk_buff *skb,
const netdev_features_t features)
{
- if (unlikely(skb->csum_not_inet))
+ if (unlikely(skb_csum_is_sctp(skb)))
return !!(features & NETIF_F_SCTP_CRC) ? 0 :
skb_crc32c_csum_help(skb);
- return !!(features & NETIF_F_CSUM_MASK) ? 0 : skb_checksum_help(skb);
+ if (features & NETIF_F_HW_CSUM)
+ return 0;
+
+ if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
+ switch (skb->csum_offset) {
+ case offsetof(struct tcphdr, check):
+ case offsetof(struct udphdr, check):
+ return 0;
+ }
+ }
+
+ return skb_checksum_help(skb);
}
EXPORT_SYMBOL(skb_csum_hwoffload_help);
@@ -3878,6 +3908,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
qdisc_skb_cb(skb)->mru = 0;
+ qdisc_skb_cb(skb)->post_ct = false;
mini_qdisc_bstats_cpu_update(miniq, skb);
switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
@@ -4083,7 +4114,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
skb_reset_mac_header(skb);
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
- __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
+ __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
@@ -4252,6 +4283,22 @@ int gro_normal_batch __read_mostly = 8;
static inline void ____napi_schedule(struct softnet_data *sd,
struct napi_struct *napi)
{
+ struct task_struct *thread;
+
+ if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
+ /* Paired with smp_mb__before_atomic() in
+ * napi_enable()/dev_set_threaded().
+ * Use READ_ONCE() to guarantee a complete
+ * read on napi->thread. Only call
+ * wake_up_process() when it's not NULL.
+ */
+ thread = READ_ONCE(napi->thread);
+ if (thread) {
+ wake_up_process(thread);
+ return;
+ }
+ }
+
list_add_tail(&napi->poll_list, &sd->poll_list);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
@@ -4603,14 +4650,14 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
+ void *orig_data, *orig_data_end, *hard_start;
struct netdev_rx_queue *rxqueue;
- void *orig_data, *orig_data_end;
u32 metalen, act = XDP_DROP;
+ u32 mac_len, frame_sz;
__be16 orig_eth_type;
struct ethhdr *eth;
bool orig_bcast;
- int hlen, off;
- u32 mac_len;
+ int off;
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
@@ -4642,15 +4689,16 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
* header.
*/
mac_len = skb->data - skb_mac_header(skb);
- hlen = skb_headlen(skb) + mac_len;
- xdp->data = skb->data - mac_len;
- xdp->data_meta = xdp->data;
- xdp->data_end = xdp->data + hlen;
- xdp->data_hard_start = skb->data - skb_headroom(skb);
+ hard_start = skb->data - skb_headroom(skb);
/* SKB "head" area always have tailroom for skb_shared_info */
- xdp->frame_sz = (void *)skb_end_pointer(skb) - xdp->data_hard_start;
- xdp->frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ frame_sz = (void *)skb_end_pointer(skb) - hard_start;
+ frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+ rxqueue = netif_get_rxqueue(skb);
+ xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq);
+ xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len,
+ skb_headlen(skb) + mac_len, true);
orig_data_end = xdp->data_end;
orig_data = xdp->data;
@@ -4658,9 +4706,6 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
orig_eth_type = eth->h_proto;
- rxqueue = netif_get_rxqueue(skb);
- xdp->rxq = &rxqueue->xdp_rxq;
-
act = bpf_prog_run_xdp(xdp_prog, xdp);
/* check if bpf_xdp_adjust_head was used */
@@ -4895,8 +4940,6 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
else
__kfree_skb_defer(skb);
}
-
- __kfree_skb_flush();
}
if (sd->output_queue) {
@@ -4962,6 +5005,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
qdisc_skb_cb(skb)->pkt_len = skb->len;
qdisc_skb_cb(skb)->mru = 0;
+ qdisc_skb_cb(skb)->post_ct = false;
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);
@@ -5151,8 +5195,7 @@ another_round:
skb_reset_mac_len(skb);
}
- if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
- skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
+ if (eth_type_vlan(skb->protocol)) {
skb = skb_vlan_untag(skb);
if (unlikely(!skb))
goto out;
@@ -5236,8 +5279,7 @@ check_vlan_id:
* find vlan device.
*/
skb->pkt_type = PACKET_OTHERHOST;
- } else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
- skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
+ } else if (eth_type_vlan(skb->protocol)) {
/* Outer header is 802.1P with vlan 0, inner header is
* 802.1Q or 802.1AD and vlan_do_receive() above could
* not find vlan dev for vlan id 0.
@@ -5713,7 +5755,7 @@ static void flush_all_backlogs(void)
}
/* we can have in flight packet[s] on the cpus we are not flushing,
- * synchronize_net() in rollback_registered_many() will take care of
+ * synchronize_net() in unregister_netdevice_many() will take care of
* them
*/
for_each_cpu(cpu, &flush_cpus)
@@ -5735,15 +5777,14 @@ static void gro_normal_list(struct napi_struct *napi)
/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
* pass the whole batch up to the stack.
*/
-static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs)
{
list_add_tail(&skb->list, &napi->rx_list);
- if (++napi->rx_count >= gro_normal_batch)
+ napi->rx_count += segs;
+ if (napi->rx_count >= gro_normal_batch)
gro_normal_list(napi);
}
-INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
-INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
{
struct packet_offload *ptype;
@@ -5777,7 +5818,7 @@ static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
}
out:
- gro_normal_one(napi, skb);
+ gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count);
return NET_RX_SUCCESS;
}
@@ -5912,10 +5953,6 @@ static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
napi_gro_complete(napi, oldest);
}
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
- struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
- struct sk_buff *));
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
@@ -6054,31 +6091,20 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
}
EXPORT_SYMBOL(gro_find_complete_by_type);
-static void napi_skb_free_stolen_head(struct sk_buff *skb)
-{
- skb_dst_drop(skb);
- skb_ext_put(skb);
- kmem_cache_free(skbuff_head_cache, skb);
-}
-
static gro_result_t napi_skb_finish(struct napi_struct *napi,
struct sk_buff *skb,
gro_result_t ret)
{
switch (ret) {
case GRO_NORMAL:
- gro_normal_one(napi, skb);
- break;
-
- case GRO_DROP:
- kfree_skb(skb);
+ gro_normal_one(napi, skb, 1);
break;
case GRO_MERGED_FREE:
if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
napi_skb_free_stolen_head(skb);
else
- __kfree_skb(skb);
+ __kfree_skb_defer(skb);
break;
case GRO_HELD:
@@ -6155,11 +6181,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
__skb_push(skb, ETH_HLEN);
skb->protocol = eth_type_trans(skb, skb->dev);
if (ret == GRO_NORMAL)
- gro_normal_one(napi, skb);
- break;
-
- case GRO_DROP:
- napi_reuse_skb(napi, skb);
+ gro_normal_one(napi, skb, 1);
break;
case GRO_MERGED_FREE:
@@ -6223,9 +6245,6 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
gro_result_t ret;
struct sk_buff *skb = napi_frags_skb(napi);
- if (!skb)
- return GRO_DROP;
-
trace_napi_gro_frags_entry(skb);
ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
@@ -6708,6 +6727,49 @@ static void init_gro_hash(struct napi_struct *napi)
napi->gro_bitmask = 0;
}
+int dev_set_threaded(struct net_device *dev, bool threaded)
+{
+ struct napi_struct *napi;
+ int err = 0;
+
+ if (dev->threaded == threaded)
+ return 0;
+
+ if (threaded) {
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ if (!napi->thread) {
+ err = napi_kthread_create(napi);
+ if (err) {
+ threaded = false;
+ break;
+ }
+ }
+ }
+ }
+
+ dev->threaded = threaded;
+
+ /* Make sure kthread is created before THREADED bit
+ * is set.
+ */
+ smp_mb__before_atomic();
+
+ /* Setting/unsetting threaded mode on a napi might not immediately
+ * take effect, if the current napi instance is actively being
+ * polled. In this case, the switch between threaded mode and
+ * softirq mode will happen in the next round of napi_schedule().
+ * This should not cause hiccups/stalls to the live traffic.
+ */
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ if (threaded)
+ set_bit(NAPI_STATE_THREADED, &napi->state);
+ else
+ clear_bit(NAPI_STATE_THREADED, &napi->state);
+ }
+
+ return err;
+}
+
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -6735,6 +6797,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
set_bit(NAPI_STATE_NPSVC, &napi->state);
list_add_rcu(&napi->dev_list, &dev->napi_list);
napi_hash_add(napi);
+ /* Create kthread for this napi if dev->threaded is set.
+ * Clear dev->threaded if kthread creation failed so that
+ * threaded mode will not be enabled in napi_enable().
+ */
+ if (dev->threaded && napi_kthread_create(napi))
+ dev->threaded = 0;
}
EXPORT_SYMBOL(netif_napi_add);
@@ -6752,9 +6820,28 @@ void napi_disable(struct napi_struct *n)
clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
clear_bit(NAPI_STATE_DISABLE, &n->state);
+ clear_bit(NAPI_STATE_THREADED, &n->state);
}
EXPORT_SYMBOL(napi_disable);
+/**
+ * napi_enable - enable NAPI scheduling
+ * @n: NAPI context
+ *
+ * Resume NAPI from being scheduled on this context.
+ * Must be paired with napi_disable.
+ */
+void napi_enable(struct napi_struct *n)
+{
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+ smp_mb__before_atomic();
+ clear_bit(NAPI_STATE_SCHED, &n->state);
+ clear_bit(NAPI_STATE_NPSVC, &n->state);
+ if (n->dev->threaded && n->thread)
+ set_bit(NAPI_STATE_THREADED, &n->state);
+}
+EXPORT_SYMBOL(napi_enable);
+
static void flush_gro_hash(struct napi_struct *napi)
{
int i;
@@ -6780,18 +6867,18 @@ void __netif_napi_del(struct napi_struct *napi)
flush_gro_hash(napi);
napi->gro_bitmask = 0;
+
+ if (napi->thread) {
+ kthread_stop(napi->thread);
+ napi->thread = NULL;
+ }
}
EXPORT_SYMBOL(__netif_napi_del);
-static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+static int __napi_poll(struct napi_struct *n, bool *repoll)
{
- void *have;
int work, weight;
- list_del_init(&n->poll_list);
-
- have = netpoll_poll_lock(n);
-
weight = n->weight;
/* This NAPI_STATE_SCHED test is for avoiding a race
@@ -6811,7 +6898,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
n->poll, work, weight);
if (likely(work < weight))
- goto out_unlock;
+ return work;
/* Drivers must not modify the NAPI state if they
* consume the entire weight. In such cases this code
@@ -6820,7 +6907,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
*/
if (unlikely(napi_disable_pending(n))) {
napi_complete(n);
- goto out_unlock;
+ return work;
}
/* The NAPI context has more processing work, but busy-polling
@@ -6833,7 +6920,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
*/
napi_schedule(n);
}
- goto out_unlock;
+ return work;
}
if (n->gro_bitmask) {
@@ -6851,17 +6938,78 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
if (unlikely(!list_empty(&n->poll_list))) {
pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
n->dev ? n->dev->name : "backlog");
- goto out_unlock;
+ return work;
}
- list_add_tail(&n->poll_list, repoll);
+ *repoll = true;
+
+ return work;
+}
+
+static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+{
+ bool do_repoll = false;
+ void *have;
+ int work;
+
+ list_del_init(&n->poll_list);
+
+ have = netpoll_poll_lock(n);
+
+ work = __napi_poll(n, &do_repoll);
+
+ if (do_repoll)
+ list_add_tail(&n->poll_list, repoll);
-out_unlock:
netpoll_poll_unlock(have);
return work;
}
+static int napi_thread_wait(struct napi_struct *napi)
+{
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ while (!kthread_should_stop() && !napi_disable_pending(napi)) {
+ if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+ WARN_ON(!list_empty(&napi->poll_list));
+ __set_current_state(TASK_RUNNING);
+ return 0;
+ }
+
+ schedule();
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+ __set_current_state(TASK_RUNNING);
+ return -1;
+}
+
+static int napi_threaded_poll(void *data)
+{
+ struct napi_struct *napi = data;
+ void *have;
+
+ while (!napi_thread_wait(napi)) {
+ for (;;) {
+ bool repoll = false;
+
+ local_bh_disable();
+
+ have = netpoll_poll_lock(napi);
+ __napi_poll(napi, &repoll);
+ netpoll_poll_unlock(have);
+
+ local_bh_enable();
+
+ if (!repoll)
+ break;
+
+ cond_resched();
+ }
+ }
+ return 0;
+}
+
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -6880,7 +7028,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
if (list_empty(&list)) {
if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
- goto out;
+ return;
break;
}
@@ -6907,8 +7055,6 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd);
-out:
- __kfree_skb_flush();
}
struct netdev_adjacent {
@@ -8120,6 +8266,39 @@ struct net_device *netdev_get_xmit_slave(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_get_xmit_slave);
+static struct net_device *netdev_sk_get_lower_dev(struct net_device *dev,
+ struct sock *sk)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_sk_get_lower_dev)
+ return NULL;
+ return ops->ndo_sk_get_lower_dev(dev, sk);
+}
+
+/**
+ * netdev_sk_get_lowest_dev - Get the lowest device in chain given device and socket
+ * @dev: device
+ * @sk: the socket
+ *
+ * %NULL is returned if no lower device is found.
+ */
+
+struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev,
+ struct sock *sk)
+{
+ struct net_device *lower;
+
+ lower = netdev_sk_get_lower_dev(dev, sk);
+ while (lower) {
+ dev = lower;
+ lower = netdev_sk_get_lower_dev(dev, sk);
+ }
+
+ return dev;
+}
+EXPORT_SYMBOL(netdev_sk_get_lowest_dev);
+
static void netdev_adjacent_add_links(struct net_device *dev)
{
struct netdev_adjacent *iter;
@@ -8742,6 +8921,48 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
}
EXPORT_SYMBOL(dev_set_mac_address);
+static DECLARE_RWSEM(dev_addr_sem);
+
+int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
+ struct netlink_ext_ack *extack)
+{
+ int ret;
+
+ down_write(&dev_addr_sem);
+ ret = dev_set_mac_address(dev, sa, extack);
+ up_write(&dev_addr_sem);
+ return ret;
+}
+EXPORT_SYMBOL(dev_set_mac_address_user);
+
+int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
+{
+ size_t size = sizeof(sa->sa_data);
+ struct net_device *dev;
+ int ret = 0;
+
+ down_read(&dev_addr_sem);
+ rcu_read_lock();
+
+ dev = dev_get_by_name_rcu(net, dev_name);
+ if (!dev) {
+ ret = -ENODEV;
+ goto unlock;
+ }
+ if (!dev->addr_len)
+ memset(sa->sa_data, 0, size);
+ else
+ memcpy(sa->sa_data, dev->dev_addr,
+ min_t(size_t, size, dev->addr_len));
+ sa->sa_family = dev->type;
+
+unlock:
+ rcu_read_unlock();
+ up_read(&dev_addr_sem);
+ return ret;
+}
+EXPORT_SYMBOL(dev_get_mac_address);
+
/**
* dev_change_carrier - Change device carrier
* @dev: device
@@ -9441,106 +9662,6 @@ static void net_set_todo(struct net_device *dev)
dev_net(dev)->dev_unreg_count++;
}
-static void rollback_registered_many(struct list_head *head)
-{
- struct net_device *dev, *tmp;
- LIST_HEAD(close_head);
-
- BUG_ON(dev_boot_phase);
- ASSERT_RTNL();
-
- list_for_each_entry_safe(dev, tmp, head, unreg_list) {
- /* Some devices call without registering
- * for initialization unwind. Remove those
- * devices and proceed with the remaining.
- */
- if (dev->reg_state == NETREG_UNINITIALIZED) {
- pr_debug("unregister_netdevice: device %s/%p never was registered\n",
- dev->name, dev);
-
- WARN_ON(1);
- list_del(&dev->unreg_list);
- continue;
- }
- dev->dismantle = true;
- BUG_ON(dev->reg_state != NETREG_REGISTERED);
- }
-
- /* If device is running, close it first. */
- list_for_each_entry(dev, head, unreg_list)
- list_add_tail(&dev->close_list, &close_head);
- dev_close_many(&close_head, true);
-
- list_for_each_entry(dev, head, unreg_list) {
- /* And unlink it from device chain. */
- unlist_netdevice(dev);
-
- dev->reg_state = NETREG_UNREGISTERING;
- }
- flush_all_backlogs();
-
- synchronize_net();
-
- list_for_each_entry(dev, head, unreg_list) {
- struct sk_buff *skb = NULL;
-
- /* Shutdown queueing discipline. */
- dev_shutdown(dev);
-
- dev_xdp_uninstall(dev);
-
- /* Notify protocols, that we are about to destroy
- * this device. They should clean all the things.
- */
- call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
-
- if (!dev->rtnl_link_ops ||
- dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
- skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
- GFP_KERNEL, NULL, 0);
-
- /*
- * Flush the unicast and multicast chains
- */
- dev_uc_flush(dev);
- dev_mc_flush(dev);
-
- netdev_name_node_alt_flush(dev);
- netdev_name_node_free(dev->name_node);
-
- if (dev->netdev_ops->ndo_uninit)
- dev->netdev_ops->ndo_uninit(dev);
-
- if (skb)
- rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
-
- /* Notifier chain MUST detach us all upper devices. */
- WARN_ON(netdev_has_any_upper_dev(dev));
- WARN_ON(netdev_has_any_lower_dev(dev));
-
- /* Remove entries from kobject tree */
- netdev_unregister_kobject(dev);
-#ifdef CONFIG_XPS
- /* Remove XPS queueing entries */
- netif_reset_xps_queues_gt(dev, 0);
-#endif
- }
-
- synchronize_net();
-
- list_for_each_entry(dev, head, unreg_list)
- dev_put(dev);
-}
-
-static void rollback_registered(struct net_device *dev)
-{
- LIST_HEAD(single);
-
- list_add(&dev->unreg_list, &single);
- rollback_registered_many(&single);
- list_del(&single);
-}
-
static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
struct net_device *upper, netdev_features_t features)
{
@@ -9661,9 +9782,20 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
}
}
- if ((features & NETIF_F_HW_TLS_TX) && !(features & NETIF_F_HW_CSUM)) {
- netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n");
- features &= ~NETIF_F_HW_TLS_TX;
+ if (features & NETIF_F_HW_TLS_TX) {
+ bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) ==
+ (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+ bool hw_csum = features & NETIF_F_HW_CSUM;
+
+ if (!ip_csum && !hw_csum) {
+ netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n");
+ features &= ~NETIF_F_HW_TLS_TX;
+ }
+ }
+
+ if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) {
+ netdev_dbg(dev, "Dropping TLS RX HW offload feature since no RXCSUM feature.\n");
+ features &= ~NETIF_F_HW_TLS_RX;
}
return features;
@@ -10002,7 +10134,7 @@ int register_netdevice(struct net_device *dev)
dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF);
dev->features |= NETIF_F_SOFT_FEATURES;
- if (dev->netdev_ops->ndo_udp_tunnel_add) {
+ if (dev->udp_tunnel_nic_info) {
dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
}
@@ -10077,17 +10209,10 @@ int register_netdevice(struct net_device *dev)
ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
ret = notifier_to_errno(ret);
if (ret) {
- rollback_registered(dev);
- rcu_barrier();
-
- dev->reg_state = NETREG_UNREGISTERED;
- /* We should put the kobject that hold in
- * netdev_unregister_kobject(), otherwise
- * the net device cannot be freed when
- * driver calls free_netdev(), because the
- * kobject is being hold.
- */
- kobject_put(&dev->dev.kobj);
+ /* Expect explicit free_netdev() on failure */
+ dev->needs_free_netdev = false;
+ unregister_netdevice_queue(dev, NULL);
+ goto out;
}
/*
* Prevent userspace races by waiting until the network
@@ -10631,6 +10756,17 @@ void free_netdev(struct net_device *dev)
struct napi_struct *p, *n;
might_sleep();
+
+ /* When called immediately after register_netdevice() failed the unwind
+ * handling may still be dismantling the device. Handle that case by
+ * deferring the free.
+ */
+ if (dev->reg_state == NETREG_UNREGISTERING) {
+ ASSERT_RTNL();
+ dev->needs_free_netdev = true;
+ return;
+ }
+
netif_free_tx_queues(dev);
netif_free_rx_queues(dev);
@@ -10697,9 +10833,10 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
if (head) {
list_move_tail(&dev->unreg_list, head);
} else {
- rollback_registered(dev);
- /* Finish processing unregister after unlock */
- net_set_todo(dev);
+ LIST_HEAD(single);
+
+ list_add(&dev->unreg_list, &single);
+ unregister_netdevice_many(&single);
}
}
EXPORT_SYMBOL(unregister_netdevice_queue);
@@ -10713,14 +10850,100 @@ EXPORT_SYMBOL(unregister_netdevice_queue);
*/
void unregister_netdevice_many(struct list_head *head)
{
- struct net_device *dev;
+ struct net_device *dev, *tmp;
+ LIST_HEAD(close_head);
+
+ BUG_ON(dev_boot_phase);
+ ASSERT_RTNL();
+
+ if (list_empty(head))
+ return;
+
+ list_for_each_entry_safe(dev, tmp, head, unreg_list) {
+ /* Some devices call without registering
+ * for initialization unwind. Remove those
+ * devices and proceed with the remaining.
+ */
+ if (dev->reg_state == NETREG_UNINITIALIZED) {
+ pr_debug("unregister_netdevice: device %s/%p never was registered\n",
+ dev->name, dev);
+
+ WARN_ON(1);
+ list_del(&dev->unreg_list);
+ continue;
+ }
+ dev->dismantle = true;
+ BUG_ON(dev->reg_state != NETREG_REGISTERED);
+ }
+
+ /* If device is running, close it first. */
+ list_for_each_entry(dev, head, unreg_list)
+ list_add_tail(&dev->close_list, &close_head);
+ dev_close_many(&close_head, true);
+
+ list_for_each_entry(dev, head, unreg_list) {
+ /* And unlink it from device chain. */
+ unlist_netdevice(dev);
+
+ dev->reg_state = NETREG_UNREGISTERING;
+ }
+ flush_all_backlogs();
+
+ synchronize_net();
+
+ list_for_each_entry(dev, head, unreg_list) {
+ struct sk_buff *skb = NULL;
- if (!list_empty(head)) {
- rollback_registered_many(head);
- list_for_each_entry(dev, head, unreg_list)
- net_set_todo(dev);
- list_del(head);
+ /* Shutdown queueing discipline. */
+ dev_shutdown(dev);
+
+ dev_xdp_uninstall(dev);
+
+ /* Notify protocols, that we are about to destroy
+ * this device. They should clean all the things.
+ */
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+
+ if (!dev->rtnl_link_ops ||
+ dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
+ GFP_KERNEL, NULL, 0);
+
+ /*
+ * Flush the unicast and multicast chains
+ */
+ dev_uc_flush(dev);
+ dev_mc_flush(dev);
+
+ netdev_name_node_alt_flush(dev);
+ netdev_name_node_free(dev->name_node);
+
+ if (dev->netdev_ops->ndo_uninit)
+ dev->netdev_ops->ndo_uninit(dev);
+
+ if (skb)
+ rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
+
+ /* Notifier chain MUST detach us all upper devices. */
+ WARN_ON(netdev_has_any_upper_dev(dev));
+ WARN_ON(netdev_has_any_lower_dev(dev));
+
+ /* Remove entries from kobject tree */
+ netdev_unregister_kobject(dev);
+#ifdef CONFIG_XPS
+ /* Remove XPS queueing entries */
+ netif_reset_xps_queues_gt(dev, 0);
+#endif
+ }
+
+ synchronize_net();
+
+ list_for_each_entry(dev, head, unreg_list) {
+ dev_put(dev);
+ net_set_todo(dev);
}
+
+ list_del(head);
}
EXPORT_SYMBOL(unregister_netdevice_many);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index db8a0ff86f36..478d032f34ac 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -123,17 +123,6 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
ifr->ifr_mtu = dev->mtu;
return 0;
- case SIOCGIFHWADDR:
- if (!dev->addr_len)
- memset(ifr->ifr_hwaddr.sa_data, 0,
- sizeof(ifr->ifr_hwaddr.sa_data));
- else
- memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
- min(sizeof(ifr->ifr_hwaddr.sa_data),
- (size_t)dev->addr_len));
- ifr->ifr_hwaddr.sa_family = dev->type;
- return 0;
-
case SIOCGIFSLAVE:
err = -EINVAL;
break;
@@ -274,7 +263,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
case SIOCSIFHWADDR:
if (dev->addr_len > sizeof(struct sockaddr))
return -EINVAL;
- return dev_set_mac_address(dev, &ifr->ifr_hwaddr, NULL);
+ return dev_set_mac_address_user(dev, &ifr->ifr_hwaddr, NULL);
case SIOCSIFHWBROADCAST:
if (ifr->ifr_hwaddr.sa_family != dev->type)
@@ -418,6 +407,12 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
*/
switch (cmd) {
+ case SIOCGIFHWADDR:
+ dev_load(net, ifr->ifr_name);
+ ret = dev_get_mac_address(&ifr->ifr_hwaddr, net, ifr->ifr_name);
+ if (colon)
+ *colon = ':';
+ return ret;
/*
* These ioctl calls:
* - can be done by all.
@@ -427,7 +422,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
case SIOCGIFFLAGS:
case SIOCGIFMETRIC:
case SIOCGIFMTU:
- case SIOCGIFHWADDR:
case SIOCGIFSLAVE:
case SIOCGIFMAP:
case SIOCGIFINDEX:
diff --git a/net/core/devlink.c b/net/core/devlink.c
index ee828e4b1007..737b61c2976e 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -87,6 +87,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
+ [DEVLINK_PORT_FN_ATTR_STATE] =
+ NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE,
+ DEVLINK_PORT_FN_STATE_ACTIVE),
};
static LIST_HEAD(devlink_list);
@@ -690,6 +693,15 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_vf.external))
return -EMSGSIZE;
break;
+ case DEVLINK_PORT_FLAVOUR_PCI_SF:
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
+ attrs->pci_sf.controller) ||
+ nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
+ attrs->pci_sf.pf) ||
+ nla_put_u32(msg, DEVLINK_ATTR_PORT_PCI_SF_NUMBER,
+ attrs->pci_sf.sf))
+ return -EMSGSIZE;
+ break;
case DEVLINK_PORT_FLAVOUR_PHYSICAL:
case DEVLINK_PORT_FLAVOUR_CPU:
case DEVLINK_PORT_FLAVOUR_DSA:
@@ -713,42 +725,105 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
}
static int
+devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *ops,
+ struct devlink_port *port, struct sk_buff *msg,
+ struct netlink_ext_ack *extack, bool *msg_updated)
+{
+ u8 hw_addr[MAX_ADDR_LEN];
+ int hw_addr_len;
+ int err;
+
+ if (!ops->port_function_hw_addr_get)
+ return 0;
+
+ err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+ err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr);
+ if (err)
+ return err;
+ *msg_updated = true;
+ return 0;
+}
+
+static bool
+devlink_port_fn_state_valid(enum devlink_port_fn_state state)
+{
+ return state == DEVLINK_PORT_FN_STATE_INACTIVE ||
+ state == DEVLINK_PORT_FN_STATE_ACTIVE;
+}
+
+static bool
+devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate)
+{
+ return opstate == DEVLINK_PORT_FN_OPSTATE_DETACHED ||
+ opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED;
+}
+
+static int
+devlink_port_fn_state_fill(struct devlink *devlink,
+ const struct devlink_ops *ops,
+ struct devlink_port *port, struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
+{
+ enum devlink_port_fn_opstate opstate;
+ enum devlink_port_fn_state state;
+ int err;
+
+ if (!ops->port_fn_state_get)
+ return 0;
+
+ err = ops->port_fn_state_get(devlink, port, &state, &opstate, extack);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+ if (!devlink_port_fn_state_valid(state)) {
+ WARN_ON_ONCE(1);
+ NL_SET_ERR_MSG_MOD(extack, "Invalid state read from driver");
+ return -EINVAL;
+ }
+ if (!devlink_port_fn_opstate_valid(opstate)) {
+ WARN_ON_ONCE(1);
+ NL_SET_ERR_MSG_MOD(extack,
+ "Invalid operational state read from driver");
+ return -EINVAL;
+ }
+ if (nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_STATE, state) ||
+ nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_OPSTATE, opstate))
+ return -EMSGSIZE;
+ *msg_updated = true;
+ return 0;
+}
+
+static int
devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
struct netlink_ext_ack *extack)
{
struct devlink *devlink = port->devlink;
const struct devlink_ops *ops;
struct nlattr *function_attr;
- bool empty_nest = true;
- int err = 0;
+ bool msg_updated = false;
+ int err;
function_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PORT_FUNCTION);
if (!function_attr)
return -EMSGSIZE;
ops = devlink->ops;
- if (ops->port_function_hw_addr_get) {
- int hw_addr_len;
- u8 hw_addr[MAX_ADDR_LEN];
-
- err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack);
- if (err == -EOPNOTSUPP) {
- /* Port function attributes are optional for a port. If port doesn't
- * support function attribute, returning -EOPNOTSUPP is not an error.
- */
- err = 0;
- goto out;
- } else if (err) {
- goto out;
- }
- err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr);
- if (err)
- goto out;
- empty_nest = false;
- }
-
+ err = devlink_port_fn_hw_addr_fill(devlink, ops, port, msg,
+ extack, &msg_updated);
+ if (err)
+ goto out;
+ err = devlink_port_fn_state_fill(devlink, ops, port, msg, extack,
+ &msg_updated);
out:
- if (err || empty_nest)
+ if (err || !msg_updated)
nla_nest_cancel(msg, function_attr);
else
nla_nest_end(msg, function_attr);
@@ -986,7 +1061,6 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *
const struct devlink_ops *ops;
const u8 *hw_addr;
int hw_addr_len;
- int err;
hw_addr = nla_data(attr);
hw_addr_len = nla_len(attr);
@@ -1011,12 +1085,25 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *
return -EOPNOTSUPP;
}
- err = ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack);
- if (err)
- return err;
+ return ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack);
+}
- devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
- return 0;
+static int devlink_port_fn_state_set(struct devlink *devlink,
+ struct devlink_port *port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ enum devlink_port_fn_state state;
+ const struct devlink_ops *ops;
+
+ state = nla_get_u8(attr);
+ ops = devlink->ops;
+ if (!ops->port_fn_state_set) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Function does not support state setting");
+ return -EOPNOTSUPP;
+ }
+ return ops->port_fn_state_set(devlink, port, state, extack);
}
static int
@@ -1034,9 +1121,21 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
}
attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
- if (attr)
+ if (attr) {
err = devlink_port_function_hw_addr_set(devlink, port, attr, extack);
+ if (err)
+ return err;
+ }
+ /* Keep this as the last function attribute set, so that when
+ * multiple port function attributes are set along with state,
+ * Those can be applied first before activating the state.
+ */
+ attr = tb[DEVLINK_PORT_FN_ATTR_STATE];
+ if (attr)
+ err = devlink_port_fn_state_set(devlink, port, attr, extack);
+ if (!err)
+ devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
return err;
}
@@ -1136,6 +1235,111 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
return devlink_port_unsplit(devlink, port_index, info->extack);
}
+static int devlink_port_new_notifiy(struct devlink *devlink,
+ unsigned int port_index,
+ struct genl_info *info)
+{
+ struct devlink_port *devlink_port;
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ mutex_lock(&devlink->lock);
+ devlink_port = devlink_port_get_by_index(devlink, port_index);
+ if (!devlink_port) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ err = devlink_nl_port_fill(msg, devlink, devlink_port,
+ DEVLINK_CMD_NEW, info->snd_portid,
+ info->snd_seq, 0, NULL);
+ if (err)
+ goto out;
+
+ err = genlmsg_reply(msg, info);
+ mutex_unlock(&devlink->lock);
+ return err;
+
+out:
+ mutex_unlock(&devlink->lock);
+ nlmsg_free(msg);
+ return err;
+}
+
+static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink_port_new_attrs new_attrs = {};
+ struct devlink *devlink = info->user_ptr[0];
+ unsigned int new_port_index;
+ int err;
+
+ if (!devlink->ops->port_new || !devlink->ops->port_del)
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] ||
+ !info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]) {
+ NL_SET_ERR_MSG_MOD(extack, "Port flavour or PCI PF are not specified");
+ return -EINVAL;
+ }
+ new_attrs.flavour = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_FLAVOUR]);
+ new_attrs.pfnum =
+ nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]);
+
+ if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ /* Port index of the new port being created by driver. */
+ new_attrs.port_index =
+ nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+ new_attrs.port_index_valid = true;
+ }
+ if (info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]) {
+ new_attrs.controller =
+ nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]);
+ new_attrs.controller_valid = true;
+ }
+ if (new_attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_SF &&
+ info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]) {
+ new_attrs.sfnum = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]);
+ new_attrs.sfnum_valid = true;
+ }
+
+ err = devlink->ops->port_new(devlink, &new_attrs, extack,
+ &new_port_index);
+ if (err)
+ return err;
+
+ err = devlink_port_new_notifiy(devlink, new_port_index, info);
+ if (err && err != -ENODEV) {
+ /* Fail to send the response; destroy newly created port. */
+ devlink->ops->port_del(devlink, new_port_index, extack);
+ }
+ return err;
+}
+
+static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct devlink *devlink = info->user_ptr[0];
+ unsigned int port_index;
+
+ if (!devlink->ops->port_del)
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ NL_SET_ERR_MSG_MOD(extack, "Port index is not specified");
+ return -EINVAL;
+ }
+ port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+ return devlink->ops->port_del(devlink, port_index, extack);
+}
+
static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
struct devlink_sb *devlink_sb,
enum devlink_command cmd, u32 portid,
@@ -4146,7 +4350,7 @@ out:
static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink_param_item *param_item;
struct sk_buff *msg;
int err;
@@ -4175,7 +4379,7 @@ static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- struct devlink_port *devlink_port = info->user_ptr[0];
+ struct devlink_port *devlink_port = info->user_ptr[1];
return __devlink_nl_cmd_param_set_doit(devlink_port->devlink,
devlink_port->index,
@@ -7594,6 +7798,10 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
DEVLINK_RELOAD_ACTION_MAX),
[DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(DEVLINK_RELOAD_LIMITS_VALID_MASK),
+ [DEVLINK_ATTR_PORT_FLAVOUR] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 },
};
static const struct genl_small_ops devlink_nl_ops[] = {
@@ -7634,6 +7842,18 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
+ .cmd = DEVLINK_CMD_PORT_NEW,
+ .doit = devlink_nl_cmd_port_new_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
+ },
+ {
+ .cmd = DEVLINK_CMD_PORT_DEL,
+ .doit = devlink_nl_cmd_port_del_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
+ },
+ {
.cmd = DEVLINK_CMD_SB_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_get_doit,
@@ -8372,6 +8592,32 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
}
EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set);
+/**
+ * devlink_port_attrs_pci_sf_set - Set PCI SF port attributes
+ *
+ * @devlink_port: devlink port
+ * @controller: associated controller number for the devlink port instance
+ * @pf: associated PF for the devlink port instance
+ * @sf: associated SF of a PF for the devlink port instance
+ */
+void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller,
+ u16 pf, u32 sf)
+{
+ struct devlink_port_attrs *attrs = &devlink_port->attrs;
+ int ret;
+
+ if (WARN_ON(devlink_port->registered))
+ return;
+ ret = __devlink_port_attrs_set(devlink_port,
+ DEVLINK_PORT_FLAVOUR_PCI_SF);
+ if (ret)
+ return;
+ attrs->pci_sf.controller = controller;
+ attrs->pci_sf.pf = pf;
+ attrs->pci_sf.sf = sf;
+}
+EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
+
static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
char *name, size_t len)
{
@@ -8420,6 +8666,10 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
n = snprintf(name, len, "pf%uvf%u",
attrs->pci_vf.pf, attrs->pci_vf.vf);
break;
+ case DEVLINK_PORT_FLAVOUR_PCI_SF:
+ n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
+ attrs->pci_sf.sf);
+ break;
}
if (n >= len)
@@ -8617,6 +8867,10 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
* @resource_id: resource's id
* @parent_resource_id: resource's parent id
* @size_params: size parameters
+ *
+ * Generic resources should reuse the same names across drivers.
+ * Please see the generic resources list at:
+ * Documentation/networking/devlink/devlink-resource.rst
*/
int devlink_resource_register(struct devlink *devlink,
const char *resource_name,
@@ -9508,6 +9762,7 @@ static const struct devlink_trap devlink_trap_generic[] = {
DEVLINK_TRAP(GTP_PARSING, DROP),
DEVLINK_TRAP(ESP_PARSING, DROP),
DEVLINK_TRAP(BLACKHOLE_NEXTHOP, DROP),
+ DEVLINK_TRAP(DMAC_FILTER, DROP),
};
#define DEVLINK_TRAP_GROUP(_id) \
diff --git a/net/core/filter.c b/net/core/filter.c
index 255aeee72402..adfdad234674 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2083,13 +2083,13 @@ static const struct bpf_func_proto bpf_csum_level_proto = {
static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
{
- return dev_forward_skb(dev, skb);
+ return dev_forward_skb_nomtu(dev, skb);
}
static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
struct sk_buff *skb)
{
- int ret = ____dev_forward_skb(dev, skb);
+ int ret = ____dev_forward_skb(dev, skb, false);
if (likely(!ret)) {
skb->dev = dev;
@@ -2480,7 +2480,7 @@ int skb_do_redirect(struct sk_buff *skb)
goto out_drop;
dev = ops->ndo_get_peer_dev(dev);
if (unlikely(!dev ||
- !is_skb_forwardable(dev, skb) ||
+ !(dev->flags & IFF_UP) ||
net_eq(net, dev_net(dev))))
goto out_drop;
skb->dev = dev;
@@ -3552,11 +3552,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
return 0;
}
-static u32 __bpf_skb_max_len(const struct sk_buff *skb)
-{
- return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
- SKB_MAX_ALLOC;
-}
+#define BPF_SKB_MAX_LEN SKB_MAX_ALLOC
BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
u32, mode, u64, flags)
@@ -3605,7 +3601,7 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
{
u32 len_cur, len_diff_abs = abs(len_diff);
u32 len_min = bpf_skb_net_base_len(skb);
- u32 len_max = __bpf_skb_max_len(skb);
+ u32 len_max = BPF_SKB_MAX_LEN;
__be16 proto = skb->protocol;
bool shrink = len_diff < 0;
u32 off;
@@ -3688,7 +3684,7 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
u64 flags)
{
- u32 max_len = __bpf_skb_max_len(skb);
+ u32 max_len = BPF_SKB_MAX_LEN;
u32 min_len = __bpf_skb_min_len(skb);
int ret;
@@ -3764,7 +3760,7 @@ static const struct bpf_func_proto sk_skb_change_tail_proto = {
static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
u64 flags)
{
- u32 max_len = __bpf_skb_max_len(skb);
+ u32 max_len = BPF_SKB_MAX_LEN;
u32 new_len = skb->len + head_room;
int ret;
@@ -4631,6 +4627,18 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+BPF_CALL_1(bpf_get_socket_ptr_cookie, struct sock *, sk)
+{
+ return sk ? sock_gen_cookie(sk) : 0;
+}
+
+const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto = {
+ .func = bpf_get_socket_ptr_cookie,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+};
+
BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
{
return __sock_gen_cookie(ctx->sk);
@@ -4645,11 +4653,9 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
static u64 __bpf_get_netns_cookie(struct sock *sk)
{
-#ifdef CONFIG_NET_NS
- return __net_gen_cookie(sk ? sk->sk_net.net : &init_net);
-#else
- return 0;
-#endif
+ const struct net *net = sk ? sock_net(sk) : &init_net;
+
+ return net->net_cookie;
}
BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
@@ -4770,6 +4776,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
ifindex = dev->ifindex;
dev_put(dev);
}
+ fallthrough;
+ case SO_BINDTOIFINDEX:
+ if (optname == SO_BINDTOIFINDEX)
+ ifindex = val;
ret = sock_bindtoindex(sk, ifindex, false);
break;
case SO_KEEPALIVE:
@@ -4932,8 +4942,25 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname,
sock_owned_by_me(sk);
+ if (level == SOL_SOCKET) {
+ if (optlen != sizeof(int))
+ goto err_clear;
+
+ switch (optname) {
+ case SO_MARK:
+ *((int *)optval) = sk->sk_mark;
+ break;
+ case SO_PRIORITY:
+ *((int *)optval) = sk->sk_priority;
+ break;
+ case SO_BINDTOIFINDEX:
+ *((int *)optval) = sk->sk_bound_dev_if;
+ break;
+ default:
+ goto err_clear;
+ }
#ifdef CONFIG_INET
- if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
+ } else if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
struct inet_connection_sock *icsk;
struct tcp_sock *tp;
@@ -4987,11 +5014,11 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname,
goto err_clear;
}
#endif
+#endif
} else {
goto err_clear;
}
return 0;
-#endif
err_clear:
memset(optval, 0, optlen);
return -EINVAL;
@@ -5272,12 +5299,14 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
const struct neighbour *neigh,
- const struct net_device *dev)
+ const struct net_device *dev, u32 mtu)
{
memcpy(params->dmac, neigh->ha, ETH_ALEN);
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
params->h_vlan_TCI = 0;
params->h_vlan_proto = 0;
+ if (mtu)
+ params->mtu_result = mtu; /* union with tot_len */
return 0;
}
@@ -5293,8 +5322,8 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
struct net_device *dev;
struct fib_result res;
struct flowi4 fl4;
+ u32 mtu = 0;
int err;
- u32 mtu;
dev = dev_get_by_index_rcu(net, params->ifindex);
if (unlikely(!dev))
@@ -5361,8 +5390,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (check_mtu) {
mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
- if (params->tot_len > mtu)
+ if (params->tot_len > mtu) {
+ params->mtu_result = mtu; /* union with tot_len */
return BPF_FIB_LKUP_RET_FRAG_NEEDED;
+ }
}
nhc = res.nhc;
@@ -5396,7 +5427,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (!neigh)
return BPF_FIB_LKUP_RET_NO_NEIGH;
- return bpf_fib_set_fwd_params(params, neigh, dev);
+ return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
}
#endif
@@ -5413,7 +5444,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
struct flowi6 fl6;
int strict = 0;
int oif, err;
- u32 mtu;
+ u32 mtu = 0;
/* link local addresses are never forwarded */
if (rt6_need_strict(dst) || rt6_need_strict(src))
@@ -5488,8 +5519,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (check_mtu) {
mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
- if (params->tot_len > mtu)
+ if (params->tot_len > mtu) {
+ params->mtu_result = mtu; /* union with tot_len */
return BPF_FIB_LKUP_RET_FRAG_NEEDED;
+ }
}
if (res.nh->fib_nh_lws)
@@ -5509,7 +5542,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (!neigh)
return BPF_FIB_LKUP_RET_NO_NEIGH;
- return bpf_fib_set_fwd_params(params, neigh, dev);
+ return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
}
#endif
@@ -5552,6 +5585,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
{
struct net *net = dev_net(skb->dev);
int rc = -EAFNOSUPPORT;
+ bool check_mtu = false;
if (plen < sizeof(*params))
return -EINVAL;
@@ -5559,25 +5593,33 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
return -EINVAL;
+ if (params->tot_len)
+ check_mtu = true;
+
switch (params->family) {
#if IS_ENABLED(CONFIG_INET)
case AF_INET:
- rc = bpf_ipv4_fib_lookup(net, params, flags, false);
+ rc = bpf_ipv4_fib_lookup(net, params, flags, check_mtu);
break;
#endif
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- rc = bpf_ipv6_fib_lookup(net, params, flags, false);
+ rc = bpf_ipv6_fib_lookup(net, params, flags, check_mtu);
break;
#endif
}
- if (!rc) {
+ if (rc == BPF_FIB_LKUP_RET_SUCCESS && !check_mtu) {
struct net_device *dev;
+ /* When tot_len isn't provided by user, check skb
+ * against MTU of FIB lookup resulting net_device
+ */
dev = dev_get_by_index_rcu(net, params->ifindex);
if (!is_skb_forwardable(dev, skb))
rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
+
+ params->mtu_result = dev->mtu; /* union with tot_len */
}
return rc;
@@ -5593,6 +5635,116 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
.arg4_type = ARG_ANYTHING,
};
+static struct net_device *__dev_via_ifindex(struct net_device *dev_curr,
+ u32 ifindex)
+{
+ struct net *netns = dev_net(dev_curr);
+
+ /* Non-redirect use-cases can use ifindex=0 and save ifindex lookup */
+ if (ifindex == 0)
+ return dev_curr;
+
+ return dev_get_by_index_rcu(netns, ifindex);
+}
+
+BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
+ u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
+{
+ int ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
+ struct net_device *dev = skb->dev;
+ int skb_len, dev_len;
+ int mtu;
+
+ if (unlikely(flags & ~(BPF_MTU_CHK_SEGS)))
+ return -EINVAL;
+
+ if (unlikely(flags & BPF_MTU_CHK_SEGS && len_diff))
+ return -EINVAL;
+
+ dev = __dev_via_ifindex(dev, ifindex);
+ if (unlikely(!dev))
+ return -ENODEV;
+
+ mtu = READ_ONCE(dev->mtu);
+
+ dev_len = mtu + dev->hard_header_len;
+ skb_len = skb->len + len_diff; /* minus result pass check */
+ if (skb_len <= dev_len) {
+ ret = BPF_MTU_CHK_RET_SUCCESS;
+ goto out;
+ }
+ /* At this point, skb->len exceed MTU, but as it include length of all
+ * segments, it can still be below MTU. The SKB can possibly get
+ * re-segmented in transmit path (see validate_xmit_skb). Thus, user
+ * must choose if segs are to be MTU checked.
+ */
+ if (skb_is_gso(skb)) {
+ ret = BPF_MTU_CHK_RET_SUCCESS;
+
+ if (flags & BPF_MTU_CHK_SEGS &&
+ !skb_gso_validate_network_len(skb, mtu))
+ ret = BPF_MTU_CHK_RET_SEGS_TOOBIG;
+ }
+out:
+ /* BPF verifier guarantees valid pointer */
+ *mtu_len = mtu;
+
+ return ret;
+}
+
+BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
+ u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
+{
+ struct net_device *dev = xdp->rxq->dev;
+ int xdp_len = xdp->data_end - xdp->data;
+ int ret = BPF_MTU_CHK_RET_SUCCESS;
+ int mtu, dev_len;
+
+ /* XDP variant doesn't support multi-buffer segment check (yet) */
+ if (unlikely(flags))
+ return -EINVAL;
+
+ dev = __dev_via_ifindex(dev, ifindex);
+ if (unlikely(!dev))
+ return -ENODEV;
+
+ mtu = READ_ONCE(dev->mtu);
+
+ /* Add L2-header as dev MTU is L3 size */
+ dev_len = mtu + dev->hard_header_len;
+
+ xdp_len += len_diff; /* minus result pass check */
+ if (xdp_len > dev_len)
+ ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
+
+ /* BPF verifier guarantees valid pointer */
+ *mtu_len = mtu;
+
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_skb_check_mtu_proto = {
+ .func = bpf_skb_check_mtu,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_INT,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto bpf_xdp_check_mtu_proto = {
+ .func = bpf_xdp_check_mtu,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_INT,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
{
@@ -7002,6 +7154,14 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UDP4_RECVMSG:
+ case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UDP4_SENDMSG:
+ case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_INET4_GETPEERNAME:
+ case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_INET4_GETSOCKNAME:
+ case BPF_CGROUP_INET6_GETSOCKNAME:
return &bpf_sock_addr_setsockopt_proto;
default:
return NULL;
@@ -7012,6 +7172,14 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_UDP4_RECVMSG:
+ case BPF_CGROUP_UDP6_RECVMSG:
+ case BPF_CGROUP_UDP4_SENDMSG:
+ case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_INET4_GETPEERNAME:
+ case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_INET4_GETSOCKNAME:
+ case BPF_CGROUP_INET6_GETSOCKNAME:
return &bpf_sock_addr_getsockopt_proto;
default:
return NULL;
@@ -7162,6 +7330,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_uid_proto;
case BPF_FUNC_fib_lookup:
return &bpf_skb_fib_lookup_proto;
+ case BPF_FUNC_check_mtu:
+ return &bpf_skb_check_mtu_proto;
case BPF_FUNC_sk_fullsock:
return &bpf_sk_fullsock_proto;
case BPF_FUNC_sk_storage_get:
@@ -7231,6 +7401,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_adjust_tail_proto;
case BPF_FUNC_fib_lookup:
return &bpf_xdp_fib_lookup_proto;
+ case BPF_FUNC_check_mtu:
+ return &bpf_xdp_check_mtu_proto;
#ifdef CONFIG_INET
case BPF_FUNC_sk_lookup_udp:
return &bpf_xdp_sk_lookup_udp_proto;
@@ -8795,7 +8967,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
target_size));
break;
case offsetof(struct bpf_sock, rx_queue_mapping):
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
*insn++ = BPF_LDX_MEM(
BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping),
si->dst_reg, si->src_reg,
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 6f1adba6695f..2ef2224b3bff 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -23,6 +23,7 @@
#include <linux/if_ether.h>
#include <linux/mpls.h>
#include <linux/tcp.h>
+#include <linux/ptp_classify.h>
#include <net/flow_dissector.h>
#include <scsi/fc/fc_fcoe.h>
#include <uapi/linux/batadv_packet.h>
@@ -236,9 +237,8 @@ skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
void
skb_flow_dissect_ct(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
- void *target_container,
- u16 *ctinfo_map,
- size_t mapsize)
+ void *target_container, u16 *ctinfo_map,
+ size_t mapsize, bool post_ct)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
struct flow_dissector_key_ct *key;
@@ -250,13 +250,19 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
return;
ct = nf_ct_get(skb, &ctinfo);
- if (!ct)
+ if (!ct && !post_ct)
return;
key = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_CT,
target_container);
+ if (!ct) {
+ key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+ TCA_FLOWER_KEY_CT_FLAGS_INVALID;
+ return;
+ }
+
if (ctinfo < mapsize)
key->ct_state = ctinfo_map[ctinfo];
#if IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)
@@ -1050,6 +1056,9 @@ proto_again:
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
}
+ __skb_flow_dissect_ipv4(skb, flow_dissector,
+ target_container, data, iph);
+
if (ip_is_fragment(iph)) {
key_control->flags |= FLOW_DIS_IS_FRAGMENT;
@@ -1066,9 +1075,6 @@ proto_again:
}
}
- __skb_flow_dissect_ipv4(skb, flow_dissector,
- target_container, data, iph);
-
break;
}
case htons(ETH_P_IPV6): {
@@ -1251,6 +1257,21 @@ proto_again:
&proto, &nhoff, hlen, flags);
break;
+ case htons(ETH_P_1588): {
+ struct ptp_header *hdr, _hdr;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
+ hlen, &_hdr);
+ if (!hdr) {
+ fdret = FLOW_DISSECT_RET_OUT_BAD;
+ break;
+ }
+
+ nhoff += ntohs(hdr->message_length);
+ fdret = FLOW_DISSECT_RET_OUT_GOOD;
+ break;
+ }
+
default:
fdret = FLOW_DISSECT_RET_OUT_BAD;
break;
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 80dbf2f4016e..8e582e29a41e 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -80,11 +80,11 @@ static void est_timer(struct timer_list *t)
u64 rate, brate;
est_fetch_counters(est, &b);
- brate = (b.bytes - est->last_bytes) << (10 - est->ewma_log - est->intvl_log);
- brate -= (est->avbps >> est->ewma_log);
+ brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log);
+ brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
- rate = (b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log);
- rate -= (est->avpps >> est->ewma_log);
+ rate = (b.packets - est->last_packets) << (10 - est->intvl_log);
+ rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
write_seqcount_begin(&est->seq);
est->avbps += brate;
@@ -143,6 +143,9 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
if (parm->interval < -2 || parm->interval > 3)
return -EINVAL;
+ if (parm->ewma_log == 0 || parm->ewma_log >= 31)
+ return -EINVAL;
+
est = kzalloc(sizeof(*est), GFP_KERNEL);
if (!est)
return -ENOBUFS;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 9500d28a43b0..e2982b3970b8 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -41,7 +41,6 @@
#include <trace/events/neigh.h>
-#define DEBUG
#define NEIGH_DEBUG 1
#define neigh_dbg(level, fmt, ...) \
do { \
@@ -1245,13 +1244,14 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
old = neigh->nud_state;
err = -EPERM;
- if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
- (old & (NUD_NOARP | NUD_PERMANENT)))
- goto out;
if (neigh->dead) {
NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
+ new = old;
goto out;
}
+ if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
+ (old & (NUD_NOARP | NUD_PERMANENT)))
+ goto out;
ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
@@ -1569,10 +1569,8 @@ static void neigh_proxy_process(struct timer_list *t)
void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
struct sk_buff *skb)
{
- unsigned long now = jiffies;
-
- unsigned long sched_next = now + (prandom_u32() %
- NEIGH_VAR(p, PROXY_DELAY));
+ unsigned long sched_next = jiffies +
+ prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY));
if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
kfree_skb(skb);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 999b70c59761..307628fdf380 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -538,6 +538,45 @@ static ssize_t phys_switch_id_show(struct device *dev,
}
static DEVICE_ATTR_RO(phys_switch_id);
+static ssize_t threaded_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+ ssize_t ret = -EINVAL;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ if (dev_isalive(netdev))
+ ret = sprintf(buf, fmt_dec, netdev->threaded);
+
+ rtnl_unlock();
+ return ret;
+}
+
+static int modify_napi_threaded(struct net_device *dev, unsigned long val)
+{
+ int ret;
+
+ if (list_empty(&dev->napi_list))
+ return -EOPNOTSUPP;
+
+ if (val != 0 && val != 1)
+ return -EOPNOTSUPP;
+
+ ret = dev_set_threaded(dev, val);
+
+ return ret;
+}
+
+static ssize_t threaded_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return netdev_store(dev, attr, buf, len, modify_napi_threaded);
+}
+static DEVICE_ATTR_RW(threaded);
+
static struct attribute *net_class_attrs[] __ro_after_init = {
&dev_attr_netdev_group.attr,
&dev_attr_type.attr,
@@ -570,6 +609,7 @@ static struct attribute *net_class_attrs[] __ro_after_init = {
&dev_attr_proto_down.attr,
&dev_attr_carrier_up_count.attr,
&dev_attr_carrier_down_count.attr,
+ &dev_attr_threaded.attr,
NULL,
};
ATTRIBUTE_GROUPS(net_class);
@@ -1136,18 +1176,25 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
char *buf)
{
struct net_device *dev = queue->dev;
+ int num_tc, tc;
int index;
- int tc;
if (!netif_is_multiqueue(dev))
return -ENOENT;
+ if (!rtnl_trylock())
+ return restart_syscall();
+
index = get_netdev_queue_index(queue);
/* If queue belongs to subordinate dev use its TC mapping */
dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+ num_tc = dev->num_tc;
tc = netdev_txq_to_tc(dev, index);
+
+ rtnl_unlock();
+
if (tc < 0)
return -EINVAL;
@@ -1158,8 +1205,8 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
* belongs to the root device it will be reported with just the
* traffic class, so just "0" for TC 0 for example.
*/
- return dev->num_tc < 0 ? sprintf(buf, "%d%d\n", tc, dev->num_tc) :
- sprintf(buf, "%d\n", tc);
+ return num_tc < 0 ? sprintf(buf, "%d%d\n", tc, num_tc) :
+ sprintf(buf, "%d\n", tc);
}
#ifdef CONFIG_XPS
@@ -1317,8 +1364,8 @@ static const struct attribute_group dql_group = {
static ssize_t xps_cpus_show(struct netdev_queue *queue,
char *buf)
{
+ int cpu, len, ret, num_tc = 1, tc = 0;
struct net_device *dev = queue->dev;
- int cpu, len, num_tc = 1, tc = 0;
struct xps_dev_maps *dev_maps;
cpumask_var_t mask;
unsigned long index;
@@ -1328,22 +1375,31 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
index = get_netdev_queue_index(queue);
+ if (!rtnl_trylock())
+ return restart_syscall();
+
if (dev->num_tc) {
/* Do not allow XPS on subordinate device directly */
num_tc = dev->num_tc;
- if (num_tc < 0)
- return -EINVAL;
+ if (num_tc < 0) {
+ ret = -EINVAL;
+ goto err_rtnl_unlock;
+ }
/* If queue belongs to subordinate dev use its map */
dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
tc = netdev_txq_to_tc(dev, index);
- if (tc < 0)
- return -EINVAL;
+ if (tc < 0) {
+ ret = -EINVAL;
+ goto err_rtnl_unlock;
+ }
}
- if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
- return -ENOMEM;
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_rtnl_unlock;
+ }
rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_cpus_map);
@@ -1366,9 +1422,15 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
}
rcu_read_unlock();
+ rtnl_unlock();
+
len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask));
free_cpumask_var(mask);
return len < PAGE_SIZE ? len : -EINVAL;
+
+err_rtnl_unlock:
+ rtnl_unlock();
+ return ret;
}
static ssize_t xps_cpus_store(struct netdev_queue *queue,
@@ -1396,7 +1458,13 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
return err;
}
+ if (!rtnl_trylock()) {
+ free_cpumask_var(mask);
+ return restart_syscall();
+ }
+
err = netif_set_xps_queue(dev, mask, index);
+ rtnl_unlock();
free_cpumask_var(mask);
@@ -1408,22 +1476,29 @@ static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
{
+ int j, len, ret, num_tc = 1, tc = 0;
struct net_device *dev = queue->dev;
struct xps_dev_maps *dev_maps;
unsigned long *mask, index;
- int j, len, num_tc = 1, tc = 0;
index = get_netdev_queue_index(queue);
+ if (!rtnl_trylock())
+ return restart_syscall();
+
if (dev->num_tc) {
num_tc = dev->num_tc;
tc = netdev_txq_to_tc(dev, index);
- if (tc < 0)
- return -EINVAL;
+ if (tc < 0) {
+ ret = -EINVAL;
+ goto err_rtnl_unlock;
+ }
}
mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL);
- if (!mask)
- return -ENOMEM;
+ if (!mask) {
+ ret = -ENOMEM;
+ goto err_rtnl_unlock;
+ }
rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_rxqs_map);
@@ -1449,10 +1524,16 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
out_no_maps:
rcu_read_unlock();
+ rtnl_unlock();
+
len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues);
bitmap_free(mask);
return len < PAGE_SIZE ? len : -EINVAL;
+
+err_rtnl_unlock:
+ rtnl_unlock();
+ return ret;
}
static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
@@ -1478,10 +1559,17 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
return err;
}
+ if (!rtnl_trylock()) {
+ bitmap_free(mask);
+ return restart_syscall();
+ }
+
cpus_read_lock();
err = __netif_set_xps_queue(dev, mask, index, true);
cpus_read_unlock();
+ rtnl_unlock();
+
bitmap_free(mask);
return err ? : len;
}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2ef3b4557f40..43b6ac4c4439 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -72,18 +72,6 @@ static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
DEFINE_COOKIE(net_cookie);
-u64 __net_gen_cookie(struct net *net)
-{
- while (1) {
- u64 res = atomic64_read(&net->net_cookie);
-
- if (res)
- return res;
- res = gen_cookie_next(&net_cookie);
- atomic64_cmpxchg(&net->net_cookie, 0, res);
- }
-}
-
static struct net_generic *net_alloc_generic(void)
{
struct net_generic *ng;
@@ -332,6 +320,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
refcount_set(&net->ns.count, 1);
refcount_set(&net->passive, 1);
get_random_bytes(&net->hash_mix, sizeof(u32));
+ preempt_disable();
+ net->net_cookie = gen_cookie_next(&net_cookie);
+ preempt_enable();
net->dev_base_seq = 1;
net->user_ns = user_ns;
idr_init(&net->netns_ids);
@@ -1103,10 +1094,6 @@ static int __init net_ns_init(void)
rcu_assign_pointer(init_net.gen, ng);
- preempt_disable();
- __net_gen_cookie(&init_net);
- preempt_enable();
-
down_write(&pernet_ops_rwsem);
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 960948290001..c310c7c1cef7 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -29,7 +29,6 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/if_vlan.h>
-#include <net/dsa.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/addrconf.h>
@@ -658,15 +657,15 @@ EXPORT_SYMBOL_GPL(__netpoll_setup);
int netpoll_setup(struct netpoll *np)
{
- struct net_device *ndev = NULL, *dev = NULL;
- struct net *net = current->nsproxy->net_ns;
+ struct net_device *ndev = NULL;
struct in_device *in_dev;
int err;
rtnl_lock();
- if (np->dev_name[0])
+ if (np->dev_name[0]) {
+ struct net *net = current->nsproxy->net_ns;
ndev = __dev_get_by_name(net, np->dev_name);
-
+ }
if (!ndev) {
np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
err = -ENODEV;
@@ -674,19 +673,6 @@ int netpoll_setup(struct netpoll *np)
}
dev_hold(ndev);
- /* bring up DSA management network devices up first */
- for_each_netdev(net, dev) {
- if (!netdev_uses_dsa(dev))
- continue;
-
- err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
- if (err < 0) {
- np_err(np, "%s failed to open %s\n",
- np->dev_name, dev->name);
- goto put;
- }
- }
-
if (netdev_master_upper_dev_get(ndev)) {
np_err(np, "%s is a slave device, aborting\n", np->dev_name);
err = -EBUSY;
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index f3c690b8c8e3..ad8b0707af04 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -350,14 +350,6 @@ static bool page_pool_recycle_in_cache(struct page *page,
return true;
}
-/* page is NOT reusable when:
- * 1) allocated when system is under some pressure. (page_is_pfmemalloc)
- */
-static bool pool_page_reusable(struct page_pool *pool, struct page *page)
-{
- return !page_is_pfmemalloc(page);
-}
-
/* If the page refcnt == 1, this will try to recycle the page.
* if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for
* the configured size min(dma_sync_size, pool->max_len).
@@ -373,9 +365,11 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
* regular page allocator APIs.
*
* refcnt == 1 means page_pool owns page, and can recycle it.
+ *
+ * page is NOT reusable when allocated when system is under
+ * some pressure. (page_is_pfmemalloc)
*/
- if (likely(page_ref_count(page) == 1 &&
- pool_page_reusable(pool, page))) {
+ if (likely(page_ref_count(page) == 1 && !page_is_pfmemalloc(page))) {
/* Read barrier done in page_ref_count / READ_ONCE */
if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 105978604ffd..3fba429f1f57 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3464,7 +3464,7 @@ static int pktgen_thread_worker(void *arg)
struct pktgen_dev *pkt_dev = NULL;
int cpu = t->cpu;
- BUG_ON(smp_processor_id() != cpu);
+ WARN_ON(smp_processor_id() != cpu);
init_waitqueue_head(&t->queue);
complete(&t->start_done);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bb0596c41b3e..0edc0b2baaa4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -55,7 +55,7 @@
#include <net/net_namespace.h>
#define RTNL_MAX_TYPE 50
-#define RTNL_SLAVE_MAX_TYPE 36
+#define RTNL_SLAVE_MAX_TYPE 40
struct rtnl_link {
rtnl_doit_func doit;
@@ -2660,7 +2660,7 @@ static int do_setlink(const struct sk_buff *skb,
sa->sa_family = dev->type;
memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
dev->addr_len);
- err = dev_set_mac_address(dev, sa, extack);
+ err = dev_set_mac_address_user(dev, sa, extack);
kfree(sa);
if (err)
goto errout;
@@ -3439,26 +3439,15 @@ replay:
dev->ifindex = ifm->ifi_index;
- if (ops->newlink) {
+ if (ops->newlink)
err = ops->newlink(link_net ? : net, dev, tb, data, extack);
- /* Drivers should call free_netdev() in ->destructor
- * and unregister it on failure after registration
- * so that device could be finally freed in rtnl_unlock.
- */
- if (err < 0) {
- /* If device is not registered at all, free it now */
- if (dev->reg_state == NETREG_UNINITIALIZED ||
- dev->reg_state == NETREG_UNREGISTERED)
- free_netdev(dev);
- goto out;
- }
- } else {
+ else
err = register_netdevice(dev);
- if (err < 0) {
- free_netdev(dev);
- goto out;
- }
+ if (err < 0) {
+ free_netdev(dev);
+ goto out;
}
+
err = rtnl_configure_link(dev, ifm);
if (err < 0)
goto out_unregister;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f62cae3f75d8..545a472273a5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -119,151 +119,75 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
skb_panic(skb, sz, addr, __func__);
}
-/*
- * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
- * the caller if emergency pfmemalloc reserves are being used. If it is and
- * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
- * may be used. Otherwise, the packet data may be discarded until enough
- * memory is free
- */
-#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
- __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
-
-static void *__kmalloc_reserve(size_t size, gfp_t flags, int node,
- unsigned long ip, bool *pfmemalloc)
-{
- void *obj;
- bool ret_pfmemalloc = false;
+#define NAPI_SKB_CACHE_SIZE 64
+#define NAPI_SKB_CACHE_BULK 16
+#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
- /*
- * Try a regular allocation, when that fails and we're not entitled
- * to the reserves, fail.
- */
- obj = kmalloc_node_track_caller(size,
- flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
- node);
- if (obj || !(gfp_pfmemalloc_allowed(flags)))
- goto out;
+struct napi_alloc_cache {
+ struct page_frag_cache page;
+ unsigned int skb_count;
+ void *skb_cache[NAPI_SKB_CACHE_SIZE];
+};
- /* Try again but now we are using pfmemalloc reserves */
- ret_pfmemalloc = true;
- obj = kmalloc_node_track_caller(size, flags, node);
+static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
+static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
-out:
- if (pfmemalloc)
- *pfmemalloc = ret_pfmemalloc;
+static void *__alloc_frag_align(unsigned int fragsz, gfp_t gfp_mask,
+ unsigned int align_mask)
+{
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
- return obj;
+ return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align_mask);
}
-/* Allocate a new skbuff. We do this ourselves so we can fill in a few
- * 'private' fields and also do memory statistics to find all the
- * [BEEP] leaks.
- *
- */
-
-/**
- * __alloc_skb - allocate a network buffer
- * @size: size to allocate
- * @gfp_mask: allocation mask
- * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
- * instead of head cache and allocate a cloned (child) skb.
- * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
- * allocations in case the data is required for writeback
- * @node: numa node to allocate memory on
- *
- * Allocate a new &sk_buff. The returned buffer has no headroom and a
- * tail room of at least size bytes. The object has a reference count
- * of one. The return is the buffer. On a failure the return is %NULL.
- *
- * Buffers may only be allocated from interrupts using a @gfp_mask of
- * %GFP_ATOMIC.
- */
-struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
- int flags, int node)
+void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
- struct kmem_cache *cache;
- struct skb_shared_info *shinfo;
- struct sk_buff *skb;
- u8 *data;
- bool pfmemalloc;
-
- cache = (flags & SKB_ALLOC_FCLONE)
- ? skbuff_fclone_cache : skbuff_head_cache;
-
- if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
- gfp_mask |= __GFP_MEMALLOC;
-
- /* Get the HEAD */
- skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
- if (!skb)
- goto out;
- prefetchw(skb);
-
- /* We do our best to align skb_shared_info on a separate cache
- * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
- * aligned memory blocks, unless SLUB/SLAB debug is enabled.
- * Both skb->head and skb_shared_info are cache line aligned.
- */
- size = SKB_DATA_ALIGN(size);
- size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
- if (!data)
- goto nodata;
- /* kmalloc(size) might give us more room than requested.
- * Put skb_shared_info exactly at the end of allocated zone,
- * to allow max possible filling before reallocation.
- */
- size = SKB_WITH_OVERHEAD(ksize(data));
- prefetchw(data + size);
-
- /*
- * Only clear those fields we need to clear, not those that we will
- * actually initialise below. Hence, don't put any more fields after
- * the tail pointer in struct sk_buff!
- */
- memset(skb, 0, offsetof(struct sk_buff, tail));
- /* Account for allocated memory : skb + skb->head */
- skb->truesize = SKB_TRUESIZE(size);
- skb->pfmemalloc = pfmemalloc;
- refcount_set(&skb->users, 1);
- skb->head = data;
- skb->data = data;
- skb_reset_tail_pointer(skb);
- skb->end = skb->tail + size;
- skb->mac_header = (typeof(skb->mac_header))~0U;
- skb->transport_header = (typeof(skb->transport_header))~0U;
+ fragsz = SKB_DATA_ALIGN(fragsz);
- /* make sure we initialize shinfo sequentially */
- shinfo = skb_shinfo(skb);
- memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
- atomic_set(&shinfo->dataref, 1);
+ return __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask);
+}
+EXPORT_SYMBOL(__napi_alloc_frag_align);
- if (flags & SKB_ALLOC_FCLONE) {
- struct sk_buff_fclones *fclones;
+void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
+{
+ struct page_frag_cache *nc;
+ void *data;
- fclones = container_of(skb, struct sk_buff_fclones, skb1);
+ fragsz = SKB_DATA_ALIGN(fragsz);
+ if (in_irq() || irqs_disabled()) {
+ nc = this_cpu_ptr(&netdev_alloc_cache);
+ data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask);
+ } else {
+ local_bh_disable();
+ data = __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask);
+ local_bh_enable();
+ }
+ return data;
+}
+EXPORT_SYMBOL(__netdev_alloc_frag_align);
- skb->fclone = SKB_FCLONE_ORIG;
- refcount_set(&fclones->fclone_ref, 1);
+static struct sk_buff *napi_skb_cache_get(void)
+{
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ struct sk_buff *skb;
- fclones->skb2.fclone = SKB_FCLONE_CLONE;
- }
+ if (unlikely(!nc->skb_count))
+ nc->skb_count = kmem_cache_alloc_bulk(skbuff_head_cache,
+ GFP_ATOMIC,
+ NAPI_SKB_CACHE_BULK,
+ nc->skb_cache);
+ if (unlikely(!nc->skb_count))
+ return NULL;
- skb_set_kcov_handle(skb, kcov_common_handle());
+ skb = nc->skb_cache[--nc->skb_count];
+ kasan_unpoison_object_data(skbuff_head_cache, skb);
-out:
return skb;
-nodata:
- kmem_cache_free(cache, skb);
- skb = NULL;
- goto out;
}
-EXPORT_SYMBOL(__alloc_skb);
/* Caller must provide SKB that is memset cleared */
-static struct sk_buff *__build_skb_around(struct sk_buff *skb,
- void *data, unsigned int frag_size)
+static void __build_skb_around(struct sk_buff *skb, void *data,
+ unsigned int frag_size)
{
struct skb_shared_info *shinfo;
unsigned int size = frag_size ? : ksize(data);
@@ -286,8 +210,6 @@ static struct sk_buff *__build_skb_around(struct sk_buff *skb,
atomic_set(&shinfo->dataref, 1);
skb_set_kcov_handle(skb, kcov_common_handle());
-
- return skb;
}
/**
@@ -318,8 +240,9 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
return NULL;
memset(skb, 0, offsetof(struct sk_buff, tail));
+ __build_skb_around(skb, data, frag_size);
- return __build_skb_around(skb, data, frag_size);
+ return skb;
}
/* build_skb() is wrapper over __build_skb(), that specifically
@@ -352,9 +275,9 @@ struct sk_buff *build_skb_around(struct sk_buff *skb,
if (unlikely(!skb))
return NULL;
- skb = __build_skb_around(skb, data, frag_size);
+ __build_skb_around(skb, data, frag_size);
- if (skb && frag_size) {
+ if (frag_size) {
skb->head_frag = 1;
if (page_is_pfmemalloc(virt_to_head_page(data)))
skb->pfmemalloc = 1;
@@ -363,56 +286,178 @@ struct sk_buff *build_skb_around(struct sk_buff *skb,
}
EXPORT_SYMBOL(build_skb_around);
-#define NAPI_SKB_CACHE_SIZE 64
+/**
+ * __napi_build_skb - build a network buffer
+ * @data: data buffer provided by caller
+ * @frag_size: size of data, or 0 if head was kmalloced
+ *
+ * Version of __build_skb() that uses NAPI percpu caches to obtain
+ * skbuff_head instead of inplace allocation.
+ *
+ * Returns a new &sk_buff on success, %NULL on allocation failure.
+ */
+static struct sk_buff *__napi_build_skb(void *data, unsigned int frag_size)
+{
+ struct sk_buff *skb;
-struct napi_alloc_cache {
- struct page_frag_cache page;
- unsigned int skb_count;
- void *skb_cache[NAPI_SKB_CACHE_SIZE];
-};
+ skb = napi_skb_cache_get();
+ if (unlikely(!skb))
+ return NULL;
-static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
-static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
+ memset(skb, 0, offsetof(struct sk_buff, tail));
+ __build_skb_around(skb, data, frag_size);
+
+ return skb;
+}
-static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
+/**
+ * napi_build_skb - build a network buffer
+ * @data: data buffer provided by caller
+ * @frag_size: size of data, or 0 if head was kmalloced
+ *
+ * Version of __napi_build_skb() that takes care of skb->head_frag
+ * and skb->pfmemalloc when the data is a page or page fragment.
+ *
+ * Returns a new &sk_buff on success, %NULL on allocation failure.
+ */
+struct sk_buff *napi_build_skb(void *data, unsigned int frag_size)
{
- struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ struct sk_buff *skb = __napi_build_skb(data, frag_size);
+
+ if (likely(skb) && frag_size) {
+ skb->head_frag = 1;
+ skb_propagate_pfmemalloc(virt_to_head_page(data), skb);
+ }
- return page_frag_alloc(&nc->page, fragsz, gfp_mask);
+ return skb;
}
+EXPORT_SYMBOL(napi_build_skb);
-void *napi_alloc_frag(unsigned int fragsz)
+/*
+ * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
+ * the caller if emergency pfmemalloc reserves are being used. If it is and
+ * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
+ * may be used. Otherwise, the packet data may be discarded until enough
+ * memory is free
+ */
+static void *kmalloc_reserve(size_t size, gfp_t flags, int node,
+ bool *pfmemalloc)
{
- fragsz = SKB_DATA_ALIGN(fragsz);
+ void *obj;
+ bool ret_pfmemalloc = false;
+
+ /*
+ * Try a regular allocation, when that fails and we're not entitled
+ * to the reserves, fail.
+ */
+ obj = kmalloc_node_track_caller(size,
+ flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
+ node);
+ if (obj || !(gfp_pfmemalloc_allowed(flags)))
+ goto out;
+
+ /* Try again but now we are using pfmemalloc reserves */
+ ret_pfmemalloc = true;
+ obj = kmalloc_node_track_caller(size, flags, node);
+
+out:
+ if (pfmemalloc)
+ *pfmemalloc = ret_pfmemalloc;
- return __napi_alloc_frag(fragsz, GFP_ATOMIC);
+ return obj;
}
-EXPORT_SYMBOL(napi_alloc_frag);
+
+/* Allocate a new skbuff. We do this ourselves so we can fill in a few
+ * 'private' fields and also do memory statistics to find all the
+ * [BEEP] leaks.
+ *
+ */
/**
- * netdev_alloc_frag - allocate a page fragment
- * @fragsz: fragment size
+ * __alloc_skb - allocate a network buffer
+ * @size: size to allocate
+ * @gfp_mask: allocation mask
+ * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
+ * instead of head cache and allocate a cloned (child) skb.
+ * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
+ * allocations in case the data is required for writeback
+ * @node: numa node to allocate memory on
*
- * Allocates a frag from a page for receive buffer.
- * Uses GFP_ATOMIC allocations.
+ * Allocate a new &sk_buff. The returned buffer has no headroom and a
+ * tail room of at least size bytes. The object has a reference count
+ * of one. The return is the buffer. On a failure the return is %NULL.
+ *
+ * Buffers may only be allocated from interrupts using a @gfp_mask of
+ * %GFP_ATOMIC.
*/
-void *netdev_alloc_frag(unsigned int fragsz)
+struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
+ int flags, int node)
{
- struct page_frag_cache *nc;
- void *data;
+ struct kmem_cache *cache;
+ struct sk_buff *skb;
+ u8 *data;
+ bool pfmemalloc;
- fragsz = SKB_DATA_ALIGN(fragsz);
- if (in_irq() || irqs_disabled()) {
- nc = this_cpu_ptr(&netdev_alloc_cache);
- data = page_frag_alloc(nc, fragsz, GFP_ATOMIC);
- } else {
- local_bh_disable();
- data = __napi_alloc_frag(fragsz, GFP_ATOMIC);
- local_bh_enable();
+ cache = (flags & SKB_ALLOC_FCLONE)
+ ? skbuff_fclone_cache : skbuff_head_cache;
+
+ if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
+ gfp_mask |= __GFP_MEMALLOC;
+
+ /* Get the HEAD */
+ if ((flags & (SKB_ALLOC_FCLONE | SKB_ALLOC_NAPI)) == SKB_ALLOC_NAPI &&
+ likely(node == NUMA_NO_NODE || node == numa_mem_id()))
+ skb = napi_skb_cache_get();
+ else
+ skb = kmem_cache_alloc_node(cache, gfp_mask & ~GFP_DMA, node);
+ if (unlikely(!skb))
+ return NULL;
+ prefetchw(skb);
+
+ /* We do our best to align skb_shared_info on a separate cache
+ * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
+ * aligned memory blocks, unless SLUB/SLAB debug is enabled.
+ * Both skb->head and skb_shared_info are cache line aligned.
+ */
+ size = SKB_DATA_ALIGN(size);
+ size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
+ if (unlikely(!data))
+ goto nodata;
+ /* kmalloc(size) might give us more room than requested.
+ * Put skb_shared_info exactly at the end of allocated zone,
+ * to allow max possible filling before reallocation.
+ */
+ size = SKB_WITH_OVERHEAD(ksize(data));
+ prefetchw(data + size);
+
+ /*
+ * Only clear those fields we need to clear, not those that we will
+ * actually initialise below. Hence, don't put any more fields after
+ * the tail pointer in struct sk_buff!
+ */
+ memset(skb, 0, offsetof(struct sk_buff, tail));
+ __build_skb_around(skb, data, 0);
+ skb->pfmemalloc = pfmemalloc;
+
+ if (flags & SKB_ALLOC_FCLONE) {
+ struct sk_buff_fclones *fclones;
+
+ fclones = container_of(skb, struct sk_buff_fclones, skb1);
+
+ skb->fclone = SKB_FCLONE_ORIG;
+ refcount_set(&fclones->fclone_ref, 1);
+
+ fclones->skb2.fclone = SKB_FCLONE_CLONE;
}
- return data;
+
+ return skb;
+
+nodata:
+ kmem_cache_free(cache, skb);
+ return NULL;
}
-EXPORT_SYMBOL(netdev_alloc_frag);
+EXPORT_SYMBOL(__alloc_skb);
/**
* __netdev_alloc_skb - allocate an skbuff for rx on a specific device
@@ -437,7 +482,11 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
len += NET_SKB_PAD;
- if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
+ /* If requested length is either too small or too big,
+ * we use kmalloc() for skb->head allocation.
+ */
+ if (len <= SKB_WITH_OVERHEAD(1024) ||
+ len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
if (!skb)
@@ -501,20 +550,26 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
gfp_t gfp_mask)
{
- struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ struct napi_alloc_cache *nc;
struct sk_buff *skb;
void *data;
len += NET_SKB_PAD + NET_IP_ALIGN;
- if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
+ /* If requested length is either too small or too big,
+ * we use kmalloc() for skb->head allocation.
+ */
+ if (len <= SKB_WITH_OVERHEAD(1024) ||
+ len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
- skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
+ skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
+ NUMA_NO_NODE);
if (!skb)
goto skb_fail;
goto skb_success;
}
+ nc = this_cpu_ptr(&napi_alloc_cache);
len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
len = SKB_DATA_ALIGN(len);
@@ -525,7 +580,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
if (unlikely(!data))
return NULL;
- skb = __build_skb(data, len);
+ skb = __napi_build_skb(data, len);
if (unlikely(!skb)) {
skb_free_frag(data);
return NULL;
@@ -605,13 +660,14 @@ static void skb_release_data(struct sk_buff *skb)
&shinfo->dataref))
return;
+ skb_zcopy_clear(skb, true);
+
for (i = 0; i < shinfo->nr_frags; i++)
__skb_frag_unref(&shinfo->frags[i]);
if (shinfo->frag_list)
kfree_skb_list(shinfo->frag_list);
- skb_zcopy_clear(skb, true);
skb_free_head(skb);
}
@@ -855,43 +911,36 @@ void __consume_stateless_skb(struct sk_buff *skb)
kfree_skbmem(skb);
}
-void __kfree_skb_flush(void)
-{
- struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
-
- /* flush skb_cache if containing objects */
- if (nc->skb_count) {
- kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
- nc->skb_cache);
- nc->skb_count = 0;
- }
-}
-
-static inline void _kfree_skb_defer(struct sk_buff *skb)
+static void napi_skb_cache_put(struct sk_buff *skb)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ u32 i;
- /* drop skb->head and call any destructors for packet */
- skb_release_all(skb);
-
- /* record skb to CPU local list */
+ kasan_poison_object_data(skbuff_head_cache, skb);
nc->skb_cache[nc->skb_count++] = skb;
-#ifdef CONFIG_SLUB
- /* SLUB writes into objects when freeing */
- prefetchw(skb);
-#endif
-
- /* flush skb_cache if it is filled */
if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
- kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
- nc->skb_cache);
- nc->skb_count = 0;
+ for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++)
+ kasan_unpoison_object_data(skbuff_head_cache,
+ nc->skb_cache[i]);
+
+ kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_HALF,
+ nc->skb_cache + NAPI_SKB_CACHE_HALF);
+ nc->skb_count = NAPI_SKB_CACHE_HALF;
}
}
+
void __kfree_skb_defer(struct sk_buff *skb)
{
- _kfree_skb_defer(skb);
+ skb_release_all(skb);
+ napi_skb_cache_put(skb);
+}
+
+void napi_skb_free_stolen_head(struct sk_buff *skb)
+{
+ skb_dst_drop(skb);
+ skb_ext_put(skb);
+ napi_skb_cache_put(skb);
}
void napi_consume_skb(struct sk_buff *skb, int budget)
@@ -916,7 +965,8 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
return;
}
- _kfree_skb_defer(skb);
+ skb_release_all(skb);
+ napi_skb_cache_put(skb);
}
EXPORT_SYMBOL(napi_consume_skb);
@@ -1093,7 +1143,7 @@ void mm_unaccount_pinned_pages(struct mmpin *mmp)
}
EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
-struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
+struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
{
struct ubuf_info *uarg;
struct sk_buff *skb;
@@ -1113,25 +1163,26 @@ struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
return NULL;
}
- uarg->callback = sock_zerocopy_callback;
+ uarg->callback = msg_zerocopy_callback;
uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
uarg->len = 1;
uarg->bytelen = size;
uarg->zerocopy = 1;
+ uarg->flags = SKBFL_ZEROCOPY_FRAG;
refcount_set(&uarg->refcnt, 1);
sock_hold(sk);
return uarg;
}
-EXPORT_SYMBOL_GPL(sock_zerocopy_alloc);
+EXPORT_SYMBOL_GPL(msg_zerocopy_alloc);
static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
{
return container_of((void *)uarg, struct sk_buff, cb);
}
-struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
- struct ubuf_info *uarg)
+struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
+ struct ubuf_info *uarg)
{
if (uarg) {
const u32 byte_limit = 1 << 19; /* limit to a few TSO */
@@ -1163,16 +1214,16 @@ struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
/* no extra ref when appending to datagram (MSG_MORE) */
if (sk->sk_type == SOCK_STREAM)
- sock_zerocopy_get(uarg);
+ net_zcopy_get(uarg);
return uarg;
}
}
new_alloc:
- return sock_zerocopy_alloc(sk, size);
+ return msg_zerocopy_alloc(sk, size);
}
-EXPORT_SYMBOL_GPL(sock_zerocopy_realloc);
+EXPORT_SYMBOL_GPL(msg_zerocopy_realloc);
static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
{
@@ -1194,7 +1245,7 @@ static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
return true;
}
-void sock_zerocopy_callback(struct ubuf_info *uarg, bool success)
+static void __msg_zerocopy_callback(struct ubuf_info *uarg)
{
struct sk_buff *tail, *skb = skb_from_uarg(uarg);
struct sock_exterr_skb *serr;
@@ -1222,7 +1273,7 @@ void sock_zerocopy_callback(struct ubuf_info *uarg, bool success)
serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
serr->ee.ee_data = hi;
serr->ee.ee_info = lo;
- if (!success)
+ if (!uarg->zerocopy)
serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
q = &sk->sk_error_queue;
@@ -1241,32 +1292,28 @@ release:
consume_skb(skb);
sock_put(sk);
}
-EXPORT_SYMBOL_GPL(sock_zerocopy_callback);
-void sock_zerocopy_put(struct ubuf_info *uarg)
+void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
+ bool success)
{
- if (uarg && refcount_dec_and_test(&uarg->refcnt)) {
- if (uarg->callback)
- uarg->callback(uarg, uarg->zerocopy);
- else
- consume_skb(skb_from_uarg(uarg));
- }
+ uarg->zerocopy = uarg->zerocopy & success;
+
+ if (refcount_dec_and_test(&uarg->refcnt))
+ __msg_zerocopy_callback(uarg);
}
-EXPORT_SYMBOL_GPL(sock_zerocopy_put);
+EXPORT_SYMBOL_GPL(msg_zerocopy_callback);
-void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
+void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
{
- if (uarg) {
- struct sock *sk = skb_from_uarg(uarg)->sk;
+ struct sock *sk = skb_from_uarg(uarg)->sk;
- atomic_dec(&sk->sk_zckey);
- uarg->len--;
+ atomic_dec(&sk->sk_zckey);
+ uarg->len--;
- if (have_uref)
- sock_zerocopy_put(uarg);
- }
+ if (have_uref)
+ msg_zerocopy_callback(NULL, uarg, true);
}
-EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
+EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort);
int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len)
{
@@ -1330,7 +1377,7 @@ static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
* @skb: the skb to modify
* @gfp_mask: allocation priority
*
- * This must be called on SKBTX_DEV_ZEROCOPY skb.
+ * This must be called on skb with SKBFL_ZEROCOPY_ENABLE.
* It will copy all frags into kernel and drop the reference
* to userspace pages.
*
@@ -3267,8 +3314,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
{
int pos = skb_headlen(skb);
- skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags &
- SKBTX_SHARED_FRAG;
+ skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
skb_zerocopy_clone(skb1, skb, 0);
if (len < pos) /* Split line is inside header. */
skb_split_inside_header(skb, skb1, len, pos);
@@ -3283,7 +3329,19 @@ EXPORT_SYMBOL(skb_split);
*/
static int skb_prepare_for_shift(struct sk_buff *skb)
{
- return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ int ret = 0;
+
+ if (skb_cloned(skb)) {
+ /* Save and restore truesize: pskb_expand_head() may reallocate
+ * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we
+ * cannot change truesize at this point.
+ */
+ unsigned int save_truesize = skb->truesize;
+
+ ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ skb->truesize = save_truesize;
+ }
+ return ret;
}
/**
@@ -3442,6 +3500,7 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
st->root_skb = st->cur_skb = skb;
st->frag_idx = st->stepped_offset = 0;
st->frag_data = NULL;
+ st->frag_off = 0;
}
EXPORT_SYMBOL(skb_prepare_seq_read);
@@ -3496,14 +3555,27 @@ next_skb:
st->stepped_offset += skb_headlen(st->cur_skb);
while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
+ unsigned int pg_idx, pg_off, pg_sz;
+
frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
- block_limit = skb_frag_size(frag) + st->stepped_offset;
+ pg_idx = 0;
+ pg_off = skb_frag_off(frag);
+ pg_sz = skb_frag_size(frag);
+
+ if (skb_frag_must_loop(skb_frag_page(frag))) {
+ pg_idx = (pg_off + st->frag_off) >> PAGE_SHIFT;
+ pg_off = offset_in_page(pg_off + st->frag_off);
+ pg_sz = min_t(unsigned int, pg_sz - st->frag_off,
+ PAGE_SIZE - pg_off);
+ }
+
+ block_limit = pg_sz + st->stepped_offset;
if (abs_offset < block_limit) {
if (!st->frag_data)
- st->frag_data = kmap_atomic(skb_frag_page(frag));
+ st->frag_data = kmap_atomic(skb_frag_page(frag) + pg_idx);
- *data = (u8 *) st->frag_data + skb_frag_off(frag) +
+ *data = (u8 *)st->frag_data + pg_off +
(abs_offset - st->stepped_offset);
return block_limit - abs_offset;
@@ -3514,8 +3586,12 @@ next_skb:
st->frag_data = NULL;
}
- st->frag_idx++;
- st->stepped_offset += skb_frag_size(frag);
+ st->stepped_offset += pg_sz;
+ st->frag_off += pg_sz;
+ if (st->frag_off == skb_frag_size(frag)) {
+ st->frag_off = 0;
+ st->frag_idx++;
+ }
}
if (st->frag_data) {
@@ -3655,7 +3731,8 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
unsigned int delta_truesize = 0;
unsigned int delta_len = 0;
struct sk_buff *tail = NULL;
- struct sk_buff *nskb;
+ struct sk_buff *nskb, *tmp;
+ int err;
skb_push(skb, -skb_network_offset(skb) + offset);
@@ -3665,11 +3742,28 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
nskb = list_skb;
list_skb = list_skb->next;
+ err = 0;
+ if (skb_shared(nskb)) {
+ tmp = skb_clone(nskb, GFP_ATOMIC);
+ if (tmp) {
+ consume_skb(nskb);
+ nskb = tmp;
+ err = skb_unclone(nskb, GFP_ATOMIC);
+ } else {
+ err = -ENOMEM;
+ }
+ }
+
if (!tail)
skb->next = nskb;
else
tail->next = nskb;
+ if (unlikely(err)) {
+ nskb->next = list_skb;
+ goto err_linearize;
+ }
+
tail = nskb;
delta_len += nskb->len;
@@ -3856,12 +3950,8 @@ normal:
}
hsize = skb_headlen(head_skb) - offset;
- if (hsize < 0)
- hsize = 0;
- if (hsize > len || !sg)
- hsize = len;
- if (!hsize && i >= nfrags && skb_headlen(list_skb) &&
+ if (hsize <= 0 && i >= nfrags && skb_headlen(list_skb) &&
(skb_headlen(list_skb) == len || sg)) {
BUG_ON(skb_headlen(list_skb) > len);
@@ -3904,6 +3994,11 @@ normal:
skb_release_head_state(nskb);
__skb_push(nskb, doffset);
} else {
+ if (hsize < 0)
+ hsize = 0;
+ if (hsize > len || !sg)
+ hsize = len;
+
nskb = __alloc_skb(hsize + doffset + headroom,
GFP_ATOMIC, skb_alloc_rx_flag(head_skb),
NUMA_NO_NODE);
@@ -3957,8 +4052,8 @@ normal:
skb_copy_from_linear_data_offset(head_skb, offset,
skb_put(nskb, hsize), hsize);
- skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
- SKBTX_SHARED_FRAG;
+ skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
+ SKBFL_SHARED_FRAG;
if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
@@ -4678,6 +4773,7 @@ err:
EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
void __skb_tstamp_tx(struct sk_buff *orig_skb,
+ const struct sk_buff *ack_skb,
struct skb_shared_hwtstamps *hwtstamps,
struct sock *sk, int tstype)
{
@@ -4700,7 +4796,8 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
sk->sk_protocol == IPPROTO_TCP &&
sk->sk_type == SOCK_STREAM) {
- skb = tcp_get_timestamping_opt_stats(sk, orig_skb);
+ skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
+ ack_skb);
opt_stats = true;
} else
#endif
@@ -4729,7 +4826,7 @@ EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
void skb_tstamp_tx(struct sk_buff *orig_skb,
struct skb_shared_hwtstamps *hwtstamps)
{
- return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk,
+ return __skb_tstamp_tx(orig_skb, NULL, hwtstamps, orig_skb->sk,
SCM_TSTAMP_SND);
}
EXPORT_SYMBOL_GPL(skb_tstamp_tx);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 25cdbb20f3a0..1261512d6807 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -669,14 +669,13 @@ static void sk_psock_destroy_deferred(struct work_struct *gc)
kfree(psock);
}
-void sk_psock_destroy(struct rcu_head *rcu)
+static void sk_psock_destroy(struct rcu_head *rcu)
{
struct sk_psock *psock = container_of(rcu, struct sk_psock, rcu);
INIT_WORK(&psock->gc, sk_psock_destroy_deferred);
schedule_work(&psock->gc);
}
-EXPORT_SYMBOL_GPL(sk_psock_destroy);
void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
{
diff --git a/net/core/sock.c b/net/core/sock.c
index bbcd4b97eddd..0ed98f20448a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -526,11 +526,17 @@ discard_and_relse:
}
EXPORT_SYMBOL(__sk_receive_skb);
+INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
+ u32));
+INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
+ u32));
struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
struct dst_entry *dst = __sk_dst_get(sk);
- if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
+ if (dst && dst->obsolete &&
+ INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
+ dst, cookie) == NULL) {
sk_tx_queue_clear(sk);
sk->sk_dst_pending_confirm = 0;
RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
@@ -546,7 +552,9 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
{
struct dst_entry *dst = sk_dst_get(sk);
- if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
+ if (dst && dst->obsolete &&
+ INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
+ dst, cookie) == NULL) {
sk_dst_reset(sk);
dst_release(dst);
return NULL;
@@ -1657,6 +1665,16 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
#ifdef CONFIG_SECURITY_NETWORK
void *sptr = nsk->sk_security;
#endif
+
+ /* If we move sk_tx_queue_mapping out of the private section,
+ * we must check if sk_tx_queue_clear() is called after
+ * sock_copy() in sk_clone_lock().
+ */
+ BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) <
+ offsetof(struct sock, sk_dontcopy_begin) ||
+ offsetof(struct sock, sk_tx_queue_mapping) >=
+ offsetof(struct sock, sk_dontcopy_end));
+
memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
@@ -1690,7 +1708,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
if (!try_module_get(prot->owner))
goto out_free_sec;
- sk_tx_queue_clear(sk);
}
return sk;
@@ -1876,123 +1893,120 @@ static void sk_init_common(struct sock *sk)
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
{
struct proto *prot = READ_ONCE(sk->sk_prot);
- struct sock *newsk;
+ struct sk_filter *filter;
bool is_charged = true;
+ struct sock *newsk;
newsk = sk_prot_alloc(prot, priority, sk->sk_family);
- if (newsk != NULL) {
- struct sk_filter *filter;
+ if (!newsk)
+ goto out;
- sock_copy(newsk, sk);
+ sock_copy(newsk, sk);
- newsk->sk_prot_creator = prot;
+ newsk->sk_prot_creator = prot;
- /* SANITY */
- if (likely(newsk->sk_net_refcnt))
- get_net(sock_net(newsk));
- sk_node_init(&newsk->sk_node);
- sock_lock_init(newsk);
- bh_lock_sock(newsk);
- newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
- newsk->sk_backlog.len = 0;
+ /* SANITY */
+ if (likely(newsk->sk_net_refcnt))
+ get_net(sock_net(newsk));
+ sk_node_init(&newsk->sk_node);
+ sock_lock_init(newsk);
+ bh_lock_sock(newsk);
+ newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
+ newsk->sk_backlog.len = 0;
- atomic_set(&newsk->sk_rmem_alloc, 0);
- /*
- * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
- */
- refcount_set(&newsk->sk_wmem_alloc, 1);
- atomic_set(&newsk->sk_omem_alloc, 0);
- sk_init_common(newsk);
+ atomic_set(&newsk->sk_rmem_alloc, 0);
- newsk->sk_dst_cache = NULL;
- newsk->sk_dst_pending_confirm = 0;
- newsk->sk_wmem_queued = 0;
- newsk->sk_forward_alloc = 0;
- atomic_set(&newsk->sk_drops, 0);
- newsk->sk_send_head = NULL;
- newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
- atomic_set(&newsk->sk_zckey, 0);
+ /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */
+ refcount_set(&newsk->sk_wmem_alloc, 1);
- sock_reset_flag(newsk, SOCK_DONE);
+ atomic_set(&newsk->sk_omem_alloc, 0);
+ sk_init_common(newsk);
- /* sk->sk_memcg will be populated at accept() time */
- newsk->sk_memcg = NULL;
+ newsk->sk_dst_cache = NULL;
+ newsk->sk_dst_pending_confirm = 0;
+ newsk->sk_wmem_queued = 0;
+ newsk->sk_forward_alloc = 0;
+ atomic_set(&newsk->sk_drops, 0);
+ newsk->sk_send_head = NULL;
+ newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+ atomic_set(&newsk->sk_zckey, 0);
- cgroup_sk_clone(&newsk->sk_cgrp_data);
+ sock_reset_flag(newsk, SOCK_DONE);
- rcu_read_lock();
- filter = rcu_dereference(sk->sk_filter);
- if (filter != NULL)
- /* though it's an empty new sock, the charging may fail
- * if sysctl_optmem_max was changed between creation of
- * original socket and cloning
- */
- is_charged = sk_filter_charge(newsk, filter);
- RCU_INIT_POINTER(newsk->sk_filter, filter);
- rcu_read_unlock();
+ /* sk->sk_memcg will be populated at accept() time */
+ newsk->sk_memcg = NULL;
- if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
- /* We need to make sure that we don't uncharge the new
- * socket if we couldn't charge it in the first place
- * as otherwise we uncharge the parent's filter.
- */
- if (!is_charged)
- RCU_INIT_POINTER(newsk->sk_filter, NULL);
- sk_free_unlock_clone(newsk);
- newsk = NULL;
- goto out;
- }
- RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
+ cgroup_sk_clone(&newsk->sk_cgrp_data);
- if (bpf_sk_storage_clone(sk, newsk)) {
- sk_free_unlock_clone(newsk);
- newsk = NULL;
- goto out;
- }
+ rcu_read_lock();
+ filter = rcu_dereference(sk->sk_filter);
+ if (filter != NULL)
+ /* though it's an empty new sock, the charging may fail
+ * if sysctl_optmem_max was changed between creation of
+ * original socket and cloning
+ */
+ is_charged = sk_filter_charge(newsk, filter);
+ RCU_INIT_POINTER(newsk->sk_filter, filter);
+ rcu_read_unlock();
- /* Clear sk_user_data if parent had the pointer tagged
- * as not suitable for copying when cloning.
+ if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
+ /* We need to make sure that we don't uncharge the new
+ * socket if we couldn't charge it in the first place
+ * as otherwise we uncharge the parent's filter.
*/
- if (sk_user_data_is_nocopy(newsk))
- newsk->sk_user_data = NULL;
+ if (!is_charged)
+ RCU_INIT_POINTER(newsk->sk_filter, NULL);
+ sk_free_unlock_clone(newsk);
+ newsk = NULL;
+ goto out;
+ }
+ RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
- newsk->sk_err = 0;
- newsk->sk_err_soft = 0;
- newsk->sk_priority = 0;
- newsk->sk_incoming_cpu = raw_smp_processor_id();
- if (likely(newsk->sk_net_refcnt))
- sock_inuse_add(sock_net(newsk), 1);
+ if (bpf_sk_storage_clone(sk, newsk)) {
+ sk_free_unlock_clone(newsk);
+ newsk = NULL;
+ goto out;
+ }
- /*
- * Before updating sk_refcnt, we must commit prior changes to memory
- * (Documentation/RCU/rculist_nulls.rst for details)
- */
- smp_wmb();
- refcount_set(&newsk->sk_refcnt, 2);
+ /* Clear sk_user_data if parent had the pointer tagged
+ * as not suitable for copying when cloning.
+ */
+ if (sk_user_data_is_nocopy(newsk))
+ newsk->sk_user_data = NULL;
- /*
- * Increment the counter in the same struct proto as the master
- * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
- * is the same as sk->sk_prot->socks, as this field was copied
- * with memcpy).
- *
- * This _changes_ the previous behaviour, where
- * tcp_create_openreq_child always was incrementing the
- * equivalent to tcp_prot->socks (inet_sock_nr), so this have
- * to be taken into account in all callers. -acme
- */
- sk_refcnt_debug_inc(newsk);
- sk_set_socket(newsk, NULL);
- sk_tx_queue_clear(newsk);
- RCU_INIT_POINTER(newsk->sk_wq, NULL);
+ newsk->sk_err = 0;
+ newsk->sk_err_soft = 0;
+ newsk->sk_priority = 0;
+ newsk->sk_incoming_cpu = raw_smp_processor_id();
+ if (likely(newsk->sk_net_refcnt))
+ sock_inuse_add(sock_net(newsk), 1);
- if (newsk->sk_prot->sockets_allocated)
- sk_sockets_allocated_inc(newsk);
+ /* Before updating sk_refcnt, we must commit prior changes to memory
+ * (Documentation/RCU/rculist_nulls.rst for details)
+ */
+ smp_wmb();
+ refcount_set(&newsk->sk_refcnt, 2);
- if (sock_needs_netstamp(sk) &&
- newsk->sk_flags & SK_FLAGS_TIMESTAMP)
- net_enable_timestamp();
- }
+ /* Increment the counter in the same struct proto as the master
+ * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
+ * is the same as sk->sk_prot->socks, as this field was copied
+ * with memcpy).
+ *
+ * This _changes_ the previous behaviour, where
+ * tcp_create_openreq_child always was incrementing the
+ * equivalent to tcp_prot->socks (inet_sock_nr), so this have
+ * to be taken into account in all callers. -acme
+ */
+ sk_refcnt_debug_inc(newsk);
+ sk_set_socket(newsk, NULL);
+ sk_tx_queue_clear(newsk);
+ RCU_INIT_POINTER(newsk->sk_wq, NULL);
+
+ if (newsk->sk_prot->sockets_allocated)
+ sk_sockets_allocated_inc(newsk);
+
+ if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP)
+ net_enable_timestamp();
out:
return newsk;
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 64b5ec14ff50..d758fb83c884 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -602,7 +602,7 @@ int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value,
ret = sock_hash_update_common(map, key, sk, flags);
sock_map_sk_release(sk);
out:
- fput(sock->file);
+ sockfd_put(sock);
return ret;
}
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index bbdd3c7b6cb5..b065f0a103ed 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -293,7 +293,7 @@ select_by_hash:
i = j = reciprocal_scale(hash, socks);
while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
i++;
- if (i >= reuse->num_socks)
+ if (i >= socks)
i = 0;
if (i == j)
goto out;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index d86d8d11cfe4..4567de519603 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -309,7 +309,6 @@ proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write,
#endif
static struct ctl_table net_core_table[] = {
-#ifdef CONFIG_NET
{
.procname = "wmem_max",
.data = &sysctl_wmem_max,
@@ -507,7 +506,6 @@ static struct ctl_table net_core_table[] = {
.proc_handler = set_default_qdisc
},
#endif
-#endif /* CONFIG_NET */
{
.procname = "netdev_budget",
.data = &netdev_budget,
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 3a8c9ab4ecbe..05354976c1fc 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -513,3 +513,73 @@ void xdp_warn(const char *msg, const char *func, const int line)
WARN(1, "XDP_WARN: %s(line:%d): %s\n", func, line, msg);
};
EXPORT_SYMBOL_GPL(xdp_warn);
+
+int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp)
+{
+ n_skb = kmem_cache_alloc_bulk(skbuff_head_cache, gfp,
+ n_skb, skbs);
+ if (unlikely(!n_skb))
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xdp_alloc_skb_bulk);
+
+struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
+ struct sk_buff *skb,
+ struct net_device *dev)
+{
+ unsigned int headroom, frame_size;
+ void *hard_start;
+
+ /* Part of headroom was reserved to xdpf */
+ headroom = sizeof(*xdpf) + xdpf->headroom;
+
+ /* Memory size backing xdp_frame data already have reserved
+ * room for build_skb to place skb_shared_info in tailroom.
+ */
+ frame_size = xdpf->frame_sz;
+
+ hard_start = xdpf->data - headroom;
+ skb = build_skb_around(skb, hard_start, frame_size);
+ if (unlikely(!skb))
+ return NULL;
+
+ skb_reserve(skb, headroom);
+ __skb_put(skb, xdpf->len);
+ if (xdpf->metasize)
+ skb_metadata_set(skb, xdpf->metasize);
+
+ /* Essential SKB info: protocol and skb->dev */
+ skb->protocol = eth_type_trans(skb, dev);
+
+ /* Optional SKB info, currently missing:
+ * - HW checksum info (skb->ip_summed)
+ * - HW RX hash (skb_set_hash)
+ * - RX ring dev queue index (skb_record_rx_queue)
+ */
+
+ /* Until page_pool get SKB return path, release DMA here */
+ xdp_release_frame(xdpf);
+
+ /* Allow SKB to reuse area used by xdp_frame */
+ xdp_scrub_frame(xdpf);
+
+ return skb;
+}
+EXPORT_SYMBOL_GPL(__xdp_build_skb_from_frame);
+
+struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
+ struct net_device *dev)
+{
+ struct sk_buff *skb;
+
+ skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
+ if (unlikely(!skb))
+ return NULL;
+
+ memset(skb, 0, offsetof(struct sk_buff, tail));
+
+ return __xdp_build_skb_from_frame(xdpf, skb, dev);
+}
+EXPORT_SYMBOL_GPL(xdp_build_skb_from_frame);
diff --git a/net/dcb/Makefile b/net/dcb/Makefile
index 3016e5a7716a..2c0fa16ee2a9 100644
--- a/net/dcb/Makefile
+++ b/net/dcb/Makefile
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_DCB) += dcbnl.o dcbevent.o
+obj-y += dcbnl.o dcbevent.o
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 084e159a12ba..653e3bc9c87b 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1765,6 +1765,8 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
fn = &reply_funcs[dcb->cmd];
if (!fn->cb)
return -EOPNOTSUPP;
+ if (fn->type == RTM_SETDCB && !netlink_capable(skb, CAP_NET_ADMIN))
+ return -EPERM;
if (!tb[DCB_ATTR_IFNAME])
return -EINVAL;
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 305f56804832..54086bb05c42 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -371,7 +371,7 @@ static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len)
fval->sp.vec = kmemdup(val, len, gfp_any());
if (fval->sp.vec == NULL) {
fval->sp.len = 0;
- return -ENOBUFS;
+ return -ENOMEM;
}
}
return 0;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 4cac31d22a50..2193ae529e75 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1035,7 +1035,7 @@ source_ok:
fld.saddr = dnet_select_source(dev_out, 0,
RT_SCOPE_HOST);
if (!fld.daddr)
- goto out;
+ goto done;
}
fld.flowidn_oif = LOOPBACK_IFINDEX;
res.type = RTN_LOCAL;
diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig
index 255df9b6e9e8..155b06163409 100644
--- a/net/dns_resolver/Kconfig
+++ b/net/dns_resolver/Kconfig
@@ -4,7 +4,7 @@
#
config DNS_RESOLVER
tristate "DNS Resolver support"
- depends on NET && KEYS
+ depends on KEYS
help
Saying Y here will include support for the DNS Resolver key type
which can be used to make upcalls to perform DNS lookups in
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index dfecd7b22fd7..3589224c8da9 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -9,6 +9,7 @@ menuconfig NET_DSA
tristate "Distributed Switch Architecture"
depends on HAVE_NET_DSA
depends on BRIDGE || BRIDGE=n
+ depends on HSR || HSR=n
select GRO_CELLS
select NET_SWITCHDEV
select PHYLINK
@@ -105,11 +106,26 @@ config NET_DSA_TAG_RTL4_A
the Realtek RTL8366RB.
config NET_DSA_TAG_OCELOT
- tristate "Tag driver for Ocelot family of switches"
+ tristate "Tag driver for Ocelot family of switches, using NPI port"
select PACKING
help
- Say Y or M if you want to enable support for tagging frames for the
- Ocelot switches (VSC7511, VSC7512, VSC7513, VSC7514, VSC9959).
+ Say Y or M if you want to enable NPI tagging for the Ocelot switches
+ (VSC7511, VSC7512, VSC7513, VSC7514, VSC9953, VSC9959). In this mode,
+ the frames over the Ethernet CPU port are prepended with a
+ hardware-defined injection/extraction frame header. Flow control
+ (PAUSE frames) over the CPU port is not supported when operating in
+ this mode.
+
+config NET_DSA_TAG_OCELOT_8021Q
+ tristate "Tag driver for Ocelot family of switches, using VLAN"
+ select NET_DSA_TAG_8021Q
+ help
+ Say Y or M if you want to enable support for tagging frames with a
+ custom VLAN-based header. Frames that require timestamping, such as
+ PTP, are not delivered over Ethernet but over register-based MMIO.
+ Flow control over the CPU port is functional in this mode. When using
+ this mode, less TCAM resources (VCAP IS1, IS2, ES0) are available for
+ use with tc-flower.
config NET_DSA_TAG_QCA
tristate "Tag driver for Qualcomm Atheros QCA8K switches"
@@ -139,4 +155,10 @@ config NET_DSA_TAG_TRAILER
Say Y or M if you want to enable support for tagging frames at
with a trailed. e.g. Marvell 88E6060.
+config NET_DSA_TAG_XRS700X
+ tristate "Tag driver for XRS700x switches"
+ help
+ Say Y or M if you want to enable support for tagging frames for
+ Arrow SpeedChips XRS700x switches that use a single byte tag trailer.
+
endif
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 0fb2b75a7ae3..44bc79952b8b 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -15,6 +15,8 @@ obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o
obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o
+obj-$(CONFIG_NET_DSA_TAG_OCELOT_8021Q) += tag_ocelot_8021q.o
obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o
obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
+obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index a1b1dc8a4d87..84cad1be9ce4 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -84,6 +84,32 @@ const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops)
return ops->name;
};
+/* Function takes a reference on the module owning the tagger,
+ * so dsa_tag_driver_put must be called afterwards.
+ */
+const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf)
+{
+ const struct dsa_device_ops *ops = ERR_PTR(-ENOPROTOOPT);
+ struct dsa_tag_driver *dsa_tag_driver;
+
+ mutex_lock(&dsa_tag_drivers_lock);
+ list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) {
+ const struct dsa_device_ops *tmp = dsa_tag_driver->ops;
+
+ if (!sysfs_streq(buf, tmp->name))
+ continue;
+
+ if (!try_module_get(dsa_tag_driver->owner))
+ break;
+
+ ops = tmp;
+ break;
+ }
+ mutex_unlock(&dsa_tag_drivers_lock);
+
+ return ops;
+}
+
const struct dsa_device_ops *dsa_tag_driver_get(int tag_protocol)
{
struct dsa_tag_driver *dsa_tag_driver;
@@ -219,11 +245,21 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
}
skb = nskb;
- p = netdev_priv(skb->dev);
skb_push(skb, ETH_HLEN);
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
+ if (unlikely(!dsa_slave_dev_check(skb->dev))) {
+ /* Packet is to be injected directly on an upper
+ * device, e.g. a team/bond, so skip all DSA-port
+ * specific actions.
+ */
+ netif_rx(skb);
+ return 0;
+ }
+
+ p = netdev_priv(skb->dev);
+
if (unlikely(cpu_dp->ds->untag_bridge_pvid)) {
nskb = dsa_untag_bridge_pvid(skb);
if (!nskb) {
@@ -309,28 +345,6 @@ bool dsa_schedule_work(struct work_struct *work)
return queue_work(dsa_owq, work);
}
-static ATOMIC_NOTIFIER_HEAD(dsa_notif_chain);
-
-int register_dsa_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_register(&dsa_notif_chain, nb);
-}
-EXPORT_SYMBOL_GPL(register_dsa_notifier);
-
-int unregister_dsa_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_unregister(&dsa_notif_chain, nb);
-}
-EXPORT_SYMBOL_GPL(unregister_dsa_notifier);
-
-int call_dsa_notifiers(unsigned long val, struct net_device *dev,
- struct dsa_notifier_info *info)
-{
- info->dev = dev;
- return atomic_notifier_call_chain(&dsa_notif_chain, val, info);
-}
-EXPORT_SYMBOL_GPL(call_dsa_notifiers);
-
int dsa_devlink_param_get(struct devlink *dl, u32 id,
struct devlink_param_gset_ctx *ctx)
{
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 183003e45762..4d4956ed303b 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -21,6 +21,108 @@
static DEFINE_MUTEX(dsa2_mutex);
LIST_HEAD(dsa_tree_list);
+/**
+ * dsa_tree_notify - Execute code for all switches in a DSA switch tree.
+ * @dst: collection of struct dsa_switch devices to notify.
+ * @e: event, must be of type DSA_NOTIFIER_*
+ * @v: event-specific value.
+ *
+ * Given a struct dsa_switch_tree, this can be used to run a function once for
+ * each member DSA switch. The other alternative of traversing the tree is only
+ * through its ports list, which does not uniquely list the switches.
+ */
+int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v)
+{
+ struct raw_notifier_head *nh = &dst->nh;
+ int err;
+
+ err = raw_notifier_call_chain(nh, e, v);
+
+ return notifier_to_errno(err);
+}
+
+/**
+ * dsa_broadcast - Notify all DSA trees in the system.
+ * @e: event, must be of type DSA_NOTIFIER_*
+ * @v: event-specific value.
+ *
+ * Can be used to notify the switching fabric of events such as cross-chip
+ * bridging between disjoint trees (such as islands of tagger-compatible
+ * switches bridged by an incompatible middle switch).
+ */
+int dsa_broadcast(unsigned long e, void *v)
+{
+ struct dsa_switch_tree *dst;
+ int err = 0;
+
+ list_for_each_entry(dst, &dsa_tree_list, list) {
+ err = dsa_tree_notify(dst, e, v);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+/**
+ * dsa_lag_map() - Map LAG netdev to a linear LAG ID
+ * @dst: Tree in which to record the mapping.
+ * @lag: Netdev that is to be mapped to an ID.
+ *
+ * dsa_lag_id/dsa_lag_dev can then be used to translate between the
+ * two spaces. The size of the mapping space is determined by the
+ * driver by setting ds->num_lag_ids. It is perfectly legal to leave
+ * it unset if it is not needed, in which case these functions become
+ * no-ops.
+ */
+void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag)
+{
+ unsigned int id;
+
+ if (dsa_lag_id(dst, lag) >= 0)
+ /* Already mapped */
+ return;
+
+ for (id = 0; id < dst->lags_len; id++) {
+ if (!dsa_lag_dev(dst, id)) {
+ dst->lags[id] = lag;
+ return;
+ }
+ }
+
+ /* No IDs left, which is OK. Some drivers do not need it. The
+ * ones that do, e.g. mv88e6xxx, will discover that dsa_lag_id
+ * returns an error for this device when joining the LAG. The
+ * driver can then return -EOPNOTSUPP back to DSA, which will
+ * fall back to a software LAG.
+ */
+}
+
+/**
+ * dsa_lag_unmap() - Remove a LAG ID mapping
+ * @dst: Tree in which the mapping is recorded.
+ * @lag: Netdev that was mapped.
+ *
+ * As there may be multiple users of the mapping, it is only removed
+ * if there are no other references to it.
+ */
+void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag)
+{
+ struct dsa_port *dp;
+ unsigned int id;
+
+ dsa_lag_foreach_port(dp, dst, lag)
+ /* There are remaining users of this mapping */
+ return;
+
+ dsa_lags_foreach_id(id, dst) {
+ if (dsa_lag_dev(dst, id) == lag) {
+ dst->lags[id] = NULL;
+ break;
+ }
+ }
+}
+
struct dsa_switch *dsa_switch_find(int tree_index, int sw_index)
{
struct dsa_switch_tree *dst;
@@ -77,6 +179,8 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index)
static void dsa_tree_free(struct dsa_switch_tree *dst)
{
+ if (dst->tag_ops)
+ dsa_tag_driver_put(dst->tag_ops);
list_del(&dst->list);
kfree(dst);
}
@@ -353,15 +457,18 @@ static int dsa_port_devlink_setup(struct dsa_port *dp)
static void dsa_port_teardown(struct dsa_port *dp)
{
+ struct devlink_port *dlp = &dp->devlink_port;
+
if (!dp->setup)
return;
+ devlink_port_type_clear(dlp);
+
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
break;
case DSA_PORT_TYPE_CPU:
dsa_port_disable(dp);
- dsa_tag_driver_put(dp->tag_ops);
dsa_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_DSA:
@@ -400,8 +507,165 @@ static int dsa_devlink_info_get(struct devlink *dl,
return -EOPNOTSUPP;
}
+static int dsa_devlink_sb_pool_get(struct devlink *dl,
+ unsigned int sb_index, u16 pool_index,
+ struct devlink_sb_pool_info *pool_info)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (!ds->ops->devlink_sb_pool_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_pool_get(ds, sb_index, pool_index,
+ pool_info);
+}
+
+static int dsa_devlink_sb_pool_set(struct devlink *dl, unsigned int sb_index,
+ u16 pool_index, u32 size,
+ enum devlink_sb_threshold_type threshold_type,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (!ds->ops->devlink_sb_pool_set)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_pool_set(ds, sb_index, pool_index, size,
+ threshold_type, extack);
+}
+
+static int dsa_devlink_sb_port_pool_get(struct devlink_port *dlp,
+ unsigned int sb_index, u16 pool_index,
+ u32 *p_threshold)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_port_pool_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_port_pool_get(ds, port, sb_index,
+ pool_index, p_threshold);
+}
+
+static int dsa_devlink_sb_port_pool_set(struct devlink_port *dlp,
+ unsigned int sb_index, u16 pool_index,
+ u32 threshold,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_port_pool_set)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_port_pool_set(ds, port, sb_index,
+ pool_index, threshold, extack);
+}
+
+static int
+dsa_devlink_sb_tc_pool_bind_get(struct devlink_port *dlp,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 *p_pool_index, u32 *p_threshold)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_tc_pool_bind_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_tc_pool_bind_get(ds, port, sb_index,
+ tc_index, pool_type,
+ p_pool_index, p_threshold);
+}
+
+static int
+dsa_devlink_sb_tc_pool_bind_set(struct devlink_port *dlp,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u16 pool_index, u32 threshold,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_tc_pool_bind_set)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_tc_pool_bind_set(ds, port, sb_index,
+ tc_index, pool_type,
+ pool_index, threshold,
+ extack);
+}
+
+static int dsa_devlink_sb_occ_snapshot(struct devlink *dl,
+ unsigned int sb_index)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (!ds->ops->devlink_sb_occ_snapshot)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_occ_snapshot(ds, sb_index);
+}
+
+static int dsa_devlink_sb_occ_max_clear(struct devlink *dl,
+ unsigned int sb_index)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (!ds->ops->devlink_sb_occ_max_clear)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_occ_max_clear(ds, sb_index);
+}
+
+static int dsa_devlink_sb_occ_port_pool_get(struct devlink_port *dlp,
+ unsigned int sb_index,
+ u16 pool_index, u32 *p_cur,
+ u32 *p_max)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_occ_port_pool_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_occ_port_pool_get(ds, port, sb_index,
+ pool_index, p_cur, p_max);
+}
+
+static int
+dsa_devlink_sb_occ_tc_port_bind_get(struct devlink_port *dlp,
+ unsigned int sb_index, u16 tc_index,
+ enum devlink_sb_pool_type pool_type,
+ u32 *p_cur, u32 *p_max)
+{
+ struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp);
+ int port = dsa_devlink_port_to_port(dlp);
+
+ if (!ds->ops->devlink_sb_occ_tc_port_bind_get)
+ return -EOPNOTSUPP;
+
+ return ds->ops->devlink_sb_occ_tc_port_bind_get(ds, port,
+ sb_index, tc_index,
+ pool_type, p_cur,
+ p_max);
+}
+
static const struct devlink_ops dsa_devlink_ops = {
- .info_get = dsa_devlink_info_get,
+ .info_get = dsa_devlink_info_get,
+ .sb_pool_get = dsa_devlink_sb_pool_get,
+ .sb_pool_set = dsa_devlink_sb_pool_set,
+ .sb_port_pool_get = dsa_devlink_sb_port_pool_get,
+ .sb_port_pool_set = dsa_devlink_sb_port_pool_set,
+ .sb_tc_pool_bind_get = dsa_devlink_sb_tc_pool_bind_get,
+ .sb_tc_pool_bind_set = dsa_devlink_sb_tc_pool_bind_set,
+ .sb_occ_snapshot = dsa_devlink_sb_occ_snapshot,
+ .sb_occ_max_clear = dsa_devlink_sb_occ_max_clear,
+ .sb_occ_port_pool_get = dsa_devlink_sb_occ_port_pool_get,
+ .sb_occ_tc_port_bind_get = dsa_devlink_sb_occ_tc_port_bind_get,
};
static int dsa_switch_setup(struct dsa_switch *ds)
@@ -448,6 +712,8 @@ static int dsa_switch_setup(struct dsa_switch *ds)
if (err)
goto unregister_devlink_ports;
+ ds->configure_vlan_while_not_filtering = true;
+
err = ds->ops->setup(ds);
if (err < 0)
goto unregister_notifier;
@@ -458,20 +724,23 @@ static int dsa_switch_setup(struct dsa_switch *ds)
ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
if (!ds->slave_mii_bus) {
err = -ENOMEM;
- goto unregister_notifier;
+ goto teardown;
}
dsa_slave_mii_bus_init(ds);
err = mdiobus_register(ds->slave_mii_bus);
if (err < 0)
- goto unregister_notifier;
+ goto teardown;
}
ds->setup = true;
return 0;
+teardown:
+ if (ds->ops->teardown)
+ ds->ops->teardown(ds);
unregister_notifier:
dsa_switch_unregister_notifier(ds);
unregister_devlink_ports:
@@ -578,6 +847,32 @@ static void dsa_tree_teardown_master(struct dsa_switch_tree *dst)
dsa_master_teardown(dp->master);
}
+static int dsa_tree_setup_lags(struct dsa_switch_tree *dst)
+{
+ unsigned int len = 0;
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->ds->num_lag_ids > len)
+ len = dp->ds->num_lag_ids;
+ }
+
+ if (!len)
+ return 0;
+
+ dst->lags = kcalloc(len, sizeof(*dst->lags), GFP_KERNEL);
+ if (!dst->lags)
+ return -ENOMEM;
+
+ dst->lags_len = len;
+ return 0;
+}
+
+static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst)
+{
+ kfree(dst->lags);
+}
+
static int dsa_tree_setup(struct dsa_switch_tree *dst)
{
bool complete;
@@ -605,12 +900,18 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
if (err)
goto teardown_switches;
+ err = dsa_tree_setup_lags(dst);
+ if (err)
+ goto teardown_master;
+
dst->setup = true;
pr_info("DSA: tree %d setup\n", dst->index);
return 0;
+teardown_master:
+ dsa_tree_teardown_master(dst);
teardown_switches:
dsa_tree_teardown_switches(dst);
teardown_default_cpu:
@@ -626,6 +927,8 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
if (!dst->setup)
return;
+ dsa_tree_teardown_lags(dst);
+
dsa_tree_teardown_master(dst);
dsa_tree_teardown_switches(dst);
@@ -642,6 +945,57 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
dst->setup = false;
}
+/* Since the dsa/tagging sysfs device attribute is per master, the assumption
+ * is that all DSA switches within a tree share the same tagger, otherwise
+ * they would have formed disjoint trees (different "dsa,member" values).
+ */
+int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ const struct dsa_device_ops *tag_ops,
+ const struct dsa_device_ops *old_tag_ops)
+{
+ struct dsa_notifier_tag_proto_info info;
+ struct dsa_port *dp;
+ int err = -EBUSY;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ /* At the moment we don't allow changing the tag protocol under
+ * traffic. The rtnl_mutex also happens to serialize concurrent
+ * attempts to change the tagging protocol. If we ever lift the IFF_UP
+ * restriction, there needs to be another mutex which serializes this.
+ */
+ if (master->flags & IFF_UP)
+ goto out_unlock;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (!dsa_is_user_port(dp->ds, dp->index))
+ continue;
+
+ if (dp->slave->flags & IFF_UP)
+ goto out_unlock;
+ }
+
+ info.tag_ops = tag_ops;
+ err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
+ if (err)
+ goto out_unwind_tagger;
+
+ dst->tag_ops = tag_ops;
+
+ rtnl_unlock();
+
+ return 0;
+
+out_unwind_tagger:
+ info.tag_ops = old_tag_ops;
+ dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info);
+out_unlock:
+ rtnl_unlock();
+ return err;
+}
+
static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
{
struct dsa_switch_tree *dst = ds->dst;
@@ -712,24 +1066,33 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
{
struct dsa_switch *ds = dp->ds;
struct dsa_switch_tree *dst = ds->dst;
- const struct dsa_device_ops *tag_ops;
enum dsa_tag_protocol tag_protocol;
tag_protocol = dsa_get_tag_protocol(dp, master);
- tag_ops = dsa_tag_driver_get(tag_protocol);
- if (IS_ERR(tag_ops)) {
- if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
- return -EPROBE_DEFER;
- dev_warn(ds->dev, "No tagger for this switch\n");
- dp->master = NULL;
- return PTR_ERR(tag_ops);
+ if (dst->tag_ops) {
+ if (dst->tag_ops->proto != tag_protocol) {
+ dev_err(ds->dev,
+ "A DSA switch tree can have only one tagging protocol\n");
+ return -EINVAL;
+ }
+ /* In the case of multiple CPU ports per switch, the tagging
+ * protocol is still reference-counted only per switch tree, so
+ * nothing to do here.
+ */
+ } else {
+ dst->tag_ops = dsa_tag_driver_get(tag_protocol);
+ if (IS_ERR(dst->tag_ops)) {
+ if (PTR_ERR(dst->tag_ops) == -ENOPROTOOPT)
+ return -EPROBE_DEFER;
+ dev_warn(ds->dev, "No tagger for this switch\n");
+ dp->master = NULL;
+ return PTR_ERR(dst->tag_ops);
+ }
}
dp->master = master;
dp->type = DSA_PORT_TYPE_CPU;
- dp->filter = tag_ops->filter;
- dp->rcv = tag_ops->rcv;
- dp->tag_ops = tag_ops;
+ dsa_port_set_tag_protocol(dp, dst->tag_ops);
dp->dst = dst;
return 0;
@@ -783,6 +1146,8 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
goto out_put_node;
if (reg >= ds->num_ports) {
+ dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%zu)\n",
+ port, reg, ds->num_ports);
err = -EINVAL;
goto out_put_node;
}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 7c96aae9062c..2eeaa42f2e08 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -20,16 +20,25 @@ enum {
DSA_NOTIFIER_BRIDGE_LEAVE,
DSA_NOTIFIER_FDB_ADD,
DSA_NOTIFIER_FDB_DEL,
+ DSA_NOTIFIER_HSR_JOIN,
+ DSA_NOTIFIER_HSR_LEAVE,
+ DSA_NOTIFIER_LAG_CHANGE,
+ DSA_NOTIFIER_LAG_JOIN,
+ DSA_NOTIFIER_LAG_LEAVE,
DSA_NOTIFIER_MDB_ADD,
DSA_NOTIFIER_MDB_DEL,
DSA_NOTIFIER_VLAN_ADD,
DSA_NOTIFIER_VLAN_DEL,
DSA_NOTIFIER_MTU,
+ DSA_NOTIFIER_TAG_PROTO,
+ DSA_NOTIFIER_MRP_ADD,
+ DSA_NOTIFIER_MRP_DEL,
+ DSA_NOTIFIER_MRP_ADD_RING_ROLE,
+ DSA_NOTIFIER_MRP_DEL_RING_ROLE,
};
/* DSA_NOTIFIER_AGEING_TIME */
struct dsa_notifier_ageing_time_info {
- struct switchdev_trans *trans;
unsigned int ageing_time;
};
@@ -52,17 +61,25 @@ struct dsa_notifier_fdb_info {
/* DSA_NOTIFIER_MDB_* */
struct dsa_notifier_mdb_info {
const struct switchdev_obj_port_mdb *mdb;
- struct switchdev_trans *trans;
int sw_index;
int port;
};
+/* DSA_NOTIFIER_LAG_* */
+struct dsa_notifier_lag_info {
+ struct net_device *lag;
+ int sw_index;
+ int port;
+
+ struct netdev_lag_upper_info *info;
+};
+
/* DSA_NOTIFIER_VLAN_* */
struct dsa_notifier_vlan_info {
const struct switchdev_obj_port_vlan *vlan;
- struct switchdev_trans *trans;
int sw_index;
int port;
+ struct netlink_ext_ack *extack;
};
/* DSA_NOTIFIER_MTU */
@@ -73,6 +90,44 @@ struct dsa_notifier_mtu_info {
int mtu;
};
+/* DSA_NOTIFIER_TAG_PROTO_* */
+struct dsa_notifier_tag_proto_info {
+ const struct dsa_device_ops *tag_ops;
+};
+
+/* DSA_NOTIFIER_MRP_* */
+struct dsa_notifier_mrp_info {
+ const struct switchdev_obj_mrp *mrp;
+ int sw_index;
+ int port;
+};
+
+/* DSA_NOTIFIER_MRP_* */
+struct dsa_notifier_mrp_ring_role_info {
+ const struct switchdev_obj_ring_role_mrp *mrp;
+ int sw_index;
+ int port;
+};
+
+struct dsa_switchdev_event_work {
+ struct dsa_switch *ds;
+ int port;
+ struct work_struct work;
+ unsigned long event;
+ /* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and
+ * SWITCHDEV_FDB_DEL_TO_DEVICE
+ */
+ unsigned char addr[ETH_ALEN];
+ u16 vid;
+};
+
+/* DSA_NOTIFIER_HSR_* */
+struct dsa_notifier_hsr_info {
+ struct net_device *hsr;
+ int sw_index;
+ int port;
+};
+
struct dsa_slave_priv {
/* Copy of CPU port xmit for faster access in slave transmit hot path */
struct sk_buff * (*xmit)(struct sk_buff *skb,
@@ -94,19 +149,11 @@ struct dsa_slave_priv {
/* dsa.c */
const struct dsa_device_ops *dsa_tag_driver_get(int tag_protocol);
void dsa_tag_driver_put(const struct dsa_device_ops *ops);
+const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf);
bool dsa_schedule_work(struct work_struct *work);
const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
-int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev,
- const unsigned char *addr, u16 vid,
- u16 flags,
- struct netlink_ext_ack *extack);
-int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev,
- const unsigned char *addr, u16 vid);
-
/* master.c */
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp);
void dsa_master_teardown(struct net_device *dev);
@@ -127,19 +174,24 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
}
/* port.c */
-int dsa_port_set_state(struct dsa_port *dp, u8 state,
- struct switchdev_trans *trans);
+void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
+ const struct dsa_device_ops *tag_ops);
+int dsa_port_set_state(struct dsa_port *dp, u8 state);
int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
void dsa_port_disable_rt(struct dsa_port *dp);
void dsa_port_disable(struct dsa_port *dp);
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
+int dsa_port_lag_change(struct dsa_port *dp,
+ struct netdev_lag_lower_state_info *linfo);
+int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev,
+ struct netdev_lag_upper_info *uinfo);
+void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
- struct switchdev_trans *trans);
+ struct netlink_ext_ack *extack);
bool dsa_port_skip_vlan_configuration(struct dsa_port *dp);
-int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
- struct switchdev_trans *trans);
+int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock);
int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
bool propagate_upstream);
int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
@@ -148,35 +200,84 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
int dsa_port_mdb_add(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans);
+ const struct switchdev_obj_port_mdb *mdb);
int dsa_port_mdb_del(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags,
- struct switchdev_trans *trans);
-int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags,
- struct switchdev_trans *trans);
+int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
+ struct switchdev_brport_flags flags,
+ struct netlink_ext_ack *extack);
+int dsa_port_bridge_flags(const struct dsa_port *dp,
+ struct switchdev_brport_flags flags,
+ struct netlink_ext_ack *extack);
int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
- struct switchdev_trans *trans);
+ struct netlink_ext_ack *extack);
int dsa_port_vlan_add(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans);
+ struct netlink_ext_ack *extack);
int dsa_port_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan);
+int dsa_port_mrp_add(const struct dsa_port *dp,
+ const struct switchdev_obj_mrp *mrp);
+int dsa_port_mrp_del(const struct dsa_port *dp,
+ const struct switchdev_obj_mrp *mrp);
+int dsa_port_mrp_add_ring_role(const struct dsa_port *dp,
+ const struct switchdev_obj_ring_role_mrp *mrp);
+int dsa_port_mrp_del_ring_role(const struct dsa_port *dp,
+ const struct switchdev_obj_ring_role_mrp *mrp);
int dsa_port_link_register_of(struct dsa_port *dp);
void dsa_port_link_unregister_of(struct dsa_port *dp);
+int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr);
+void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr);
extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
+static inline bool dsa_port_offloads_netdev(struct dsa_port *dp,
+ struct net_device *dev)
+{
+ /* Switchdev offloading can be configured on: */
+
+ if (dev == dp->slave)
+ /* DSA ports directly connected to a bridge, and event
+ * was emitted for the ports themselves.
+ */
+ return true;
+
+ if (dp->bridge_dev == dev)
+ /* DSA ports connected to a bridge, and event was emitted
+ * for the bridge.
+ */
+ return true;
+
+ if (dp->lag_dev == dev)
+ /* DSA ports connected to a bridge via a LAG */
+ return true;
+
+ return false;
+}
+
+/* Returns true if any port of this tree offloads the given net_device */
+static inline bool dsa_tree_offloads_netdev(struct dsa_switch_tree *dst,
+ struct net_device *dev)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_offloads_netdev(dp, dev))
+ return true;
+
+ return false;
+}
+
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
int dsa_slave_create(struct dsa_port *dp);
void dsa_slave_destroy(struct net_device *slave_dev);
-bool dsa_slave_dev_check(const struct net_device *dev);
int dsa_slave_suspend(struct net_device *slave_dev);
int dsa_slave_resume(struct net_device *slave_dev);
int dsa_slave_register_notifier(void);
void dsa_slave_unregister_notifier(void);
+void dsa_slave_setup_tagger(struct net_device *slave);
+int dsa_slave_change_mtu(struct net_device *dev, int new_mtu);
static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
{
@@ -257,6 +358,15 @@ int dsa_switch_register_notifier(struct dsa_switch *ds);
void dsa_switch_unregister_notifier(struct dsa_switch *ds);
/* dsa2.c */
+void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag);
+void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag);
+int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v);
+int dsa_broadcast(unsigned long e, void *v);
+int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
+ struct net_device *master,
+ const struct dsa_device_ops *tag_ops,
+ const struct dsa_device_ops *old_tag_ops);
+
extern struct list_head dsa_tree_list;
#endif
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 5a0f6fec4271..052a977914a6 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -280,7 +280,44 @@ static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
return sprintf(buf, "%s\n",
dsa_tag_protocol_to_str(cpu_dp->tag_ops));
}
-static DEVICE_ATTR_RO(tagging);
+
+static ssize_t tagging_store(struct device *d, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ const struct dsa_device_ops *new_tag_ops, *old_tag_ops;
+ struct net_device *dev = to_net_dev(d);
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ int err;
+
+ old_tag_ops = cpu_dp->tag_ops;
+ new_tag_ops = dsa_find_tagger_by_name(buf);
+ /* Bad tagger name, or module is not loaded? */
+ if (IS_ERR(new_tag_ops))
+ return PTR_ERR(new_tag_ops);
+
+ if (new_tag_ops == old_tag_ops)
+ /* Drop the temporarily held duplicate reference, since
+ * the DSA switch tree uses this tagger.
+ */
+ goto out;
+
+ err = dsa_tree_change_tag_proto(cpu_dp->ds->dst, dev, new_tag_ops,
+ old_tag_ops);
+ if (err) {
+ /* On failure the old tagger is restored, so we don't need the
+ * driver for the new one.
+ */
+ dsa_tag_driver_put(new_tag_ops);
+ return err;
+ }
+
+ /* On success we no longer need the module for the old tagging protocol
+ */
+out:
+ dsa_tag_driver_put(old_tag_ops);
+ return count;
+}
+static DEVICE_ATTR_RW(tagging);
static struct attribute *dsa_slave_attrs[] = {
&dev_attr_tagging.attr,
@@ -309,8 +346,18 @@ static struct lock_class_key dsa_master_addr_list_lock_key;
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
{
int mtu = ETH_DATA_LEN + cpu_dp->tag_ops->overhead;
+ struct dsa_switch *ds = cpu_dp->ds;
+ struct device_link *consumer_link;
int ret;
+ /* The DSA master must use SET_NETDEV_DEV for this to work. */
+ consumer_link = device_link_add(ds->dev, dev->dev.parent,
+ DL_FLAG_AUTOREMOVE_CONSUMER);
+ if (!consumer_link)
+ netdev_err(dev,
+ "Failed to create a device link to DSA switch %s\n",
+ dev_name(ds->dev));
+
rtnl_lock();
ret = dev_set_mtu(dev, mtu);
rtnl_unlock();
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 73569c9af3cc..c9c6d7ab3f47 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -13,44 +13,32 @@
#include "dsa_priv.h"
-static int dsa_broadcast(unsigned long e, void *v)
-{
- struct dsa_switch_tree *dst;
- int err = 0;
-
- list_for_each_entry(dst, &dsa_tree_list, list) {
- struct raw_notifier_head *nh = &dst->nh;
-
- err = raw_notifier_call_chain(nh, e, v);
- err = notifier_to_errno(err);
- if (err)
- break;
- }
-
- return err;
-}
-
+/**
+ * dsa_port_notify - Notify the switching fabric of changes to a port
+ * @dp: port on which change occurred
+ * @e: event, must be of type DSA_NOTIFIER_*
+ * @v: event-specific value.
+ *
+ * Notify all switches in the DSA tree that this port's switch belongs to,
+ * including this switch itself, of an event. Allows the other switches to
+ * reconfigure themselves for cross-chip operations. Can also be used to
+ * reconfigure ports without net_devices (CPU ports, DSA links) whenever
+ * a user port's state changes.
+ */
static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v)
{
- struct raw_notifier_head *nh = &dp->ds->dst->nh;
- int err;
-
- err = raw_notifier_call_chain(nh, e, v);
-
- return notifier_to_errno(err);
+ return dsa_tree_notify(dp->ds->dst, e, v);
}
-int dsa_port_set_state(struct dsa_port *dp, u8 state,
- struct switchdev_trans *trans)
+int dsa_port_set_state(struct dsa_port *dp, u8 state)
{
struct dsa_switch *ds = dp->ds;
int port = dp->index;
- if (switchdev_trans_ph_prepare(trans))
- return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
+ if (!ds->ops->port_stp_state_set)
+ return -EOPNOTSUPP;
- if (ds->ops->port_stp_state_set)
- ds->ops->port_stp_state_set(ds, port, state);
+ ds->ops->port_stp_state_set(ds, port, state);
if (ds->ops->port_fast_age) {
/* Fast age FDB entries or flush appropriate forwarding database
@@ -75,7 +63,7 @@ static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
{
int err;
- err = dsa_port_set_state(dp, state, NULL);
+ err = dsa_port_set_state(dp, state);
if (err)
pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
}
@@ -134,6 +122,28 @@ void dsa_port_disable(struct dsa_port *dp)
rtnl_unlock();
}
+static void dsa_port_change_brport_flags(struct dsa_port *dp,
+ bool bridge_offload)
+{
+ struct switchdev_brport_flags flags;
+ int flag;
+
+ flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
+ if (bridge_offload)
+ flags.val = flags.mask;
+ else
+ flags.val = flags.mask & ~BR_LEARNING;
+
+ for_each_set_bit(flag, &flags.mask, 32) {
+ struct switchdev_brport_flags tmp;
+
+ tmp.val = flags.val & BIT(flag);
+ tmp.mask = BIT(flag);
+
+ dsa_port_bridge_flags(dp, tmp, NULL);
+ }
+}
+
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
{
struct dsa_notifier_bridge_info info = {
@@ -144,10 +154,10 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
};
int err;
- /* Set the flooding mode before joining the port in the switch */
- err = dsa_port_bridge_flags(dp, BR_FLOOD | BR_MCAST_FLOOD, NULL);
- if (err)
- return err;
+ /* Notify the port driver to set its configurable flags in a way that
+ * matches the initial settings of a bridge port.
+ */
+ dsa_port_change_brport_flags(dp, true);
/* Here the interface is already bridged. Reflect the current
* configuration so that drivers can program their chips accordingly.
@@ -158,7 +168,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
/* The bridging is rolled back on error */
if (err) {
- dsa_port_bridge_flags(dp, 0, NULL);
+ dsa_port_change_brport_flags(dp, false);
dp->bridge_dev = NULL;
}
@@ -184,8 +194,18 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
if (err)
pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
- /* Port is leaving the bridge, disable flooding */
- dsa_port_bridge_flags(dp, 0, NULL);
+ /* Configure the port for standalone mode (no address learning,
+ * flood everything).
+ * The bridge only emits SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS events
+ * when the user requests it through netlink or sysfs, but not
+ * automatically at port join or leave, so we need to handle resetting
+ * the brport flags ourselves. But we even prefer it that way, because
+ * otherwise, some setups might never get the notification they need,
+ * for example, when a port leaves a LAG that offloads the bridge,
+ * it becomes standalone, but as far as the bridge is concerned, no
+ * port ever left.
+ */
+ dsa_port_change_brport_flags(dp, false);
/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
* so allow it to be in BR_STATE_FORWARDING to be kept functional
@@ -193,9 +213,89 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
}
+int dsa_port_lag_change(struct dsa_port *dp,
+ struct netdev_lag_lower_state_info *linfo)
+{
+ struct dsa_notifier_lag_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ };
+ bool tx_enabled;
+
+ if (!dp->lag_dev)
+ return 0;
+
+ /* On statically configured aggregates (e.g. loadbalance
+ * without LACP) ports will always be tx_enabled, even if the
+ * link is down. Thus we require both link_up and tx_enabled
+ * in order to include it in the tx set.
+ */
+ tx_enabled = linfo->link_up && linfo->tx_enabled;
+
+ if (tx_enabled == dp->lag_tx_enabled)
+ return 0;
+
+ dp->lag_tx_enabled = tx_enabled;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_LAG_CHANGE, &info);
+}
+
+int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag,
+ struct netdev_lag_upper_info *uinfo)
+{
+ struct dsa_notifier_lag_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .lag = lag,
+ .info = uinfo,
+ };
+ int err;
+
+ dsa_lag_map(dp->ds->dst, lag);
+ dp->lag_dev = lag;
+
+ err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_JOIN, &info);
+ if (err) {
+ dp->lag_dev = NULL;
+ dsa_lag_unmap(dp->ds->dst, lag);
+ }
+
+ return err;
+}
+
+void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
+{
+ struct dsa_notifier_lag_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .lag = lag,
+ };
+ int err;
+
+ if (!dp->lag_dev)
+ return;
+
+ /* Port might have been part of a LAG that in turn was
+ * attached to a bridge.
+ */
+ if (dp->bridge_dev)
+ dsa_port_bridge_leave(dp, dp->bridge_dev);
+
+ dp->lag_tx_enabled = false;
+ dp->lag_dev = NULL;
+
+ err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info);
+ if (err)
+ pr_err("DSA: failed to notify DSA_NOTIFIER_LAG_LEAVE: %d\n",
+ err);
+
+ dsa_lag_unmap(dp->ds->dst, lag);
+}
+
/* Must be called under rcu_read_lock() */
static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp,
- bool vlan_filtering)
+ bool vlan_filtering,
+ struct netlink_ext_ack *extack)
{
struct dsa_switch *ds = dp->ds;
int err, i;
@@ -225,8 +325,8 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp,
*/
err = br_vlan_get_info(br, vid, &br_info);
if (err == 0) {
- dev_err(ds->dev, "Must remove upper %s first\n",
- upper_dev->name);
+ NL_SET_ERR_MSG_MOD(extack,
+ "Must first remove VLAN uppers having VIDs also present in bridge");
return false;
}
}
@@ -252,7 +352,8 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp,
if (other_bridge == dp->bridge_dev)
continue;
if (br_vlan_enabled(other_bridge) != vlan_filtering) {
- dev_err(ds->dev, "VLAN filtering is a global setting\n");
+ NL_SET_ERR_MSG_MOD(extack,
+ "VLAN filtering is a global setting");
return false;
}
}
@@ -260,42 +361,37 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp,
}
int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
- struct switchdev_trans *trans)
+ struct netlink_ext_ack *extack)
{
struct dsa_switch *ds = dp->ds;
+ bool apply;
int err;
- if (switchdev_trans_ph_prepare(trans)) {
- bool apply;
-
- if (!ds->ops->port_vlan_filtering)
- return -EOPNOTSUPP;
+ if (!ds->ops->port_vlan_filtering)
+ return -EOPNOTSUPP;
- /* We are called from dsa_slave_switchdev_blocking_event(),
- * which is not under rcu_read_lock(), unlike
- * dsa_slave_switchdev_event().
- */
- rcu_read_lock();
- apply = dsa_port_can_apply_vlan_filtering(dp, vlan_filtering);
- rcu_read_unlock();
- if (!apply)
- return -EINVAL;
- }
+ /* We are called from dsa_slave_switchdev_blocking_event(),
+ * which is not under rcu_read_lock(), unlike
+ * dsa_slave_switchdev_event().
+ */
+ rcu_read_lock();
+ apply = dsa_port_can_apply_vlan_filtering(dp, vlan_filtering, extack);
+ rcu_read_unlock();
+ if (!apply)
+ return -EINVAL;
if (dsa_port_is_vlan_filtering(dp) == vlan_filtering)
return 0;
err = ds->ops->port_vlan_filtering(ds, dp->index, vlan_filtering,
- trans);
+ extack);
if (err)
return err;
- if (switchdev_trans_ph_commit(trans)) {
- if (ds->vlan_filtering_is_global)
- ds->vlan_filtering = vlan_filtering;
- else
- dp->vlan_filtering = vlan_filtering;
- }
+ if (ds->vlan_filtering_is_global)
+ ds->vlan_filtering = vlan_filtering;
+ else
+ dp->vlan_filtering = vlan_filtering;
return 0;
}
@@ -314,63 +410,57 @@ bool dsa_port_skip_vlan_configuration(struct dsa_port *dp)
!br_vlan_enabled(dp->bridge_dev));
}
-int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
- struct switchdev_trans *trans)
+int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock)
{
unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock);
unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
- struct dsa_notifier_ageing_time_info info = {
- .ageing_time = ageing_time,
- .trans = trans,
- };
+ struct dsa_notifier_ageing_time_info info;
+ int err;
+
+ info.ageing_time = ageing_time;
- if (switchdev_trans_ph_prepare(trans))
- return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
+ err = dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
+ if (err)
+ return err;
dp->ageing_time = ageing_time;
- return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
+ return 0;
}
-int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags,
- struct switchdev_trans *trans)
+int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
+ struct switchdev_brport_flags flags,
+ struct netlink_ext_ack *extack)
{
struct dsa_switch *ds = dp->ds;
- if (!ds->ops->port_egress_floods ||
- (flags & ~(BR_FLOOD | BR_MCAST_FLOOD)))
+ if (!ds->ops->port_pre_bridge_flags)
return -EINVAL;
- return 0;
+ return ds->ops->port_pre_bridge_flags(ds, dp->index, flags, extack);
}
-int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags,
- struct switchdev_trans *trans)
+int dsa_port_bridge_flags(const struct dsa_port *dp,
+ struct switchdev_brport_flags flags,
+ struct netlink_ext_ack *extack)
{
struct dsa_switch *ds = dp->ds;
- int port = dp->index;
- int err = 0;
- if (switchdev_trans_ph_prepare(trans))
- return 0;
-
- if (ds->ops->port_egress_floods)
- err = ds->ops->port_egress_floods(ds, port, flags & BR_FLOOD,
- flags & BR_MCAST_FLOOD);
+ if (!ds->ops->port_bridge_flags)
+ return -EINVAL;
- return err;
+ return ds->ops->port_bridge_flags(ds, dp->index, flags, extack);
}
int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
- struct switchdev_trans *trans)
+ struct netlink_ext_ack *extack)
{
struct dsa_switch *ds = dp->ds;
- int port = dp->index;
- if (switchdev_trans_ph_prepare(trans))
- return ds->ops->port_egress_floods ? 0 : -EOPNOTSUPP;
+ if (!ds->ops->port_set_mrouter)
+ return -EOPNOTSUPP;
- return ds->ops->port_egress_floods(ds, port, true, mrouter);
+ return ds->ops->port_set_mrouter(ds, dp->index, mrouter, extack);
}
int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
@@ -425,13 +515,11 @@ int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data)
}
int dsa_port_mdb_add(const struct dsa_port *dp,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans)
+ const struct switchdev_obj_port_mdb *mdb)
{
struct dsa_notifier_mdb_info info = {
.sw_index = dp->ds->index,
.port = dp->index,
- .trans = trans,
.mdb = mdb,
};
@@ -452,13 +540,13 @@ int dsa_port_mdb_del(const struct dsa_port *dp,
int dsa_port_vlan_add(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans)
+ struct netlink_ext_ack *extack)
{
struct dsa_notifier_vlan_info info = {
.sw_index = dp->ds->index,
.port = dp->index,
- .trans = trans,
.vlan = vlan,
+ .extack = extack,
};
return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
@@ -476,6 +564,62 @@ int dsa_port_vlan_del(struct dsa_port *dp,
return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
}
+int dsa_port_mrp_add(const struct dsa_port *dp,
+ const struct switchdev_obj_mrp *mrp)
+{
+ struct dsa_notifier_mrp_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .mrp = mrp,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_MRP_ADD, &info);
+}
+
+int dsa_port_mrp_del(const struct dsa_port *dp,
+ const struct switchdev_obj_mrp *mrp)
+{
+ struct dsa_notifier_mrp_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .mrp = mrp,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_MRP_DEL, &info);
+}
+
+int dsa_port_mrp_add_ring_role(const struct dsa_port *dp,
+ const struct switchdev_obj_ring_role_mrp *mrp)
+{
+ struct dsa_notifier_mrp_ring_role_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .mrp = mrp,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_MRP_ADD_RING_ROLE, &info);
+}
+
+int dsa_port_mrp_del_ring_role(const struct dsa_port *dp,
+ const struct switchdev_obj_ring_role_mrp *mrp)
+{
+ struct dsa_notifier_mrp_ring_role_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .mrp = mrp,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_MRP_DEL_RING_ROLE, &info);
+}
+
+void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
+ const struct dsa_device_ops *tag_ops)
+{
+ cpu_dp->filter = tag_ops->filter;
+ cpu_dp->rcv = tag_ops->rcv;
+ cpu_dp->tag_ops = tag_ops;
+}
+
static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
{
struct device_node *phy_dn;
@@ -810,3 +954,37 @@ int dsa_port_get_phy_sset_count(struct dsa_port *dp)
return ret;
}
EXPORT_SYMBOL_GPL(dsa_port_get_phy_sset_count);
+
+int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr)
+{
+ struct dsa_notifier_hsr_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .hsr = hsr,
+ };
+ int err;
+
+ dp->hsr_dev = hsr;
+
+ err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_JOIN, &info);
+ if (err)
+ dp->hsr_dev = NULL;
+
+ return err;
+}
+
+void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr)
+{
+ struct dsa_notifier_hsr_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .hsr = hsr,
+ };
+ int err;
+
+ dp->hsr_dev = NULL;
+
+ err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_LEAVE, &info);
+ if (err)
+ pr_err("DSA: failed to notify DSA_NOTIFIER_HSR_LEAVE\n");
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 4a0498bf6c65..491e3761b5f4 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -17,6 +17,7 @@
#include <net/pkt_cls.h>
#include <net/tc_act/tc_mirred.h>
#include <linux/if_bridge.h>
+#include <linux/if_hsr.h>
#include <linux/netpoll.h>
#include <linux/ptp_classify.h>
@@ -68,8 +69,11 @@ static int dsa_slave_open(struct net_device *dev)
struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
- if (!(master->flags & IFF_UP))
- return -ENETDOWN;
+ err = dev_open(master, NULL);
+ if (err < 0) {
+ netdev_err(dev, "failed to open master %s\n", master->name);
+ goto out;
+ }
if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) {
err = dev_uc_add(master, dev->dev_addr);
@@ -269,31 +273,34 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
static int dsa_slave_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
- struct switchdev_trans *trans)
+ struct netlink_ext_ack *extack)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
int ret;
+ if (!dsa_port_offloads_netdev(dp, attr->orig_dev))
+ return -EOPNOTSUPP;
+
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
- ret = dsa_port_set_state(dp, attr->u.stp_state, trans);
+ ret = dsa_port_set_state(dp, attr->u.stp_state);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering,
- trans);
+ extack);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
- ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans);
+ ret = dsa_port_ageing_time(dp, attr->u.ageing_time);
break;
case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags,
- trans);
+ extack);
break;
case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
- ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, trans);
+ ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
- ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, trans);
+ ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack);
break;
default:
ret = -EOPNOTSUPP;
@@ -318,7 +325,7 @@ dsa_slave_vlan_check_for_8021q_uppers(struct net_device *slave,
continue;
vid = vlan_dev_vlan_id(upper_dev);
- if (vid >= vlan->vid_begin && vid <= vlan->vid_end)
+ if (vid == vlan->vid)
return -EBUSY;
}
@@ -327,33 +334,38 @@ dsa_slave_vlan_check_for_8021q_uppers(struct net_device *slave,
static int dsa_slave_vlan_add(struct net_device *dev,
const struct switchdev_obj *obj,
- struct switchdev_trans *trans)
+ struct netlink_ext_ack *extack)
{
struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
struct switchdev_obj_port_vlan vlan;
- int vid, err;
+ int err;
- if (obj->orig_dev != dev)
+ if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
return -EOPNOTSUPP;
- if (dsa_port_skip_vlan_configuration(dp))
+ if (dsa_port_skip_vlan_configuration(dp)) {
+ NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN");
return 0;
+ }
vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj);
/* Deny adding a bridge VLAN when there is already an 802.1Q upper with
* the same VID.
*/
- if (trans->ph_prepare && br_vlan_enabled(dp->bridge_dev)) {
+ if (br_vlan_enabled(dp->bridge_dev)) {
rcu_read_lock();
err = dsa_slave_vlan_check_for_8021q_uppers(dev, &vlan);
rcu_read_unlock();
- if (err)
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Port already has a VLAN upper with this VID");
return err;
+ }
}
- err = dsa_port_vlan_add(dp, &vlan, trans);
+ err = dsa_port_vlan_add(dp, &vlan, extack);
if (err)
return err;
@@ -363,47 +375,45 @@ static int dsa_slave_vlan_add(struct net_device *dev,
*/
vlan.flags &= ~BRIDGE_VLAN_INFO_PVID;
- err = dsa_port_vlan_add(dp->cpu_dp, &vlan, trans);
+ err = dsa_port_vlan_add(dp->cpu_dp, &vlan, extack);
if (err)
return err;
- for (vid = vlan.vid_begin; vid <= vlan.vid_end; vid++) {
- err = vlan_vid_add(master, htons(ETH_P_8021Q), vid);
- if (err)
- return err;
- }
-
- return 0;
+ return vlan_vid_add(master, htons(ETH_P_8021Q), vlan.vid);
}
static int dsa_slave_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
- struct switchdev_trans *trans,
struct netlink_ext_ack *extack)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
- /* For the prepare phase, ensure the full set of changes is feasable in
- * one go in order to signal a failure properly. If an operation is not
- * supported, return -EOPNOTSUPP.
- */
-
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_MDB:
- if (obj->orig_dev != dev)
+ if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
return -EOPNOTSUPP;
- err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
+ err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_HOST_MDB:
/* DSA can directly translate this to a normal MDB add,
* but on the CPU port.
*/
- err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj),
- trans);
+ err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
- err = dsa_slave_vlan_add(dev, obj, trans);
+ err = dsa_slave_vlan_add(dev, obj, extack);
+ break;
+ case SWITCHDEV_OBJ_ID_MRP:
+ if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+ return -EOPNOTSUPP;
+ err = dsa_port_mrp_add(dp, SWITCHDEV_OBJ_MRP(obj));
+ break;
+ case SWITCHDEV_OBJ_ID_RING_ROLE_MRP:
+ if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+ return -EOPNOTSUPP;
+ err = dsa_port_mrp_add_ring_role(dp,
+ SWITCHDEV_OBJ_RING_ROLE_MRP(obj));
break;
default:
err = -EOPNOTSUPP;
@@ -419,9 +429,9 @@ static int dsa_slave_vlan_del(struct net_device *dev,
struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
struct switchdev_obj_port_vlan *vlan;
- int vid, err;
+ int err;
- if (obj->orig_dev != dev)
+ if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
return -EOPNOTSUPP;
if (dsa_port_skip_vlan_configuration(dp))
@@ -436,8 +446,7 @@ static int dsa_slave_vlan_del(struct net_device *dev,
if (err)
return err;
- for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++)
- vlan_vid_del(master, htons(ETH_P_8021Q), vid);
+ vlan_vid_del(master, htons(ETH_P_8021Q), vlan->vid);
return 0;
}
@@ -450,7 +459,7 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_MDB:
- if (obj->orig_dev != dev)
+ if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
return -EOPNOTSUPP;
err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
@@ -463,6 +472,17 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
case SWITCHDEV_OBJ_ID_PORT_VLAN:
err = dsa_slave_vlan_del(dev, obj);
break;
+ case SWITCHDEV_OBJ_ID_MRP:
+ if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+ return -EOPNOTSUPP;
+ err = dsa_port_mrp_del(dp, SWITCHDEV_OBJ_MRP(obj));
+ break;
+ case SWITCHDEV_OBJ_ID_RING_ROLE_MRP:
+ if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+ return -EOPNOTSUPP;
+ err = dsa_port_mrp_del_ring_role(dp,
+ SWITCHDEV_OBJ_RING_ROLE_MRP(obj));
+ break;
default:
err = -EOPNOTSUPP;
break;
@@ -1289,35 +1309,29 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
struct dsa_port *dp = dsa_slave_to_port(dev);
struct switchdev_obj_port_vlan vlan = {
.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
- .vid_begin = vid,
- .vid_end = vid,
+ .vid = vid,
/* This API only allows programming tagged, non-PVID VIDs */
.flags = 0,
};
- struct switchdev_trans trans;
+ struct netlink_ext_ack extack = {0};
int ret;
/* User port... */
- trans.ph_prepare = true;
- ret = dsa_port_vlan_add(dp, &vlan, &trans);
- if (ret)
- return ret;
-
- trans.ph_prepare = false;
- ret = dsa_port_vlan_add(dp, &vlan, &trans);
- if (ret)
+ ret = dsa_port_vlan_add(dp, &vlan, &extack);
+ if (ret) {
+ if (extack._msg)
+ netdev_err(dev, "%s\n", extack._msg);
return ret;
+ }
/* And CPU port... */
- trans.ph_prepare = true;
- ret = dsa_port_vlan_add(dp->cpu_dp, &vlan, &trans);
- if (ret)
- return ret;
-
- trans.ph_prepare = false;
- ret = dsa_port_vlan_add(dp->cpu_dp, &vlan, &trans);
- if (ret)
+ ret = dsa_port_vlan_add(dp->cpu_dp, &vlan, &extack);
+ if (ret) {
+ if (extack._msg)
+ netdev_err(dev, "CPU port %d: %s\n", dp->cpu_dp->index,
+ extack._msg);
return ret;
+ }
return vlan_vid_add(master, proto, vid);
}
@@ -1328,8 +1342,7 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
struct switchdev_obj_port_vlan vlan = {
- .vid_begin = vid,
- .vid_end = vid,
+ .vid = vid,
/* This API only allows programming tagged, non-PVID VIDs */
.flags = 0,
};
@@ -1457,7 +1470,7 @@ out:
dsa_hw_port_list_free(&hw_port_list);
}
-static int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
+int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
@@ -1575,20 +1588,20 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
};
/* legacy way, bypassing the bridge *****************************************/
-int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev,
- const unsigned char *addr, u16 vid,
- u16 flags,
- struct netlink_ext_ack *extack)
+static int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr, u16 vid,
+ u16 flags,
+ struct netlink_ext_ack *extack)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
return dsa_port_fdb_add(dp, addr, vid);
}
-int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev,
- const unsigned char *addr, u16 vid)
+static int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr, u16 vid)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
@@ -1602,6 +1615,18 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
return dp->ds->devlink ? &dp->devlink_port : NULL;
}
+static void dsa_slave_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *s)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->get_stats64)
+ ds->ops->get_stats64(ds, dp->index, s);
+ else
+ dev_get_tstats64(dev, s);
+}
+
static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_open = dsa_slave_open,
.ndo_stop = dsa_slave_close,
@@ -1621,7 +1646,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
#endif
.ndo_get_phys_port_name = dsa_slave_get_phys_port_name,
.ndo_setup_tc = dsa_slave_setup_tc,
- .ndo_get_stats64 = dev_get_tstats64,
+ .ndo_get_stats64 = dsa_slave_get_stats64,
.ndo_get_port_parent_id = dsa_slave_get_port_parent_id,
.ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
@@ -1723,6 +1748,27 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
return ret;
}
+void dsa_slave_setup_tagger(struct net_device *slave)
+{
+ struct dsa_port *dp = dsa_slave_to_port(slave);
+ struct dsa_slave_priv *p = netdev_priv(slave);
+ const struct dsa_port *cpu_dp = dp->cpu_dp;
+ struct net_device *master = cpu_dp->master;
+
+ if (cpu_dp->tag_ops->tail_tag)
+ slave->needed_tailroom = cpu_dp->tag_ops->overhead;
+ else
+ slave->needed_headroom = cpu_dp->tag_ops->overhead;
+ /* Try to save one extra realloc later in the TX path (in the master)
+ * by also inheriting the master's needed headroom and tailroom.
+ * The 8021q driver also does this.
+ */
+ slave->needed_headroom += master->needed_headroom;
+ slave->needed_tailroom += master->needed_tailroom;
+
+ p->xmit = cpu_dp->tag_ops->xmit;
+}
+
static struct lock_class_key dsa_slave_netdev_xmit_lock_key;
static void dsa_slave_set_lockdep_class_one(struct net_device *dev,
struct netdev_queue *txq,
@@ -1764,20 +1810,6 @@ int dsa_slave_resume(struct net_device *slave_dev)
return 0;
}
-static void dsa_slave_notify(struct net_device *dev, unsigned long val)
-{
- struct net_device *master = dsa_slave_to_master(dev);
- struct dsa_port *dp = dsa_slave_to_port(dev);
- struct dsa_notifier_register_info rinfo = {
- .switch_number = dp->ds->index,
- .port_number = dp->index,
- .master = master,
- .info.dev = dev,
- };
-
- call_dsa_notifiers(val, dev, &rinfo.info);
-}
-
int dsa_slave_create(struct dsa_port *port)
{
const struct dsa_port *cpu_dp = port->cpu_dp;
@@ -1811,16 +1843,6 @@ int dsa_slave_create(struct dsa_port *port)
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
if (ds->ops->port_max_mtu)
slave_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index);
- if (cpu_dp->tag_ops->tail_tag)
- slave_dev->needed_tailroom = cpu_dp->tag_ops->overhead;
- else
- slave_dev->needed_headroom = cpu_dp->tag_ops->overhead;
- /* Try to save one extra realloc later in the TX path (in the master)
- * by also inheriting the master's needed headroom and tailroom.
- * The 8021q driver also does this.
- */
- slave_dev->needed_headroom += master->needed_headroom;
- slave_dev->needed_tailroom += master->needed_tailroom;
SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
@@ -1843,8 +1865,8 @@ int dsa_slave_create(struct dsa_port *port)
p->dp = port;
INIT_LIST_HEAD(&p->mall_tc_list);
- p->xmit = cpu_dp->tag_ops->xmit;
port->slave = slave_dev;
+ dsa_slave_setup_tagger(slave_dev);
rtnl_lock();
ret = dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN);
@@ -1863,8 +1885,6 @@ int dsa_slave_create(struct dsa_port *port)
goto out_gcells;
}
- dsa_slave_notify(slave_dev, DSA_PORT_REGISTER);
-
rtnl_lock();
ret = register_netdevice(slave_dev);
@@ -1913,7 +1933,6 @@ void dsa_slave_destroy(struct net_device *slave_dev)
phylink_disconnect_phy(dp->pl);
rtnl_unlock();
- dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
phylink_destroy(dp->pl);
gro_cells_destroy(&p->gcells);
free_percpu(slave_dev->tstats);
@@ -1924,6 +1943,7 @@ bool dsa_slave_dev_check(const struct net_device *dev)
{
return dev->netdev_ops == &dsa_slave_netdev_ops;
}
+EXPORT_SYMBOL_GPL(dsa_slave_dev_check);
static int dsa_slave_changeupper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
@@ -1941,6 +1961,59 @@ static int dsa_slave_changeupper(struct net_device *dev,
dsa_port_bridge_leave(dp, info->upper_dev);
err = NOTIFY_OK;
}
+ } else if (netif_is_lag_master(info->upper_dev)) {
+ if (info->linking) {
+ err = dsa_port_lag_join(dp, info->upper_dev,
+ info->upper_info);
+ if (err == -EOPNOTSUPP) {
+ NL_SET_ERR_MSG_MOD(info->info.extack,
+ "Offloading not supported");
+ err = 0;
+ }
+ err = notifier_from_errno(err);
+ } else {
+ dsa_port_lag_leave(dp, info->upper_dev);
+ err = NOTIFY_OK;
+ }
+ } else if (is_hsr_master(info->upper_dev)) {
+ if (info->linking) {
+ err = dsa_port_hsr_join(dp, info->upper_dev);
+ if (err == -EOPNOTSUPP) {
+ NL_SET_ERR_MSG_MOD(info->info.extack,
+ "Offloading not supported");
+ err = 0;
+ }
+ err = notifier_from_errno(err);
+ } else {
+ dsa_port_hsr_leave(dp, info->upper_dev);
+ err = NOTIFY_OK;
+ }
+ }
+
+ return err;
+}
+
+static int
+dsa_slave_lag_changeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *lower;
+ struct list_head *iter;
+ int err = NOTIFY_DONE;
+ struct dsa_port *dp;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ if (!dsa_slave_dev_check(lower))
+ continue;
+
+ dp = dsa_slave_to_port(lower);
+ if (!dp->lag_dev)
+ /* Software LAG */
+ continue;
+
+ err = dsa_slave_changeupper(lower, info);
+ if (notifier_to_errno(err))
+ break;
}
return err;
@@ -2038,128 +2111,224 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
break;
}
case NETDEV_CHANGEUPPER:
+ if (dsa_slave_dev_check(dev))
+ return dsa_slave_changeupper(dev, ptr);
+
+ if (netif_is_lag_master(dev))
+ return dsa_slave_lag_changeupper(dev, ptr);
+
+ break;
+ case NETDEV_CHANGELOWERSTATE: {
+ struct netdev_notifier_changelowerstate_info *info = ptr;
+ struct dsa_port *dp;
+ int err;
+
if (!dsa_slave_dev_check(dev))
+ break;
+
+ dp = dsa_slave_to_port(dev);
+
+ err = dsa_port_lag_change(dp, info->lower_state_info);
+ return notifier_from_errno(err);
+ }
+ case NETDEV_GOING_DOWN: {
+ struct dsa_port *dp, *cpu_dp;
+ struct dsa_switch_tree *dst;
+ LIST_HEAD(close_list);
+
+ if (!netdev_uses_dsa(dev))
return NOTIFY_DONE;
- return dsa_slave_changeupper(dev, ptr);
+ cpu_dp = dev->dsa_ptr;
+ dst = cpu_dp->ds->dst;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (!dsa_is_user_port(dp->ds, dp->index))
+ continue;
+
+ list_add(&dp->slave->close_list, &close_list);
+ }
+
+ dev_close_many(&close_list, true);
+
+ return NOTIFY_OK;
+ }
+ default:
+ break;
}
return NOTIFY_DONE;
}
-struct dsa_switchdev_event_work {
- struct work_struct work;
- struct switchdev_notifier_fdb_info fdb_info;
- struct net_device *dev;
- unsigned long event;
-};
+static void
+dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work)
+{
+ struct dsa_switch *ds = switchdev_work->ds;
+ struct switchdev_notifier_fdb_info info;
+ struct dsa_port *dp;
+
+ if (!dsa_is_user_port(ds, switchdev_work->port))
+ return;
+
+ info.addr = switchdev_work->addr;
+ info.vid = switchdev_work->vid;
+ info.offloaded = true;
+ dp = dsa_to_port(ds, switchdev_work->port);
+ call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED,
+ dp->slave, &info.info, NULL);
+}
static void dsa_slave_switchdev_event_work(struct work_struct *work)
{
struct dsa_switchdev_event_work *switchdev_work =
container_of(work, struct dsa_switchdev_event_work, work);
- struct net_device *dev = switchdev_work->dev;
- struct switchdev_notifier_fdb_info *fdb_info;
- struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = switchdev_work->ds;
+ struct dsa_port *dp;
int err;
+ dp = dsa_to_port(ds, switchdev_work->port);
+
rtnl_lock();
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
- fdb_info = &switchdev_work->fdb_info;
- if (!fdb_info->added_by_user)
- break;
-
- err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid);
+ err = dsa_port_fdb_add(dp, switchdev_work->addr,
+ switchdev_work->vid);
if (err) {
- netdev_dbg(dev, "fdb add failed err=%d\n", err);
+ dev_err(ds->dev,
+ "port %d failed to add %pM vid %d to fdb: %d\n",
+ dp->index, switchdev_work->addr,
+ switchdev_work->vid, err);
break;
}
- fdb_info->offloaded = true;
- call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
- &fdb_info->info, NULL);
+ dsa_fdb_offload_notify(switchdev_work);
break;
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- fdb_info = &switchdev_work->fdb_info;
- if (!fdb_info->added_by_user)
- break;
-
- err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid);
+ err = dsa_port_fdb_del(dp, switchdev_work->addr,
+ switchdev_work->vid);
if (err) {
- netdev_dbg(dev, "fdb del failed err=%d\n", err);
- dev_close(dev);
+ dev_err(ds->dev,
+ "port %d failed to delete %pM vid %d from fdb: %d\n",
+ dp->index, switchdev_work->addr,
+ switchdev_work->vid, err);
}
+
break;
}
rtnl_unlock();
- kfree(switchdev_work->fdb_info.addr);
kfree(switchdev_work);
- dev_put(dev);
+ if (dsa_is_user_port(ds, dp->index))
+ dev_put(dp->slave);
}
-static int
-dsa_slave_switchdev_fdb_work_init(struct dsa_switchdev_event_work *
- switchdev_work,
- const struct switchdev_notifier_fdb_info *
- fdb_info)
-{
- memcpy(&switchdev_work->fdb_info, fdb_info,
- sizeof(switchdev_work->fdb_info));
- switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
- if (!switchdev_work->fdb_info.addr)
- return -ENOMEM;
- ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
- fdb_info->addr);
+static int dsa_lower_dev_walk(struct net_device *lower_dev,
+ struct netdev_nested_priv *priv)
+{
+ if (dsa_slave_dev_check(lower_dev)) {
+ priv->data = (void *)netdev_priv(lower_dev);
+ return 1;
+ }
+
return 0;
}
+static struct dsa_slave_priv *dsa_slave_dev_lower_find(struct net_device *dev)
+{
+ struct netdev_nested_priv priv = {
+ .data = NULL,
+ };
+
+ netdev_walk_all_lower_dev_rcu(dev, dsa_lower_dev_walk, &priv);
+
+ return (struct dsa_slave_priv *)priv.data;
+}
+
/* Called under rcu_read_lock() */
static int dsa_slave_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ const struct switchdev_notifier_fdb_info *fdb_info;
struct dsa_switchdev_event_work *switchdev_work;
+ struct dsa_port *dp;
int err;
- if (event == SWITCHDEV_PORT_ATTR_SET) {
+ switch (event) {
+ case SWITCHDEV_PORT_ATTR_SET:
err = switchdev_handle_port_attr_set(dev, ptr,
dsa_slave_dev_check,
dsa_slave_port_attr_set);
return notifier_from_errno(err);
- }
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ fdb_info = ptr;
- if (!dsa_slave_dev_check(dev))
- return NOTIFY_DONE;
+ if (dsa_slave_dev_check(dev)) {
+ if (!fdb_info->added_by_user)
+ return NOTIFY_OK;
- switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
- if (!switchdev_work)
- return NOTIFY_BAD;
+ dp = dsa_slave_to_port(dev);
+ } else {
+ /* Snoop addresses learnt on foreign interfaces
+ * bridged with us, for switches that don't
+ * automatically learn SA from CPU-injected traffic
+ */
+ struct net_device *br_dev;
+ struct dsa_slave_priv *p;
+
+ br_dev = netdev_master_upper_dev_get_rcu(dev);
+ if (!br_dev)
+ return NOTIFY_DONE;
+
+ if (!netif_is_bridge_master(br_dev))
+ return NOTIFY_DONE;
+
+ p = dsa_slave_dev_lower_find(br_dev);
+ if (!p)
+ return NOTIFY_DONE;
+
+ dp = p->dp->cpu_dp;
+
+ if (!dp->ds->assisted_learning_on_cpu_port)
+ return NOTIFY_DONE;
+
+ /* When the bridge learns an address on an offloaded
+ * LAG we don't want to send traffic to the CPU, the
+ * other ports bridged with the LAG should be able to
+ * autonomously forward towards it.
+ */
+ if (dsa_tree_offloads_netdev(dp->ds->dst, dev))
+ return NOTIFY_DONE;
+ }
- INIT_WORK(&switchdev_work->work,
- dsa_slave_switchdev_event_work);
- switchdev_work->dev = dev;
- switchdev_work->event = event;
+ if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del)
+ return NOTIFY_DONE;
- switch (event) {
- case SWITCHDEV_FDB_ADD_TO_DEVICE:
- case SWITCHDEV_FDB_DEL_TO_DEVICE:
- if (dsa_slave_switchdev_fdb_work_init(switchdev_work, ptr))
- goto err_fdb_work_init;
- dev_hold(dev);
+ switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
+ if (!switchdev_work)
+ return NOTIFY_BAD;
+
+ INIT_WORK(&switchdev_work->work,
+ dsa_slave_switchdev_event_work);
+ switchdev_work->ds = dp->ds;
+ switchdev_work->port = dp->index;
+ switchdev_work->event = event;
+
+ ether_addr_copy(switchdev_work->addr,
+ fdb_info->addr);
+ switchdev_work->vid = fdb_info->vid;
+
+ /* Hold a reference on the slave for dsa_fdb_offload_notify */
+ if (dsa_is_user_port(dp->ds, dp->index))
+ dev_hold(dev);
+ dsa_schedule_work(&switchdev_work->work);
break;
default:
- kfree(switchdev_work);
return NOTIFY_DONE;
}
- dsa_schedule_work(&switchdev_work->work);
return NOTIFY_OK;
-
-err_fdb_work_init:
- kfree(switchdev_work);
- return NOTIFY_BAD;
}
static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused,
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 3fb362b6874e..4b5da89dc27a 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -33,15 +33,12 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds,
struct dsa_notifier_ageing_time_info *info)
{
unsigned int ageing_time = info->ageing_time;
- struct switchdev_trans *trans = info->trans;
-
- if (switchdev_trans_ph_prepare(trans)) {
- if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
- return -ERANGE;
- if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
- return -ERANGE;
- return 0;
- }
+
+ if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
+ return -ERANGE;
+
+ if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
+ return -ERANGE;
/* Program the fastest ageing time in case of multiple bridges */
ageing_time = dsa_switch_fastest_ageing_time(ds, ageing_time);
@@ -109,6 +106,7 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
{
bool unset_vlan_filtering = br_vlan_enabled(info->br);
struct dsa_switch_tree *dst = ds->dst;
+ struct netlink_ext_ack extack = {0};
int err, i;
if (dst->index == info->tree_index && ds->index == info->sw_index &&
@@ -139,17 +137,11 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
}
}
if (unset_vlan_filtering) {
- struct switchdev_trans trans;
-
- trans.ph_prepare = true;
err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port),
- false, &trans);
- if (err && err != EOPNOTSUPP)
- return err;
-
- trans.ph_prepare = false;
- err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port),
- false, &trans);
+ false, &extack);
+ if (extack._msg)
+ dev_err(ds->dev, "port %d: %s\n", info->port,
+ extack._msg);
if (err && err != EOPNOTSUPP)
return err;
}
@@ -178,6 +170,65 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds,
return ds->ops->port_fdb_del(ds, port, info->addr, info->vid);
}
+static int dsa_switch_hsr_join(struct dsa_switch *ds,
+ struct dsa_notifier_hsr_info *info)
+{
+ if (ds->index == info->sw_index && ds->ops->port_hsr_join)
+ return ds->ops->port_hsr_join(ds, info->port, info->hsr);
+
+ return -EOPNOTSUPP;
+}
+
+static int dsa_switch_hsr_leave(struct dsa_switch *ds,
+ struct dsa_notifier_hsr_info *info)
+{
+ if (ds->index == info->sw_index && ds->ops->port_hsr_leave)
+ return ds->ops->port_hsr_leave(ds, info->port, info->hsr);
+
+ return -EOPNOTSUPP;
+}
+
+static int dsa_switch_lag_change(struct dsa_switch *ds,
+ struct dsa_notifier_lag_info *info)
+{
+ if (ds->index == info->sw_index && ds->ops->port_lag_change)
+ return ds->ops->port_lag_change(ds, info->port);
+
+ if (ds->index != info->sw_index && ds->ops->crosschip_lag_change)
+ return ds->ops->crosschip_lag_change(ds, info->sw_index,
+ info->port);
+
+ return 0;
+}
+
+static int dsa_switch_lag_join(struct dsa_switch *ds,
+ struct dsa_notifier_lag_info *info)
+{
+ if (ds->index == info->sw_index && ds->ops->port_lag_join)
+ return ds->ops->port_lag_join(ds, info->port, info->lag,
+ info->info);
+
+ if (ds->index != info->sw_index && ds->ops->crosschip_lag_join)
+ return ds->ops->crosschip_lag_join(ds, info->sw_index,
+ info->port, info->lag,
+ info->info);
+
+ return 0;
+}
+
+static int dsa_switch_lag_leave(struct dsa_switch *ds,
+ struct dsa_notifier_lag_info *info)
+{
+ if (ds->index == info->sw_index && ds->ops->port_lag_leave)
+ return ds->ops->port_lag_leave(ds, info->port, info->lag);
+
+ if (ds->index != info->sw_index && ds->ops->crosschip_lag_leave)
+ return ds->ops->crosschip_lag_leave(ds, info->sw_index,
+ info->port, info->lag);
+
+ return 0;
+}
+
static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port,
struct dsa_notifier_mdb_info *info)
{
@@ -190,41 +241,24 @@ static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port,
return false;
}
-static int dsa_switch_mdb_prepare(struct dsa_switch *ds,
- struct dsa_notifier_mdb_info *info)
+static int dsa_switch_mdb_add(struct dsa_switch *ds,
+ struct dsa_notifier_mdb_info *info)
{
- int port, err;
+ int err = 0;
+ int port;
- if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
+ if (!ds->ops->port_mdb_add)
return -EOPNOTSUPP;
for (port = 0; port < ds->num_ports; port++) {
if (dsa_switch_mdb_match(ds, port, info)) {
- err = ds->ops->port_mdb_prepare(ds, port, info->mdb);
+ err = ds->ops->port_mdb_add(ds, port, info->mdb);
if (err)
- return err;
+ break;
}
}
- return 0;
-}
-
-static int dsa_switch_mdb_add(struct dsa_switch *ds,
- struct dsa_notifier_mdb_info *info)
-{
- int port;
-
- if (switchdev_trans_ph_prepare(info->trans))
- return dsa_switch_mdb_prepare(ds, info);
-
- if (!ds->ops->port_mdb_add)
- return 0;
-
- for (port = 0; port < ds->num_ports; port++)
- if (dsa_switch_mdb_match(ds, port, info))
- ds->ops->port_mdb_add(ds, port, info->mdb);
-
- return 0;
+ return err;
}
static int dsa_switch_mdb_del(struct dsa_switch *ds,
@@ -251,17 +285,18 @@ static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port,
return false;
}
-static int dsa_switch_vlan_prepare(struct dsa_switch *ds,
- struct dsa_notifier_vlan_info *info)
+static int dsa_switch_vlan_add(struct dsa_switch *ds,
+ struct dsa_notifier_vlan_info *info)
{
int port, err;
- if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
+ if (!ds->ops->port_vlan_add)
return -EOPNOTSUPP;
for (port = 0; port < ds->num_ports; port++) {
if (dsa_switch_vlan_match(ds, port, info)) {
- err = ds->ops->port_vlan_prepare(ds, port, info->vlan);
+ err = ds->ops->port_vlan_add(ds, port, info->vlan,
+ info->extack);
if (err)
return err;
}
@@ -270,36 +305,163 @@ static int dsa_switch_vlan_prepare(struct dsa_switch *ds,
return 0;
}
-static int dsa_switch_vlan_add(struct dsa_switch *ds,
+static int dsa_switch_vlan_del(struct dsa_switch *ds,
struct dsa_notifier_vlan_info *info)
{
+ if (!ds->ops->port_vlan_del)
+ return -EOPNOTSUPP;
+
+ if (ds->index == info->sw_index)
+ return ds->ops->port_vlan_del(ds, info->port, info->vlan);
+
+ /* Do not deprogram the DSA links as they may be used as conduit
+ * for other VLAN members in the fabric.
+ */
+ return 0;
+}
+
+static bool dsa_switch_tag_proto_match(struct dsa_switch *ds, int port,
+ struct dsa_notifier_tag_proto_info *info)
+{
+ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+ return true;
+
+ return false;
+}
+
+static int dsa_switch_change_tag_proto(struct dsa_switch *ds,
+ struct dsa_notifier_tag_proto_info *info)
+{
+ const struct dsa_device_ops *tag_ops = info->tag_ops;
+ int port, err;
+
+ if (!ds->ops->change_tag_protocol)
+ return -EOPNOTSUPP;
+
+ ASSERT_RTNL();
+
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_switch_tag_proto_match(ds, port, info)) {
+ err = ds->ops->change_tag_protocol(ds, port,
+ tag_ops->proto);
+ if (err)
+ return err;
+
+ if (dsa_is_cpu_port(ds, port))
+ dsa_port_set_tag_protocol(dsa_to_port(ds, port),
+ tag_ops);
+ }
+ }
+
+ /* Now that changing the tag protocol can no longer fail, let's update
+ * the remaining bits which are "duplicated for faster access", and the
+ * bits that depend on the tagger, such as the MTU.
+ */
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_is_user_port(ds, port)) {
+ struct net_device *slave;
+
+ slave = dsa_to_port(ds, port)->slave;
+ dsa_slave_setup_tagger(slave);
+
+ /* rtnl_mutex is held in dsa_tree_change_tag_proto */
+ dsa_slave_change_mtu(slave, slave->mtu);
+ }
+ }
+
+ return 0;
+}
+
+static bool dsa_switch_mrp_match(struct dsa_switch *ds, int port,
+ struct dsa_notifier_mrp_info *info)
+{
+ if (ds->index == info->sw_index && port == info->port)
+ return true;
+
+ if (dsa_is_dsa_port(ds, port))
+ return true;
+
+ return false;
+}
+
+static int dsa_switch_mrp_add(struct dsa_switch *ds,
+ struct dsa_notifier_mrp_info *info)
+{
+ int err = 0;
int port;
- if (switchdev_trans_ph_prepare(info->trans))
- return dsa_switch_vlan_prepare(ds, info);
+ if (!ds->ops->port_mrp_add)
+ return -EOPNOTSUPP;
- if (!ds->ops->port_vlan_add)
- return 0;
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_switch_mrp_match(ds, port, info)) {
+ err = ds->ops->port_mrp_add(ds, port, info->mrp);
+ if (err)
+ break;
+ }
+ }
+
+ return err;
+}
- for (port = 0; port < ds->num_ports; port++)
- if (dsa_switch_vlan_match(ds, port, info))
- ds->ops->port_vlan_add(ds, port, info->vlan);
+static int dsa_switch_mrp_del(struct dsa_switch *ds,
+ struct dsa_notifier_mrp_info *info)
+{
+ if (!ds->ops->port_mrp_del)
+ return -EOPNOTSUPP;
+
+ if (ds->index == info->sw_index)
+ return ds->ops->port_mrp_del(ds, info->port, info->mrp);
return 0;
}
-static int dsa_switch_vlan_del(struct dsa_switch *ds,
- struct dsa_notifier_vlan_info *info)
+static bool
+dsa_switch_mrp_ring_role_match(struct dsa_switch *ds, int port,
+ struct dsa_notifier_mrp_ring_role_info *info)
{
- if (!ds->ops->port_vlan_del)
+ if (ds->index == info->sw_index && port == info->port)
+ return true;
+
+ if (dsa_is_dsa_port(ds, port))
+ return true;
+
+ return false;
+}
+
+static int
+dsa_switch_mrp_add_ring_role(struct dsa_switch *ds,
+ struct dsa_notifier_mrp_ring_role_info *info)
+{
+ int err = 0;
+ int port;
+
+ if (!ds->ops->port_mrp_add)
+ return -EOPNOTSUPP;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_switch_mrp_ring_role_match(ds, port, info)) {
+ err = ds->ops->port_mrp_add_ring_role(ds, port,
+ info->mrp);
+ if (err)
+ break;
+ }
+ }
+
+ return err;
+}
+
+static int
+dsa_switch_mrp_del_ring_role(struct dsa_switch *ds,
+ struct dsa_notifier_mrp_ring_role_info *info)
+{
+ if (!ds->ops->port_mrp_del)
return -EOPNOTSUPP;
if (ds->index == info->sw_index)
- return ds->ops->port_vlan_del(ds, info->port, info->vlan);
+ return ds->ops->port_mrp_del_ring_role(ds, info->port,
+ info->mrp);
- /* Do not deprogram the DSA links as they may be used as conduit
- * for other VLAN members in the fabric.
- */
return 0;
}
@@ -325,6 +487,21 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_FDB_DEL:
err = dsa_switch_fdb_del(ds, info);
break;
+ case DSA_NOTIFIER_HSR_JOIN:
+ err = dsa_switch_hsr_join(ds, info);
+ break;
+ case DSA_NOTIFIER_HSR_LEAVE:
+ err = dsa_switch_hsr_leave(ds, info);
+ break;
+ case DSA_NOTIFIER_LAG_CHANGE:
+ err = dsa_switch_lag_change(ds, info);
+ break;
+ case DSA_NOTIFIER_LAG_JOIN:
+ err = dsa_switch_lag_join(ds, info);
+ break;
+ case DSA_NOTIFIER_LAG_LEAVE:
+ err = dsa_switch_lag_leave(ds, info);
+ break;
case DSA_NOTIFIER_MDB_ADD:
err = dsa_switch_mdb_add(ds, info);
break;
@@ -340,15 +517,26 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_MTU:
err = dsa_switch_mtu(ds, info);
break;
+ case DSA_NOTIFIER_TAG_PROTO:
+ err = dsa_switch_change_tag_proto(ds, info);
+ break;
+ case DSA_NOTIFIER_MRP_ADD:
+ err = dsa_switch_mrp_add(ds, info);
+ break;
+ case DSA_NOTIFIER_MRP_DEL:
+ err = dsa_switch_mrp_del(ds, info);
+ break;
+ case DSA_NOTIFIER_MRP_ADD_RING_ROLE:
+ err = dsa_switch_mrp_add_ring_role(ds, info);
+ break;
+ case DSA_NOTIFIER_MRP_DEL_RING_ROLE:
+ err = dsa_switch_mrp_del_ring_role(ds, info);
+ break;
default:
err = -EOPNOTSUPP;
break;
}
- /* Non-switchdev operations cannot be rolled back. If a DSA driver
- * returns an error during the chained call, switch chips may be in an
- * inconsistent state.
- */
if (err)
dev_dbg(ds->dev, "breaking chain for DSA event %lu (%d)\n",
event, err);
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 8e3e8a5b8559..008c1ec6e20c 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -133,10 +133,21 @@ u16 dsa_8021q_rx_subvlan(u16 vid)
}
EXPORT_SYMBOL_GPL(dsa_8021q_rx_subvlan);
+bool vid_is_dsa_8021q_rxvlan(u16 vid)
+{
+ return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX;
+}
+EXPORT_SYMBOL_GPL(vid_is_dsa_8021q_rxvlan);
+
+bool vid_is_dsa_8021q_txvlan(u16 vid)
+{
+ return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_TX;
+}
+EXPORT_SYMBOL_GPL(vid_is_dsa_8021q_txvlan);
+
bool vid_is_dsa_8021q(u16 vid)
{
- return ((vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX ||
- (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_TX);
+ return vid_is_dsa_8021q_rxvlan(vid) || vid_is_dsa_8021q_txvlan(vid);
}
EXPORT_SYMBOL_GPL(vid_is_dsa_8021q);
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index e934dace3922..e2577a7dcbca 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -5,6 +5,7 @@
* Copyright (C) 2014 Broadcom Corporation
*/
+#include <linux/dsa/brcm.h>
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 112c7c6dd568..7e7b7decdf39 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -163,6 +163,7 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
u8 extra)
{
int source_device, source_port;
+ bool trunk = false;
enum dsa_code code;
enum dsa_cmd cmd;
u8 *dsa_header;
@@ -174,6 +175,8 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
switch (cmd) {
case DSA_CMD_FORWARD:
skb->offload_fwd_mark = 1;
+
+ trunk = !!(dsa_header[1] & 7);
break;
case DSA_CMD_TO_CPU:
@@ -216,7 +219,19 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
source_device = dsa_header[0] & 0x1f;
source_port = (dsa_header[1] >> 3) & 0x1f;
- skb->dev = dsa_master_find_slave(dev, source_device, source_port);
+ if (trunk) {
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+
+ /* The exact source port is not available in the tag,
+ * so we inject the frame directly on the upper
+ * team/bond.
+ */
+ skb->dev = dsa_lag_dev(cpu_dp->dst, source_port);
+ } else {
+ skb->dev = dsa_master_find_slave(dev, source_device,
+ source_port);
+ }
+
if (!skb->dev)
return NULL;
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 16a1afd5b8e1..743809b5806b 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -1,174 +1,74 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright 2019 NXP Semiconductors
*/
+#include <linux/dsa/ocelot.h>
#include <soc/mscc/ocelot.h>
-#include <linux/packing.h>
#include "dsa_priv.h"
-/* The CPU injection header and the CPU extraction header can have 3 types of
- * prefixes: long, short and no prefix. The format of the header itself is the
- * same in all 3 cases.
- *
- * Extraction with long prefix:
- *
- * +-------------------+-------------------+------+------+------------+-------+
- * | ff:ff:ff:ff:ff:ff | ff:ff:ff:ff:ff:ff | 8880 | 000a | extraction | frame |
- * | | | | | header | |
- * +-------------------+-------------------+------+------+------------+-------+
- * 48 bits 48 bits 16 bits 16 bits 128 bits
- *
- * Extraction with short prefix:
- *
- * +------+------+------------+-------+
- * | 8880 | 000a | extraction | frame |
- * | | | header | |
- * +------+------+------------+-------+
- * 16 bits 16 bits 128 bits
- *
- * Extraction with no prefix:
- *
- * +------------+-------+
- * | extraction | frame |
- * | header | |
- * +------------+-------+
- * 128 bits
- *
- *
- * Injection with long prefix:
- *
- * +-------------------+-------------------+------+------+------------+-------+
- * | any dmac | any smac | 8880 | 000a | injection | frame |
- * | | | | | header | |
- * +-------------------+-------------------+------+------+------------+-------+
- * 48 bits 48 bits 16 bits 16 bits 128 bits
- *
- * Injection with short prefix:
- *
- * +------+------+------------+-------+
- * | 8880 | 000a | injection | frame |
- * | | | header | |
- * +------+------+------------+-------+
- * 16 bits 16 bits 128 bits
- *
- * Injection with no prefix:
- *
- * +------------+-------+
- * | injection | frame |
- * | header | |
- * +------------+-------+
- * 128 bits
- *
- * The injection header looks like this (network byte order, bit 127
- * is part of lowest address byte in memory, bit 0 is part of highest
- * address byte):
- *
- * +------+------+------+------+------+------+------+------+
- * 127:120 |BYPASS| MASQ | MASQ_PORT |REW_OP|REW_OP|
- * +------+------+------+------+------+------+------+------+
- * 119:112 | REW_OP |
- * +------+------+------+------+------+------+------+------+
- * 111:104 | REW_VAL |
- * +------+------+------+------+------+------+------+------+
- * 103: 96 | REW_VAL |
- * +------+------+------+------+------+------+------+------+
- * 95: 88 | REW_VAL |
- * +------+------+------+------+------+------+------+------+
- * 87: 80 | REW_VAL |
- * +------+------+------+------+------+------+------+------+
- * 79: 72 | RSV |
- * +------+------+------+------+------+------+------+------+
- * 71: 64 | RSV | DEST |
- * +------+------+------+------+------+------+------+------+
- * 63: 56 | DEST |
- * +------+------+------+------+------+------+------+------+
- * 55: 48 | RSV |
- * +------+------+------+------+------+------+------+------+
- * 47: 40 | RSV | SRC_PORT | RSV |TFRM_TIMER|
- * +------+------+------+------+------+------+------+------+
- * 39: 32 | TFRM_TIMER | RSV |
- * +------+------+------+------+------+------+------+------+
- * 31: 24 | RSV | DP | POP_CNT | CPUQ |
- * +------+------+------+------+------+------+------+------+
- * 23: 16 | CPUQ | QOS_CLASS |TAG_TYPE|
- * +------+------+------+------+------+------+------+------+
- * 15: 8 | PCP | DEI | VID |
- * +------+------+------+------+------+------+------+------+
- * 7: 0 | VID |
- * +------+------+------+------+------+------+------+------+
- *
- * And the extraction header looks like this:
- *
- * +------+------+------+------+------+------+------+------+
- * 127:120 | RSV | REW_OP |
- * +------+------+------+------+------+------+------+------+
- * 119:112 | REW_OP | REW_VAL |
- * +------+------+------+------+------+------+------+------+
- * 111:104 | REW_VAL |
- * +------+------+------+------+------+------+------+------+
- * 103: 96 | REW_VAL |
- * +------+------+------+------+------+------+------+------+
- * 95: 88 | REW_VAL |
- * +------+------+------+------+------+------+------+------+
- * 87: 80 | REW_VAL | LLEN |
- * +------+------+------+------+------+------+------+------+
- * 79: 72 | LLEN | WLEN |
- * +------+------+------+------+------+------+------+------+
- * 71: 64 | WLEN | RSV |
- * +------+------+------+------+------+------+------+------+
- * 63: 56 | RSV |
- * +------+------+------+------+------+------+------+------+
- * 55: 48 | RSV |
- * +------+------+------+------+------+------+------+------+
- * 47: 40 | RSV | SRC_PORT | ACL_ID |
- * +------+------+------+------+------+------+------+------+
- * 39: 32 | ACL_ID | RSV | SFLOW_ID |
- * +------+------+------+------+------+------+------+------+
- * 31: 24 |ACL_HIT| DP | LRN_FLAGS | CPUQ |
- * +------+------+------+------+------+------+------+------+
- * 23: 16 | CPUQ | QOS_CLASS |TAG_TYPE|
- * +------+------+------+------+------+------+------+------+
- * 15: 8 | PCP | DEI | VID |
- * +------+------+------+------+------+------+------+------+
- * 7: 0 | VID |
- * +------+------+------+------+------+------+------+------+
- */
+static void ocelot_xmit_ptp(struct dsa_port *dp, void *injection,
+ struct sk_buff *clone)
+{
+ struct ocelot *ocelot = dp->ds->priv;
+ struct ocelot_port *ocelot_port;
+ u64 rew_op;
-static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
- struct net_device *netdev)
+ ocelot_port = ocelot->ports[dp->index];
+ rew_op = ocelot_port->ptp_cmd;
+
+ /* Retrieve timestamp ID populated inside skb->cb[0] of the
+ * clone by ocelot_port_add_txtstamp_skb
+ */
+ if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
+ rew_op |= clone->cb[0] << 3;
+
+ ocelot_ifh_set_rew_op(injection, rew_op);
+}
+
+static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev,
+ __be32 ifh_prefix, void **ifh)
{
struct dsa_port *dp = dsa_slave_to_port(netdev);
struct sk_buff *clone = DSA_SKB_CB(skb)->clone;
struct dsa_switch *ds = dp->ds;
- struct ocelot *ocelot = ds->priv;
- struct ocelot_port *ocelot_port;
- u8 *prefix, *injection;
- u64 qos_class, rew_op;
-
- ocelot_port = ocelot->ports[dp->index];
+ void *injection;
+ __be32 *prefix;
injection = skb_push(skb, OCELOT_TAG_LEN);
-
prefix = skb_push(skb, OCELOT_SHORT_PREFIX_LEN);
- memcpy(prefix, ocelot_port->xmit_template, OCELOT_TOTAL_TAG_LEN);
-
- /* Fix up the fields which are not statically determined
- * in the template
- */
- qos_class = skb->priority;
- packing(injection, &qos_class, 19, 17, OCELOT_TAG_LEN, PACK, 0);
+ *prefix = ifh_prefix;
+ memset(injection, 0, OCELOT_TAG_LEN);
+ ocelot_ifh_set_bypass(injection, 1);
+ ocelot_ifh_set_src(injection, ds->num_ports);
+ ocelot_ifh_set_qos_class(injection, skb->priority);
/* TX timestamping was requested */
- if (clone) {
- rew_op = ocelot_port->ptp_cmd;
- /* Retrieve timestamp ID populated inside skb->cb[0] of the
- * clone by ocelot_port_add_txtstamp_skb
- */
- if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
- rew_op |= clone->cb[0] << 3;
+ if (clone)
+ ocelot_xmit_ptp(dp, injection, clone);
- packing(injection, &rew_op, 125, 117, OCELOT_TAG_LEN, PACK, 0);
- }
+ *ifh = injection;
+}
+
+static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(netdev);
+ void *injection;
+
+ ocelot_xmit_common(skb, netdev, cpu_to_be32(0x8880000a), &injection);
+ ocelot_ifh_set_dest(injection, BIT_ULL(dp->index));
+
+ return skb;
+}
+
+static struct sk_buff *seville_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(netdev);
+ void *injection;
+
+ ocelot_xmit_common(skb, netdev, cpu_to_be32(0x88800005), &injection);
+ seville_ifh_set_dest(injection, BIT_ULL(dp->index));
return skb;
}
@@ -177,14 +77,13 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
struct net_device *netdev,
struct packet_type *pt)
{
- struct dsa_port *cpu_dp = netdev->dsa_ptr;
- struct dsa_switch *ds = cpu_dp->ds;
- struct ocelot *ocelot = ds->priv;
u64 src_port, qos_class;
u64 vlan_tci, tag_type;
u8 *start = skb->data;
+ struct dsa_port *dp;
u8 *extraction;
u16 vlan_tpid;
+ u64 cpuq;
/* Revert skb->data by the amount consumed by the DSA master,
* so it points to the beginning of the frame.
@@ -210,10 +109,11 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
/* Remove from inet csum the extraction header */
skb_postpull_rcsum(skb, start, OCELOT_TOTAL_TAG_LEN);
- packing(extraction, &src_port, 46, 43, OCELOT_TAG_LEN, UNPACK, 0);
- packing(extraction, &qos_class, 19, 17, OCELOT_TAG_LEN, UNPACK, 0);
- packing(extraction, &tag_type, 16, 16, OCELOT_TAG_LEN, UNPACK, 0);
- packing(extraction, &vlan_tci, 15, 0, OCELOT_TAG_LEN, UNPACK, 0);
+ ocelot_xfh_get_src_port(extraction, &src_port);
+ ocelot_xfh_get_qos_class(extraction, &qos_class);
+ ocelot_xfh_get_tag_type(extraction, &tag_type);
+ ocelot_xfh_get_vlan_tci(extraction, &vlan_tci);
+ ocelot_xfh_get_cpuq(extraction, &cpuq);
skb->dev = dsa_master_find_slave(netdev, 0, src_port);
if (!skb->dev)
@@ -228,6 +128,12 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
skb->offload_fwd_mark = 1;
skb->priority = qos_class;
+#if IS_ENABLED(CONFIG_BRIDGE_MRP)
+ if (eth_hdr(skb)->h_proto == cpu_to_be16(ETH_P_MRP) &&
+ cpuq & BIT(OCELOT_MRP_CPUQ))
+ skb->offload_fwd_mark = 0;
+#endif
+
/* Ocelot switches copy frames unmodified to the CPU. However, it is
* possible for the user to request a VLAN modification through
* VCAP_IS1_ACT_VID_REPLACE_ENA. In this case, what will happen is that
@@ -243,9 +149,10 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
* equal to the pvid of the ingress port and should not be used for
* processing.
*/
+ dp = dsa_slave_to_port(skb->dev);
vlan_tpid = tag_type ? ETH_P_8021AD : ETH_P_8021Q;
- if (ocelot->ports[src_port]->vlan_aware &&
+ if (dsa_port_is_vlan_filtering(dp) &&
eth_hdr(skb)->h_proto == htons(vlan_tpid)) {
u16 dummy_vlan_tci;
@@ -267,7 +174,26 @@ static const struct dsa_device_ops ocelot_netdev_ops = {
.promisc_on_master = true,
};
-MODULE_LICENSE("GPL v2");
+DSA_TAG_DRIVER(ocelot_netdev_ops);
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_OCELOT);
-module_dsa_tag_driver(ocelot_netdev_ops);
+static const struct dsa_device_ops seville_netdev_ops = {
+ .name = "seville",
+ .proto = DSA_TAG_PROTO_SEVILLE,
+ .xmit = seville_xmit,
+ .rcv = ocelot_rcv,
+ .overhead = OCELOT_TOTAL_TAG_LEN,
+ .promisc_on_master = true,
+};
+
+DSA_TAG_DRIVER(seville_netdev_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_SEVILLE);
+
+static struct dsa_tag_driver *ocelot_tag_driver_array[] = {
+ &DSA_TAG_DRIVER_NAME(ocelot_netdev_ops),
+ &DSA_TAG_DRIVER_NAME(seville_netdev_ops),
+};
+
+module_dsa_tag_drivers(ocelot_tag_driver_array);
+
+MODULE_LICENSE("GPL v2");
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
new file mode 100644
index 000000000000..5f3e8e124a82
--- /dev/null
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2020-2021 NXP Semiconductors
+ *
+ * An implementation of the software-defined tag_8021q.c tagger format, which
+ * also preserves full functionality under a vlan_filtering bridge. It does
+ * this by using the TCAM engines for:
+ * - pushing the RX VLAN as a second, outer tag, on egress towards the CPU port
+ * - redirecting towards the correct front port based on TX VLAN and popping
+ * that on egress
+ */
+#include <linux/dsa/8021q.h>
+#include <soc/mscc/ocelot.h>
+#include <soc/mscc/ocelot_ptp.h>
+#include "dsa_priv.h"
+
+static struct sk_buff *ocelot_xmit_ptp(struct dsa_port *dp,
+ struct sk_buff *skb,
+ struct sk_buff *clone)
+{
+ struct ocelot *ocelot = dp->ds->priv;
+ struct ocelot_port *ocelot_port;
+ int port = dp->index;
+ u32 rew_op;
+
+ if (!ocelot_can_inject(ocelot, 0))
+ return NULL;
+
+ ocelot_port = ocelot->ports[port];
+ rew_op = ocelot_port->ptp_cmd;
+
+ /* Retrieve timestamp ID populated inside skb->cb[0] of the
+ * clone by ocelot_port_add_txtstamp_skb
+ */
+ if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
+ rew_op |= clone->cb[0] << 3;
+
+ ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb);
+
+ return NULL;
+}
+
+static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(netdev);
+ u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
+ u16 queue_mapping = skb_get_queue_mapping(skb);
+ u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
+ struct sk_buff *clone = DSA_SKB_CB(skb)->clone;
+
+ /* TX timestamping was requested, so inject through MMIO */
+ if (clone)
+ return ocelot_xmit_ptp(dp, skb, clone);
+
+ return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q,
+ ((pcp << VLAN_PRIO_SHIFT) | tx_vid));
+}
+
+static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
+ struct net_device *netdev,
+ struct packet_type *pt)
+{
+ int src_port, switch_id, qos_class;
+ u16 vid, tci;
+
+ skb_push_rcsum(skb, ETH_HLEN);
+ if (skb_vlan_tag_present(skb)) {
+ tci = skb_vlan_tag_get(skb);
+ __vlan_hwaccel_clear_tag(skb);
+ } else {
+ __skb_vlan_pop(skb, &tci);
+ }
+ skb_pull_rcsum(skb, ETH_HLEN);
+
+ vid = tci & VLAN_VID_MASK;
+ src_port = dsa_8021q_rx_source_port(vid);
+ switch_id = dsa_8021q_rx_switch_id(vid);
+ qos_class = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+
+ skb->dev = dsa_master_find_slave(netdev, switch_id, src_port);
+ if (!skb->dev)
+ return NULL;
+
+ skb->offload_fwd_mark = 1;
+ skb->priority = qos_class;
+
+ return skb;
+}
+
+static const struct dsa_device_ops ocelot_8021q_netdev_ops = {
+ .name = "ocelot-8021q",
+ .proto = DSA_TAG_PROTO_OCELOT_8021Q,
+ .xmit = ocelot_xmit,
+ .rcv = ocelot_rcv,
+ .overhead = VLAN_HLEN,
+ .promisc_on_master = true,
+};
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_OCELOT_8021Q);
+
+module_dsa_tag_driver(ocelot_8021q_netdev_ops);
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index 2646abe5a69e..c17d39b4a1a0 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -12,9 +12,7 @@
*
* The 2 bytes tag form a 16 bit big endian word. The exact
* meaning has been guessed from packet dumps from ingress
- * frames, as no working egress traffic has been available
- * we do not know the format of the egress tags or if they
- * are even supported.
+ * frames.
*/
#include <linux/etherdevice.h>
@@ -36,17 +34,34 @@
static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- /*
- * Just let it pass thru, we don't know if it is possible
- * to tag a frame with the 0x8899 ethertype and direct it
- * to a specific port, all attempts at reverse-engineering have
- * ended up with the frames getting dropped.
- *
- * The VLAN set-up needs to restrict the frames to the right port.
- *
- * If you have documentation on the tagging format for RTL8366RB
- * (tag type A) then please contribute.
- */
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ u8 *tag;
+ u16 *p;
+ u16 out;
+
+ /* Pad out to at least 60 bytes */
+ if (unlikely(eth_skb_pad(skb)))
+ return NULL;
+ if (skb_cow_head(skb, RTL4_A_HDR_LEN) < 0)
+ return NULL;
+
+ netdev_dbg(dev, "add realtek tag to package to port %d\n",
+ dp->index);
+ skb_push(skb, RTL4_A_HDR_LEN);
+
+ memmove(skb->data, skb->data + RTL4_A_HDR_LEN, 2 * ETH_ALEN);
+ tag = skb->data + 2 * ETH_ALEN;
+
+ /* Set Ethertype */
+ p = (u16 *)tag;
+ *p = htons(RTL4_A_ETHERTYPE);
+
+ out = (RTL4_A_PROTOCOL_RTL8366RB << 12) | (2 << 8);
+ /* The lower bits is the port numer */
+ out |= (u8)dp->index;
+ p = (u16 *)(tag + 2);
+ *p = htons(out);
+
return skb;
}
diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c
new file mode 100644
index 000000000000..858cdf9d2913
--- /dev/null
+++ b/net/dsa/tag_xrs700x.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * XRS700x tag format handling
+ * Copyright (c) 2008-2009 Marvell Semiconductor
+ * Copyright (c) 2020 NovaTech LLC
+ */
+
+#include <linux/bitops.h>
+
+#include "dsa_priv.h"
+
+static struct sk_buff *xrs700x_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct dsa_port *partner, *dp = dsa_slave_to_port(dev);
+ u8 *trailer;
+
+ trailer = skb_put(skb, 1);
+ trailer[0] = BIT(dp->index);
+
+ if (dp->hsr_dev)
+ dsa_hsr_foreach_port(partner, dp->ds, dp->hsr_dev)
+ if (partner != dp)
+ trailer[0] |= BIT(partner->index);
+
+ return skb;
+}
+
+static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt)
+{
+ int source_port;
+ u8 *trailer;
+
+ trailer = skb_tail_pointer(skb) - 1;
+
+ source_port = ffs((int)trailer[0]) - 1;
+
+ if (source_port < 0)
+ return NULL;
+
+ skb->dev = dsa_master_find_slave(dev, 0, source_port);
+ if (!skb->dev)
+ return NULL;
+
+ if (pskb_trim_rcsum(skb, skb->len - 1))
+ return NULL;
+
+ /* Frame is forwarded by hardware, don't forward in software. */
+ skb->offload_fwd_mark = 1;
+
+ return skb;
+}
+
+static const struct dsa_device_ops xrs700x_netdev_ops = {
+ .name = "xrs700x",
+ .proto = DSA_TAG_PROTO_XRS700X,
+ .xmit = xrs700x_xmit,
+ .rcv = xrs700x_rcv,
+ .overhead = 1,
+ .tail_tag = true,
+};
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_XRS700X);
+
+module_dsa_tag_driver(xrs700x_netdev_ops);
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 24036e3055a1..c6a383dfd6c2 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -68,6 +68,11 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
[NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload",
[NETIF_F_GRO_FRAGLIST_BIT] = "rx-gro-list",
[NETIF_F_HW_MACSEC_BIT] = "macsec-hw-offload",
+ [NETIF_F_GRO_UDP_FWD_BIT] = "rx-udp-gro-forwarding",
+ [NETIF_F_HW_HSR_TAG_INS_BIT] = "hsr-tag-ins-offload",
+ [NETIF_F_HW_HSR_TAG_RM_BIT] = "hsr-tag-rm-offload",
+ [NETIF_F_HW_HSR_FWD_BIT] = "hsr-fwd-offload",
+ [NETIF_F_HW_HSR_DUP_BIT] = "hsr-dup-offload",
};
const char
@@ -197,6 +202,153 @@ const char link_mode_names[][ETH_GSTRING_LEN] = {
};
static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
+#define __LINK_MODE_LANES_CR 1
+#define __LINK_MODE_LANES_CR2 2
+#define __LINK_MODE_LANES_CR4 4
+#define __LINK_MODE_LANES_CR8 8
+#define __LINK_MODE_LANES_DR 1
+#define __LINK_MODE_LANES_DR2 2
+#define __LINK_MODE_LANES_DR4 4
+#define __LINK_MODE_LANES_DR8 8
+#define __LINK_MODE_LANES_KR 1
+#define __LINK_MODE_LANES_KR2 2
+#define __LINK_MODE_LANES_KR4 4
+#define __LINK_MODE_LANES_KR8 8
+#define __LINK_MODE_LANES_SR 1
+#define __LINK_MODE_LANES_SR2 2
+#define __LINK_MODE_LANES_SR4 4
+#define __LINK_MODE_LANES_SR8 8
+#define __LINK_MODE_LANES_ER 1
+#define __LINK_MODE_LANES_KX 1
+#define __LINK_MODE_LANES_KX4 4
+#define __LINK_MODE_LANES_LR 1
+#define __LINK_MODE_LANES_LR4 4
+#define __LINK_MODE_LANES_LR4_ER4 4
+#define __LINK_MODE_LANES_LR_ER_FR 1
+#define __LINK_MODE_LANES_LR2_ER2_FR2 2
+#define __LINK_MODE_LANES_LR4_ER4_FR4 4
+#define __LINK_MODE_LANES_LR8_ER8_FR8 8
+#define __LINK_MODE_LANES_LRM 1
+#define __LINK_MODE_LANES_MLD2 2
+#define __LINK_MODE_LANES_T 1
+#define __LINK_MODE_LANES_T1 1
+#define __LINK_MODE_LANES_X 1
+#define __LINK_MODE_LANES_FX 1
+
+#define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \
+ [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \
+ .speed = SPEED_ ## _speed, \
+ .lanes = __LINK_MODE_LANES_ ## _type, \
+ .duplex = __DUPLEX_ ## _duplex \
+ }
+#define __DUPLEX_Half DUPLEX_HALF
+#define __DUPLEX_Full DUPLEX_FULL
+#define __DEFINE_SPECIAL_MODE_PARAMS(_mode) \
+ [ETHTOOL_LINK_MODE_ ## _mode ## _BIT] = { \
+ .speed = SPEED_UNKNOWN, \
+ .lanes = 0, \
+ .duplex = DUPLEX_UNKNOWN, \
+ }
+
+const struct link_mode_info link_mode_params[] = {
+ __DEFINE_LINK_MODE_PARAMS(10, T, Half),
+ __DEFINE_LINK_MODE_PARAMS(10, T, Full),
+ __DEFINE_LINK_MODE_PARAMS(100, T, Half),
+ __DEFINE_LINK_MODE_PARAMS(100, T, Full),
+ __DEFINE_LINK_MODE_PARAMS(1000, T, Half),
+ __DEFINE_LINK_MODE_PARAMS(1000, T, Full),
+ __DEFINE_SPECIAL_MODE_PARAMS(Autoneg),
+ __DEFINE_SPECIAL_MODE_PARAMS(TP),
+ __DEFINE_SPECIAL_MODE_PARAMS(AUI),
+ __DEFINE_SPECIAL_MODE_PARAMS(MII),
+ __DEFINE_SPECIAL_MODE_PARAMS(FIBRE),
+ __DEFINE_SPECIAL_MODE_PARAMS(BNC),
+ __DEFINE_LINK_MODE_PARAMS(10000, T, Full),
+ __DEFINE_SPECIAL_MODE_PARAMS(Pause),
+ __DEFINE_SPECIAL_MODE_PARAMS(Asym_Pause),
+ __DEFINE_LINK_MODE_PARAMS(2500, X, Full),
+ __DEFINE_SPECIAL_MODE_PARAMS(Backplane),
+ __DEFINE_LINK_MODE_PARAMS(1000, KX, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, KX4, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, KR, Full),
+ [ETHTOOL_LINK_MODE_10000baseR_FEC_BIT] = {
+ .speed = SPEED_10000,
+ .duplex = DUPLEX_FULL,
+ },
+ __DEFINE_LINK_MODE_PARAMS(20000, MLD2, Full),
+ __DEFINE_LINK_MODE_PARAMS(20000, KR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(40000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(40000, CR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(40000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(40000, LR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(56000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(56000, CR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(56000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(56000, LR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(25000, CR, Full),
+ __DEFINE_LINK_MODE_PARAMS(25000, KR, Full),
+ __DEFINE_LINK_MODE_PARAMS(25000, SR, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, CR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, KR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, CR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, LR4_ER4, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, SR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(1000, X, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, CR, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, SR, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, LR, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, LRM, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, ER, Full),
+ __DEFINE_LINK_MODE_PARAMS(2500, T, Full),
+ __DEFINE_LINK_MODE_PARAMS(5000, T, Full),
+ __DEFINE_SPECIAL_MODE_PARAMS(FEC_NONE),
+ __DEFINE_SPECIAL_MODE_PARAMS(FEC_RS),
+ __DEFINE_SPECIAL_MODE_PARAMS(FEC_BASER),
+ __DEFINE_LINK_MODE_PARAMS(50000, KR, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, SR, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, CR, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, LR_ER_FR, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, DR, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, KR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, SR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, CR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, LR2_ER2_FR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, DR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, LR4_ER4_FR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, DR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, CR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(100, T1, Full),
+ __DEFINE_LINK_MODE_PARAMS(1000, T1, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, KR8, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, SR8, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, LR8_ER8_FR8, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, DR8, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, CR8, Full),
+ __DEFINE_SPECIAL_MODE_PARAMS(FEC_LLRS),
+ __DEFINE_LINK_MODE_PARAMS(100000, KR, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, SR, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, LR_ER_FR, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, DR, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, CR, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, KR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, SR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, LR2_ER2_FR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, DR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, CR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, LR4_ER4_FR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, DR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, CR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(100, FX, Half),
+ __DEFINE_LINK_MODE_PARAMS(100, FX, Full),
+};
+static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS);
+
const char netif_msg_class_names[][ETH_GSTRING_LEN] = {
[NETIF_MSG_DRV_BIT] = "drv",
[NETIF_MSG_PROBE_BIT] = "probe",
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index 3d9251c95a8b..a9d071248698 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -14,6 +14,12 @@
#define __SOF_TIMESTAMPING_CNT (const_ilog2(SOF_TIMESTAMPING_LAST) + 1)
+struct link_mode_info {
+ int speed;
+ u8 lanes;
+ u8 duplex;
+};
+
extern const char
netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN];
extern const char
@@ -23,6 +29,7 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN];
extern const char
phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN];
extern const char link_mode_names[][ETH_GSTRING_LEN];
+extern const struct link_mode_info link_mode_params[];
extern const char netif_msg_class_names[][ETH_GSTRING_LEN];
extern const char wol_mode_names[][ETH_GSTRING_LEN];
extern const char sof_timestamping_names[][ETH_GSTRING_LEN];
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 771688e1b0da..24783b71c584 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -426,13 +426,29 @@ struct ethtool_link_usettings {
int __ethtool_get_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *link_ksettings)
{
+ const struct link_mode_info *link_info;
+ int err;
+
ASSERT_RTNL();
if (!dev->ethtool_ops->get_link_ksettings)
return -EOPNOTSUPP;
memset(link_ksettings, 0, sizeof(*link_ksettings));
- return dev->ethtool_ops->get_link_ksettings(dev, link_ksettings);
+
+ link_ksettings->link_mode = -1;
+ err = dev->ethtool_ops->get_link_ksettings(dev, link_ksettings);
+ if (err)
+ return err;
+
+ if (link_ksettings->link_mode != -1) {
+ link_info = &link_mode_params[link_ksettings->link_mode];
+ link_ksettings->base.speed = link_info->speed;
+ link_ksettings->lanes = link_info->lanes;
+ link_ksettings->base.duplex = link_info->duplex;
+ }
+
+ return 0;
}
EXPORT_SYMBOL(__ethtool_get_link_ksettings);
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index c5bcb9abc8b9..f9eda596f301 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -4,6 +4,8 @@
#include "common.h"
#include "bitset.h"
+/* LINKMODES_GET */
+
struct linkmodes_req_info {
struct ethnl_req_info base;
};
@@ -43,6 +45,9 @@ static int linkmodes_prepare_data(const struct ethnl_req_info *req_base,
goto out;
}
+ if (!dev->ethtool_ops->cap_link_lanes_supported)
+ data->ksettings.lanes = 0;
+
data->peer_empty =
bitmap_empty(data->ksettings.link_modes.lp_advertising,
__ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -63,6 +68,7 @@ static int linkmodes_reply_size(const struct ethnl_req_info *req_base,
len = nla_total_size(sizeof(u8)) /* LINKMODES_AUTONEG */
+ nla_total_size(sizeof(u32)) /* LINKMODES_SPEED */
+ + nla_total_size(sizeof(u32)) /* LINKMODES_LANES */
+ nla_total_size(sizeof(u8)) /* LINKMODES_DUPLEX */
+ 0;
ret = ethnl_bitset_size(ksettings->link_modes.advertising,
@@ -123,6 +129,10 @@ static int linkmodes_fill_reply(struct sk_buff *skb,
nla_put_u8(skb, ETHTOOL_A_LINKMODES_DUPLEX, lsettings->duplex))
return -EMSGSIZE;
+ if (ksettings->lanes &&
+ nla_put_u32(skb, ETHTOOL_A_LINKMODES_LANES, ksettings->lanes))
+ return -EMSGSIZE;
+
if (lsettings->master_slave_cfg != MASTER_SLAVE_CFG_UNSUPPORTED &&
nla_put_u8(skb, ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG,
lsettings->master_slave_cfg))
@@ -150,122 +160,6 @@ const struct ethnl_request_ops ethnl_linkmodes_request_ops = {
/* LINKMODES_SET */
-struct link_mode_info {
- int speed;
- u8 duplex;
-};
-
-#define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \
- [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \
- .speed = SPEED_ ## _speed, \
- .duplex = __DUPLEX_ ## _duplex \
- }
-#define __DUPLEX_Half DUPLEX_HALF
-#define __DUPLEX_Full DUPLEX_FULL
-#define __DEFINE_SPECIAL_MODE_PARAMS(_mode) \
- [ETHTOOL_LINK_MODE_ ## _mode ## _BIT] = { \
- .speed = SPEED_UNKNOWN, \
- .duplex = DUPLEX_UNKNOWN, \
- }
-
-static const struct link_mode_info link_mode_params[] = {
- __DEFINE_LINK_MODE_PARAMS(10, T, Half),
- __DEFINE_LINK_MODE_PARAMS(10, T, Full),
- __DEFINE_LINK_MODE_PARAMS(100, T, Half),
- __DEFINE_LINK_MODE_PARAMS(100, T, Full),
- __DEFINE_LINK_MODE_PARAMS(1000, T, Half),
- __DEFINE_LINK_MODE_PARAMS(1000, T, Full),
- __DEFINE_SPECIAL_MODE_PARAMS(Autoneg),
- __DEFINE_SPECIAL_MODE_PARAMS(TP),
- __DEFINE_SPECIAL_MODE_PARAMS(AUI),
- __DEFINE_SPECIAL_MODE_PARAMS(MII),
- __DEFINE_SPECIAL_MODE_PARAMS(FIBRE),
- __DEFINE_SPECIAL_MODE_PARAMS(BNC),
- __DEFINE_LINK_MODE_PARAMS(10000, T, Full),
- __DEFINE_SPECIAL_MODE_PARAMS(Pause),
- __DEFINE_SPECIAL_MODE_PARAMS(Asym_Pause),
- __DEFINE_LINK_MODE_PARAMS(2500, X, Full),
- __DEFINE_SPECIAL_MODE_PARAMS(Backplane),
- __DEFINE_LINK_MODE_PARAMS(1000, KX, Full),
- __DEFINE_LINK_MODE_PARAMS(10000, KX4, Full),
- __DEFINE_LINK_MODE_PARAMS(10000, KR, Full),
- [ETHTOOL_LINK_MODE_10000baseR_FEC_BIT] = {
- .speed = SPEED_10000,
- .duplex = DUPLEX_FULL,
- },
- __DEFINE_LINK_MODE_PARAMS(20000, MLD2, Full),
- __DEFINE_LINK_MODE_PARAMS(20000, KR2, Full),
- __DEFINE_LINK_MODE_PARAMS(40000, KR4, Full),
- __DEFINE_LINK_MODE_PARAMS(40000, CR4, Full),
- __DEFINE_LINK_MODE_PARAMS(40000, SR4, Full),
- __DEFINE_LINK_MODE_PARAMS(40000, LR4, Full),
- __DEFINE_LINK_MODE_PARAMS(56000, KR4, Full),
- __DEFINE_LINK_MODE_PARAMS(56000, CR4, Full),
- __DEFINE_LINK_MODE_PARAMS(56000, SR4, Full),
- __DEFINE_LINK_MODE_PARAMS(56000, LR4, Full),
- __DEFINE_LINK_MODE_PARAMS(25000, CR, Full),
- __DEFINE_LINK_MODE_PARAMS(25000, KR, Full),
- __DEFINE_LINK_MODE_PARAMS(25000, SR, Full),
- __DEFINE_LINK_MODE_PARAMS(50000, CR2, Full),
- __DEFINE_LINK_MODE_PARAMS(50000, KR2, Full),
- __DEFINE_LINK_MODE_PARAMS(100000, KR4, Full),
- __DEFINE_LINK_MODE_PARAMS(100000, SR4, Full),
- __DEFINE_LINK_MODE_PARAMS(100000, CR4, Full),
- __DEFINE_LINK_MODE_PARAMS(100000, LR4_ER4, Full),
- __DEFINE_LINK_MODE_PARAMS(50000, SR2, Full),
- __DEFINE_LINK_MODE_PARAMS(1000, X, Full),
- __DEFINE_LINK_MODE_PARAMS(10000, CR, Full),
- __DEFINE_LINK_MODE_PARAMS(10000, SR, Full),
- __DEFINE_LINK_MODE_PARAMS(10000, LR, Full),
- __DEFINE_LINK_MODE_PARAMS(10000, LRM, Full),
- __DEFINE_LINK_MODE_PARAMS(10000, ER, Full),
- __DEFINE_LINK_MODE_PARAMS(2500, T, Full),
- __DEFINE_LINK_MODE_PARAMS(5000, T, Full),
- __DEFINE_SPECIAL_MODE_PARAMS(FEC_NONE),
- __DEFINE_SPECIAL_MODE_PARAMS(FEC_RS),
- __DEFINE_SPECIAL_MODE_PARAMS(FEC_BASER),
- __DEFINE_LINK_MODE_PARAMS(50000, KR, Full),
- __DEFINE_LINK_MODE_PARAMS(50000, SR, Full),
- __DEFINE_LINK_MODE_PARAMS(50000, CR, Full),
- __DEFINE_LINK_MODE_PARAMS(50000, LR_ER_FR, Full),
- __DEFINE_LINK_MODE_PARAMS(50000, DR, Full),
- __DEFINE_LINK_MODE_PARAMS(100000, KR2, Full),
- __DEFINE_LINK_MODE_PARAMS(100000, SR2, Full),
- __DEFINE_LINK_MODE_PARAMS(100000, CR2, Full),
- __DEFINE_LINK_MODE_PARAMS(100000, LR2_ER2_FR2, Full),
- __DEFINE_LINK_MODE_PARAMS(100000, DR2, Full),
- __DEFINE_LINK_MODE_PARAMS(200000, KR4, Full),
- __DEFINE_LINK_MODE_PARAMS(200000, SR4, Full),
- __DEFINE_LINK_MODE_PARAMS(200000, LR4_ER4_FR4, Full),
- __DEFINE_LINK_MODE_PARAMS(200000, DR4, Full),
- __DEFINE_LINK_MODE_PARAMS(200000, CR4, Full),
- __DEFINE_LINK_MODE_PARAMS(100, T1, Full),
- __DEFINE_LINK_MODE_PARAMS(1000, T1, Full),
- __DEFINE_LINK_MODE_PARAMS(400000, KR8, Full),
- __DEFINE_LINK_MODE_PARAMS(400000, SR8, Full),
- __DEFINE_LINK_MODE_PARAMS(400000, LR8_ER8_FR8, Full),
- __DEFINE_LINK_MODE_PARAMS(400000, DR8, Full),
- __DEFINE_LINK_MODE_PARAMS(400000, CR8, Full),
- __DEFINE_SPECIAL_MODE_PARAMS(FEC_LLRS),
- __DEFINE_LINK_MODE_PARAMS(100000, KR, Full),
- __DEFINE_LINK_MODE_PARAMS(100000, SR, Full),
- __DEFINE_LINK_MODE_PARAMS(100000, LR_ER_FR, Full),
- __DEFINE_LINK_MODE_PARAMS(100000, DR, Full),
- __DEFINE_LINK_MODE_PARAMS(100000, CR, Full),
- __DEFINE_LINK_MODE_PARAMS(200000, KR2, Full),
- __DEFINE_LINK_MODE_PARAMS(200000, SR2, Full),
- __DEFINE_LINK_MODE_PARAMS(200000, LR2_ER2_FR2, Full),
- __DEFINE_LINK_MODE_PARAMS(200000, DR2, Full),
- __DEFINE_LINK_MODE_PARAMS(200000, CR2, Full),
- __DEFINE_LINK_MODE_PARAMS(400000, KR4, Full),
- __DEFINE_LINK_MODE_PARAMS(400000, SR4, Full),
- __DEFINE_LINK_MODE_PARAMS(400000, LR4_ER4_FR4, Full),
- __DEFINE_LINK_MODE_PARAMS(400000, DR4, Full),
- __DEFINE_LINK_MODE_PARAMS(400000, CR4, Full),
- __DEFINE_LINK_MODE_PARAMS(100, FX, Half),
- __DEFINE_LINK_MODE_PARAMS(100, FX, Full),
-};
-
const struct nla_policy ethnl_linkmodes_set_policy[] = {
[ETHTOOL_A_LINKMODES_HEADER] =
NLA_POLICY_NESTED(ethnl_header_policy),
@@ -274,25 +168,23 @@ const struct nla_policy ethnl_linkmodes_set_policy[] = {
[ETHTOOL_A_LINKMODES_SPEED] = { .type = NLA_U32 },
[ETHTOOL_A_LINKMODES_DUPLEX] = { .type = NLA_U8 },
[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG] = { .type = NLA_U8 },
+ [ETHTOOL_A_LINKMODES_LANES] = NLA_POLICY_RANGE(NLA_U32, 1, 8),
};
-/* Set advertised link modes to all supported modes matching requested speed
- * and duplex values. Called when autonegotiation is on, speed or duplex is
- * requested but no link mode change. This is done in userspace with ioctl()
- * interface, move it into kernel for netlink.
+/* Set advertised link modes to all supported modes matching requested speed,
+ * lanes and duplex values. Called when autonegotiation is on, speed, lanes or
+ * duplex is requested but no link mode change. This is done in userspace with
+ * ioctl() interface, move it into kernel for netlink.
* Returns true if advertised modes bitmap was modified.
*/
static bool ethnl_auto_linkmodes(struct ethtool_link_ksettings *ksettings,
- bool req_speed, bool req_duplex)
+ bool req_speed, bool req_lanes, bool req_duplex)
{
unsigned long *advertising = ksettings->link_modes.advertising;
unsigned long *supported = ksettings->link_modes.supported;
DECLARE_BITMAP(old_adv, __ETHTOOL_LINK_MODE_MASK_NBITS);
unsigned int i;
- BUILD_BUG_ON(ARRAY_SIZE(link_mode_params) !=
- __ETHTOOL_LINK_MODE_MASK_NBITS);
-
bitmap_copy(old_adv, advertising, __ETHTOOL_LINK_MODE_MASK_NBITS);
for (i = 0; i < __ETHTOOL_LINK_MODE_MASK_NBITS; i++) {
@@ -302,6 +194,7 @@ static bool ethnl_auto_linkmodes(struct ethtool_link_ksettings *ksettings,
continue;
if (test_bit(i, supported) &&
(!req_speed || info->speed == ksettings->base.speed) &&
+ (!req_lanes || info->lanes == ksettings->lanes) &&
(!req_duplex || info->duplex == ksettings->base.duplex))
set_bit(i, advertising);
else
@@ -325,38 +218,72 @@ static bool ethnl_validate_master_slave_cfg(u8 cfg)
return false;
}
+static int ethnl_check_linkmodes(struct genl_info *info, struct nlattr **tb)
+{
+ const struct nlattr *master_slave_cfg, *lanes_cfg;
+
+ master_slave_cfg = tb[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG];
+ if (master_slave_cfg &&
+ !ethnl_validate_master_slave_cfg(nla_get_u8(master_slave_cfg))) {
+ NL_SET_ERR_MSG_ATTR(info->extack, master_slave_cfg,
+ "master/slave value is invalid");
+ return -EOPNOTSUPP;
+ }
+
+ lanes_cfg = tb[ETHTOOL_A_LINKMODES_LANES];
+ if (lanes_cfg && !is_power_of_2(nla_get_u32(lanes_cfg))) {
+ NL_SET_ERR_MSG_ATTR(info->extack, lanes_cfg,
+ "lanes value is invalid");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb,
struct ethtool_link_ksettings *ksettings,
- bool *mod)
+ bool *mod, const struct net_device *dev)
{
struct ethtool_link_settings *lsettings = &ksettings->base;
- bool req_speed, req_duplex;
- const struct nlattr *master_slave_cfg;
+ bool req_speed, req_lanes, req_duplex;
+ const struct nlattr *master_slave_cfg, *lanes_cfg;
int ret;
master_slave_cfg = tb[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG];
if (master_slave_cfg) {
- u8 cfg = nla_get_u8(master_slave_cfg);
-
if (lsettings->master_slave_cfg == MASTER_SLAVE_CFG_UNSUPPORTED) {
NL_SET_ERR_MSG_ATTR(info->extack, master_slave_cfg,
"master/slave configuration not supported by device");
return -EOPNOTSUPP;
}
-
- if (!ethnl_validate_master_slave_cfg(cfg)) {
- NL_SET_ERR_MSG_ATTR(info->extack, master_slave_cfg,
- "master/slave value is invalid");
- return -EOPNOTSUPP;
- }
}
*mod = false;
req_speed = tb[ETHTOOL_A_LINKMODES_SPEED];
+ req_lanes = tb[ETHTOOL_A_LINKMODES_LANES];
req_duplex = tb[ETHTOOL_A_LINKMODES_DUPLEX];
ethnl_update_u8(&lsettings->autoneg, tb[ETHTOOL_A_LINKMODES_AUTONEG],
mod);
+
+ lanes_cfg = tb[ETHTOOL_A_LINKMODES_LANES];
+ if (lanes_cfg) {
+ /* If autoneg is off and lanes parameter is not supported by the
+ * driver, return an error.
+ */
+ if (!lsettings->autoneg &&
+ !dev->ethtool_ops->cap_link_lanes_supported) {
+ NL_SET_ERR_MSG_ATTR(info->extack, lanes_cfg,
+ "lanes configuration not supported by device");
+ return -EOPNOTSUPP;
+ }
+ } else if (!lsettings->autoneg) {
+ /* If autoneg is off and lanes parameter is not passed from user,
+ * set the lanes parameter to 0.
+ */
+ ksettings->lanes = 0;
+ }
+
ret = ethnl_update_bitset(ksettings->link_modes.advertising,
__ETHTOOL_LINK_MODE_MASK_NBITS,
tb[ETHTOOL_A_LINKMODES_OURS], link_mode_names,
@@ -365,13 +292,14 @@ static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb,
return ret;
ethnl_update_u32(&lsettings->speed, tb[ETHTOOL_A_LINKMODES_SPEED],
mod);
+ ethnl_update_u32(&ksettings->lanes, lanes_cfg, mod);
ethnl_update_u8(&lsettings->duplex, tb[ETHTOOL_A_LINKMODES_DUPLEX],
mod);
ethnl_update_u8(&lsettings->master_slave_cfg, master_slave_cfg, mod);
if (!tb[ETHTOOL_A_LINKMODES_OURS] && lsettings->autoneg &&
- (req_speed || req_duplex) &&
- ethnl_auto_linkmodes(ksettings, req_speed, req_duplex))
+ (req_speed || req_lanes || req_duplex) &&
+ ethnl_auto_linkmodes(ksettings, req_speed, req_lanes, req_duplex))
*mod = true;
return 0;
@@ -386,6 +314,10 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info)
bool mod = false;
int ret;
+ ret = ethnl_check_linkmodes(info, tb);
+ if (ret < 0)
+ return ret;
+
ret = ethnl_parse_header_dev_get(&req_info,
tb[ETHTOOL_A_LINKMODES_HEADER],
genl_info_net(info), info->extack,
@@ -409,7 +341,7 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info)
goto out_ops;
}
- ret = ethnl_update_linkmodes(info, tb, &ksettings, &mod);
+ ret = ethnl_update_linkmodes(info, tb, &ksettings, &mod, dev);
if (ret < 0)
goto out_ops;
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index d8efec516d86..6eabd58d81bf 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -351,7 +351,7 @@ extern const struct nla_policy ethnl_strset_get_policy[ETHTOOL_A_STRSET_COUNTS_O
extern const struct nla_policy ethnl_linkinfo_get_policy[ETHTOOL_A_LINKINFO_HEADER + 1];
extern const struct nla_policy ethnl_linkinfo_set_policy[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL + 1];
extern const struct nla_policy ethnl_linkmodes_get_policy[ETHTOOL_A_LINKMODES_HEADER + 1];
-extern const struct nla_policy ethnl_linkmodes_set_policy[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG + 1];
+extern const struct nla_policy ethnl_linkmodes_set_policy[ETHTOOL_A_LINKMODES_LANES + 1];
extern const struct nla_policy ethnl_linkstate_get_policy[ETHTOOL_A_LINKSTATE_HEADER + 1];
extern const struct nla_policy ethnl_debug_get_policy[ETHTOOL_A_DEBUG_HEADER + 1];
extern const struct nla_policy ethnl_debug_set_policy[ETHTOOL_A_DEBUG_MSGMASK + 1];
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index ab953a1a0d6c..7444ec6e298e 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -230,7 +230,7 @@ static const struct header_ops hsr_header_ops = {
.parse = eth_header_parse,
};
-static struct sk_buff *hsr_init_skb(struct hsr_port *master, u16 proto)
+static struct sk_buff *hsr_init_skb(struct hsr_port *master)
{
struct hsr_priv *hsr = master->hsr;
struct sk_buff *skb;
@@ -242,8 +242,7 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master, u16 proto)
* being, for PRP it is a trailer and for HSR it is a
* header
*/
- skb = dev_alloc_skb(sizeof(struct hsr_tag) +
- sizeof(struct hsr_sup_tag) +
+ skb = dev_alloc_skb(sizeof(struct hsr_sup_tag) +
sizeof(struct hsr_sup_payload) + hlen + tlen);
if (!skb)
@@ -251,10 +250,9 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master, u16 proto)
skb_reserve(skb, hlen);
skb->dev = master->dev;
- skb->protocol = htons(proto);
skb->priority = TC_PRIO_CONTROL;
- if (dev_hard_header(skb, skb->dev, proto,
+ if (dev_hard_header(skb, skb->dev, ETH_P_PRP,
hsr->sup_multicast_addr,
skb->dev->dev_addr, skb->len) <= 0)
goto out;
@@ -275,12 +273,10 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
{
struct hsr_priv *hsr = master->hsr;
__u8 type = HSR_TLV_LIFE_CHECK;
- struct hsr_tag *hsr_tag = NULL;
struct hsr_sup_payload *hsr_sp;
struct hsr_sup_tag *hsr_stag;
unsigned long irqflags;
struct sk_buff *skb;
- u16 proto;
*interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
if (hsr->announce_count < 3 && hsr->prot_version == 0) {
@@ -289,23 +285,12 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
hsr->announce_count++;
}
- if (!hsr->prot_version)
- proto = ETH_P_PRP;
- else
- proto = ETH_P_HSR;
-
- skb = hsr_init_skb(master, proto);
+ skb = hsr_init_skb(master);
if (!skb) {
WARN_ONCE(1, "HSR: Could not send supervision frame\n");
return;
}
- if (hsr->prot_version > 0) {
- hsr_tag = skb_put(skb, sizeof(struct hsr_tag));
- hsr_tag->encap_proto = htons(ETH_P_PRP);
- set_hsr_tag_LSDU_size(hsr_tag, HSR_V1_SUP_LSDUSIZE);
- }
-
hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag));
set_hsr_stag_path(hsr_stag, (hsr->prot_version ? 0x0 : 0xf));
set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version);
@@ -315,8 +300,6 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
if (hsr->prot_version > 0) {
hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr);
hsr->sup_sequence_nr++;
- hsr_tag->sequence_nr = htons(hsr->sequence_nr);
- hsr->sequence_nr++;
} else {
hsr_stag->sequence_nr = htons(hsr->sequence_nr);
hsr->sequence_nr++;
@@ -332,7 +315,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr);
- if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN))
+ if (skb_put_padto(skb, ETH_ZLEN))
return;
hsr_forward_skb(skb, master);
@@ -348,10 +331,8 @@ static void send_prp_supervision_frame(struct hsr_port *master,
struct hsr_sup_tag *hsr_stag;
unsigned long irqflags;
struct sk_buff *skb;
- struct prp_rct *rct;
- u8 *tail;
- skb = hsr_init_skb(master, ETH_P_PRP);
+ skb = hsr_init_skb(master);
if (!skb) {
WARN_ONCE(1, "PRP: Could not send supervision frame\n");
return;
@@ -373,17 +354,11 @@ static void send_prp_supervision_frame(struct hsr_port *master,
hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr);
- if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN)) {
+ if (skb_put_padto(skb, ETH_ZLEN)) {
spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags);
return;
}
- tail = skb_tail_pointer(skb) - HSR_HLEN;
- rct = (struct prp_rct *)tail;
- rct->PRP_suffix = htons(ETH_P_PRP);
- set_prp_LSDU_size(rct, HSR_V1_SUP_LSDUSIZE);
- rct->sequence_nr = htons(hsr->sequence_nr);
- hsr->sequence_nr++;
spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags);
hsr_forward_skb(skb, master);
@@ -442,6 +417,7 @@ static struct hsr_proto_ops hsr_ops = {
.send_sv_frame = send_hsr_supervision_frame,
.create_tagged_frame = hsr_create_tagged_frame,
.get_untagged_frame = hsr_get_untagged_frame,
+ .drop_frame = hsr_drop_frame,
.fill_frame_info = hsr_fill_frame_info,
.invalid_dan_ingress_frame = hsr_invalid_dan_ingress_frame,
};
@@ -489,10 +465,11 @@ void hsr_dev_setup(struct net_device *dev)
/* Return true if dev is a HSR master; return false otherwise.
*/
-inline bool is_hsr_master(struct net_device *dev)
+bool is_hsr_master(struct net_device *dev)
{
return (dev->netdev_ops->ndo_start_xmit == hsr_dev_xmit);
}
+EXPORT_SYMBOL(is_hsr_master);
/* Default multicast address for HSR Supervision frames */
static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = {
@@ -545,16 +522,6 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
hsr->prot_version = protocol_version;
- /* FIXME: should I modify the value of these?
- *
- * - hsr_dev->flags - i.e.
- * IFF_MASTER/SLAVE?
- * - hsr_dev->priv_flags - i.e.
- * IFF_EBRIDGE?
- * IFF_TX_SKB_SHARING?
- * IFF_HSR_MASTER/SLAVE?
- */
-
/* Make sure the 1st call to netif_carrier_on() gets through */
netif_carrier_off(hsr_dev);
diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h
index 868373822ee4..9060c92168f9 100644
--- a/net/hsr/hsr_device.h
+++ b/net/hsr/hsr_device.h
@@ -19,6 +19,5 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
unsigned char multicast_spec, u8 protocol_version,
struct netlink_ext_ack *extack);
void hsr_check_carrier_and_operstate(struct hsr_priv *hsr);
-bool is_hsr_master(struct net_device *dev);
int hsr_get_max_mtu(struct hsr_priv *hsr);
#endif /* __HSR_DEVICE_H */
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index cadfccd7876e..ed82a470b6e1 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -186,6 +186,7 @@ static struct sk_buff *prp_fill_rct(struct sk_buff *skb,
set_prp_LSDU_size(trailer, lsdu_size);
trailer->sequence_nr = htons(frame->sequence_nr);
trailer->PRP_suffix = htons(ETH_P_PRP);
+ skb->protocol = eth_hdr(skb)->h_proto;
return skb;
}
@@ -226,6 +227,7 @@ static struct sk_buff *hsr_fill_tag(struct sk_buff *skb,
hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto;
hsr_ethhdr->ethhdr.h_proto = htons(proto_version ?
ETH_P_HSR : ETH_P_PRP);
+ skb->protocol = hsr_ethhdr->ethhdr.h_proto;
return skb;
}
@@ -247,6 +249,8 @@ struct sk_buff *hsr_create_tagged_frame(struct hsr_frame_info *frame,
/* set the lane id properly */
hsr_set_path_id(hsr_ethhdr, port);
return skb_clone(frame->skb_hsr, GFP_ATOMIC);
+ } else if (port->dev->features & NETIF_F_HW_HSR_TAG_INS) {
+ return skb_clone(frame->skb_std, GFP_ATOMIC);
}
/* Create the new skb with enough headroom to fit the HSR tag */
@@ -289,6 +293,8 @@ struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame,
return NULL;
}
return skb_clone(frame->skb_prp, GFP_ATOMIC);
+ } else if (port->dev->features & NETIF_F_HW_HSR_TAG_INS) {
+ return skb_clone(frame->skb_std, GFP_ATOMIC);
}
skb = skb_copy_expand(frame->skb_std, 0,
@@ -341,6 +347,14 @@ bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port)
port->type == HSR_PT_SLAVE_A));
}
+bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port)
+{
+ if (port->dev->features & NETIF_F_HW_HSR_FWD)
+ return prp_drop_frame(frame, port);
+
+ return false;
+}
+
/* Forward the frame through all devices except:
* - Back through the receiving device
* - If it's a HSR frame: through a device where it has passed before
@@ -357,6 +371,7 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
{
struct hsr_port *port;
struct sk_buff *skb;
+ bool sent = false;
hsr_for_each_port(frame->port_rcv->hsr, port) {
struct hsr_priv *hsr = port->hsr;
@@ -372,6 +387,12 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
if (port->type != HSR_PT_MASTER && frame->is_local_exclusive)
continue;
+ /* If hardware duplicate generation is enabled, only send out
+ * one port.
+ */
+ if ((port->dev->features & NETIF_F_HW_HSR_DUP) && sent)
+ continue;
+
/* Don't send frame over port where it has been sent before.
* Also fro SAN, this shouldn't be done.
*/
@@ -403,10 +424,12 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
}
skb->dev = port->dev;
- if (port->type == HSR_PT_MASTER)
+ if (port->type == HSR_PT_MASTER) {
hsr_deliver_master(skb, port->dev, frame->node_src);
- else
- hsr_xmit(skb, port, frame);
+ } else {
+ if (!hsr_xmit(skb, port, frame))
+ sent = true;
+ }
}
}
@@ -454,7 +477,11 @@ static void handle_std_frame(struct sk_buff *skb,
void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb,
struct hsr_frame_info *frame)
{
- if (proto == htons(ETH_P_PRP) ||
+ struct hsr_port *port = frame->port_rcv;
+ struct hsr_priv *hsr = port->hsr;
+
+ /* HSRv0 supervisory frames double as a tag so treat them as tagged. */
+ if ((!hsr->prot_version && proto == htons(ETH_P_PRP)) ||
proto == htons(ETH_P_HSR)) {
/* HSR tagged frame :- Data or Supervision */
frame->skb_std = NULL;
diff --git a/net/hsr/hsr_forward.h b/net/hsr/hsr_forward.h
index 618140d484ad..b6acaafa83fc 100644
--- a/net/hsr/hsr_forward.h
+++ b/net/hsr/hsr_forward.h
@@ -23,6 +23,7 @@ struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame,
struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame,
struct hsr_port *port);
bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port);
+bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port);
void prp_fill_frame_info(__be16 proto, struct sk_buff *skb,
struct hsr_frame_info *frame);
void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb,
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 5c97de459905..bb1351c38397 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -164,8 +164,10 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
* as initialization. (0 could trigger an spurious ring error warning).
*/
now = jiffies;
- for (i = 0; i < HSR_PT_PORTS; i++)
+ for (i = 0; i < HSR_PT_PORTS; i++) {
new_node->time_in[i] = now;
+ new_node->time_out[i] = now;
+ }
for (i = 0; i < HSR_PT_PORTS; i++)
new_node->seq_out[i] = seq_out;
@@ -277,6 +279,8 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
skb = frame->skb_hsr;
else if (frame->skb_prp)
skb = frame->skb_prp;
+ else if (frame->skb_std)
+ skb = frame->skb_std;
if (!skb)
return;
@@ -411,9 +415,12 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
u16 sequence_nr)
{
- if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]))
+ if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]) &&
+ time_is_after_jiffies(node->time_out[port->type] +
+ msecs_to_jiffies(HSR_ENTRY_FORGET_TIME)))
return 1;
+ node->time_out[port->type] = jiffies;
node->seq_out[port->type] = sequence_nr;
return 0;
}
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 86b43f539f2c..d9628e7a5f05 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -75,6 +75,7 @@ struct hsr_node {
enum hsr_port_type addr_B_port;
unsigned long time_in[HSR_PT_PORTS];
bool time_in_stale[HSR_PT_PORTS];
+ unsigned long time_out[HSR_PT_PORTS];
/* if the node is a SAN */
bool san_a;
bool san_b;
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index 2fd1976e5b1c..f7e284f23b1f 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -131,6 +131,17 @@ struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt)
return NULL;
}
+int hsr_get_version(struct net_device *dev, enum hsr_version *ver)
+{
+ struct hsr_priv *hsr;
+
+ hsr = netdev_priv(dev);
+ *ver = hsr->prot_version;
+
+ return 0;
+}
+EXPORT_SYMBOL(hsr_get_version);
+
static struct notifier_block hsr_nb = {
.notifier_call = hsr_netdev_notify, /* Slave event notifications */
};
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 7dc92ce5a134..8f264672b70b 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -13,6 +13,7 @@
#include <linux/netdevice.h>
#include <linux/list.h>
#include <linux/if_vlan.h>
+#include <linux/if_hsr.h>
/* Time constants as specified in the HSR specification (IEC-62439-3 2010)
* Table 8.
@@ -21,6 +22,7 @@
#define HSR_LIFE_CHECK_INTERVAL 2000 /* ms */
#define HSR_NODE_FORGET_TIME 60000 /* ms */
#define HSR_ANNOUNCE_INTERVAL 100 /* ms */
+#define HSR_ENTRY_FORGET_TIME 400 /* ms */
/* By how much may slave1 and slave2 timestamps of latest received frame from
* each node differ before we notify of communication problem?
@@ -171,13 +173,6 @@ struct hsr_port {
enum hsr_port_type type;
};
-/* used by driver internally to differentiate various protocols */
-enum hsr_version {
- HSR_V0 = 0,
- HSR_V1,
- PRP_V1,
-};
-
struct hsr_frame_info;
struct hsr_node;
@@ -217,7 +212,10 @@ struct hsr_priv {
u8 net_id; /* for PRP, it occupies most significant 3 bits
* of lan_id
*/
- unsigned char sup_multicast_addr[ETH_ALEN];
+ unsigned char sup_multicast_addr[ETH_ALEN] __aligned(sizeof(u16));
+ /* Align to u16 boundary to avoid unaligned access
+ * in ether_addr_equal
+ */
#ifdef CONFIG_DEBUG_FS
struct dentry *node_tbl_root;
#endif
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index 36d5fcf09c61..c5227d42faf5 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -48,12 +48,14 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
goto finish_consume;
}
- /* For HSR, only tagged frames are expected, but for PRP
- * there could be non tagged frames as well from Single
- * attached nodes (SANs).
+ /* For HSR, only tagged frames are expected (unless the device offloads
+ * HSR tag removal), but for PRP there could be non tagged frames as
+ * well from Single attached nodes (SANs).
*/
protocol = eth_hdr(skb)->h_proto;
- if (hsr->proto_ops->invalid_dan_ingress_frame &&
+
+ if (!(port->dev->features & NETIF_F_HW_HSR_TAG_RM) &&
+ hsr->proto_ops->invalid_dan_ingress_frame &&
hsr->proto_ops->invalid_dan_ingress_frame(protocol))
goto finish_pass;
diff --git a/net/ife/Kconfig b/net/ife/Kconfig
index bcf650564db4..de36a5b91e50 100644
--- a/net/ife/Kconfig
+++ b/net/ife/Kconfig
@@ -4,7 +4,6 @@
#
menuconfig NET_IFE
- depends on NET
tristate "Inter-FE based on IETF ForCES InterFE LFB"
default n
help
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b94fa8eb831b..a02ce89b56b5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -438,6 +438,7 @@ EXPORT_SYMBOL(inet_release);
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
+ u32 flags = BIND_WITH_LOCK;
int err;
/* If the socket has its own bind function then use it. (RAW) */
@@ -450,11 +451,12 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* BPF prog is run before any checks are done so that if the prog
* changes context in a wrong way it will be caught.
*/
- err = BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr);
+ err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+ BPF_CGROUP_INET4_BIND, &flags);
if (err)
return err;
- return __inet_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
+ return __inet_bind(sk, uaddr, addr_len, flags);
}
EXPORT_SYMBOL(inet_bind);
@@ -499,7 +501,8 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
snum = ntohs(addr->sin_port);
err = -EACCES;
- if (snum && inet_port_requires_bind_service(net, snum) &&
+ if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
+ snum && inet_port_requires_bind_service(net, snum) &&
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
goto out;
@@ -777,18 +780,19 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
return -ENOTCONN;
sin->sin_port = inet->inet_dport;
sin->sin_addr.s_addr = inet->inet_daddr;
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ NULL);
} else {
__be32 addr = inet->inet_rcv_saddr;
if (!addr)
addr = inet->inet_saddr;
sin->sin_port = inet->inet_sport;
sin->sin_addr.s_addr = addr;
- }
- if (cgroup_bpf_enabled)
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- peer ? BPF_CGROUP_INET4_GETPEERNAME :
- BPF_CGROUP_INET4_GETSOCKNAME,
+ BPF_CGROUP_INET4_GETSOCKNAME,
NULL);
+ }
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
return sizeof(*sin);
}
@@ -1419,7 +1423,6 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
out:
return segs;
}
-EXPORT_SYMBOL(inet_gso_segment);
static struct sk_buff *ipip_gso_segment(struct sk_buff *skb,
netdev_features_t features)
@@ -1550,7 +1553,6 @@ out:
return pp;
}
-EXPORT_SYMBOL(inet_gro_receive);
static struct sk_buff *ipip_gro_receive(struct list_head *head,
struct sk_buff *skb)
@@ -1636,7 +1638,6 @@ out_unlock:
return err;
}
-EXPORT_SYMBOL(inet_gro_complete);
static int ipip_gro_complete(struct sk_buff *skb, int nhoff)
{
@@ -1871,6 +1872,8 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_igmp_llm_reports = 1;
net->ipv4.sysctl_igmp_qrv = 2;
+ net->ipv4.sysctl_fib_notify_on_flag_change = 0;
+
return 0;
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 8b07f3a4f2db..a3271ec3e162 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -443,7 +443,6 @@ static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb,
int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
{
u8 *tail;
- u8 *vaddr;
int nfrags;
int esph_offset;
struct page *page;
@@ -485,14 +484,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
page = pfrag->page;
get_page(page);
- vaddr = kmap_atomic(page);
-
- tail = vaddr + pfrag->offset;
+ tail = page_address(page) + pfrag->offset;
esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
- kunmap_atomic(vaddr);
-
nfrags = skb_shinfo(skb)->nr_frags;
__skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 5bda5aeda579..601f5fbfc63f 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -285,7 +285,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
esp.esph = ip_esp_hdr(skb);
- if (!hw_offload || (hw_offload && !skb_is_gso(skb))) {
+ if (!hw_offload || !skb_is_gso(skb)) {
esp.nfrags = esp_output_head(x, skb, &esp);
if (esp.nfrags < 0)
return esp.nfrags;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index cdf6ec5aa45d..84bb707bd88d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -292,7 +292,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
.flowi4_iif = LOOPBACK_IFINDEX,
.flowi4_oif = l3mdev_master_ifindex_rcu(dev),
.daddr = ip_hdr(skb)->saddr,
- .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
+ .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK,
.flowi4_scope = scope,
.flowi4_mark = vmark ? skb->mark : 0,
};
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 818916b2a04d..b58db1ca4bfb 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -18,7 +18,8 @@ struct fib_alias {
s16 fa_default;
u8 offload:1,
trap:1,
- unused:6;
+ offload_failed:1,
+ unused:5;
struct rcu_head rcu;
};
@@ -39,9 +40,10 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi,
struct netlink_ext_ack *extack);
bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi);
int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
- struct fib_rt_info *fri, unsigned int flags);
+ const struct fib_rt_info *fri, unsigned int flags);
void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len,
u32 tb_id, const struct nl_info *info, unsigned int nlm_flags);
+size_t fib_nlmsg_size(struct fib_info *fi);
static inline void fib_result_assign(struct fib_result *res,
struct fib_info *fi)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b5400cec4f69..a632b66bc13a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -452,7 +452,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
return -1;
}
-static inline size_t fib_nlmsg_size(struct fib_info *fi)
+size_t fib_nlmsg_size(struct fib_info *fi)
{
size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
+ nla_total_size(4) /* RTA_TABLE */
@@ -521,6 +521,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
fri.type = fa->fa_type;
fri.offload = fa->offload;
fri.trap = fa->trap;
+ fri.offload_failed = fa->offload_failed;
err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
@@ -1733,7 +1734,7 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
#endif
int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
- struct fib_rt_info *fri, unsigned int flags)
+ const struct fib_rt_info *fri, unsigned int flags)
{
unsigned int nhs = fib_info_num_path(fri->fi);
struct fib_info *fi = fri->fi;
@@ -1811,6 +1812,8 @@ offload:
rtm->rtm_flags |= RTM_F_OFFLOAD;
if (fri->trap)
rtm->rtm_flags |= RTM_F_TRAP;
+ if (fri->offload_failed)
+ rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 28117c05dc35..25cf387cca5b 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1038,6 +1038,8 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
{
struct fib_alias *fa_match;
+ struct sk_buff *skb;
+ int err;
rcu_read_lock();
@@ -1045,9 +1047,42 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
if (!fa_match)
goto out;
+ if (fa_match->offload == fri->offload && fa_match->trap == fri->trap &&
+ fa_match->offload_failed == fri->offload_failed)
+ goto out;
+
fa_match->offload = fri->offload;
fa_match->trap = fri->trap;
+ /* 2 means send notifications only if offload_failed was changed. */
+ if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
+ fa_match->offload_failed == fri->offload_failed)
+ goto out;
+
+ fa_match->offload_failed = fri->offload_failed;
+
+ if (!net->ipv4.sysctl_fib_notify_on_flag_change)
+ goto out;
+
+ skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
+ if (!skb) {
+ err = -ENOBUFS;
+ goto errout;
+ }
+
+ err = fib_dump_info(skb, 0, 0, RTM_NEWROUTE, fri, 0);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV4_ROUTE, NULL, GFP_ATOMIC);
+ goto out;
+
+errout:
+ rtnl_set_sk_err(net, RTNLGRP_IPV4_ROUTE, err);
out:
rcu_read_unlock();
}
@@ -1263,6 +1298,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->fa_default = -1;
new_fa->offload = 0;
new_fa->trap = 0;
+ new_fa->offload_failed = 0;
hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
@@ -1323,6 +1359,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->fa_default = -1;
new_fa->offload = 0;
new_fa->trap = 0;
+ new_fa->offload_failed = 0;
/* Insert new entry to the list. */
err = fib_insert_alias(t, tp, l, new_fa, fa, key);
@@ -2262,6 +2299,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
fri.type = fa->fa_type;
fri.offload = fa->offload;
fri.trap = fa->trap;
+ fri.offload_failed = fa->offload_failed;
err = fib_dump_info(skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 66fdbfe5447c..5d1e6fe9d838 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -128,7 +128,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
* to 0 and sets the configured key in the
* inner erspan header field
*/
- if (greh->protocol == htons(ETH_P_ERSPAN) ||
+ if ((greh->protocol == htons(ETH_P_ERSPAN) && hdr_len != 4) ||
greh->protocol == htons(ETH_P_ERSPAN2)) {
struct erspan_base_hdr *ershdr;
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index e0a246575887..1121a9d5fed9 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -15,7 +15,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
- bool need_csum, need_recompute_csum, gso_partial;
+ bool need_csum, offload_csum, gso_partial, need_ipsec;
struct sk_buff *segs = ERR_PTR(-EINVAL);
u16 mac_offset = skb->mac_header;
__be16 protocol = skb->protocol;
@@ -41,10 +41,16 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
skb->protocol = skb->inner_protocol;
need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
- need_recompute_csum = skb->csum_not_inet;
skb->encap_hdr_csum = need_csum;
features &= skb->dev->hw_enc_features;
+ if (need_csum)
+ features &= ~NETIF_F_SCTP_CRC;
+
+ need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb));
+ /* Try to offload checksum if possible */
+ offload_csum = !!(need_csum && !need_ipsec &&
+ (skb->dev->features & NETIF_F_HW_CSUM));
/* segment inner packet. */
segs = skb_mac_gso_segment(skb, features);
@@ -99,14 +105,12 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
}
*(pcsum + 1) = 0;
- if (need_recompute_csum && !skb_is_gso(skb)) {
- __wsum csum;
-
- csum = skb_checksum(skb, gre_offset,
- skb->len - gre_offset, 0);
- *pcsum = csum_fold(csum);
- } else {
+ if (skb->encapsulation || !offload_csum) {
*pcsum = gso_make_checksum(skb, 0);
+ } else {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = sizeof(*greh);
}
} while ((skb = skb->next));
out:
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 396b492c804f..616e2dc1c8fa 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -775,13 +775,14 @@ EXPORT_SYMBOL(__icmp_send);
void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
{
struct sk_buff *cloned_skb = NULL;
+ struct ip_options opts = { 0 };
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
__be32 orig_ip;
ct = nf_ct_get(skb_in, &ctinfo);
if (!ct || !(ct->status & IPS_SRC_NAT)) {
- icmp_send(skb_in, type, code, info);
+ __icmp_send(skb_in, type, code, info, &opts);
return;
}
@@ -796,7 +797,7 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
orig_ip = ip_hdr(skb_in)->saddr;
ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
- icmp_send(skb_in, type, code, info);
+ __icmp_send(skb_in, type, code, info, &opts);
ip_hdr(skb_in)->saddr = orig_ip;
out:
consume_skb(cloned_skb);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fd8b8800a2c3..6bd7ca09af03 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -851,6 +851,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
newicsk->icsk_retransmits = 0;
newicsk->icsk_backoff = 0;
newicsk->icsk_probes_out = 0;
+ newicsk->icsk_probes_tstamp = 0;
/* Deinitialize accept_queue to trap illegal accesses. */
memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 45fb450b4522..c96866a53a66 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -709,6 +709,17 @@ unlock:
}
EXPORT_SYMBOL_GPL(inet_unhash);
+/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
+ * Note that we use 32bit integers (vs RFC 'short integers')
+ * because 2^16 is not a multiple of num_ephemeral and this
+ * property might be used by clever attacker.
+ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement,
+ * we use 256 instead to really give more isolation and
+ * privacy, this only consumes 1 KB of kernel memory.
+ */
+#define INET_TABLE_PERTURB_SHIFT 8
+static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT];
+
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk, u32 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
@@ -722,8 +733,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct inet_bind_bucket *tb;
u32 remaining, offset;
int ret, i, low, high;
- static u32 hint;
int l3mdev;
+ u32 index;
if (port) {
head = &hinfo->bhash[inet_bhashfn(net, port,
@@ -750,7 +761,10 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
if (likely(remaining > 1))
remaining &= ~1U;
- offset = (hint + port_offset) % remaining;
+ net_get_random_once(table_perturb, sizeof(table_perturb));
+ index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT);
+
+ offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining;
/* In first pass we try ports of @low parity.
* inet_csk_get_port() does the opposite choice.
*/
@@ -804,7 +818,12 @@ next_port:
return -EADDRNOTAVAIL;
ok:
- hint += i + 2;
+ /* If our first attempt found a candidate, skip next candidate
+ * in 1/16 of cases to add some noise.
+ */
+ if (!i && !(prandom_u32() % 16))
+ i = 2;
+ WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, port);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index b0c244af1e4d..3a025c011971 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -253,6 +253,7 @@ int ip_local_deliver(struct sk_buff *skb)
net, NULL, skb, skb->dev, NULL,
ip_local_deliver_finish);
}
+EXPORT_SYMBOL(ip_local_deliver);
static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
{
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 89fff5f59eea..3aab53beb4ea 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -302,7 +302,7 @@ static int __ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *
if (skb_is_gso(skb))
return ip_finish_output_gso(net, sk, skb, mtu);
- if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
+ if (skb->len > mtu || IPCB(skb)->frag_max_size)
return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
return ip_finish_output2(net, sk, skb);
@@ -434,6 +434,7 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
ip_finish_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
+EXPORT_SYMBOL(ip_output);
/*
* copy saddr and daddr, possibly using 64bit load/stores
@@ -1018,7 +1019,7 @@ static int __ip_append_data(struct sock *sk,
csummode = CHECKSUM_PARTIAL;
if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
- uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
if (!uarg)
return -ENOBUFS;
extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
@@ -1230,8 +1231,7 @@ alloc_new_skb:
error_efault:
err = -EFAULT;
error:
- if (uarg)
- sock_zerocopy_put_abort(uarg, extra_uref);
+ net_zcopy_put_abort(uarg, extra_uref);
cork->length -= length;
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index ee65c9225178..76a420c76f16 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -317,7 +317,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
}
dev->needed_headroom = t_hlen + hlen;
- mtu -= (dev->hard_header_len + t_hlen);
+ mtu -= t_hlen;
if (mtu < IPV4_MIN_MTU)
mtu = IPV4_MIN_MTU;
@@ -347,7 +347,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
nt = netdev_priv(dev);
t_hlen = nt->hlen + sizeof(struct iphdr);
dev->min_mtu = ETH_MIN_MTU;
- dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
+ dev->max_mtu = IP_MAX_MTU - t_hlen;
ip_tunnel_add(itn, nt);
return nt;
@@ -488,11 +488,10 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
int mtu;
tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
- pkt_size = skb->len - tunnel_hlen - dev->hard_header_len;
+ pkt_size = skb->len - tunnel_hlen;
if (df)
- mtu = dst_mtu(&rt->dst) - dev->hard_header_len
- - sizeof(struct iphdr) - tunnel_hlen;
+ mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
else
mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
@@ -759,8 +758,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
- if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph,
- 0, 0, false)) {
+ df = tnl_params->frag_off;
+ if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
+ df |= (inner_iph->frag_off & htons(IP_DF));
+
+ if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
ip_rt_put(rt);
goto tx_error;
}
@@ -788,10 +790,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
ttl = ip4_dst_hoplimit(&rt->dst);
}
- df = tnl_params->frag_off;
- if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
- df |= (inner_iph->frag_off&htons(IP_DF));
-
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
if (max_headroom > dev->needed_headroom)
@@ -973,7 +971,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
- int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
+ int max_mtu = IP_MAX_MTU - t_hlen;
if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
@@ -1150,10 +1148,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
mtu = ip_tunnel_bind_dev(dev);
if (tb[IFLA_MTU]) {
- unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
+ unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
- mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
- (unsigned int)(max - sizeof(struct iphdr)));
+ mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
}
err = dev_set_mtu(dev, mtu);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 7ca338fbe8ba..6b2dc7b2b612 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -222,7 +222,7 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu)
.code = ICMP_FRAG_NEEDED,
.checksum = 0,
.un.frag.__unused = 0,
- .un.frag.mtu = ntohs(mtu),
+ .un.frag.mtu = htons(mtu),
};
icmph->checksum = ip_compute_csum(icmph, len);
skb_reset_transport_header(skb);
@@ -245,7 +245,7 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu)
skb->ip_summed = CHECKSUM_NONE;
- eth_header(skb, skb->dev, htons(eh.h_proto), eh.h_source, eh.h_dest, 0);
+ eth_header(skb, skb->dev, ntohs(eh.h_proto), eh.h_source, eh.h_dest, 0);
skb_reset_mac_header(skb);
return skb->len;
@@ -338,7 +338,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu)
skb->ip_summed = CHECKSUM_NONE;
- eth_header(skb, skb->dev, htons(eh.h_proto), eh.h_source, eh.h_dest, 0);
+ eth_header(skb, skb->dev, ntohs(eh.h_proto), eh.h_source, eh.h_dest, 0);
skb_reset_mac_header(skb);
return skb->len;
@@ -583,8 +583,9 @@ static int ip_tun_parse_opts_erspan(struct nlattr *attr,
static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
struct netlink_ext_ack *extack)
{
- int err, rem, opt_len, opts_len = 0, type = 0;
+ int err, rem, opt_len, opts_len = 0;
struct nlattr *nla;
+ __be16 type = 0;
if (!attr)
return 0;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 3cd13e1bc6a7..47db1bfdaaa0 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -61,7 +61,6 @@
#include <linux/export.h>
#include <net/net_namespace.h>
#include <net/arp.h>
-#include <net/dsa.h>
#include <net/ip.h>
#include <net/ipconfig.h>
#include <net/route.h>
@@ -218,9 +217,9 @@ static int __init ic_open_devs(void)
last = &ic_first_dev;
rtnl_lock();
- /* bring loopback and DSA master network devices up first */
+ /* bring loopback device up first */
for_each_netdev(&init_net, dev) {
- if (!(dev->flags & IFF_LOOPBACK) && !netdev_uses_dsa(dev))
+ if (!(dev->flags & IFF_LOOPBACK))
continue;
if (dev_change_flags(dev, dev->flags | IFF_UP, NULL) < 0)
pr_err("IP-Config: Failed to open %s\n", dev->name);
@@ -305,17 +304,32 @@ have_carrier:
return 0;
}
+/* Close all network interfaces except the one we've autoconfigured, and its
+ * lowers, in case it's a stacked virtual interface.
+ */
static void __init ic_close_devs(void)
{
+ struct net_device *selected_dev = ic_dev->dev;
struct ic_device *d, *next;
struct net_device *dev;
rtnl_lock();
next = ic_first_dev;
while ((d = next)) {
+ bool bring_down = (d != ic_dev);
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
next = d->next;
dev = d->dev;
- if (d != ic_dev && !netdev_uses_dsa(dev)) {
+
+ netdev_for_each_lower_dev(selected_dev, lower_dev, iter) {
+ if (dev == lower_dev) {
+ bring_down = false;
+ break;
+ }
+ }
+ if (bring_down) {
pr_debug("IP-Config: Downing %s\n", dev->name);
dev_change_flags(dev, d->flags, NULL);
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 563b62b76a5f..c576a63d09db 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1379,7 +1379,7 @@ static int compat_get_entries(struct net *net,
xt_compat_lock(NFPROTO_ARP);
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
if (!IS_ERR(t)) {
- const struct xt_table_info *private = t->private;
+ const struct xt_table_info *private = xt_table_get_private_protected(t);
struct xt_table_info info;
ret = compat_table_info(private, &info);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 6e2851f8d3a3..e8f6f9d86237 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1589,7 +1589,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
xt_compat_lock(AF_INET);
t = xt_find_table_lock(net, AF_INET, get.name);
if (!IS_ERR(t)) {
- const struct xt_table_info *private = t->private;
+ const struct xt_table_info *private = xt_table_get_private_protected(t);
struct xt_table_info info;
ret = compat_table_info(private, &info);
if (!ret && get.size == info.size)
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index cc23f1ce239c..8cd3224d913e 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -76,7 +76,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
- flow.flowi4_tos = RT_TOS(iph->tos);
+ flow.flowi4_tos = iph->tos & IPTOS_RT_MASK;
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par));
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index bcdb37f86a94..aeb631760eb9 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -13,8 +13,8 @@
#include <net/netfilter/ipv4/nf_dup_ipv4.h>
struct nft_dup_ipv4 {
- enum nft_registers sreg_addr:8;
- enum nft_registers sreg_dev:8;
+ u8 sreg_addr;
+ u8 sreg_dev;
};
static void nft_dup_ipv4_eval(const struct nft_expr *expr,
@@ -40,16 +40,16 @@ static int nft_dup_ipv4_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_ADDR] == NULL)
return -EINVAL;
- priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]);
- err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in_addr));
+ err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
+ sizeof(struct in_addr));
if (err < 0)
return err;
- if (tb[NFTA_DUP_SREG_DEV] != NULL) {
- priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
- return nft_validate_register_load(priv->sreg_dev, sizeof(int));
- }
- return 0;
+ if (tb[NFTA_DUP_SREG_DEV])
+ err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV],
+ &priv->sreg_dev, sizeof(int));
+
+ return err;
}
static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 5e1b22d4f939..f1c6cbdb9e43 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -22,7 +22,7 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
#define NH_DEV_HASHBITS 8
#define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
-static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
+static const struct nla_policy rtm_nh_policy_new[] = {
[NHA_ID] = { .type = NLA_U32 },
[NHA_GROUP] = { .type = NLA_BINARY },
[NHA_GROUP_TYPE] = { .type = NLA_U16 },
@@ -31,6 +31,15 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
[NHA_GATEWAY] = { .type = NLA_BINARY },
[NHA_ENCAP_TYPE] = { .type = NLA_U16 },
[NHA_ENCAP] = { .type = NLA_NESTED },
+ [NHA_FDB] = { .type = NLA_FLAG },
+};
+
+static const struct nla_policy rtm_nh_policy_get[] = {
+ [NHA_ID] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy rtm_nh_policy_dump[] = {
+ [NHA_OIF] = { .type = NLA_U32 },
[NHA_GROUPS] = { .type = NLA_FLAG },
[NHA_MASTER] = { .type = NLA_U32 },
[NHA_FDB] = { .type = NLA_FLAG },
@@ -62,6 +71,7 @@ __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
static int nh_notifier_single_info_init(struct nh_notifier_info *info,
const struct nexthop *nh)
{
+ info->type = NH_NOTIFIER_INFO_TYPE_SINGLE;
info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL);
if (!info->nh)
return -ENOMEM;
@@ -76,13 +86,13 @@ static void nh_notifier_single_info_fini(struct nh_notifier_info *info)
kfree(info->nh);
}
-static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
- const struct nexthop *nh)
+static int nh_notifier_mp_info_init(struct nh_notifier_info *info,
+ struct nh_group *nhg)
{
- struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
u16 num_nh = nhg->num_nh;
int i;
+ info->type = NH_NOTIFIER_INFO_TYPE_GRP;
info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh),
GFP_KERNEL);
if (!info->nh_grp)
@@ -103,27 +113,41 @@ static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
return 0;
}
-static void nh_notifier_grp_info_fini(struct nh_notifier_info *info)
+static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
+ const struct nexthop *nh)
{
- kfree(info->nh_grp);
+ struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+
+ if (nhg->mpath)
+ return nh_notifier_mp_info_init(info, nhg);
+ return -EINVAL;
+}
+
+static void nh_notifier_grp_info_fini(struct nh_notifier_info *info,
+ const struct nexthop *nh)
+{
+ struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+
+ if (nhg->mpath)
+ kfree(info->nh_grp);
}
static int nh_notifier_info_init(struct nh_notifier_info *info,
const struct nexthop *nh)
{
info->id = nh->id;
- info->is_grp = nh->is_group;
- if (info->is_grp)
+ if (nh->is_group)
return nh_notifier_grp_info_init(info, nh);
else
return nh_notifier_single_info_init(info, nh);
}
-static void nh_notifier_info_fini(struct nh_notifier_info *info)
+static void nh_notifier_info_fini(struct nh_notifier_info *info,
+ const struct nexthop *nh)
{
- if (info->is_grp)
- nh_notifier_grp_info_fini(info);
+ if (nh->is_group)
+ nh_notifier_grp_info_fini(info, nh);
else
nh_notifier_single_info_fini(info);
}
@@ -152,7 +176,7 @@ static int call_nexthop_notifiers(struct net *net,
err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
event_type, &info);
- nh_notifier_info_fini(&info);
+ nh_notifier_info_fini(&info, nh);
return notifier_to_errno(err);
}
@@ -173,7 +197,7 @@ static int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
return err;
err = nb->notifier_call(nb, event_type, &info);
- nh_notifier_info_fini(&info);
+ nh_notifier_info_fini(&info, nh);
return notifier_to_errno(err);
}
@@ -200,7 +224,7 @@ static void nexthop_devhash_add(struct net *net, struct nh_info *nhi)
hlist_add_head(&nhi->dev_hash, head);
}
-static void nexthop_free_mpath(struct nexthop *nh)
+static void nexthop_free_group(struct nexthop *nh)
{
struct nh_group *nhg;
int i;
@@ -240,7 +264,7 @@ void nexthop_free_rcu(struct rcu_head *head)
struct nexthop *nh = container_of(head, struct nexthop, rcu);
if (nh->is_group)
- nexthop_free_mpath(nh);
+ nexthop_free_group(nh);
else
nexthop_free_single(nh);
@@ -565,7 +589,8 @@ static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
return 0;
}
-static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
+static int nh_check_attr_group(struct net *net,
+ struct nlattr *tb[], size_t tb_size,
struct netlink_ext_ack *extack)
{
unsigned int len = nla_len(tb[NHA_GROUP]);
@@ -624,10 +649,10 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
return -EINVAL;
}
}
- for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) {
+ for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
if (!tb[i])
continue;
- if (tb[NHA_FDB])
+ if (i == NHA_FDB)
continue;
NL_SET_ERR_MSG(extack,
"No other attributes can be set in nexthop groups");
@@ -670,21 +695,16 @@ static bool ipv4_good_nh(const struct fib_nh *nh)
return !!(state & NUD_VALID);
}
-struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
+static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash)
{
struct nexthop *rc = NULL;
- struct nh_group *nhg;
int i;
- if (!nh->is_group)
- return nh;
-
- nhg = rcu_dereference(nh->nh_grp);
for (i = 0; i < nhg->num_nh; ++i) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
struct nh_info *nhi;
- if (hash > atomic_read(&nhge->upper_bound))
+ if (hash > atomic_read(&nhge->mpath.upper_bound))
continue;
nhi = rcu_dereference(nhge->nh->nh_info);
@@ -711,6 +731,21 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
return rc;
}
+
+struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
+{
+ struct nh_group *nhg;
+
+ if (!nh->is_group)
+ return nh;
+
+ nhg = rcu_dereference(nh->nh_grp);
+ if (nhg->mpath)
+ return nexthop_select_path_mp(nhg, hash);
+
+ /* Unreachable. */
+ return NULL;
+}
EXPORT_SYMBOL_GPL(nexthop_select_path);
int nexthop_for_each_fib6_nh(struct nexthop *nh,
@@ -904,7 +939,7 @@ static void nh_group_rebalance(struct nh_group *nhg)
w += nhge->weight;
upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
- atomic_set(&nhge->upper_bound, upper_bound);
+ atomic_set(&nhge->mpath.upper_bound, upper_bound);
}
}
@@ -1446,10 +1481,13 @@ static struct nexthop *nexthop_create_group(struct net *net,
nhg->nh_entries[i].nh_parent = nh;
}
- if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
+ if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH)
nhg->mpath = 1;
+
+ WARN_ON_ONCE(nhg->mpath != 1);
+
+ if (nhg->mpath)
nh_group_rebalance(nhg);
- }
if (cfg->nh_fdb)
nhg->fdb_nh = 1;
@@ -1459,8 +1497,10 @@ static struct nexthop *nexthop_create_group(struct net *net,
return nh;
out_no_nh:
- for (; i >= 0; --i)
+ for (i--; i >= 0; --i) {
+ list_del(&nhg->nh_entries[i].nh_list);
nexthop_put(nhg->nh_entries[i].nh);
+ }
kfree(nhg->spare);
kfree(nhg);
@@ -1641,11 +1681,12 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
struct netlink_ext_ack *extack)
{
struct nhmsg *nhm = nlmsg_data(nlh);
- struct nlattr *tb[NHA_MAX + 1];
+ struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)];
int err;
- err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
- extack);
+ err = nlmsg_parse(nlh, sizeof(*nhm), tb,
+ ARRAY_SIZE(rtm_nh_policy_new) - 1,
+ rtm_nh_policy_new, extack);
if (err < 0)
return err;
@@ -1672,11 +1713,6 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
goto out;
}
- if (tb[NHA_GROUPS] || tb[NHA_MASTER]) {
- NL_SET_ERR_MSG(extack, "Invalid attributes in request");
- goto out;
- }
-
memset(cfg, 0, sizeof(*cfg));
cfg->nlflags = nlh->nlmsg_flags;
cfg->nlinfo.portid = NETLINK_CB(skb).portid;
@@ -1718,7 +1754,7 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
NL_SET_ERR_MSG(extack, "Invalid group type");
goto out;
}
- err = nh_check_attr_group(net, tb, extack);
+ err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb), extack);
/* no other attributes should be set */
goto out;
@@ -1836,49 +1872,44 @@ static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
return err;
}
-static int nh_valid_get_del_req(struct nlmsghdr *nlh, u32 *id,
- struct netlink_ext_ack *extack)
+static int __nh_valid_get_del_req(const struct nlmsghdr *nlh,
+ struct nlattr **tb, u32 *id,
+ struct netlink_ext_ack *extack)
{
struct nhmsg *nhm = nlmsg_data(nlh);
- struct nlattr *tb[NHA_MAX + 1];
- int err, i;
-
- err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
- extack);
- if (err < 0)
- return err;
- err = -EINVAL;
- for (i = 0; i < __NHA_MAX; ++i) {
- if (!tb[i])
- continue;
-
- switch (i) {
- case NHA_ID:
- break;
- default:
- NL_SET_ERR_MSG_ATTR(extack, tb[i],
- "Unexpected attribute in request");
- goto out;
- }
- }
if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
NL_SET_ERR_MSG(extack, "Invalid values in header");
- goto out;
+ return -EINVAL;
}
if (!tb[NHA_ID]) {
NL_SET_ERR_MSG(extack, "Nexthop id is missing");
- goto out;
+ return -EINVAL;
}
*id = nla_get_u32(tb[NHA_ID]);
- if (!(*id))
+ if (!(*id)) {
NL_SET_ERR_MSG(extack, "Invalid nexthop id");
- else
- err = 0;
-out:
- return err;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)];
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+ ARRAY_SIZE(rtm_nh_policy_get) - 1,
+ rtm_nh_policy_get, extack);
+ if (err < 0)
+ return err;
+
+ return __nh_valid_get_del_req(nlh, tb, id, extack);
}
/* rtnl */
@@ -1947,16 +1978,23 @@ errout_free:
goto out;
}
-static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx,
- bool group_filter, u8 family)
+struct nh_dump_filter {
+ int dev_idx;
+ int master_idx;
+ bool group_filter;
+ bool fdb_filter;
+};
+
+static bool nh_dump_filtered(struct nexthop *nh,
+ struct nh_dump_filter *filter, u8 family)
{
const struct net_device *dev;
const struct nh_info *nhi;
- if (group_filter && !nh->is_group)
+ if (filter->group_filter && !nh->is_group)
return true;
- if (!dev_idx && !master_idx && !family)
+ if (!filter->dev_idx && !filter->master_idx && !family)
return false;
if (nh->is_group)
@@ -1967,70 +2005,48 @@ static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx,
return true;
dev = nhi->fib_nhc.nhc_dev;
- if (dev_idx && (!dev || dev->ifindex != dev_idx))
+ if (filter->dev_idx && (!dev || dev->ifindex != filter->dev_idx))
return true;
- if (master_idx) {
+ if (filter->master_idx) {
struct net_device *master;
if (!dev)
return true;
master = netdev_master_upper_dev_get((struct net_device *)dev);
- if (!master || master->ifindex != master_idx)
+ if (!master || master->ifindex != filter->master_idx)
return true;
}
return false;
}
-static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
- int *master_idx, bool *group_filter,
- bool *fdb_filter, struct netlink_callback *cb)
+static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb,
+ struct nh_dump_filter *filter,
+ struct netlink_ext_ack *extack)
{
- struct netlink_ext_ack *extack = cb->extack;
- struct nlattr *tb[NHA_MAX + 1];
struct nhmsg *nhm;
- int err, i;
u32 idx;
- err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
- NULL);
- if (err < 0)
- return err;
-
- for (i = 0; i <= NHA_MAX; ++i) {
- if (!tb[i])
- continue;
-
- switch (i) {
- case NHA_OIF:
- idx = nla_get_u32(tb[i]);
- if (idx > INT_MAX) {
- NL_SET_ERR_MSG(extack, "Invalid device index");
- return -EINVAL;
- }
- *dev_idx = idx;
- break;
- case NHA_MASTER:
- idx = nla_get_u32(tb[i]);
- if (idx > INT_MAX) {
- NL_SET_ERR_MSG(extack, "Invalid master device index");
- return -EINVAL;
- }
- *master_idx = idx;
- break;
- case NHA_GROUPS:
- *group_filter = true;
- break;
- case NHA_FDB:
- *fdb_filter = true;
- break;
- default:
- NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
+ if (tb[NHA_OIF]) {
+ idx = nla_get_u32(tb[NHA_OIF]);
+ if (idx > INT_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid device index");
+ return -EINVAL;
+ }
+ filter->dev_idx = idx;
+ }
+ if (tb[NHA_MASTER]) {
+ idx = nla_get_u32(tb[NHA_MASTER]);
+ if (idx > INT_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid master device index");
return -EINVAL;
}
+ filter->master_idx = idx;
}
+ filter->group_filter = nla_get_flag(tb[NHA_GROUPS]);
+ filter->fdb_filter = nla_get_flag(tb[NHA_FDB]);
nhm = nlmsg_data(nlh);
if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
@@ -2041,24 +2057,49 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
return 0;
}
-/* rtnl */
-static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
+static int nh_valid_dump_req(const struct nlmsghdr *nlh,
+ struct nh_dump_filter *filter,
+ struct netlink_callback *cb)
{
- bool group_filter = false, fdb_filter = false;
- struct nhmsg *nhm = nlmsg_data(cb->nlh);
- int dev_filter_idx = 0, master_idx = 0;
- struct net *net = sock_net(skb->sk);
- struct rb_root *root = &net->nexthop.rb_root;
- struct rb_node *node;
- int idx = 0, s_idx;
+ struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)];
int err;
- err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx,
- &group_filter, &fdb_filter, cb);
+ err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+ ARRAY_SIZE(rtm_nh_policy_dump) - 1,
+ rtm_nh_policy_dump, cb->extack);
if (err < 0)
return err;
- s_idx = cb->args[0];
+ return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
+}
+
+struct rtm_dump_nh_ctx {
+ u32 idx;
+};
+
+static struct rtm_dump_nh_ctx *
+rtm_dump_nh_ctx(struct netlink_callback *cb)
+{
+ struct rtm_dump_nh_ctx *ctx = (void *)cb->ctx;
+
+ BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+ return ctx;
+}
+
+static int rtm_dump_walk_nexthops(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct rb_root *root,
+ struct rtm_dump_nh_ctx *ctx,
+ int (*nh_cb)(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct nexthop *nh, void *data),
+ void *data)
+{
+ struct rb_node *node;
+ int idx = 0, s_idx;
+ int err;
+
+ s_idx = ctx->idx;
for (node = rb_first(root); node; node = rb_next(node)) {
struct nexthop *nh;
@@ -2066,30 +2107,58 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
goto cont;
nh = rb_entry(node, struct nexthop, rb_node);
- if (nh_dump_filtered(nh, dev_filter_idx, master_idx,
- group_filter, nhm->nh_family))
- goto cont;
-
- err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI);
- if (err < 0) {
- if (likely(skb->len))
- goto out;
-
- goto out_err;
- }
+ ctx->idx = idx;
+ err = nh_cb(skb, cb, nh, data);
+ if (err)
+ return err;
cont:
idx++;
}
+ ctx->idx = idx;
+ return 0;
+}
+
+static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb,
+ struct nexthop *nh, void *data)
+{
+ struct nhmsg *nhm = nlmsg_data(cb->nlh);
+ struct nh_dump_filter *filter = data;
+
+ if (nh_dump_filtered(nh, filter, nhm->nh_family))
+ return 0;
+
+ return nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI);
+}
+
+/* rtnl */
+static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb);
+ struct net *net = sock_net(skb->sk);
+ struct rb_root *root = &net->nexthop.rb_root;
+ struct nh_dump_filter filter = {};
+ int err;
+
+ err = nh_valid_dump_req(cb->nlh, &filter, cb);
+ if (err < 0)
+ return err;
+
+ err = rtm_dump_walk_nexthops(skb, cb, root, ctx,
+ &rtm_dump_nexthop_cb, &filter);
+ if (err < 0) {
+ if (likely(skb->len))
+ goto out;
+ goto out_err;
+ }
+
out:
err = skb->len;
out_err:
- cb->args[0] = idx;
cb->seq = net->nexthop.seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-
return err;
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 63cd370ea29d..6d46297a99f8 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -464,30 +464,52 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
*/
static int netstat_seq_show(struct seq_file *seq, void *v)
{
- int i;
+ const int ip_cnt = ARRAY_SIZE(snmp4_ipextstats_list) - 1;
+ const int tcp_cnt = ARRAY_SIZE(snmp4_net_list) - 1;
struct net *net = seq->private;
+ unsigned long *buff;
+ int i;
seq_puts(seq, "TcpExt:");
- for (i = 0; snmp4_net_list[i].name; i++)
+ for (i = 0; i < tcp_cnt; i++)
seq_printf(seq, " %s", snmp4_net_list[i].name);
seq_puts(seq, "\nTcpExt:");
- for (i = 0; snmp4_net_list[i].name; i++)
- seq_printf(seq, " %lu",
- snmp_fold_field(net->mib.net_statistics,
- snmp4_net_list[i].entry));
-
+ buff = kzalloc(max(tcp_cnt * sizeof(long), ip_cnt * sizeof(u64)),
+ GFP_KERNEL);
+ if (buff) {
+ snmp_get_cpu_field_batch(buff, snmp4_net_list,
+ net->mib.net_statistics);
+ for (i = 0; i < tcp_cnt; i++)
+ seq_printf(seq, " %lu", buff[i]);
+ } else {
+ for (i = 0; i < tcp_cnt; i++)
+ seq_printf(seq, " %lu",
+ snmp_fold_field(net->mib.net_statistics,
+ snmp4_net_list[i].entry));
+ }
seq_puts(seq, "\nIpExt:");
- for (i = 0; snmp4_ipextstats_list[i].name; i++)
+ for (i = 0; i < ip_cnt; i++)
seq_printf(seq, " %s", snmp4_ipextstats_list[i].name);
seq_puts(seq, "\nIpExt:");
- for (i = 0; snmp4_ipextstats_list[i].name; i++)
- seq_printf(seq, " %llu",
- snmp_fold_field64(net->mib.ip_statistics,
- snmp4_ipextstats_list[i].entry,
- offsetof(struct ipstats_mib, syncp)));
-
+ if (buff) {
+ u64 *buff64 = (u64 *)buff;
+
+ memset(buff64, 0, ip_cnt * sizeof(u64));
+ snmp_get_cpu_field64_batch(buff64, snmp4_ipextstats_list,
+ net->mib.ip_statistics,
+ offsetof(struct ipstats_mib, syncp));
+ for (i = 0; i < ip_cnt; i++)
+ seq_printf(seq, " %llu", buff64[i]);
+ } else {
+ for (i = 0; i < ip_cnt; i++)
+ seq_printf(seq, " %llu",
+ snmp_fold_field64(net->mib.ip_statistics,
+ snmp4_ipextstats_list[i].entry,
+ offsetof(struct ipstats_mib, syncp)));
+ }
+ kfree(buff);
seq_putc(seq, '\n');
mptcp_seq_show(seq);
return 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e26652ff7059..02d81d79deeb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -133,9 +133,11 @@ static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
* Interface to generic destination cache.
*/
-static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
+INDIRECT_CALLABLE_SCOPE
+struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
-static unsigned int ipv4_mtu(const struct dst_entry *dst);
+INDIRECT_CALLABLE_SCOPE
+unsigned int ipv4_mtu(const struct dst_entry *dst);
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb);
static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
@@ -1187,7 +1189,8 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
}
EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
-static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
+INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst,
+ u32 cookie)
{
struct rtable *rt = (struct rtable *) dst;
@@ -1203,6 +1206,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
return NULL;
return dst;
}
+EXPORT_INDIRECT_CALLABLE(ipv4_dst_check);
static void ipv4_send_dest_unreach(struct sk_buff *skb)
{
@@ -1311,7 +1315,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
return min(advmss, IPV4_MAX_PMTU - header_size);
}
-static unsigned int ipv4_mtu(const struct dst_entry *dst)
+INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst)
{
const struct rtable *rt = (const struct rtable *)dst;
unsigned int mtu = rt->rt_pmtu;
@@ -1333,6 +1337,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
+EXPORT_INDIRECT_CALLABLE(ipv4_mtu);
static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
{
@@ -3299,6 +3304,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fri.type = rt->rt_type;
fri.offload = 0;
fri.trap = 0;
+ fri.offload_failed = 0;
if (res.fa_head) {
struct fib_alias *fa;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 3e5f4f2e705e..f55095d3ed16 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1354,6 +1354,15 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ONE
},
+ {
+ .procname = "fib_notify_on_flag_change",
+ .data = &init_net.ipv4.sysctl_fib_notify_on_flag_change,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &two,
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ed42d2193c5c..a3422e42784e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -280,6 +280,12 @@
#include <asm/ioctls.h>
#include <net/busy_poll.h>
+/* Track pending CMSGs. */
+enum {
+ TCP_CMSG_INQ = 1,
+ TCP_CMSG_TS = 2
+};
+
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@ -475,19 +481,11 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
}
}
-static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
- int target, struct sock *sk)
+static bool tcp_stream_is_readable(struct sock *sk, int target)
{
- int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq);
-
- if (avail > 0) {
- if (avail >= target)
- return true;
- if (tcp_rmem_pressure(sk))
- return true;
- if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss)
- return true;
- }
+ if (tcp_epollin_ready(sk, target))
+ return true;
+
if (sk->sk_prot->stream_memory_read)
return sk->sk_prot->stream_memory_read(sk);
return false;
@@ -562,7 +560,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
tp->urg_data)
target++;
- if (tcp_stream_is_readable(tp, target, sk))
+ if (tcp_stream_is_readable(sk, target))
mask |= EPOLLIN | EPOLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
@@ -1010,7 +1008,7 @@ new_segment:
}
if (!(flags & MSG_NO_SHARED_FRAGS))
- skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
+ skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
skb->len += copy;
skb->data_len += copy;
@@ -1217,7 +1215,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
skb = tcp_write_queue_tail(sk);
- uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
+ uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
if (!uarg) {
err = -ENOBUFS;
goto out_err;
@@ -1429,7 +1427,7 @@ out:
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
}
out_nopush:
- sock_zerocopy_put(uarg);
+ net_zcopy_put(uarg);
return copied + copied_syn;
do_error:
@@ -1440,7 +1438,7 @@ do_fault:
if (copied + copied_syn)
goto out;
out_err:
- sock_zerocopy_put_abort(uarg, true);
+ net_zcopy_put_abort(uarg, true);
err = sk_stream_error(sk, flags, err);
/* make sure we wake any epoll edge trigger waiter */
if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
@@ -1739,6 +1737,20 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
}
EXPORT_SYMBOL(tcp_set_rcvlowat);
+static void tcp_update_recv_tstamps(struct sk_buff *skb,
+ struct scm_timestamping_internal *tss)
+{
+ if (skb->tstamp)
+ tss->ts[0] = ktime_to_timespec64(skb->tstamp);
+ else
+ tss->ts[0] = (struct timespec64) {0};
+
+ if (skb_hwtstamps(skb)->hwtstamp)
+ tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp);
+ else
+ tss->ts[2] = (struct timespec64) {0};
+}
+
#ifdef CONFIG_MMU
static const struct vm_operations_struct tcp_vm_ops = {
};
@@ -1842,13 +1854,13 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
struct scm_timestamping_internal *tss,
int *cmsg_flags);
static int receive_fallback_to_copy(struct sock *sk,
- struct tcp_zerocopy_receive *zc, int inq)
+ struct tcp_zerocopy_receive *zc, int inq,
+ struct scm_timestamping_internal *tss)
{
unsigned long copy_address = (unsigned long)zc->copybuf_address;
- struct scm_timestamping_internal tss_unused;
- int err, cmsg_flags_unused;
struct msghdr msg = {};
struct iovec iov;
+ int err;
zc->length = 0;
zc->recv_skip_hint = 0;
@@ -1862,7 +1874,7 @@ static int receive_fallback_to_copy(struct sock *sk,
return err;
err = tcp_recvmsg_locked(sk, &msg, inq, /*nonblock=*/1, /*flags=*/0,
- &tss_unused, &cmsg_flags_unused);
+ tss, &zc->msg_flags);
if (err < 0)
return err;
@@ -1903,21 +1915,27 @@ static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc,
return (__s32)copylen;
}
-static int tcp_zerocopy_handle_leftover_data(struct tcp_zerocopy_receive *zc,
- struct sock *sk,
- struct sk_buff *skb,
- u32 *seq,
- s32 copybuf_len)
+static int tcp_zc_handle_leftover(struct tcp_zerocopy_receive *zc,
+ struct sock *sk,
+ struct sk_buff *skb,
+ u32 *seq,
+ s32 copybuf_len,
+ struct scm_timestamping_internal *tss)
{
u32 offset, copylen = min_t(u32, copybuf_len, zc->recv_skip_hint);
if (!copylen)
return 0;
/* skb is null if inq < PAGE_SIZE. */
- if (skb)
+ if (skb) {
offset = *seq - TCP_SKB_CB(skb)->seq;
- else
+ } else {
skb = tcp_recv_skb(sk, *seq, &offset);
+ if (TCP_SKB_CB(skb)->has_rxtstamp) {
+ tcp_update_recv_tstamps(skb, tss);
+ zc->msg_flags |= TCP_CMSG_TS;
+ }
+ }
zc->copybuf_len = tcp_copy_straggler_data(zc, skb, copylen, &offset,
seq);
@@ -2004,9 +2022,38 @@ static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
err);
}
+#define TCP_VALID_ZC_MSG_FLAGS (TCP_CMSG_TS)
+static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+ struct scm_timestamping_internal *tss);
+static void tcp_zc_finalize_rx_tstamp(struct sock *sk,
+ struct tcp_zerocopy_receive *zc,
+ struct scm_timestamping_internal *tss)
+{
+ unsigned long msg_control_addr;
+ struct msghdr cmsg_dummy;
+
+ msg_control_addr = (unsigned long)zc->msg_control;
+ cmsg_dummy.msg_control = (void *)msg_control_addr;
+ cmsg_dummy.msg_controllen =
+ (__kernel_size_t)zc->msg_controllen;
+ cmsg_dummy.msg_flags = in_compat_syscall()
+ ? MSG_CMSG_COMPAT : 0;
+ zc->msg_flags = 0;
+ if (zc->msg_control == msg_control_addr &&
+ zc->msg_controllen == cmsg_dummy.msg_controllen) {
+ tcp_recv_timestamp(&cmsg_dummy, sk, tss);
+ zc->msg_control = (__u64)
+ ((uintptr_t)cmsg_dummy.msg_control);
+ zc->msg_controllen =
+ (__u64)cmsg_dummy.msg_controllen;
+ zc->msg_flags = (__u32)cmsg_dummy.msg_flags;
+ }
+}
+
#define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32
static int tcp_zerocopy_receive(struct sock *sk,
- struct tcp_zerocopy_receive *zc)
+ struct tcp_zerocopy_receive *zc,
+ struct scm_timestamping_internal *tss)
{
u32 length = 0, offset, vma_len, avail_len, copylen = 0;
unsigned long address = (unsigned long)zc->address;
@@ -2023,6 +2070,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
int ret;
zc->copybuf_len = 0;
+ zc->msg_flags = 0;
if (address & (PAGE_SIZE - 1) || address != zc->address)
return -EINVAL;
@@ -2033,7 +2081,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
sock_rps_record_flow(sk);
if (inq && inq <= copybuf_len)
- return receive_fallback_to_copy(sk, zc, inq);
+ return receive_fallback_to_copy(sk, zc, inq, tss);
if (inq < PAGE_SIZE) {
zc->length = 0;
@@ -2078,6 +2126,11 @@ static int tcp_zerocopy_receive(struct sock *sk,
} else {
skb = tcp_recv_skb(sk, seq, &offset);
}
+
+ if (TCP_SKB_CB(skb)->has_rxtstamp) {
+ tcp_update_recv_tstamps(skb, tss);
+ zc->msg_flags |= TCP_CMSG_TS;
+ }
zc->recv_skip_hint = skb->len - offset;
frags = skb_advance_to_frag(skb, offset, &offset_frag);
if (!frags || offset_frag)
@@ -2120,8 +2173,7 @@ out:
mmap_read_unlock(current->mm);
/* Try to copy straggler data. */
if (!ret)
- copylen = tcp_zerocopy_handle_leftover_data(zc, sk, skb, &seq,
- copybuf_len);
+ copylen = tcp_zc_handle_leftover(zc, sk, skb, &seq, copybuf_len, tss);
if (length + copylen) {
WRITE_ONCE(tp->copied_seq, seq);
@@ -2142,20 +2194,6 @@ out:
}
#endif
-static void tcp_update_recv_tstamps(struct sk_buff *skb,
- struct scm_timestamping_internal *tss)
-{
- if (skb->tstamp)
- tss->ts[0] = ktime_to_timespec64(skb->tstamp);
- else
- tss->ts[0] = (struct timespec64) {0};
-
- if (skb_hwtstamps(skb)->hwtstamp)
- tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp);
- else
- tss->ts[2] = (struct timespec64) {0};
-}
-
/* Similar to __sock_recv_timestamp, but does not require an skb */
static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
struct scm_timestamping_internal *tss)
@@ -2272,7 +2310,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
goto out;
if (tp->recvmsg_inq)
- *cmsg_flags = 1;
+ *cmsg_flags = TCP_CMSG_INQ;
timeo = sock_rcvtimeo(sk, nonblock);
/* Urgent data needs to be handled specially. */
@@ -2453,7 +2491,7 @@ skip_copy:
if (TCP_SKB_CB(skb)->has_rxtstamp) {
tcp_update_recv_tstamps(skb, tss);
- *cmsg_flags |= 2;
+ *cmsg_flags |= TCP_CMSG_TS;
}
if (used + offset < skb->len)
@@ -2513,9 +2551,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
release_sock(sk);
if (cmsg_flags && ret >= 0) {
- if (cmsg_flags & 2)
+ if (cmsg_flags & TCP_CMSG_TS)
tcp_recv_timestamp(msg, sk, &tss);
- if (cmsg_flags & 1) {
+ if (cmsg_flags & TCP_CMSG_INQ) {
inq = tcp_inq_hint(sk);
put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
}
@@ -2937,6 +2975,7 @@ int tcp_disconnect(struct sock *sk, int flags)
icsk->icsk_backoff = 0;
icsk->icsk_probes_out = 0;
+ icsk->icsk_probes_tstamp = 0;
icsk->icsk_rto = TCP_TIMEOUT_INIT;
icsk->icsk_rto_min = TCP_RTO_MIN;
icsk->icsk_delack_max = TCP_DELACK_MAX;
@@ -3766,11 +3805,24 @@ static size_t tcp_opt_stats_get_size(void)
nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */
nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_EDT */
+ nla_total_size(sizeof(u8)) + /* TCP_NLA_TTL */
0;
}
+/* Returns TTL or hop limit of an incoming packet from skb. */
+static u8 tcp_skb_ttl_or_hop_limit(const struct sk_buff *skb)
+{
+ if (skb->protocol == htons(ETH_P_IP))
+ return ip_hdr(skb)->ttl;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ return ipv6_hdr(skb)->hop_limit;
+ else
+ return 0;
+}
+
struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
- const struct sk_buff *orig_skb)
+ const struct sk_buff *orig_skb,
+ const struct sk_buff *ack_skb)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *stats;
@@ -3826,6 +3878,9 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
max_t(int, 0, tp->write_seq - tp->snd_nxt));
nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns,
TCP_NLA_PAD);
+ if (ack_skb)
+ nla_put_u8(stats, TCP_NLA_TTL,
+ tcp_skb_ttl_or_hop_limit(ack_skb));
return stats;
}
@@ -4082,6 +4137,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
}
#ifdef CONFIG_MMU
case TCP_ZEROCOPY_RECEIVE: {
+ struct scm_timestamping_internal tss;
struct tcp_zerocopy_receive zc = {};
int err;
@@ -4089,19 +4145,36 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return -EFAULT;
if (len < offsetofend(struct tcp_zerocopy_receive, length))
return -EINVAL;
- if (len > sizeof(zc)) {
+ if (unlikely(len > sizeof(zc))) {
+ err = check_zeroed_user(optval + sizeof(zc),
+ len - sizeof(zc));
+ if (err < 1)
+ return err == 0 ? -EINVAL : err;
len = sizeof(zc);
if (put_user(len, optlen))
return -EFAULT;
}
if (copy_from_user(&zc, optval, len))
return -EFAULT;
+ if (zc.reserved)
+ return -EINVAL;
+ if (zc.msg_flags & ~(TCP_VALID_ZC_MSG_FLAGS))
+ return -EINVAL;
lock_sock(sk);
- err = tcp_zerocopy_receive(sk, &zc);
+ err = tcp_zerocopy_receive(sk, &zc, &tss);
+ err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname,
+ &zc, &len, err);
release_sock(sk);
- if (len >= offsetofend(struct tcp_zerocopy_receive, err))
- goto zerocopy_rcv_sk_err;
+ if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags))
+ goto zerocopy_rcv_cmsg;
switch (len) {
+ case offsetofend(struct tcp_zerocopy_receive, msg_flags):
+ goto zerocopy_rcv_cmsg;
+ case offsetofend(struct tcp_zerocopy_receive, msg_controllen):
+ case offsetofend(struct tcp_zerocopy_receive, msg_control):
+ case offsetofend(struct tcp_zerocopy_receive, flags):
+ case offsetofend(struct tcp_zerocopy_receive, copybuf_len):
+ case offsetofend(struct tcp_zerocopy_receive, copybuf_address):
case offsetofend(struct tcp_zerocopy_receive, err):
goto zerocopy_rcv_sk_err;
case offsetofend(struct tcp_zerocopy_receive, inq):
@@ -4110,6 +4183,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
default:
goto zerocopy_rcv_out;
}
+zerocopy_rcv_cmsg:
+ if (zc.msg_flags & TCP_CMSG_TS)
+ tcp_zc_finalize_rx_tstamp(sk, &zc, &tss);
+ else
+ zc.msg_flags = 0;
zerocopy_rcv_sk_err:
if (!err)
zc.err = sock_error(sk);
@@ -4132,6 +4210,18 @@ zerocopy_rcv_out:
return 0;
}
+bool tcp_bpf_bypass_getsockopt(int level, int optname)
+{
+ /* TCP do_tcp_getsockopt has optimized getsockopt implementation
+ * to avoid extra socket lock for TCP_ZEROCOPY_RECEIVE.
+ */
+ if (level == SOL_TCP && optname == TCP_ZEROCOPY_RECEIVE)
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL(tcp_bpf_bypass_getsockopt);
+
int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
int __user *optlen)
{
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index c7bf5b26bf0c..ffcbe46dacdb 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -104,16 +104,7 @@ struct bictcp {
static inline void bictcp_reset(struct bictcp *ca)
{
- ca->cnt = 0;
- ca->last_max_cwnd = 0;
- ca->last_cwnd = 0;
- ca->last_time = 0;
- ca->bic_origin_point = 0;
- ca->bic_K = 0;
- ca->delay_min = 0;
- ca->epoch_start = 0;
- ca->ack_cnt = 0;
- ca->tcp_cwnd = 0;
+ memset(ca, 0, offsetof(struct bictcp, unused));
ca->found = 0;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c7e16b0ed791..69a545db80d2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2859,7 +2859,8 @@ static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
} else if (tcp_is_rack(sk)) {
u32 prior_retrans = tp->retrans_out;
- tcp_rack_mark_lost(sk);
+ if (tcp_rack_mark_lost(sk))
+ *ack_flag &= ~FLAG_SET_XMIT_TIMER;
if (prior_retrans > tp->retrans_out)
*ack_flag |= FLAG_LOST_RETRANS;
}
@@ -3145,7 +3146,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
}
static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
- u32 prior_snd_una)
+ const struct sk_buff *ack_skb, u32 prior_snd_una)
{
const struct skb_shared_info *shinfo;
@@ -3157,7 +3158,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
if (!before(shinfo->tskey, prior_snd_una) &&
before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
tcp_skb_tsorted_save(skb) {
- __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+ __skb_tstamp_tx(skb, ack_skb, NULL, sk, SCM_TSTAMP_ACK);
} tcp_skb_tsorted_restore(skb);
}
}
@@ -3166,8 +3167,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
* is before the ack sequence we can discard it as it's confirmed to have
* arrived at the other end.
*/
-static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
- u32 prior_snd_una,
+static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
+ u32 prior_fack, u32 prior_snd_una,
struct tcp_sacktag_state *sack, bool ece_ack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3256,7 +3257,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (!fully_acked)
break;
- tcp_ack_tstamp(sk, skb, prior_snd_una);
+ tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
next = skb_rb_next(skb);
if (unlikely(skb == tp->retransmit_skb_hint))
@@ -3274,7 +3275,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
tp->snd_up = tp->snd_una;
if (skb) {
- tcp_ack_tstamp(sk, skb, prior_snd_una);
+ tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
flag |= FLAG_SACK_RENEGING;
}
@@ -3384,6 +3385,7 @@ static void tcp_ack_probe(struct sock *sk)
return;
if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
icsk->icsk_backoff = 0;
+ icsk->icsk_probes_tstamp = 0;
inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
/* Socket must be waked up by subsequent tcp_data_snd_check().
* This function is not for random using!
@@ -3391,8 +3393,8 @@ static void tcp_ack_probe(struct sock *sk)
} else {
unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
- tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
- when, TCP_RTO_MAX);
+ when = tcp_clamp_probe0_to_user_timeout(sk, when);
+ tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX);
}
}
@@ -3808,16 +3810,13 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
goto no_queue;
/* See if we can take anything off of the retransmit queue. */
- flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state,
- flag & FLAG_ECE);
+ flag |= tcp_clean_rtx_queue(sk, skb, prior_fack, prior_snd_una,
+ &sack_state, flag & FLAG_ECE);
tcp_rack_update_reo_wnd(sk, &rs);
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
- /* If needed, reset TLP/RTO timer; RACK may later override this. */
- if (flag & FLAG_SET_XMIT_TIMER)
- tcp_set_xmit_timer(sk);
if (tcp_ack_is_dubious(sk, flag)) {
if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
@@ -3830,6 +3829,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
&rexmit);
}
+ /* If needed, reset TLP/RTO timer when RACK doesn't set. */
+ if (flag & FLAG_SET_XMIT_TIMER)
+ tcp_set_xmit_timer(sk);
+
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
sk_dst_confirm(sk);
@@ -4396,10 +4399,9 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
* The receiver remembers and reflects via DSACKs. Leverage the
* DSACK state and change the txhash to re-route speculatively.
*/
- if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq) {
- sk_rethink_txhash(sk);
+ if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
+ sk_rethink_txhash(sk))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
- }
}
static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
@@ -4922,15 +4924,8 @@ err:
void tcp_data_ready(struct sock *sk)
{
- const struct tcp_sock *tp = tcp_sk(sk);
- int avail = tp->rcv_nxt - tp->copied_seq;
-
- if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
- !sock_flag(sk, SOCK_DONE) &&
- tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss)
- return;
-
- sk->sk_data_ready(sk);
+ if (tcp_epollin_ready(sk, sk->sk_rcvlowat) || sock_flag(sk, SOCK_DONE))
+ sk->sk_data_ready(sk);
}
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 58207c7769d0..daad4f99db32 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1595,6 +1595,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
tcp_move_syn(newtp, req);
ireq->ireq_opt = NULL;
} else {
+ newinet->inet_opt = NULL;
+
if (!req_unhash && found_dup_sk) {
/* This code path should only be executed in the
* syncookie case only
@@ -1602,8 +1604,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
bh_unlock_sock(newsk);
sock_put(newsk);
newsk = NULL;
- } else {
- newinet->inet_opt = NULL;
}
}
return newsk;
@@ -1649,6 +1649,8 @@ u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
return mss;
}
+INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
+ u32));
/* The socket must have it's spinlock held when we get
* here, unless it is a TCP_LISTEN socket.
*
@@ -1668,7 +1670,8 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
sk_mark_napi_id(sk, skb);
if (dst) {
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
- !dst->ops->check(dst, 0)) {
+ !INDIRECT_CALL_1(dst->ops->check, ipv4_dst_check,
+ dst, 0)) {
dst_release(dst);
sk->sk_rx_dst = NULL;
}
@@ -1760,6 +1763,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
{
u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
+ u32 tail_gso_size, tail_gso_segs;
struct skb_shared_info *shinfo;
const struct tcphdr *th;
struct tcphdr *thtail;
@@ -1767,6 +1771,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
unsigned int hdrlen;
bool fragstolen;
u32 gso_segs;
+ u32 gso_size;
int delta;
/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
@@ -1792,13 +1797,6 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
*/
th = (const struct tcphdr *)skb->data;
hdrlen = th->doff * 4;
- shinfo = skb_shinfo(skb);
-
- if (!shinfo->gso_size)
- shinfo->gso_size = skb->len - hdrlen;
-
- if (!shinfo->gso_segs)
- shinfo->gso_segs = 1;
tail = sk->sk_backlog.tail;
if (!tail)
@@ -1821,6 +1819,15 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
goto no_coalesce;
__skb_pull(skb, hdrlen);
+
+ shinfo = skb_shinfo(skb);
+ gso_size = shinfo->gso_size ?: skb->len;
+ gso_segs = shinfo->gso_segs ?: 1;
+
+ shinfo = skb_shinfo(tail);
+ tail_gso_size = shinfo->gso_size ?: (tail->len - hdrlen);
+ tail_gso_segs = shinfo->gso_segs ?: 1;
+
if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
@@ -1847,11 +1854,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
}
/* Not as strict as GRO. We only need to carry mss max value */
- skb_shinfo(tail)->gso_size = max(shinfo->gso_size,
- skb_shinfo(tail)->gso_size);
-
- gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs;
- skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
+ shinfo->gso_size = max(gso_size, tail_gso_size);
+ shinfo->gso_segs = min_t(u32, gso_segs + tail_gso_segs, 0xFFFF);
sk->sk_backlog.len += delta;
__NET_INC_STATS(sock_net(sk),
@@ -2792,6 +2796,7 @@ struct proto tcp_prot = {
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
+ .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
.keepalive = tcp_set_keepalive,
.recvmsg = tcp_recvmsg,
.sendmsg = tcp_sendmsg,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f322e798a351..fbf140a770d8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1319,7 +1319,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
skb_orphan(skb);
skb->sk = sk;
skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
- skb_set_hash_from_sk(skb, sk);
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
@@ -1390,6 +1389,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
tcp_skb_pcount(skb));
tp->segs_out += tcp_skb_pcount(skb);
+ skb_set_hash_from_sk(skb, sk);
/* OK, its time to fill skb_shinfo(skb)->gso_{segs|size} */
skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
@@ -4084,6 +4084,7 @@ void tcp_send_probe0(struct sock *sk)
/* Cancel probe timer, if it is not required. */
icsk->icsk_probes_out = 0;
icsk->icsk_backoff = 0;
+ icsk->icsk_probes_tstamp = 0;
return;
}
@@ -4098,6 +4099,8 @@ void tcp_send_probe0(struct sock *sk)
*/
timeout = TCP_RESOURCE_PROBE_INTERVAL;
}
+
+ timeout = tcp_clamp_probe0_to_user_timeout(sk, timeout);
tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX);
}
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 177307a3081f..6f1b4ac7fe99 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -96,13 +96,13 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
}
}
-void tcp_rack_mark_lost(struct sock *sk)
+bool tcp_rack_mark_lost(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout;
if (!tp->rack.advanced)
- return;
+ return false;
/* Reset the advanced flag to avoid unnecessary queue scanning */
tp->rack.advanced = 0;
@@ -112,6 +112,7 @@ void tcp_rack_mark_lost(struct sock *sk)
inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
timeout, inet_csk(sk)->icsk_rto);
}
+ return !!timeout;
}
/* Record the most recently (re)sent time among the (s)acked packets
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6c62b9ea1320..4ef08079ccfa 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -40,6 +40,24 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
}
+u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ u32 remaining;
+ s32 elapsed;
+
+ if (!icsk->icsk_user_timeout || !icsk->icsk_probes_tstamp)
+ return when;
+
+ elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp;
+ if (unlikely(elapsed < 0))
+ elapsed = 0;
+ remaining = msecs_to_jiffies(icsk->icsk_user_timeout) - elapsed;
+ remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN);
+
+ return min_t(u32, remaining, when);
+}
+
/**
* tcp_write_err() - close socket and save error info
* @sk: The socket the error has appeared on.
@@ -219,14 +237,8 @@ static int tcp_write_timeout(struct sock *sk)
int retry_until;
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
- if (icsk->icsk_retransmits) {
- dst_negative_advice(sk);
- } else {
- sk_rethink_txhash(sk);
- tp->timeout_rehash++;
- __NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPTIMEOUTREHASH);
- }
+ if (icsk->icsk_retransmits)
+ __dst_negative_advice(sk);
retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
expired = icsk->icsk_retransmits >= retry_until;
} else {
@@ -234,12 +246,7 @@ static int tcp_write_timeout(struct sock *sk)
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
- dst_negative_advice(sk);
- } else {
- sk_rethink_txhash(sk);
- tp->timeout_rehash++;
- __NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPTIMEOUTREHASH);
+ __dst_negative_advice(sk);
}
retry_until = net->ipv4.sysctl_tcp_retries2;
@@ -270,6 +277,11 @@ static int tcp_write_timeout(struct sock *sk)
return 1;
}
+ if (sk_rethink_txhash(sk)) {
+ tp->timeout_rehash++;
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH);
+ }
+
return 0;
}
@@ -349,6 +361,7 @@ static void tcp_probe_timer(struct sock *sk)
if (tp->packets_out || !skb) {
icsk->icsk_probes_out = 0;
+ icsk->icsk_probes_tstamp = 0;
return;
}
@@ -360,13 +373,12 @@ static void tcp_probe_timer(struct sock *sk)
* corresponding system limit. We also implement similar policy when
* we use RTO to probe window in tcp_retransmit_timer().
*/
- if (icsk->icsk_user_timeout) {
- u32 elapsed = tcp_model_timeout(sk, icsk->icsk_probes_out,
- tcp_probe0_base(sk));
-
- if (elapsed >= icsk->icsk_user_timeout)
- goto abort;
- }
+ if (!icsk->icsk_probes_tstamp)
+ icsk->icsk_probes_tstamp = tcp_jiffies32;
+ else if (icsk->icsk_user_timeout &&
+ (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
+ msecs_to_jiffies(icsk->icsk_user_timeout))
+ goto abort;
max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
if (sock_flag(sk, SOCK_DEAD)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7103b0a89756..4a0478b17243 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -596,6 +596,12 @@ void udp_encap_enable(void)
}
EXPORT_SYMBOL(udp_encap_enable);
+void udp_encap_disable(void)
+{
+ static_branch_dec(&udp_encap_needed_key);
+}
+EXPORT_SYMBOL(udp_encap_disable);
+
/* Handler for tunnels with arbitrary destination ports: no socket lookup, go
* through error handlers in encapsulations looking for a match.
*/
@@ -1124,7 +1130,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
rcu_read_unlock();
}
- if (cgroup_bpf_enabled && !connected) {
+ if (cgroup_bpf_enabled(BPF_CGROUP_UDP4_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
(struct sockaddr *)usin, &ipc.addr);
if (err)
@@ -1858,9 +1864,8 @@ try_again:
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
*addr_len = sizeof(*sin);
- if (cgroup_bpf_enabled)
- BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
- (struct sockaddr *)sin);
+ BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk,
+ (struct sockaddr *)sin);
}
if (udp_sk(sk)->gro_enabled)
@@ -2555,7 +2560,8 @@ int udp_v4_early_demux(struct sk_buff *skb)
*/
if (!inet_sk(sk)->inet_daddr && in_dev)
return ip_mc_validate_source(skb, iph->daddr,
- iph->saddr, iph->tos,
+ iph->saddr,
+ iph->tos & IPTOS_RT_MASK,
skb->dev, in_dev, &itag);
}
return 0;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index ff39e94781bf..b76c48efd37e 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -68,8 +68,8 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
(NETIF_F_HW_CSUM | NETIF_F_IP_CSUM))));
features &= skb->dev->hw_enc_features;
- /* CRC checksum can't be handled by HW when it's a UDP tunneling packet. */
- features &= ~NETIF_F_SCTP_CRC;
+ if (need_csum)
+ features &= ~NETIF_F_SCTP_CRC;
/* The only checksum offload we care about from here on out is the
* outer one so strip the existing checksum feature flags and
@@ -187,8 +187,67 @@ out_unlock:
}
EXPORT_SYMBOL(skb_udp_tunnel_segment);
+static void __udpv4_gso_segment_csum(struct sk_buff *seg,
+ __be32 *oldip, __be32 *newip,
+ __be16 *oldport, __be16 *newport)
+{
+ struct udphdr *uh;
+ struct iphdr *iph;
+
+ if (*oldip == *newip && *oldport == *newport)
+ return;
+
+ uh = udp_hdr(seg);
+ iph = ip_hdr(seg);
+
+ if (uh->check) {
+ inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip,
+ true);
+ inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport,
+ false);
+ if (!uh->check)
+ uh->check = CSUM_MANGLED_0;
+ }
+ *oldport = *newport;
+
+ csum_replace4(&iph->check, *oldip, *newip);
+ *oldip = *newip;
+}
+
+static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs)
+{
+ struct sk_buff *seg;
+ struct udphdr *uh, *uh2;
+ struct iphdr *iph, *iph2;
+
+ seg = segs;
+ uh = udp_hdr(seg);
+ iph = ip_hdr(seg);
+
+ if ((udp_hdr(seg)->dest == udp_hdr(seg->next)->dest) &&
+ (udp_hdr(seg)->source == udp_hdr(seg->next)->source) &&
+ (ip_hdr(seg)->daddr == ip_hdr(seg->next)->daddr) &&
+ (ip_hdr(seg)->saddr == ip_hdr(seg->next)->saddr))
+ return segs;
+
+ while ((seg = seg->next)) {
+ uh2 = udp_hdr(seg);
+ iph2 = ip_hdr(seg);
+
+ __udpv4_gso_segment_csum(seg,
+ &iph2->saddr, &iph->saddr,
+ &uh2->source, &uh->source);
+ __udpv4_gso_segment_csum(seg,
+ &iph2->daddr, &iph->daddr,
+ &uh2->dest, &uh->dest);
+ }
+
+ return segs;
+}
+
static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
- netdev_features_t features)
+ netdev_features_t features,
+ bool is_ipv6)
{
unsigned int mss = skb_shinfo(skb)->gso_size;
@@ -198,11 +257,11 @@ static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
- return skb;
+ return is_ipv6 ? skb : __udpv4_gso_segment_list_csum(skb);
}
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
- netdev_features_t features)
+ netdev_features_t features, bool is_ipv6)
{
struct sock *sk = gso_skb->sk;
unsigned int sum_truesize = 0;
@@ -214,7 +273,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
__be16 newlen;
if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
- return __udp_gso_segment_list(gso_skb, features);
+ return __udp_gso_segment_list(gso_skb, features, is_ipv6);
mss = skb_shinfo(gso_skb)->gso_size;
if (gso_skb->len <= sizeof(*uh) + mss)
@@ -328,7 +387,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
goto out;
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
- return __udp_gso_segment(skb, features);
+ return __udp_gso_segment(skb, features, false);
mss = skb_shinfo(skb)->gso_size;
if (unlikely(skb->len <= mss))
@@ -460,7 +519,8 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
- if ((sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) {
+ if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) ||
+ (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) {
pp = call_gro_receive(udp_gro_receive_segment, head, skb);
return pp;
}
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index 3eecba0874aa..b97e3635acf5 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -90,15 +90,11 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
struct sock *sk = sock->sk;
struct udp_tunnel_info ti;
- if (!dev->netdev_ops->ndo_udp_tunnel_add ||
- !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
- return;
-
ti.type = type;
ti.sa_family = sk->sk_family;
ti.port = inet_sk(sk)->inet_sport;
- dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
+ udp_tunnel_nic_add_port(dev, &ti);
}
EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port);
@@ -108,15 +104,11 @@ void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock,
struct sock *sk = sock->sk;
struct udp_tunnel_info ti;
- if (!dev->netdev_ops->ndo_udp_tunnel_del ||
- !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
- return;
-
ti.type = type;
ti.sa_family = sk->sk_family;
ti.port = inet_sk(sk)->inet_sport;
- dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
+ udp_tunnel_nic_del_port(dev, &ti);
}
EXPORT_SYMBOL_GPL(udp_tunnel_drop_rx_port);
@@ -134,11 +126,7 @@ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
rcu_read_lock();
for_each_netdev_rcu(net, dev) {
- if (!dev->netdev_ops->ndo_udp_tunnel_add)
- continue;
- if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
- continue;
- dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
+ udp_tunnel_nic_add_port(dev, &ti);
}
rcu_read_unlock();
}
@@ -158,11 +146,7 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type)
rcu_read_lock();
for_each_netdev_rcu(net, dev) {
- if (!dev->netdev_ops->ndo_udp_tunnel_del)
- continue;
- if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
- continue;
- dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
+ udp_tunnel_nic_del_port(dev, &ti);
}
rcu_read_unlock();
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index eff2cacd5209..f2337fb756ac 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -205,6 +205,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
+ .ra_defrtr_metric = IP6_RT_PRIO_USER,
.accept_ra_from_local = 0,
.accept_ra_min_hop_limit= 1,
.accept_ra_pinfo = 1,
@@ -260,6 +261,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
+ .ra_defrtr_metric = IP6_RT_PRIO_USER,
.accept_ra_from_local = 0,
.accept_ra_min_hop_limit= 1,
.accept_ra_pinfo = 1,
@@ -2467,8 +2469,9 @@ static void addrconf_add_mroute(struct net_device *dev)
.fc_ifindex = dev->ifindex,
.fc_dst_len = 8,
.fc_flags = RTF_UP,
- .fc_type = RTN_UNICAST,
+ .fc_type = RTN_MULTICAST,
.fc_nlinfo.nl_net = dev_net(dev),
+ .fc_protocol = RTPROT_KERNEL,
};
ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
@@ -5475,6 +5478,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
+ array[DEVCONF_RA_DEFRTR_METRIC] = cnf->ra_defrtr_metric;
array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit;
array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
#ifdef CONFIG_IPV6_ROUTER_PREF
@@ -6668,6 +6672,14 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "ra_defrtr_metric",
+ .data = &ipv6_devconf.ra_defrtr_metric,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = (void *)SYSCTL_ONE,
+ },
+ {
.procname = "accept_ra_min_hop_limit",
.data = &ipv6_devconf.accept_ra_min_hop_limit,
.maxlen = sizeof(int),
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8e9c3e9ea36e..1fb75f01756c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -295,7 +295,8 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
return -EINVAL;
snum = ntohs(addr->sin6_port);
- if (snum && inet_port_requires_bind_service(net, snum) &&
+ if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
+ snum && inet_port_requires_bind_service(net, snum) &&
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
@@ -439,6 +440,7 @@ out_unlock:
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
+ u32 flags = BIND_WITH_LOCK;
int err = 0;
/* If the socket has its own bind function then use it. */
@@ -451,11 +453,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* BPF prog is run before any checks are done so that if the prog
* changes context in a wrong way it will be caught.
*/
- err = BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr);
+ err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+ BPF_CGROUP_INET6_BIND, &flags);
if (err)
return err;
- return __inet6_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
+ return __inet6_bind(sk, uaddr, addr_len, flags);
}
EXPORT_SYMBOL(inet6_bind);
@@ -527,18 +530,19 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin6_addr = sk->sk_v6_daddr;
if (np->sndflow)
sin->sin6_flowinfo = np->flow_label;
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ NULL);
} else {
if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
sin->sin6_addr = np->saddr;
else
sin->sin6_addr = sk->sk_v6_rcv_saddr;
sin->sin6_port = inet->inet_sport;
- }
- if (cgroup_bpf_enabled)
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- peer ? BPF_CGROUP_INET6_GETPEERNAME :
- BPF_CGROUP_INET6_GETSOCKNAME,
+ BPF_CGROUP_INET6_GETSOCKNAME,
NULL);
+ }
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
sk->sk_bound_dev_if);
return sizeof(*sin);
@@ -954,6 +958,7 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT;
net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN;
net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN;
+ net->ipv6.sysctl.fib_notify_on_flag_change = 0;
atomic_set(&net->ipv6.fib6_sernum, 1);
err = ipv6_init_mibs(net);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 52c2f063529f..153ad103ba74 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -478,7 +478,6 @@ static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb,
int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
{
u8 *tail;
- u8 *vaddr;
int nfrags;
int esph_offset;
struct page *page;
@@ -519,14 +518,10 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
page = pfrag->page;
get_page(page);
- vaddr = kmap_atomic(page);
-
- tail = vaddr + pfrag->offset;
+ tail = page_address(page) + pfrag->offset;
esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
- kunmap_atomic(vaddr);
-
nfrags = skb_shinfo(skb)->nr_frags;
__skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
@@ -793,7 +788,7 @@ int esp6_input_done2(struct sk_buff *skb, int err)
int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
int hdr_len = skb_network_header_len(skb);
- if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
+ if (!xo || !(xo->flags & CRYPTO_DONE))
kfree(ESP_SKB_CB(skb)->tmp);
if (unlikely(err))
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index f3d05866692e..fd1f896115c1 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -331,10 +331,9 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
}
#if IS_ENABLED(CONFIG_IPV6_MIP6)
-static void mip6_addr_swap(struct sk_buff *skb)
+static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
- struct inet6_skb_parm *opt = IP6CB(skb);
struct ipv6_destopt_hao *hao;
struct in6_addr tmp;
int off;
@@ -351,7 +350,7 @@ static void mip6_addr_swap(struct sk_buff *skb)
}
}
#else
-static inline void mip6_addr_swap(struct sk_buff *skb) {}
+static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
#endif
static struct dst_entry *icmpv6_route_lookup(struct net *net,
@@ -446,7 +445,8 @@ static int icmp6_iif(const struct sk_buff *skb)
* Send an ICMP message in response to a packet in error
*/
void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
- const struct in6_addr *force_saddr)
+ const struct in6_addr *force_saddr,
+ const struct inet6_skb_parm *parm)
{
struct inet6_dev *idev = NULL;
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -542,7 +542,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
goto out_bh_enable;
- mip6_addr_swap(skb);
+ mip6_addr_swap(skb, parm);
sk = icmpv6_xmit_lock(net);
if (!sk)
@@ -559,7 +559,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
/* select a more meaningful saddr from input if */
struct net_device *in_netdev;
- in_netdev = dev_get_by_index(net, IP6CB(skb)->iif);
+ in_netdev = dev_get_by_index(net, parm->iif);
if (in_netdev) {
ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
inet6_sk(sk)->srcprefs,
@@ -640,7 +640,7 @@ EXPORT_SYMBOL(icmp6_send);
*/
void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
{
- icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
+ icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
kfree_skb(skb);
}
@@ -697,10 +697,10 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
}
if (type == ICMP_TIME_EXCEEDED)
icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
- info, &temp_saddr);
+ info, &temp_saddr, IP6CB(skb2));
else
icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
- info, &temp_saddr);
+ info, &temp_saddr, IP6CB(skb2));
if (rt)
ip6_rt_put(rt);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 605cdd38a919..ef9d022e693f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -499,7 +499,7 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb,
hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
err = fib6_table_dump(net, tb, w);
- if (err < 0)
+ if (err)
goto out;
}
}
@@ -507,7 +507,8 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb,
out:
kfree(w);
- return err;
+ /* The tree traversal function should never return a positive value. */
+ return err > 0 ? -EINVAL : err;
}
static int fib6_dump_node(struct fib6_walker *w)
@@ -1025,6 +1026,8 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
{
struct fib6_table *table = rt->fib6_table;
+ /* Flush all cached dst in exception table */
+ rt6_flush_exceptions(rt);
fib6_drop_pcpu_from(rt, table);
if (rt->nh && !list_empty(&rt->nh_list))
@@ -1927,9 +1930,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
net->ipv6.rt6_stats->fib_rt_entries--;
net->ipv6.rt6_stats->fib_discarded_routes++;
- /* Flush all cached dst in exception table */
- rt6_flush_exceptions(rt);
-
/* Reset round-robin state, if necessary */
if (rcu_access_pointer(fn->rr_ptr) == rt)
fn->rr_ptr = NULL;
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 70c8c2f36c98..9e3574880cb0 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -33,23 +33,25 @@ int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn)
}
EXPORT_SYMBOL(inet6_unregister_icmp_sender);
-void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+ const struct inet6_skb_parm *parm)
{
ip6_icmp_send_t *send;
rcu_read_lock();
send = rcu_dereference(ip6_icmp_send);
if (send)
- send(skb, type, code, info, NULL);
+ send(skb, type, code, info, NULL, parm);
rcu_read_unlock();
}
-EXPORT_SYMBOL(icmpv6_send);
+EXPORT_SYMBOL(__icmpv6_send);
#endif
#if IS_ENABLED(CONFIG_NF_NAT)
#include <net/netfilter/nf_conntrack.h>
void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
{
+ struct inet6_skb_parm parm = { 0 };
struct sk_buff *cloned_skb = NULL;
enum ip_conntrack_info ctinfo;
struct in6_addr orig_ip;
@@ -57,7 +59,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
ct = nf_ct_get(skb_in, &ctinfo);
if (!ct || !(ct->status & IPS_SRC_NAT)) {
- icmpv6_send(skb_in, type, code, info);
+ __icmpv6_send(skb_in, type, code, info, &parm);
return;
}
@@ -72,7 +74,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
orig_ip = ipv6_hdr(skb_in)->saddr;
ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
- icmpv6_send(skb_in, type, code, info);
+ __icmpv6_send(skb_in, type, code, info, &parm);
ipv6_hdr(skb_in)->saddr = orig_ip;
out:
consume_skb(cloned_skb);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index e96304d8a4a7..e9d2a4a409aa 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -32,6 +32,7 @@
#include <net/sock.h>
#include <net/snmp.h>
+#include <net/udp.h>
#include <net/ipv6.h>
#include <net/protocol.h>
@@ -44,7 +45,6 @@
#include <net/inet_ecn.h>
#include <net/dst_metadata.h>
-INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *));
INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
@@ -352,7 +352,6 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
ip6_sublist_rcv(&sublist, curr_dev, curr_net);
}
-INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *));
/*
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index a80f90bf3ae7..1b9827ff8ccf 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -15,6 +15,7 @@
#include <net/inet_common.h>
#include <net/tcp.h>
#include <net/udp.h>
+#include <net/gro.h>
#include "ip6_offload.h"
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 749ad72386b2..ff4f9ebcf7f6 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -125,8 +125,43 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
return -EINVAL;
}
+static int
+ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
+ struct sk_buff *skb, unsigned int mtu)
+{
+ struct sk_buff *segs, *nskb;
+ netdev_features_t features;
+ int ret = 0;
+
+ /* Please see corresponding comment in ip_finish_output_gso
+ * describing the cases where GSO segment length exceeds the
+ * egress MTU.
+ */
+ features = netif_skb_features(skb);
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+ if (IS_ERR_OR_NULL(segs)) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+
+ consume_skb(skb);
+
+ skb_list_walk_safe(segs, segs, nskb) {
+ int err;
+
+ skb_mark_not_on_list(segs);
+ err = ip6_fragment(net, sk, segs, ip6_finish_output2);
+ if (err && ret == 0)
+ ret = err;
+ }
+
+ return ret;
+}
+
static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ unsigned int mtu;
+
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
if (skb_dst(skb)->xfrm) {
@@ -135,7 +170,11 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff
}
#endif
- if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
+ mtu = ip6_skb_dst_mtu(skb);
+ if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
+ return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
+
+ if ((skb->len > mtu && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)) ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
return ip6_fragment(net, sk, skb, ip6_finish_output2);
@@ -178,6 +217,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
ip6_finish_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
+EXPORT_SYMBOL(ip6_output);
bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
{
@@ -1471,7 +1511,7 @@ emsgsize:
csummode = CHECKSUM_PARTIAL;
if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
- uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
if (!uarg)
return -ENOBUFS;
extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
@@ -1715,8 +1755,7 @@ alloc_new_skb:
error_efault:
err = -EFAULT;
error:
- if (uarg)
- sock_zerocopy_put_abort(uarg, extra_uref);
+ net_zcopy_put_abort(uarg, extra_uref);
cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 76717478f173..c467c6419893 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1173,6 +1173,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
struct neighbour *neigh = NULL;
struct inet6_dev *in6_dev;
struct fib6_info *rt = NULL;
+ u32 defrtr_usr_metric;
struct net *net;
int lifetime;
struct ndisc_options ndopts;
@@ -1303,18 +1304,21 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
}
- if (rt && lifetime == 0) {
+ /* Set default route metric as specified by user */
+ defrtr_usr_metric = in6_dev->cnf.ra_defrtr_metric;
+ /* delete the route if lifetime is 0 or if metric needs change */
+ if (rt && (lifetime == 0 || rt->fib6_metric != defrtr_usr_metric)) {
ip6_del_rt(net, rt, false);
rt = NULL;
}
- ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, for dev: %s\n",
- rt, lifetime, skb->dev->name);
+ ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, metric: %d, for dev: %s\n",
+ rt, lifetime, defrtr_usr_metric, skb->dev->name);
if (!rt && lifetime) {
ND_PRINTK(3, info, "RA: adding default router\n");
rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
- skb->dev, pref);
+ skb->dev, pref, defrtr_usr_metric);
if (!rt) {
ND_PRINTK(0, err,
"RA: %s failed to add default route\n",
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index c4f532f4d311..0d453fa9e327 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1598,7 +1598,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
xt_compat_lock(AF_INET6);
t = xt_find_table_lock(net, AF_INET6, get.name);
if (!IS_ERR(t)) {
- const struct xt_table_info *private = t->private;
+ const struct xt_table_info *private = xt_table_get_private_protected(t);
struct xt_table_info info;
ret = compat_table_info(private, &info);
if (!ret && get.size == info.size)
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index 8b5193efb1f1..3a00d95e964e 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -13,8 +13,8 @@
#include <net/netfilter/ipv6/nf_dup_ipv6.h>
struct nft_dup_ipv6 {
- enum nft_registers sreg_addr:8;
- enum nft_registers sreg_dev:8;
+ u8 sreg_addr;
+ u8 sreg_dev;
};
static void nft_dup_ipv6_eval(const struct nft_expr *expr,
@@ -38,16 +38,16 @@ static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_ADDR] == NULL)
return -EINVAL;
- priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]);
- err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in6_addr));
+ err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
+ sizeof(struct in6_addr));
if (err < 0)
return err;
- if (tb[NFTA_DUP_SREG_DEV] != NULL) {
- priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
- return nft_validate_register_load(priv->sreg_dev, sizeof(int));
- }
- return 0;
+ if (tb[NFTA_DUP_SREG_DEV])
+ err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV],
+ &priv->sreg_dev, sizeof(int));
+
+ return err;
}
static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 188e114b29b4..1536f4948e86 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -81,9 +81,11 @@ enum rt6_nud_state {
RT6_NUD_SUCCEED = 1
};
-static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
+INDIRECT_CALLABLE_SCOPE
+struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ip6_default_advmss(const struct dst_entry *dst);
-static unsigned int ip6_mtu(const struct dst_entry *dst);
+INDIRECT_CALLABLE_SCOPE
+unsigned int ip6_mtu(const struct dst_entry *dst);
static struct dst_entry *ip6_negative_advice(struct dst_entry *);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
@@ -2611,7 +2613,8 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
return NULL;
}
-static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
+INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
+ u32 cookie)
{
struct dst_entry *dst_ret;
struct fib6_info *from;
@@ -2641,6 +2644,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
return dst_ret;
}
+EXPORT_INDIRECT_CALLABLE(ip6_dst_check);
static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
{
@@ -3089,7 +3093,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
return mtu;
}
-static unsigned int ip6_mtu(const struct dst_entry *dst)
+INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst)
{
struct inet6_dev *idev;
unsigned int mtu;
@@ -3111,6 +3115,7 @@ out:
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
+EXPORT_INDIRECT_CALLABLE(ip6_mtu);
/* MTU selection:
* 1. mtu on route is locked - use it
@@ -4252,11 +4257,12 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
struct fib6_info *rt6_add_dflt_router(struct net *net,
const struct in6_addr *gwaddr,
struct net_device *dev,
- unsigned int pref)
+ unsigned int pref,
+ u32 defrtr_usr_metric)
{
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
- .fc_metric = IP6_RT_PRIO_USER,
+ .fc_metric = defrtr_usr_metric,
.fc_ifindex = dev->ifindex,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
@@ -5613,6 +5619,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
rtm->rtm_flags |= RTM_F_OFFLOAD;
if (rt->trap)
rtm->rtm_flags |= RTM_F_TRAP;
+ if (rt->offload_failed)
+ rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED;
}
if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
@@ -6063,6 +6071,58 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
+void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
+ bool offload, bool trap, bool offload_failed)
+{
+ struct sk_buff *skb;
+ int err;
+
+ if (f6i->offload == offload && f6i->trap == trap &&
+ f6i->offload_failed == offload_failed)
+ return;
+
+ f6i->offload = offload;
+ f6i->trap = trap;
+
+ /* 2 means send notifications only if offload_failed was changed. */
+ if (net->ipv6.sysctl.fib_notify_on_flag_change == 2 &&
+ f6i->offload_failed == offload_failed)
+ return;
+
+ f6i->offload_failed = offload_failed;
+
+ if (!rcu_access_pointer(f6i->fib6_node))
+ /* The route was removed from the tree, do not send
+ * notfication.
+ */
+ return;
+
+ if (!net->ipv6.sysctl.fib_notify_on_flag_change)
+ return;
+
+ skb = nlmsg_new(rt6_nlmsg_size(f6i), GFP_KERNEL);
+ if (!skb) {
+ err = -ENOBUFS;
+ goto errout;
+ }
+
+ err = rt6_fill_node(net, skb, f6i, NULL, NULL, NULL, 0, RTM_NEWROUTE, 0,
+ 0, 0);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ROUTE, NULL, GFP_KERNEL);
+ return;
+
+errout:
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+}
+EXPORT_SYMBOL(fib6_info_hw_flags_set);
+
static int ip6_route_dev_notify(struct notifier_block *this,
unsigned long event, void *ptr)
{
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index b07f7c1c82a4..c2a0c78e84d4 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -31,6 +31,8 @@
#include <linux/etherdevice.h>
#include <linux/bpf.h>
+#define SEG6_F_ATTR(i) BIT(i)
+
struct seg6_local_lwt;
/* callbacks used for customizing the creation and destruction of a behavior */
@@ -660,8 +662,8 @@ seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt)
unsigned long parsed_optattrs = slwt->parsed_optattrs;
bool legacy, vrfmode;
- legacy = !!(parsed_optattrs & (1 << SEG6_LOCAL_TABLE));
- vrfmode = !!(parsed_optattrs & (1 << SEG6_LOCAL_VRFTABLE));
+ legacy = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE));
+ vrfmode = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE));
if (!(legacy ^ vrfmode))
/* both are absent or present: invalid DT6 mode */
@@ -883,32 +885,32 @@ static struct seg6_action_desc seg6_action_table[] = {
},
{
.action = SEG6_LOCAL_ACTION_END_X,
- .attrs = (1 << SEG6_LOCAL_NH6),
+ .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
.input = input_action_end_x,
},
{
.action = SEG6_LOCAL_ACTION_END_T,
- .attrs = (1 << SEG6_LOCAL_TABLE),
+ .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE),
.input = input_action_end_t,
},
{
.action = SEG6_LOCAL_ACTION_END_DX2,
- .attrs = (1 << SEG6_LOCAL_OIF),
+ .attrs = SEG6_F_ATTR(SEG6_LOCAL_OIF),
.input = input_action_end_dx2,
},
{
.action = SEG6_LOCAL_ACTION_END_DX6,
- .attrs = (1 << SEG6_LOCAL_NH6),
+ .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
.input = input_action_end_dx6,
},
{
.action = SEG6_LOCAL_ACTION_END_DX4,
- .attrs = (1 << SEG6_LOCAL_NH4),
+ .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH4),
.input = input_action_end_dx4,
},
{
.action = SEG6_LOCAL_ACTION_END_DT4,
- .attrs = (1 << SEG6_LOCAL_VRFTABLE),
+ .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
#ifdef CONFIG_NET_L3_MASTER_DEV
.input = input_action_end_dt4,
.slwt_ops = {
@@ -920,30 +922,30 @@ static struct seg6_action_desc seg6_action_table[] = {
.action = SEG6_LOCAL_ACTION_END_DT6,
#ifdef CONFIG_NET_L3_MASTER_DEV
.attrs = 0,
- .optattrs = (1 << SEG6_LOCAL_TABLE) |
- (1 << SEG6_LOCAL_VRFTABLE),
+ .optattrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE) |
+ SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
.slwt_ops = {
.build_state = seg6_end_dt6_build,
},
#else
- .attrs = (1 << SEG6_LOCAL_TABLE),
+ .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE),
#endif
.input = input_action_end_dt6,
},
{
.action = SEG6_LOCAL_ACTION_END_B6,
- .attrs = (1 << SEG6_LOCAL_SRH),
+ .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
.input = input_action_end_b6,
},
{
.action = SEG6_LOCAL_ACTION_END_B6_ENCAP,
- .attrs = (1 << SEG6_LOCAL_SRH),
+ .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
.input = input_action_end_b6_encap,
.static_headroom = sizeof(struct ipv6hdr),
},
{
.action = SEG6_LOCAL_ACTION_END_BPF,
- .attrs = (1 << SEG6_LOCAL_BPF),
+ .attrs = SEG6_F_ATTR(SEG6_LOCAL_BPF),
.input = input_action_end_bpf,
},
@@ -1366,7 +1368,7 @@ static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed,
* attribute; otherwise, we call the destroy() callback.
*/
for (i = 0; i < max_parsed; ++i) {
- if (!(parsed_attrs & (1 << i)))
+ if (!(parsed_attrs & SEG6_F_ATTR(i)))
continue;
param = &seg6_action_params[i];
@@ -1395,7 +1397,7 @@ static int parse_nla_optional_attrs(struct nlattr **attrs,
int err, i;
for (i = 0; i < SEG6_LOCAL_MAX + 1; ++i) {
- if (!(desc->optattrs & (1 << i)) || !attrs[i])
+ if (!(desc->optattrs & SEG6_F_ATTR(i)) || !attrs[i])
continue;
/* once here, the i-th attribute is provided by the
@@ -1408,7 +1410,7 @@ static int parse_nla_optional_attrs(struct nlattr **attrs,
goto parse_optattrs_err;
/* current attribute has been correctly parsed */
- parsed_optattrs |= (1 << i);
+ parsed_optattrs |= SEG6_F_ATTR(i);
}
/* store in the tunnel state all the optional attributed successfully
@@ -1494,7 +1496,7 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
/* parse the required attributes */
for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
- if (desc->attrs & (1 << i)) {
+ if (desc->attrs & SEG6_F_ATTR(i)) {
if (!attrs[i])
return -EINVAL;
@@ -1599,7 +1601,7 @@ static int seg6_local_fill_encap(struct sk_buff *skb,
attrs = slwt->desc->attrs | slwt->parsed_optattrs;
for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
- if (attrs & (1 << i)) {
+ if (attrs & SEG6_F_ATTR(i)) {
param = &seg6_action_params[i];
err = param->put(skb, slwt);
if (err < 0)
@@ -1620,30 +1622,30 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
attrs = slwt->desc->attrs | slwt->parsed_optattrs;
- if (attrs & (1 << SEG6_LOCAL_SRH))
+ if (attrs & SEG6_F_ATTR(SEG6_LOCAL_SRH))
nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
- if (attrs & (1 << SEG6_LOCAL_TABLE))
+ if (attrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE))
nlsize += nla_total_size(4);
- if (attrs & (1 << SEG6_LOCAL_NH4))
+ if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH4))
nlsize += nla_total_size(4);
- if (attrs & (1 << SEG6_LOCAL_NH6))
+ if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH6))
nlsize += nla_total_size(16);
- if (attrs & (1 << SEG6_LOCAL_IIF))
+ if (attrs & SEG6_F_ATTR(SEG6_LOCAL_IIF))
nlsize += nla_total_size(4);
- if (attrs & (1 << SEG6_LOCAL_OIF))
+ if (attrs & SEG6_F_ATTR(SEG6_LOCAL_OIF))
nlsize += nla_total_size(4);
- if (attrs & (1 << SEG6_LOCAL_BPF))
+ if (attrs & SEG6_F_ATTR(SEG6_LOCAL_BPF))
nlsize += nla_total_size(sizeof(struct nlattr)) +
nla_total_size(MAX_PROG_NAME) +
nla_total_size(4);
- if (attrs & (1 << SEG6_LOCAL_VRFTABLE))
+ if (attrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE))
nlsize += nla_total_size(4);
return nlsize;
@@ -1670,7 +1672,7 @@ static int seg6_local_cmp_encap(struct lwtunnel_state *a,
return 1;
for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
- if (attrs_a & (1 << i)) {
+ if (attrs_a & SEG6_F_ATTR(i)) {
param = &seg6_action_params[i];
if (param->cmp(slwt_a, slwt_b))
return 1;
@@ -1692,6 +1694,15 @@ static const struct lwtunnel_encap_ops seg6_local_ops = {
int __init seg6_local_init(void)
{
+ /* If the max total number of defined attributes is reached, then your
+ * kernel build stops here.
+ *
+ * This check is required to avoid arithmetic overflows when processing
+ * behavior attributes and the maximum number of defined attributes
+ * exceeds the allowed value.
+ */
+ BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long));
+
return lwtunnel_encap_add_ops(&seg6_local_ops,
LWTUNNEL_ENCAP_SEG6_LOCAL);
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 2da0ee703779..93636867aee2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1645,8 +1645,11 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
}
#ifdef CONFIG_IPV6_SIT_6RD
- if (ipip6_netlink_6rd_parms(data, &ip6rd))
+ if (ipip6_netlink_6rd_parms(data, &ip6rd)) {
err = ipip6_tunnel_update_6rd(nt, &ip6rd);
+ if (err < 0)
+ unregister_netdevice_queue(dev, NULL);
+ }
#endif
return err;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 5b60a4bdd36a..263ab43ed06b 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -160,6 +160,15 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "fib_notify_on_flag_change",
+ .data = &init_net.ipv6.sysctl.fib_notify_on_flag_change,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &two,
+ },
{ }
};
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0e1509b02cb3..bd44ded7e50c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1420,6 +1420,8 @@ out:
return NULL;
}
+INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
+ u32));
/* The socket must have it's spinlock held when we get
* here, unless it is a TCP_LISTEN socket.
*
@@ -1473,7 +1475,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
sk_mark_napi_id(sk, skb);
if (dst) {
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
- dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
+ INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
+ dst, np->rx_dst_cookie) == NULL) {
dst_release(dst);
sk->sk_rx_dst = NULL;
}
@@ -2121,6 +2124,7 @@ struct proto tcpv6_prot = {
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
+ .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
.keepalive = tcp_set_keepalive,
.recvmsg = tcp_recvmsg,
.sendmsg = tcp_sendmsg,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b9f3dfdd2383..d25e5a9252fd 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -409,9 +409,8 @@ try_again:
}
*addr_len = sizeof(*sin6);
- if (cgroup_bpf_enabled)
- BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk,
- (struct sockaddr *)sin6);
+ BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk,
+ (struct sockaddr *)sin6);
}
if (udp_sk(sk)->gro_enabled)
@@ -1462,7 +1461,7 @@ do_udp_sendmsg:
fl6.saddr = np->saddr;
fl6.fl6_sport = inet->inet_sport;
- if (cgroup_bpf_enabled && !connected) {
+ if (cgroup_bpf_enabled(BPF_CGROUP_UDP6_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
(struct sockaddr *)sin6, &fl6.saddr);
if (err)
@@ -1608,8 +1607,10 @@ void udpv6_destroy_sock(struct sock *sk)
if (encap_destroy)
encap_destroy(sk);
}
- if (up->encap_enabled)
+ if (up->encap_enabled) {
static_branch_dec(&udpv6_encap_needed_key);
+ udp_encap_disable();
+ }
}
inet6_destroy_sock(sk);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index c7bd7b1a04c1..faa823c24292 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -42,7 +42,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
goto out;
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
- return __udp_gso_segment(skb, features);
+ return __udp_gso_segment(skb, features, true);
mss = skb_shinfo(skb)->gso_size;
if (unlikely(skb->len <= mss))
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 882f028992c3..6092d5cb7168 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -89,7 +89,7 @@ static struct sock *iucv_accept_dequeue(struct sock *parent,
static void iucv_sock_kill(struct sock *sk);
static void iucv_sock_close(struct sock *sk);
-static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify);
+static void afiucv_hs_callback_txnotify(struct sock *sk, enum iucv_tx_notify);
/* Call Back functions */
static void iucv_callback_rx(struct iucv_path *, struct iucv_message *);
@@ -182,7 +182,7 @@ static inline int iucv_below_msglim(struct sock *sk)
if (sk->sk_state != IUCV_CONNECTED)
return 1;
if (iucv->transport == AF_IUCV_TRANS_IUCV)
- return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim);
+ return (atomic_read(&iucv->skbs_in_xmit) < iucv->path->msglim);
else
return ((atomic_read(&iucv->msg_sent) < iucv->msglimit_peer) &&
(atomic_read(&iucv->pendings) <= 0));
@@ -211,7 +211,6 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
{
struct iucv_sock *iucv = iucv_sk(sock);
struct af_iucv_trans_hdr *phs_hdr;
- struct sk_buff *nskb;
int err, confirm_recv = 0;
phs_hdr = skb_push(skb, sizeof(*phs_hdr));
@@ -257,22 +256,16 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
err = -EMSGSIZE;
goto err_free;
}
- skb_trim(skb, skb->dev->mtu);
+ err = pskb_trim(skb, skb->dev->mtu);
+ if (err)
+ goto err_free;
}
skb->protocol = cpu_to_be16(ETH_P_AF_IUCV);
- __skb_header_release(skb);
- nskb = skb_clone(skb, GFP_ATOMIC);
- if (!nskb) {
- err = -ENOMEM;
- goto err_free;
- }
-
- skb_queue_tail(&iucv->send_skb_q, nskb);
+ atomic_inc(&iucv->skbs_in_xmit);
err = dev_queue_xmit(skb);
if (net_xmit_eval(err)) {
- skb_unlink(nskb, &iucv->send_skb_q);
- kfree_skb(nskb);
+ atomic_dec(&iucv->skbs_in_xmit);
} else {
atomic_sub(confirm_recv, &iucv->msg_recv);
WARN_ON(atomic_read(&iucv->msg_recv) < 0);
@@ -424,7 +417,7 @@ static void iucv_sock_close(struct sock *sk)
sk->sk_state = IUCV_CLOSING;
sk->sk_state_change(sk);
- if (!err && !skb_queue_empty(&iucv->send_skb_q)) {
+ if (!err && atomic_read(&iucv->skbs_in_xmit) > 0) {
if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
timeo = sk->sk_lingertime;
else
@@ -491,6 +484,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio,
atomic_set(&iucv->pendings, 0);
iucv->flags = 0;
iucv->msglimit = 0;
+ atomic_set(&iucv->skbs_in_xmit, 0);
atomic_set(&iucv->msg_sent, 0);
atomic_set(&iucv->msg_recv, 0);
iucv->path = NULL;
@@ -1004,7 +998,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
if (iucv->transport == AF_IUCV_TRANS_HIPER) {
headroom = sizeof(struct af_iucv_trans_hdr) +
LL_RESERVED_SPACE(iucv->hs_dev);
- linear = len;
+ linear = min(len, PAGE_SIZE - headroom);
} else {
if (len < PAGE_SIZE) {
linear = len;
@@ -1055,6 +1049,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
}
} else { /* Classic VM IUCV transport */
skb_queue_tail(&iucv->send_skb_q, skb);
+ atomic_inc(&iucv->skbs_in_xmit);
if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) &&
skb->len <= 7) {
@@ -1063,6 +1058,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
/* on success: there is no message_complete callback */
/* for an IPRMDATA msg; remove skb from send queue */
if (err == 0) {
+ atomic_dec(&iucv->skbs_in_xmit);
skb_unlink(skb, &iucv->send_skb_q);
kfree_skb(skb);
}
@@ -1071,6 +1067,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
/* IUCV_IPRMDATA path flag is set... sever path */
if (err == 0x15) {
pr_iucv->path_sever(iucv->path, NULL);
+ atomic_dec(&iucv->skbs_in_xmit);
skb_unlink(skb, &iucv->send_skb_q);
err = -EPIPE;
goto fail;
@@ -1109,6 +1106,8 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
} else {
err = -EPIPE;
}
+
+ atomic_dec(&iucv->skbs_in_xmit);
skb_unlink(skb, &iucv->send_skb_q);
goto fail;
}
@@ -1748,10 +1747,14 @@ static void iucv_callback_txdone(struct iucv_path *path,
{
struct sock *sk = path->private;
struct sk_buff *this = NULL;
- struct sk_buff_head *list = &iucv_sk(sk)->send_skb_q;
+ struct sk_buff_head *list;
struct sk_buff *list_skb;
+ struct iucv_sock *iucv;
unsigned long flags;
+ iucv = iucv_sk(sk);
+ list = &iucv->send_skb_q;
+
bh_lock_sock(sk);
spin_lock_irqsave(&list->lock, flags);
@@ -1761,8 +1764,11 @@ static void iucv_callback_txdone(struct iucv_path *path,
break;
}
}
- if (this)
+ if (this) {
+ atomic_dec(&iucv->skbs_in_xmit);
__skb_unlink(this, list);
+ }
+
spin_unlock_irqrestore(&list->lock, flags);
if (this) {
@@ -1772,7 +1778,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
}
if (sk->sk_state == IUCV_CLOSING) {
- if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) {
+ if (atomic_read(&iucv->skbs_in_xmit) == 0) {
sk->sk_state = IUCV_CLOSED;
sk->sk_state_change(sk);
}
@@ -2036,7 +2042,6 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
char nullstring[8];
if (!pskb_may_pull(skb, sizeof(*trans_hdr))) {
- WARN_ONCE(1, "AF_IUCV failed to receive skb, len=%u", skb->len);
kfree_skb(skb);
return NET_RX_SUCCESS;
}
@@ -2132,73 +2137,40 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
* afiucv_hs_callback_txnotify() - handle send notifcations from HiperSockets
* transport
**/
-static void afiucv_hs_callback_txnotify(struct sk_buff *skb,
- enum iucv_tx_notify n)
+static void afiucv_hs_callback_txnotify(struct sock *sk, enum iucv_tx_notify n)
{
- struct sock *isk = skb->sk;
- struct sock *sk = NULL;
- struct iucv_sock *iucv = NULL;
- struct sk_buff_head *list;
- struct sk_buff *list_skb;
- struct sk_buff *nskb;
- unsigned long flags;
-
- read_lock_irqsave(&iucv_sk_list.lock, flags);
- sk_for_each(sk, &iucv_sk_list.head)
- if (sk == isk) {
- iucv = iucv_sk(sk);
- break;
- }
- read_unlock_irqrestore(&iucv_sk_list.lock, flags);
+ struct iucv_sock *iucv = iucv_sk(sk);
- if (!iucv || sock_flag(sk, SOCK_ZAPPED))
+ if (sock_flag(sk, SOCK_ZAPPED))
return;
- list = &iucv->send_skb_q;
- spin_lock_irqsave(&list->lock, flags);
- skb_queue_walk_safe(list, list_skb, nskb) {
- if (skb_shinfo(list_skb) == skb_shinfo(skb)) {
- switch (n) {
- case TX_NOTIFY_OK:
- __skb_unlink(list_skb, list);
- kfree_skb(list_skb);
- iucv_sock_wake_msglim(sk);
- break;
- case TX_NOTIFY_PENDING:
- atomic_inc(&iucv->pendings);
- break;
- case TX_NOTIFY_DELAYED_OK:
- __skb_unlink(list_skb, list);
- atomic_dec(&iucv->pendings);
- if (atomic_read(&iucv->pendings) <= 0)
- iucv_sock_wake_msglim(sk);
- kfree_skb(list_skb);
- break;
- case TX_NOTIFY_UNREACHABLE:
- case TX_NOTIFY_DELAYED_UNREACHABLE:
- case TX_NOTIFY_TPQFULL: /* not yet used */
- case TX_NOTIFY_GENERALERROR:
- case TX_NOTIFY_DELAYED_GENERALERROR:
- __skb_unlink(list_skb, list);
- kfree_skb(list_skb);
- if (sk->sk_state == IUCV_CONNECTED) {
- sk->sk_state = IUCV_DISCONN;
- sk->sk_state_change(sk);
- }
- break;
- }
- break;
+ switch (n) {
+ case TX_NOTIFY_OK:
+ atomic_dec(&iucv->skbs_in_xmit);
+ iucv_sock_wake_msglim(sk);
+ break;
+ case TX_NOTIFY_PENDING:
+ atomic_inc(&iucv->pendings);
+ break;
+ case TX_NOTIFY_DELAYED_OK:
+ atomic_dec(&iucv->skbs_in_xmit);
+ if (atomic_dec_return(&iucv->pendings) <= 0)
+ iucv_sock_wake_msglim(sk);
+ break;
+ default:
+ atomic_dec(&iucv->skbs_in_xmit);
+ if (sk->sk_state == IUCV_CONNECTED) {
+ sk->sk_state = IUCV_DISCONN;
+ sk->sk_state_change(sk);
}
}
- spin_unlock_irqrestore(&list->lock, flags);
if (sk->sk_state == IUCV_CLOSING) {
- if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) {
+ if (atomic_read(&iucv->skbs_in_xmit) == 0) {
sk->sk_state = IUCV_CLOSED;
sk->sk_state_change(sk);
}
}
-
}
/*
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 56dad9565bc9..d0b56ffbb057 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -786,7 +786,7 @@ static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
- skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
+ skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
goto coalesced;
}
@@ -834,7 +834,7 @@ static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
get_page(page);
skb_fill_page_desc(skb, i, page, offset, size);
- skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
+ skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
coalesced:
skb->len += size;
@@ -1496,7 +1496,7 @@ static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
return 0;
out:
- fput(csock->file);
+ sockfd_put(csock);
return err;
}
@@ -1644,7 +1644,7 @@ static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
spin_unlock_bh(&mux->lock);
out:
- fput(csock->file);
+ sockfd_put(csock);
return err;
}
diff --git a/net/key/af_key.c b/net/key/af_key.c
index c12dbc51ef5f..ef9b4ac03e7b 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2902,7 +2902,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t)
break;
if (!aalg->pfkey_supported)
continue;
- if (aalg_tmpl_set(t, aalg) && aalg->available)
+ if (aalg_tmpl_set(t, aalg))
sz += sizeof(struct sadb_comb);
}
return sz + sizeof(struct sadb_prop);
@@ -2920,7 +2920,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!ealg->pfkey_supported)
continue;
- if (!(ealg_tmpl_set(t, ealg) && ealg->available))
+ if (!(ealg_tmpl_set(t, ealg)))
continue;
for (k = 1; ; k++) {
@@ -2931,7 +2931,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!aalg->pfkey_supported)
continue;
- if (aalg_tmpl_set(t, aalg) && aalg->available)
+ if (aalg_tmpl_set(t, aalg))
sz += sizeof(struct sadb_comb);
}
}
diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile
index 59755a9e2f9b..9e7da0acc58c 100644
--- a/net/l3mdev/Makefile
+++ b/net/l3mdev/Makefile
@@ -3,4 +3,4 @@
# Makefile for the L3 device API
#
-obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o
+obj-y += l3mdev.o
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index 213ea7abc9ab..0511bbe4af7b 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -122,6 +122,8 @@ static struct lapb_cb *lapb_create_cb(void)
timer_setup(&lapb->t1timer, NULL, 0);
timer_setup(&lapb->t2timer, NULL, 0);
+ lapb->t1timer_stop = true;
+ lapb->t2timer_stop = true;
lapb->t1 = LAPB_DEFAULT_T1;
lapb->t2 = LAPB_DEFAULT_T2;
@@ -129,6 +131,8 @@ static struct lapb_cb *lapb_create_cb(void)
lapb->mode = LAPB_DEFAULT_MODE;
lapb->window = LAPB_DEFAULT_WINDOW;
lapb->state = LAPB_STATE_0;
+
+ spin_lock_init(&lapb->lock);
refcount_set(&lapb->refcnt, 1);
out:
return lapb;
@@ -178,11 +182,23 @@ int lapb_unregister(struct net_device *dev)
goto out;
lapb_put(lapb);
+ /* Wait for other refs to "lapb" to drop */
+ while (refcount_read(&lapb->refcnt) > 2)
+ usleep_range(1, 10);
+
+ spin_lock_bh(&lapb->lock);
+
lapb_stop_t1timer(lapb);
lapb_stop_t2timer(lapb);
lapb_clear_queues(lapb);
+ spin_unlock_bh(&lapb->lock);
+
+ /* Wait for running timers to stop */
+ del_timer_sync(&lapb->t1timer);
+ del_timer_sync(&lapb->t2timer);
+
__lapb_remove_cb(lapb);
lapb_put(lapb);
@@ -201,6 +217,8 @@ int lapb_getparms(struct net_device *dev, struct lapb_parms_struct *parms)
if (!lapb)
goto out;
+ spin_lock_bh(&lapb->lock);
+
parms->t1 = lapb->t1 / HZ;
parms->t2 = lapb->t2 / HZ;
parms->n2 = lapb->n2;
@@ -219,6 +237,7 @@ int lapb_getparms(struct net_device *dev, struct lapb_parms_struct *parms)
else
parms->t2timer = (lapb->t2timer.expires - jiffies) / HZ;
+ spin_unlock_bh(&lapb->lock);
lapb_put(lapb);
rc = LAPB_OK;
out:
@@ -234,6 +253,8 @@ int lapb_setparms(struct net_device *dev, struct lapb_parms_struct *parms)
if (!lapb)
goto out;
+ spin_lock_bh(&lapb->lock);
+
rc = LAPB_INVALUE;
if (parms->t1 < 1 || parms->t2 < 1 || parms->n2 < 1)
goto out_put;
@@ -256,6 +277,7 @@ int lapb_setparms(struct net_device *dev, struct lapb_parms_struct *parms)
rc = LAPB_OK;
out_put:
+ spin_unlock_bh(&lapb->lock);
lapb_put(lapb);
out:
return rc;
@@ -270,6 +292,8 @@ int lapb_connect_request(struct net_device *dev)
if (!lapb)
goto out;
+ spin_lock_bh(&lapb->lock);
+
rc = LAPB_OK;
if (lapb->state == LAPB_STATE_1)
goto out_put;
@@ -285,24 +309,18 @@ int lapb_connect_request(struct net_device *dev)
rc = LAPB_OK;
out_put:
+ spin_unlock_bh(&lapb->lock);
lapb_put(lapb);
out:
return rc;
}
EXPORT_SYMBOL(lapb_connect_request);
-int lapb_disconnect_request(struct net_device *dev)
+static int __lapb_disconnect_request(struct lapb_cb *lapb)
{
- struct lapb_cb *lapb = lapb_devtostruct(dev);
- int rc = LAPB_BADTOKEN;
-
- if (!lapb)
- goto out;
-
switch (lapb->state) {
case LAPB_STATE_0:
- rc = LAPB_NOTCONNECTED;
- goto out_put;
+ return LAPB_NOTCONNECTED;
case LAPB_STATE_1:
lapb_dbg(1, "(%p) S1 TX DISC(1)\n", lapb->dev);
@@ -310,12 +328,10 @@ int lapb_disconnect_request(struct net_device *dev)
lapb_send_control(lapb, LAPB_DISC, LAPB_POLLON, LAPB_COMMAND);
lapb->state = LAPB_STATE_0;
lapb_start_t1timer(lapb);
- rc = LAPB_NOTCONNECTED;
- goto out_put;
+ return LAPB_NOTCONNECTED;
case LAPB_STATE_2:
- rc = LAPB_OK;
- goto out_put;
+ return LAPB_OK;
}
lapb_clear_queues(lapb);
@@ -328,8 +344,22 @@ int lapb_disconnect_request(struct net_device *dev)
lapb_dbg(1, "(%p) S3 DISC(1)\n", lapb->dev);
lapb_dbg(0, "(%p) S3 -> S2\n", lapb->dev);
- rc = LAPB_OK;
-out_put:
+ return LAPB_OK;
+}
+
+int lapb_disconnect_request(struct net_device *dev)
+{
+ struct lapb_cb *lapb = lapb_devtostruct(dev);
+ int rc = LAPB_BADTOKEN;
+
+ if (!lapb)
+ goto out;
+
+ spin_lock_bh(&lapb->lock);
+
+ rc = __lapb_disconnect_request(lapb);
+
+ spin_unlock_bh(&lapb->lock);
lapb_put(lapb);
out:
return rc;
@@ -344,6 +374,8 @@ int lapb_data_request(struct net_device *dev, struct sk_buff *skb)
if (!lapb)
goto out;
+ spin_lock_bh(&lapb->lock);
+
rc = LAPB_NOTCONNECTED;
if (lapb->state != LAPB_STATE_3 && lapb->state != LAPB_STATE_4)
goto out_put;
@@ -352,6 +384,7 @@ int lapb_data_request(struct net_device *dev, struct sk_buff *skb)
lapb_kick(lapb);
rc = LAPB_OK;
out_put:
+ spin_unlock_bh(&lapb->lock);
lapb_put(lapb);
out:
return rc;
@@ -364,7 +397,9 @@ int lapb_data_received(struct net_device *dev, struct sk_buff *skb)
int rc = LAPB_BADTOKEN;
if (lapb) {
+ spin_lock_bh(&lapb->lock);
lapb_data_input(lapb, skb);
+ spin_unlock_bh(&lapb->lock);
lapb_put(lapb);
rc = LAPB_OK;
}
@@ -435,6 +470,8 @@ static int lapb_device_event(struct notifier_block *this, unsigned long event,
if (!lapb)
return NOTIFY_DONE;
+ spin_lock_bh(&lapb->lock);
+
switch (event) {
case NETDEV_UP:
lapb_dbg(0, "(%p) Interface up: %s\n", dev, dev->name);
@@ -454,7 +491,7 @@ static int lapb_device_event(struct notifier_block *this, unsigned long event,
break;
case NETDEV_GOING_DOWN:
if (netif_carrier_ok(dev))
- lapb_disconnect_request(dev);
+ __lapb_disconnect_request(lapb);
break;
case NETDEV_DOWN:
lapb_dbg(0, "(%p) Interface down: %s\n", dev, dev->name);
@@ -489,6 +526,8 @@ static int lapb_device_event(struct notifier_block *this, unsigned long event,
break;
}
+ spin_unlock_bh(&lapb->lock);
+ lapb_put(lapb);
return NOTIFY_DONE;
}
diff --git a/net/lapb/lapb_out.c b/net/lapb/lapb_out.c
index 7a4d0715d1c3..a966d29c772d 100644
--- a/net/lapb/lapb_out.c
+++ b/net/lapb/lapb_out.c
@@ -82,7 +82,8 @@ void lapb_kick(struct lapb_cb *lapb)
skb = skb_dequeue(&lapb->write_queue);
do {
- if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+ skbn = skb_copy(skb, GFP_ATOMIC);
+ if (!skbn) {
skb_queue_head(&lapb->write_queue, skb);
break;
}
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index baa247fe4ed0..0230b272b7d1 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -40,6 +40,7 @@ void lapb_start_t1timer(struct lapb_cb *lapb)
lapb->t1timer.function = lapb_t1timer_expiry;
lapb->t1timer.expires = jiffies + lapb->t1;
+ lapb->t1timer_stop = false;
add_timer(&lapb->t1timer);
}
@@ -50,16 +51,19 @@ void lapb_start_t2timer(struct lapb_cb *lapb)
lapb->t2timer.function = lapb_t2timer_expiry;
lapb->t2timer.expires = jiffies + lapb->t2;
+ lapb->t2timer_stop = false;
add_timer(&lapb->t2timer);
}
void lapb_stop_t1timer(struct lapb_cb *lapb)
{
+ lapb->t1timer_stop = true;
del_timer(&lapb->t1timer);
}
void lapb_stop_t2timer(struct lapb_cb *lapb)
{
+ lapb->t2timer_stop = true;
del_timer(&lapb->t2timer);
}
@@ -72,16 +76,31 @@ static void lapb_t2timer_expiry(struct timer_list *t)
{
struct lapb_cb *lapb = from_timer(lapb, t, t2timer);
+ spin_lock_bh(&lapb->lock);
+ if (timer_pending(&lapb->t2timer)) /* A new timer has been set up */
+ goto out;
+ if (lapb->t2timer_stop) /* The timer has been stopped */
+ goto out;
+
if (lapb->condition & LAPB_ACK_PENDING_CONDITION) {
lapb->condition &= ~LAPB_ACK_PENDING_CONDITION;
lapb_timeout_response(lapb);
}
+
+out:
+ spin_unlock_bh(&lapb->lock);
}
static void lapb_t1timer_expiry(struct timer_list *t)
{
struct lapb_cb *lapb = from_timer(lapb, t, t1timer);
+ spin_lock_bh(&lapb->lock);
+ if (timer_pending(&lapb->t1timer)) /* A new timer has been set up */
+ goto out;
+ if (lapb->t1timer_stop) /* The timer has been stopped */
+ goto out;
+
switch (lapb->state) {
/*
@@ -108,7 +127,7 @@ static void lapb_t1timer_expiry(struct timer_list *t)
lapb->state = LAPB_STATE_0;
lapb_disconnect_indication(lapb, LAPB_TIMEDOUT);
lapb_dbg(0, "(%p) S1 -> S0\n", lapb->dev);
- return;
+ goto out;
} else {
lapb->n2count++;
if (lapb->mode & LAPB_EXTENDED) {
@@ -132,7 +151,7 @@ static void lapb_t1timer_expiry(struct timer_list *t)
lapb->state = LAPB_STATE_0;
lapb_disconnect_confirmation(lapb, LAPB_TIMEDOUT);
lapb_dbg(0, "(%p) S2 -> S0\n", lapb->dev);
- return;
+ goto out;
} else {
lapb->n2count++;
lapb_dbg(1, "(%p) S2 TX DISC(1)\n", lapb->dev);
@@ -150,7 +169,7 @@ static void lapb_t1timer_expiry(struct timer_list *t)
lapb_stop_t2timer(lapb);
lapb_disconnect_indication(lapb, LAPB_TIMEDOUT);
lapb_dbg(0, "(%p) S3 -> S0\n", lapb->dev);
- return;
+ goto out;
} else {
lapb->n2count++;
lapb_requeue_frames(lapb);
@@ -167,7 +186,7 @@ static void lapb_t1timer_expiry(struct timer_list *t)
lapb->state = LAPB_STATE_0;
lapb_disconnect_indication(lapb, LAPB_TIMEDOUT);
lapb_dbg(0, "(%p) S4 -> S0\n", lapb->dev);
- return;
+ goto out;
} else {
lapb->n2count++;
lapb_transmit_frmr(lapb);
@@ -176,4 +195,7 @@ static void lapb_t1timer_expiry(struct timer_list *t)
}
lapb_start_t1timer(lapb);
+
+out:
+ spin_unlock_bh(&lapb->lock);
}
diff --git a/net/llc/Kconfig b/net/llc/Kconfig
index b0e646ac47eb..7f79f5e134f9 100644
--- a/net/llc/Kconfig
+++ b/net/llc/Kconfig
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config LLC
tristate
- depends on NET
config LLC2
tristate "ANSI/IEEE 802.2 LLC type 2 Support"
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index cd9a9bd242ba..51ec8256b7fa 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -69,7 +69,7 @@ config MAC80211_MESH
config MAC80211_LEDS
bool "Enable LED triggers"
depends on MAC80211
- depends on LEDS_CLASS
+ depends on LEDS_CLASS=y || LEDS_CLASS=MAC80211
select LEDS_TRIGGERS
help
This option enables a few LED triggers for different
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index ad04c361cba5..23d25e8b2358 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -56,11 +56,9 @@ mac80211-$(CONFIG_PM) += pm.o
CFLAGS_trace.o := -I$(src)
rc80211_minstrel-y := \
- rc80211_minstrel.o \
rc80211_minstrel_ht.o
rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += \
- rc80211_minstrel_debugfs.o \
rc80211_minstrel_ht_debugfs.o
mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y)
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 48f144f107d5..5296898875ff 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -120,18 +120,17 @@ static ssize_t aqm_write(struct file *file,
{
struct ieee80211_local *local = file->private_data;
char buf[100];
- size_t len;
- if (count > sizeof(buf))
+ if (count >= sizeof(buf))
return -EINVAL;
if (copy_from_user(buf, user_buf, count))
return -EFAULT;
- buf[sizeof(buf) - 1] = '\0';
- len = strlen(buf);
- if (len > 0 && buf[len-1] == '\n')
- buf[len-1] = 0;
+ if (count && buf[count - 1] == '\n')
+ buf[count - 1] = '\0';
+ else
+ buf[count] = '\0';
if (sscanf(buf, "fq_limit %u", &local->fq.limit) == 1)
return count;
@@ -177,18 +176,17 @@ static ssize_t airtime_flags_write(struct file *file,
{
struct ieee80211_local *local = file->private_data;
char buf[16];
- size_t len;
- if (count > sizeof(buf))
+ if (count >= sizeof(buf))
return -EINVAL;
if (copy_from_user(buf, user_buf, count))
return -EFAULT;
- buf[sizeof(buf) - 1] = 0;
- len = strlen(buf);
- if (len > 0 && buf[len - 1] == '\n')
- buf[len - 1] = 0;
+ if (count && buf[count - 1] == '\n')
+ buf[count - 1] = '\0';
+ else
+ buf[count] = '\0';
if (kstrtou16(buf, 0, &local->airtime_flags))
return -EINVAL;
@@ -237,20 +235,19 @@ static ssize_t aql_txq_limit_write(struct file *file,
{
struct ieee80211_local *local = file->private_data;
char buf[100];
- size_t len;
u32 ac, q_limit_low, q_limit_high, q_limit_low_old, q_limit_high_old;
struct sta_info *sta;
- if (count > sizeof(buf))
+ if (count >= sizeof(buf))
return -EINVAL;
if (copy_from_user(buf, user_buf, count))
return -EFAULT;
- buf[sizeof(buf) - 1] = 0;
- len = strlen(buf);
- if (len > 0 && buf[len - 1] == '\n')
- buf[len - 1] = 0;
+ if (count && buf[count - 1] == '\n')
+ buf[count - 1] = '\0';
+ else
+ buf[count] = '\0';
if (sscanf(buf, "%u %u %u", &ac, &q_limit_low, &q_limit_high) != 3)
return -EINVAL;
@@ -284,6 +281,56 @@ static const struct file_operations aql_txq_limit_ops = {
.llseek = default_llseek,
};
+static ssize_t aql_enable_read(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ char buf[3];
+ int len;
+
+ len = scnprintf(buf, sizeof(buf), "%d\n",
+ !static_key_false(&aql_disable.key));
+
+ return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+}
+
+static ssize_t aql_enable_write(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ bool aql_disabled = static_key_false(&aql_disable.key);
+ char buf[3];
+ size_t len;
+
+ if (count > sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(buf, user_buf, count))
+ return -EFAULT;
+
+ buf[sizeof(buf) - 1] = '\0';
+ len = strlen(buf);
+ if (len > 0 && buf[len - 1] == '\n')
+ buf[len - 1] = 0;
+
+ if (buf[0] == '0' && buf[1] == '\0') {
+ if (!aql_disabled)
+ static_branch_inc(&aql_disable);
+ } else if (buf[0] == '1' && buf[1] == '\0') {
+ if (aql_disabled)
+ static_branch_dec(&aql_disable);
+ } else {
+ return -EINVAL;
+ }
+
+ return count;
+}
+
+static const struct file_operations aql_enable_ops = {
+ .write = aql_enable_write,
+ .read = aql_enable_read,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
static ssize_t force_tx_status_read(struct file *file,
char __user *user_buf,
size_t count,
@@ -306,18 +353,17 @@ static ssize_t force_tx_status_write(struct file *file,
{
struct ieee80211_local *local = file->private_data;
char buf[3];
- size_t len;
- if (count > sizeof(buf))
+ if (count >= sizeof(buf))
return -EINVAL;
if (copy_from_user(buf, user_buf, count))
return -EFAULT;
- buf[sizeof(buf) - 1] = '\0';
- len = strlen(buf);
- if (len > 0 && buf[len - 1] == '\n')
- buf[len - 1] = 0;
+ if (count && buf[count - 1] == '\n')
+ buf[count - 1] = '\0';
+ else
+ buf[count] = '\0';
if (buf[0] == '0' && buf[1] == '\0')
local->force_tx_status = 0;
@@ -409,6 +455,7 @@ static const char *hw_flag_names[] = {
FLAG(SUPPORTS_ONLY_HE_MULTI_BSSID),
FLAG(AMPDU_KEYBORDER_SUPPORT),
FLAG(SUPPORTS_TX_ENCAP_OFFLOAD),
+ FLAG(SUPPORTS_RX_DECAP_OFFLOAD),
#undef FLAG
};
@@ -572,6 +619,7 @@ void debugfs_hw_add(struct ieee80211_local *local)
DEBUGFS_ADD(power);
DEBUGFS_ADD(hw_conf);
DEBUGFS_ADD_MODE(force_tx_status, 0600);
+ DEBUGFS_ADD_MODE(aql_enable, 0600);
if (local->ops->wake_tx_queue)
DEBUGFS_ADD_MODE(aqm, 0600);
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index eb4bb79d936a..5a27c61a7b38 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -79,6 +79,7 @@ static const char * const sta_flag_names[] = {
FLAG(MPSP_RECIPIENT),
FLAG(PS_DELIVER),
FLAG(USES_ENCRYPTION),
+ FLAG(DECAP_OFFLOAD),
#undef FLAG
};
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index c9a8a2433e8a..48322e45e7dd 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -125,8 +125,11 @@ int drv_sta_state(struct ieee80211_local *local,
} else if (old_state == IEEE80211_STA_AUTH &&
new_state == IEEE80211_STA_ASSOC) {
ret = drv_sta_add(local, sdata, &sta->sta);
- if (ret == 0)
+ if (ret == 0) {
sta->uploaded = true;
+ if (rcu_access_pointer(sta->sta.rates))
+ drv_sta_rate_tbl_update(local, sdata, &sta->sta);
+ }
} else if (old_state == IEEE80211_STA_ASSOC &&
new_state == IEEE80211_STA_AUTH) {
drv_sta_remove(local, sdata, &sta->sta);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index bcdfd19a596b..604ca59937f0 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1413,4 +1413,20 @@ static inline void drv_sta_set_4addr(struct ieee80211_local *local,
trace_drv_return_void(local);
}
+static inline void drv_sta_set_decap_offload(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta,
+ bool enabled)
+{
+ sdata = get_bss_sdata(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ trace_drv_sta_set_decap_offload(local, sdata, sta, enabled);
+ if (local->ops->sta_set_decap_offload)
+ local->ops->sta_set_decap_offload(&local->hw, &sdata->vif, sta,
+ enabled);
+ trace_drv_return_void(local);
+}
+
#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index cc26f239838b..0c0b970835ce 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -52,6 +52,57 @@ ieee80211_update_from_he_6ghz_capa(const struct ieee80211_he_6ghz_capa *he_6ghz_
sta->sta.he_6ghz_capa = *he_6ghz_capa;
}
+static void ieee80211_he_mcs_disable(__le16 *he_mcs)
+{
+ u32 i;
+
+ for (i = 0; i < 8; i++)
+ *he_mcs |= cpu_to_le16(IEEE80211_HE_MCS_NOT_SUPPORTED << i * 2);
+}
+
+static void ieee80211_he_mcs_intersection(__le16 *he_own_rx, __le16 *he_peer_rx,
+ __le16 *he_own_tx, __le16 *he_peer_tx)
+{
+ u32 i;
+ u16 own_rx, own_tx, peer_rx, peer_tx;
+
+ for (i = 0; i < 8; i++) {
+ own_rx = le16_to_cpu(*he_own_rx);
+ own_rx = (own_rx >> i * 2) & IEEE80211_HE_MCS_NOT_SUPPORTED;
+
+ own_tx = le16_to_cpu(*he_own_tx);
+ own_tx = (own_tx >> i * 2) & IEEE80211_HE_MCS_NOT_SUPPORTED;
+
+ peer_rx = le16_to_cpu(*he_peer_rx);
+ peer_rx = (peer_rx >> i * 2) & IEEE80211_HE_MCS_NOT_SUPPORTED;
+
+ peer_tx = le16_to_cpu(*he_peer_tx);
+ peer_tx = (peer_tx >> i * 2) & IEEE80211_HE_MCS_NOT_SUPPORTED;
+
+ if (peer_tx != IEEE80211_HE_MCS_NOT_SUPPORTED) {
+ if (own_rx == IEEE80211_HE_MCS_NOT_SUPPORTED)
+ peer_tx = IEEE80211_HE_MCS_NOT_SUPPORTED;
+ else if (own_rx < peer_tx)
+ peer_tx = own_rx;
+ }
+
+ if (peer_rx != IEEE80211_HE_MCS_NOT_SUPPORTED) {
+ if (own_tx == IEEE80211_HE_MCS_NOT_SUPPORTED)
+ peer_rx = IEEE80211_HE_MCS_NOT_SUPPORTED;
+ else if (own_tx < peer_rx)
+ peer_rx = own_tx;
+ }
+
+ *he_peer_rx &=
+ ~cpu_to_le16(IEEE80211_HE_MCS_NOT_SUPPORTED << i * 2);
+ *he_peer_rx |= cpu_to_le16(peer_rx << i * 2);
+
+ *he_peer_tx &=
+ ~cpu_to_le16(IEEE80211_HE_MCS_NOT_SUPPORTED << i * 2);
+ *he_peer_tx |= cpu_to_le16(peer_tx << i * 2);
+ }
+}
+
void
ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
@@ -60,10 +111,12 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
{
struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap;
+ struct ieee80211_sta_he_cap own_he_cap = sband->iftype_data->he_cap;
struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie;
u8 he_ppe_size;
u8 mcs_nss_size;
u8 he_total_size;
+ bool own_160, peer_160, own_80p80, peer_80p80;
memset(he_cap, 0, sizeof(*he_cap));
@@ -101,6 +154,45 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
if (sband->band == NL80211_BAND_6GHZ && he_6ghz_capa)
ieee80211_update_from_he_6ghz_capa(he_6ghz_capa, sta);
+
+ ieee80211_he_mcs_intersection(&own_he_cap.he_mcs_nss_supp.rx_mcs_80,
+ &he_cap->he_mcs_nss_supp.rx_mcs_80,
+ &own_he_cap.he_mcs_nss_supp.tx_mcs_80,
+ &he_cap->he_mcs_nss_supp.tx_mcs_80);
+
+ own_160 = own_he_cap.he_cap_elem.phy_cap_info[0] &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
+ peer_160 = he_cap->he_cap_elem.phy_cap_info[0] &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
+
+ if (peer_160 && own_160) {
+ ieee80211_he_mcs_intersection(&own_he_cap.he_mcs_nss_supp.rx_mcs_160,
+ &he_cap->he_mcs_nss_supp.rx_mcs_160,
+ &own_he_cap.he_mcs_nss_supp.tx_mcs_160,
+ &he_cap->he_mcs_nss_supp.tx_mcs_160);
+ } else if (peer_160 && !own_160) {
+ ieee80211_he_mcs_disable(&he_cap->he_mcs_nss_supp.rx_mcs_160);
+ ieee80211_he_mcs_disable(&he_cap->he_mcs_nss_supp.tx_mcs_160);
+ he_cap->he_cap_elem.phy_cap_info[0] &=
+ ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
+ }
+
+ own_80p80 = own_he_cap.he_cap_elem.phy_cap_info[0] &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G;
+ peer_80p80 = he_cap->he_cap_elem.phy_cap_info[0] &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G;
+
+ if (peer_80p80 && own_80p80) {
+ ieee80211_he_mcs_intersection(&own_he_cap.he_mcs_nss_supp.rx_mcs_80p80,
+ &he_cap->he_mcs_nss_supp.rx_mcs_80p80,
+ &own_he_cap.he_mcs_nss_supp.tx_mcs_80p80,
+ &he_cap->he_mcs_nss_supp.tx_mcs_80p80);
+ } else if (peer_80p80 && !own_80p80) {
+ ieee80211_he_mcs_disable(&he_cap->he_mcs_nss_supp.rx_mcs_80p80);
+ ieee80211_he_mcs_disable(&he_cap->he_mcs_nss_supp.tx_mcs_80p80);
+ he_cap->he_cap_elem.phy_cap_info[0] &=
+ ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G;
+ }
}
void
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8bf9c0e974d6..ecda126a7026 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -848,7 +848,6 @@ enum txq_info_flags {
*/
struct txq_info {
struct fq_tin tin;
- struct fq_flow def_flow;
struct codel_vars def_cvars;
struct codel_stats cstats;
struct sk_buff_head frags;
@@ -1078,6 +1077,7 @@ enum queue_stop_reason {
IEEE80211_QUEUE_STOP_REASON_FLUSH,
IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN,
IEEE80211_QUEUE_STOP_REASON_RESERVE_TID,
+ IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE,
IEEE80211_QUEUE_STOP_REASONS,
};
@@ -1143,6 +1143,8 @@ enum mac80211_scan_state {
SCAN_ABORT,
};
+DECLARE_STATIC_KEY_FALSE(aql_disable);
+
struct ieee80211_local {
/* embed the driver visible part.
* don't cast (use the static inlines below), but we keep
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 3b9ec4ef81c3..b80c9b016b2b 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -357,11 +357,14 @@ static int ieee80211_open(struct net_device *dev)
if (err)
return err;
- return ieee80211_do_open(&sdata->wdev, true);
+ wiphy_lock(sdata->local->hw.wiphy);
+ err = ieee80211_do_open(&sdata->wdev, true);
+ wiphy_unlock(sdata->local->hw.wiphy);
+
+ return err;
}
-static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
- bool going_down)
+static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_down)
{
struct ieee80211_local *local = sdata->local;
unsigned long flags;
@@ -637,7 +640,9 @@ static int ieee80211_stop(struct net_device *dev)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ wiphy_lock(sdata->local->hw.wiphy);
ieee80211_do_stop(sdata, true);
+ wiphy_unlock(sdata->local->hw.wiphy);
return 0;
}
@@ -765,7 +770,7 @@ static const struct net_device_ops ieee80211_dataif_8023_ops = {
.ndo_get_stats64 = ieee80211_get_stats64,
};
-static bool ieee80211_iftype_supports_encap_offload(enum nl80211_iftype iftype)
+static bool ieee80211_iftype_supports_hdr_offload(enum nl80211_iftype iftype)
{
switch (iftype) {
/* P2P GO and client are mapped to AP/STATION types */
@@ -785,7 +790,7 @@ static bool ieee80211_set_sdata_offload_flags(struct ieee80211_sub_if_data *sdat
flags = sdata->vif.offload_flags;
if (ieee80211_hw_check(&local->hw, SUPPORTS_TX_ENCAP_OFFLOAD) &&
- ieee80211_iftype_supports_encap_offload(sdata->vif.type)) {
+ ieee80211_iftype_supports_hdr_offload(sdata->vif.type)) {
flags |= IEEE80211_OFFLOAD_ENCAP_ENABLED;
if (!ieee80211_hw_check(&local->hw, SUPPORTS_TX_FRAG) &&
@@ -798,10 +803,21 @@ static bool ieee80211_set_sdata_offload_flags(struct ieee80211_sub_if_data *sdat
flags &= ~IEEE80211_OFFLOAD_ENCAP_ENABLED;
}
+ if (ieee80211_hw_check(&local->hw, SUPPORTS_RX_DECAP_OFFLOAD) &&
+ ieee80211_iftype_supports_hdr_offload(sdata->vif.type)) {
+ flags |= IEEE80211_OFFLOAD_DECAP_ENABLED;
+
+ if (local->monitors)
+ flags &= ~IEEE80211_OFFLOAD_DECAP_ENABLED;
+ } else {
+ flags &= ~IEEE80211_OFFLOAD_DECAP_ENABLED;
+ }
+
if (sdata->vif.offload_flags == flags)
return false;
sdata->vif.offload_flags = flags;
+ ieee80211_check_fast_rx_iface(sdata);
return true;
}
@@ -819,7 +835,7 @@ static void ieee80211_set_vif_encap_ops(struct ieee80211_sub_if_data *sdata)
}
if (!ieee80211_hw_check(&local->hw, SUPPORTS_TX_ENCAP_OFFLOAD) ||
- !ieee80211_iftype_supports_encap_offload(bss->vif.type))
+ !ieee80211_iftype_supports_hdr_offload(bss->vif.type))
return;
enabled = bss->vif.offload_flags & IEEE80211_OFFLOAD_ENCAP_ENABLED;
@@ -1617,6 +1633,10 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
if (ret)
return ret;
+ ieee80211_stop_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE);
+ synchronize_net();
+
ieee80211_do_stop(sdata, false);
ieee80211_teardown_sdata(sdata);
@@ -1639,6 +1659,8 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
err = ieee80211_do_open(&sdata->wdev, false);
WARN(err, "type change: do_open returned %d", err);
+ ieee80211_wake_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE);
return ret;
}
@@ -1965,7 +1987,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ndev->min_mtu = 256;
ndev->max_mtu = local->hw.max_mtu;
- ret = register_netdevice(ndev);
+ ret = cfg80211_register_netdevice(ndev);
if (ret) {
free_netdev(ndev);
return ret;
@@ -1995,10 +2017,9 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
synchronize_rcu();
- if (sdata->dev) {
- unregister_netdevice(sdata->dev);
- } else {
- cfg80211_unregister_wdev(&sdata->wdev);
+ cfg80211_unregister_wdev(&sdata->wdev);
+
+ if (!sdata->dev) {
ieee80211_teardown_sdata(sdata);
kfree(sdata);
}
@@ -2047,13 +2068,16 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
list_add(&sdata->list, &wdev_list);
}
mutex_unlock(&local->iflist_mtx);
+
unregister_netdevice_many(&unreg_list);
+ wiphy_lock(local->hw.wiphy);
list_for_each_entry_safe(sdata, tmp, &wdev_list, list) {
list_del(&sdata->list);
cfg80211_unregister_wdev(&sdata->wdev);
kfree(sdata);
}
+ wiphy_unlock(local->hw.wiphy);
}
static int netdev_notify(struct notifier_block *nb,
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index a4817aa4b171..56c068cb49c4 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -887,7 +887,7 @@ void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata)
struct ieee80211_key *key;
struct ieee80211_sub_if_data *vlan;
- ASSERT_RTNL();
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
mutex_lock(&sdata->local->key_mtx);
@@ -924,7 +924,7 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,
struct ieee80211_key *key, *tmp;
struct ieee80211_sub_if_data *sdata;
- ASSERT_RTNL();
+ lockdep_assert_wiphy(hw->wiphy);
mutex_lock(&local->key_mtx);
if (vif) {
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index dee88ec566ad..4f3f8bb58e76 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -261,7 +261,9 @@ static void ieee80211_restart_work(struct work_struct *work)
"%s called with hardware scan in progress\n", __func__);
flush_work(&local->radar_detected_work);
+ /* we might do interface manipulations, so need both */
rtnl_lock();
+ wiphy_lock(local->hw.wiphy);
list_for_each_entry(sdata, &local->interfaces, list) {
/*
* XXX: there may be more work for other vif types and even
@@ -293,6 +295,7 @@ static void ieee80211_restart_work(struct work_struct *work)
synchronize_net();
ieee80211_reconfig(local);
+ wiphy_unlock(local->hw.wiphy);
rtnl_unlock();
}
@@ -1272,6 +1275,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
rate_control_add_debugfs(local);
rtnl_lock();
+ wiphy_lock(hw->wiphy);
/* add one default STA interface if supported */
if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) &&
@@ -1285,6 +1289,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
"Failed to add default virtual iface\n");
}
+ wiphy_unlock(hw->wiphy);
rtnl_unlock();
#ifdef CONFIG_INET
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 313eee12410e..3db514c4c63a 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -356,7 +356,7 @@ u32 airtime_link_metric_get(struct ieee80211_local *local,
*/
tx_time = (device_constant + 10 * test_frame_len / rate);
estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err));
- result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT);
+ result = ((u64)tx_time * estimated_retx) >> (2 * ARITH_SHIFT);
return (u32)result;
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0e4d950cf907..2e33a1263518 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -5754,6 +5754,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
if (req->flags & ASSOC_REQ_DISABLE_VHT)
ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+ if (req->flags & ASSOC_REQ_DISABLE_HE)
+ ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+
err = ieee80211_prep_connection(sdata, req->bss, true, override);
if (err)
goto err_clear;
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index ae378a41c927..7809a906d7fe 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -1,4 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
+/*
+ * Portions
+ * Copyright (C) 2020-2021 Intel Corporation
+ */
#include <net/mac80211.h>
#include <net/rtnetlink.h>
@@ -11,7 +15,7 @@ static void ieee80211_sched_scan_cancel(struct ieee80211_local *local)
{
if (ieee80211_request_sched_scan_stop(local))
return;
- cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy, 0);
+ cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0);
}
int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 45927202c71c..63652c39c8e0 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -960,7 +960,8 @@ int rate_control_set_rates(struct ieee80211_hw *hw,
if (old)
kfree_rcu(old, rcu_head);
- drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta);
+ if (sta->uploaded)
+ drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta);
ieee80211_sta_set_expected_throughput(pubsta, sta_get_expected_throughput(sta));
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
deleted file mode 100644
index b13b1da19386..000000000000
--- a/net/mac80211/rc80211_minstrel.c
+++ /dev/null
@@ -1,574 +0,0 @@
-/*
- * Copyright (C) 2008 Felix Fietkau <nbd@openwrt.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on minstrel.c:
- * Copyright (C) 2005-2007 Derek Smithies <derek@indranet.co.nz>
- * Sponsored by Indranet Technologies Ltd
- *
- * Based on sample.c:
- * Copyright (c) 2005 John Bicket
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
- * redistribution must be conditioned upon including a substantially
- * similar Disclaimer requirement for further binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
- * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGES.
- */
-#include <linux/netdevice.h>
-#include <linux/types.h>
-#include <linux/skbuff.h>
-#include <linux/debugfs.h>
-#include <linux/random.h>
-#include <linux/ieee80211.h>
-#include <linux/slab.h>
-#include <net/mac80211.h>
-#include "rate.h"
-#include "rc80211_minstrel.h"
-
-#define SAMPLE_TBL(_mi, _idx, _col) \
- _mi->sample_table[(_idx * SAMPLE_COLUMNS) + _col]
-
-/* convert mac80211 rate index to local array index */
-static inline int
-rix_to_ndx(struct minstrel_sta_info *mi, int rix)
-{
- int i = rix;
- for (i = rix; i >= 0; i--)
- if (mi->r[i].rix == rix)
- break;
- return i;
-}
-
-/* return current EMWA throughput */
-int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_avg)
-{
- int usecs;
-
- usecs = mr->perfect_tx_time;
- if (!usecs)
- usecs = 1000000;
-
- /* reset thr. below 10% success */
- if (mr->stats.prob_avg < MINSTREL_FRAC(10, 100))
- return 0;
-
- if (prob_avg > MINSTREL_FRAC(90, 100))
- return MINSTREL_TRUNC(100000 * (MINSTREL_FRAC(90, 100) / usecs));
- else
- return MINSTREL_TRUNC(100000 * (prob_avg / usecs));
-}
-
-/* find & sort topmost throughput rates */
-static inline void
-minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list)
-{
- int j;
- struct minstrel_rate_stats *tmp_mrs;
- struct minstrel_rate_stats *cur_mrs = &mi->r[i].stats;
-
- for (j = MAX_THR_RATES; j > 0; --j) {
- tmp_mrs = &mi->r[tp_list[j - 1]].stats;
- if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_avg) <=
- minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_avg))
- break;
- }
-
- if (j < MAX_THR_RATES - 1)
- memmove(&tp_list[j + 1], &tp_list[j], MAX_THR_RATES - (j + 1));
- if (j < MAX_THR_RATES)
- tp_list[j] = i;
-}
-
-static void
-minstrel_set_rate(struct minstrel_sta_info *mi, struct ieee80211_sta_rates *ratetbl,
- int offset, int idx)
-{
- struct minstrel_rate *r = &mi->r[idx];
-
- ratetbl->rate[offset].idx = r->rix;
- ratetbl->rate[offset].count = r->adjusted_retry_count;
- ratetbl->rate[offset].count_cts = r->retry_count_cts;
- ratetbl->rate[offset].count_rts = r->stats.retry_count_rtscts;
-}
-
-static void
-minstrel_update_rates(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
-{
- struct ieee80211_sta_rates *ratetbl;
- int i = 0;
-
- ratetbl = kzalloc(sizeof(*ratetbl), GFP_ATOMIC);
- if (!ratetbl)
- return;
-
- /* Start with max_tp_rate */
- minstrel_set_rate(mi, ratetbl, i++, mi->max_tp_rate[0]);
-
- if (mp->hw->max_rates >= 3) {
- /* At least 3 tx rates supported, use max_tp_rate2 next */
- minstrel_set_rate(mi, ratetbl, i++, mi->max_tp_rate[1]);
- }
-
- if (mp->hw->max_rates >= 2) {
- /* At least 2 tx rates supported, use max_prob_rate next */
- minstrel_set_rate(mi, ratetbl, i++, mi->max_prob_rate);
- }
-
- /* Use lowest rate last */
- ratetbl->rate[i].idx = mi->lowest_rix;
- ratetbl->rate[i].count = mp->max_retry;
- ratetbl->rate[i].count_cts = mp->max_retry;
- ratetbl->rate[i].count_rts = mp->max_retry;
-
- rate_control_set_rates(mp->hw, mi->sta, ratetbl);
-}
-
-/*
-* Recalculate statistics and counters of a given rate
-*/
-void
-minstrel_calc_rate_stats(struct minstrel_priv *mp,
- struct minstrel_rate_stats *mrs)
-{
- unsigned int cur_prob;
-
- if (unlikely(mrs->attempts > 0)) {
- mrs->sample_skipped = 0;
- cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts);
- if (mp->new_avg) {
- minstrel_filter_avg_add(&mrs->prob_avg,
- &mrs->prob_avg_1, cur_prob);
- } else if (unlikely(!mrs->att_hist)) {
- mrs->prob_avg = cur_prob;
- } else {
- /*update exponential weighted moving avarage */
- mrs->prob_avg = minstrel_ewma(mrs->prob_avg,
- cur_prob,
- EWMA_LEVEL);
- }
- mrs->att_hist += mrs->attempts;
- mrs->succ_hist += mrs->success;
- } else {
- mrs->sample_skipped++;
- }
-
- mrs->last_success = mrs->success;
- mrs->last_attempts = mrs->attempts;
- mrs->success = 0;
- mrs->attempts = 0;
-}
-
-static void
-minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
-{
- u8 tmp_tp_rate[MAX_THR_RATES];
- u8 tmp_prob_rate = 0;
- int i, tmp_cur_tp, tmp_prob_tp;
-
- for (i = 0; i < MAX_THR_RATES; i++)
- tmp_tp_rate[i] = 0;
-
- for (i = 0; i < mi->n_rates; i++) {
- struct minstrel_rate *mr = &mi->r[i];
- struct minstrel_rate_stats *mrs = &mi->r[i].stats;
- struct minstrel_rate_stats *tmp_mrs = &mi->r[tmp_prob_rate].stats;
-
- /* Update statistics of success probability per rate */
- minstrel_calc_rate_stats(mp, mrs);
-
- /* Sample less often below the 10% chance of success.
- * Sample less often above the 95% chance of success. */
- if (mrs->prob_avg > MINSTREL_FRAC(95, 100) ||
- mrs->prob_avg < MINSTREL_FRAC(10, 100)) {
- mr->adjusted_retry_count = mrs->retry_count >> 1;
- if (mr->adjusted_retry_count > 2)
- mr->adjusted_retry_count = 2;
- mr->sample_limit = 4;
- } else {
- mr->sample_limit = -1;
- mr->adjusted_retry_count = mrs->retry_count;
- }
- if (!mr->adjusted_retry_count)
- mr->adjusted_retry_count = 2;
-
- minstrel_sort_best_tp_rates(mi, i, tmp_tp_rate);
-
- /* To determine the most robust rate (max_prob_rate) used at
- * 3rd mmr stage we distinct between two cases:
- * (1) if any success probabilitiy >= 95%, out of those rates
- * choose the maximum throughput rate as max_prob_rate
- * (2) if all success probabilities < 95%, the rate with
- * highest success probability is chosen as max_prob_rate */
- if (mrs->prob_avg >= MINSTREL_FRAC(95, 100)) {
- tmp_cur_tp = minstrel_get_tp_avg(mr, mrs->prob_avg);
- tmp_prob_tp = minstrel_get_tp_avg(&mi->r[tmp_prob_rate],
- tmp_mrs->prob_avg);
- if (tmp_cur_tp >= tmp_prob_tp)
- tmp_prob_rate = i;
- } else {
- if (mrs->prob_avg >= tmp_mrs->prob_avg)
- tmp_prob_rate = i;
- }
- }
-
- /* Assign the new rate set */
- memcpy(mi->max_tp_rate, tmp_tp_rate, sizeof(mi->max_tp_rate));
- mi->max_prob_rate = tmp_prob_rate;
-
-#ifdef CONFIG_MAC80211_DEBUGFS
- /* use fixed index if set */
- if (mp->fixed_rate_idx != -1) {
- mi->max_tp_rate[0] = mp->fixed_rate_idx;
- mi->max_tp_rate[1] = mp->fixed_rate_idx;
- mi->max_prob_rate = mp->fixed_rate_idx;
- }
-#endif
-
- /* Reset update timer */
- mi->last_stats_update = jiffies;
-
- minstrel_update_rates(mp, mi);
-}
-
-static void
-minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
- void *priv_sta, struct ieee80211_tx_status *st)
-{
- struct ieee80211_tx_info *info = st->info;
- struct minstrel_priv *mp = priv;
- struct minstrel_sta_info *mi = priv_sta;
- struct ieee80211_tx_rate *ar = info->status.rates;
- int i, ndx;
- int success;
-
- success = !!(info->flags & IEEE80211_TX_STAT_ACK);
-
- for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
- if (ar[i].idx < 0 || !ar[i].count)
- break;
-
- ndx = rix_to_ndx(mi, ar[i].idx);
- if (ndx < 0)
- continue;
-
- mi->r[ndx].stats.attempts += ar[i].count;
-
- if ((i != IEEE80211_TX_MAX_RATES - 1) && (ar[i + 1].idx < 0))
- mi->r[ndx].stats.success += success;
- }
-
- if (time_after(jiffies, mi->last_stats_update +
- mp->update_interval / (mp->new_avg ? 2 : 1)))
- minstrel_update_stats(mp, mi);
-}
-
-
-static inline unsigned int
-minstrel_get_retry_count(struct minstrel_rate *mr,
- struct ieee80211_tx_info *info)
-{
- u8 retry = mr->adjusted_retry_count;
-
- if (info->control.use_rts)
- retry = max_t(u8, 2, min(mr->stats.retry_count_rtscts, retry));
- else if (info->control.use_cts_prot)
- retry = max_t(u8, 2, min(mr->retry_count_cts, retry));
- return retry;
-}
-
-
-static int
-minstrel_get_next_sample(struct minstrel_sta_info *mi)
-{
- unsigned int sample_ndx;
- sample_ndx = SAMPLE_TBL(mi, mi->sample_row, mi->sample_column);
- mi->sample_row++;
- if ((int) mi->sample_row >= mi->n_rates) {
- mi->sample_row = 0;
- mi->sample_column++;
- if (mi->sample_column >= SAMPLE_COLUMNS)
- mi->sample_column = 0;
- }
- return sample_ndx;
-}
-
-static void
-minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
- void *priv_sta, struct ieee80211_tx_rate_control *txrc)
-{
- struct sk_buff *skb = txrc->skb;
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
- struct minstrel_sta_info *mi = priv_sta;
- struct minstrel_priv *mp = priv;
- struct ieee80211_tx_rate *rate = &info->control.rates[0];
- struct minstrel_rate *msr, *mr;
- unsigned int ndx;
- bool mrr_capable;
- bool prev_sample;
- int delta;
- int sampling_ratio;
-
- /* check multi-rate-retry capabilities & adjust lookaround_rate */
- mrr_capable = mp->has_mrr &&
- !txrc->rts &&
- !txrc->bss_conf->use_cts_prot;
- if (mrr_capable)
- sampling_ratio = mp->lookaround_rate_mrr;
- else
- sampling_ratio = mp->lookaround_rate;
-
- /* increase sum packet counter */
- mi->total_packets++;
-
-#ifdef CONFIG_MAC80211_DEBUGFS
- if (mp->fixed_rate_idx != -1)
- return;
-#endif
-
- /* Don't use EAPOL frames for sampling on non-mrr hw */
- if (mp->hw->max_rates == 1 &&
- (info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO))
- return;
-
- delta = (mi->total_packets * sampling_ratio / 100) -
- mi->sample_packets;
-
- /* delta < 0: no sampling required */
- prev_sample = mi->prev_sample;
- mi->prev_sample = false;
- if (delta < 0 || (!mrr_capable && prev_sample))
- return;
-
- if (mi->total_packets >= 10000) {
- mi->sample_packets = 0;
- mi->total_packets = 0;
- } else if (delta > mi->n_rates * 2) {
- /* With multi-rate retry, not every planned sample
- * attempt actually gets used, due to the way the retry
- * chain is set up - [max_tp,sample,prob,lowest] for
- * sample_rate < max_tp.
- *
- * If there's too much sampling backlog and the link
- * starts getting worse, minstrel would start bursting
- * out lots of sampling frames, which would result
- * in a large throughput loss. */
- mi->sample_packets += (delta - mi->n_rates * 2);
- }
-
- /* get next random rate sample */
- ndx = minstrel_get_next_sample(mi);
- msr = &mi->r[ndx];
- mr = &mi->r[mi->max_tp_rate[0]];
-
- /* Decide if direct ( 1st mrr stage) or indirect (2nd mrr stage)
- * rate sampling method should be used.
- * Respect such rates that are not sampled for 20 interations.
- */
- if (msr->perfect_tx_time < mr->perfect_tx_time ||
- msr->stats.sample_skipped >= 20) {
- if (!msr->sample_limit)
- return;
-
- mi->sample_packets++;
- if (msr->sample_limit > 0)
- msr->sample_limit--;
- }
-
- /* If we're not using MRR and the sampling rate already
- * has a probability of >95%, we shouldn't be attempting
- * to use it, as this only wastes precious airtime */
- if (!mrr_capable &&
- (mi->r[ndx].stats.prob_avg > MINSTREL_FRAC(95, 100)))
- return;
-
- mi->prev_sample = true;
-
- rate->idx = mi->r[ndx].rix;
- rate->count = minstrel_get_retry_count(&mi->r[ndx], info);
- info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
-}
-
-
-static void
-calc_rate_durations(enum nl80211_band band,
- struct minstrel_rate *d,
- struct ieee80211_rate *rate,
- struct cfg80211_chan_def *chandef)
-{
- int erp = !!(rate->flags & IEEE80211_RATE_ERP_G);
- int shift = ieee80211_chandef_get_shift(chandef);
-
- d->perfect_tx_time = ieee80211_frame_duration(band, 1200,
- DIV_ROUND_UP(rate->bitrate, 1 << shift), erp, 1,
- shift);
- d->ack_time = ieee80211_frame_duration(band, 10,
- DIV_ROUND_UP(rate->bitrate, 1 << shift), erp, 1,
- shift);
-}
-
-static void
-init_sample_table(struct minstrel_sta_info *mi)
-{
- unsigned int i, col, new_idx;
- u8 rnd[8];
-
- mi->sample_column = 0;
- mi->sample_row = 0;
- memset(mi->sample_table, 0xff, SAMPLE_COLUMNS * mi->n_rates);
-
- for (col = 0; col < SAMPLE_COLUMNS; col++) {
- prandom_bytes(rnd, sizeof(rnd));
- for (i = 0; i < mi->n_rates; i++) {
- new_idx = (i + rnd[i & 7]) % mi->n_rates;
- while (SAMPLE_TBL(mi, new_idx, col) != 0xff)
- new_idx = (new_idx + 1) % mi->n_rates;
-
- SAMPLE_TBL(mi, new_idx, col) = i;
- }
- }
-}
-
-static void
-minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
- struct cfg80211_chan_def *chandef,
- struct ieee80211_sta *sta, void *priv_sta)
-{
- struct minstrel_sta_info *mi = priv_sta;
- struct minstrel_priv *mp = priv;
- struct ieee80211_rate *ctl_rate;
- unsigned int i, n = 0;
- unsigned int t_slot = 9; /* FIXME: get real slot time */
- u32 rate_flags;
-
- mi->sta = sta;
- mi->lowest_rix = rate_lowest_index(sband, sta);
- ctl_rate = &sband->bitrates[mi->lowest_rix];
- mi->sp_ack_dur = ieee80211_frame_duration(sband->band, 10,
- ctl_rate->bitrate,
- !!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1,
- ieee80211_chandef_get_shift(chandef));
-
- rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef);
- memset(mi->max_tp_rate, 0, sizeof(mi->max_tp_rate));
- mi->max_prob_rate = 0;
-
- for (i = 0; i < sband->n_bitrates; i++) {
- struct minstrel_rate *mr = &mi->r[n];
- struct minstrel_rate_stats *mrs = &mi->r[n].stats;
- unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0;
- unsigned int tx_time_single;
- unsigned int cw = mp->cw_min;
- int shift;
-
- if (!rate_supported(sta, sband->band, i))
- continue;
- if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
- continue;
-
- n++;
- memset(mr, 0, sizeof(*mr));
- memset(mrs, 0, sizeof(*mrs));
-
- mr->rix = i;
- shift = ieee80211_chandef_get_shift(chandef);
- mr->bitrate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
- (1 << shift) * 5);
- calc_rate_durations(sband->band, mr, &sband->bitrates[i],
- chandef);
-
- /* calculate maximum number of retransmissions before
- * fallback (based on maximum segment size) */
- mr->sample_limit = -1;
- mrs->retry_count = 1;
- mr->retry_count_cts = 1;
- mrs->retry_count_rtscts = 1;
- tx_time = mr->perfect_tx_time + mi->sp_ack_dur;
- do {
- /* add one retransmission */
- tx_time_single = mr->ack_time + mr->perfect_tx_time;
-
- /* contention window */
- tx_time_single += (t_slot * cw) >> 1;
- cw = min((cw << 1) | 1, mp->cw_max);
-
- tx_time += tx_time_single;
- tx_time_cts += tx_time_single + mi->sp_ack_dur;
- tx_time_rtscts += tx_time_single + 2 * mi->sp_ack_dur;
- if ((tx_time_cts < mp->segment_size) &&
- (mr->retry_count_cts < mp->max_retry))
- mr->retry_count_cts++;
- if ((tx_time_rtscts < mp->segment_size) &&
- (mrs->retry_count_rtscts < mp->max_retry))
- mrs->retry_count_rtscts++;
- } while ((tx_time < mp->segment_size) &&
- (++mr->stats.retry_count < mp->max_retry));
- mr->adjusted_retry_count = mrs->retry_count;
- if (!(sband->bitrates[i].flags & IEEE80211_RATE_ERP_G))
- mr->retry_count_cts = mrs->retry_count;
- }
-
- for (i = n; i < sband->n_bitrates; i++) {
- struct minstrel_rate *mr = &mi->r[i];
- mr->rix = -1;
- }
-
- mi->n_rates = n;
- mi->last_stats_update = jiffies;
-
- init_sample_table(mi);
- minstrel_update_rates(mp, mi);
-}
-
-static u32 minstrel_get_expected_throughput(void *priv_sta)
-{
- struct minstrel_sta_info *mi = priv_sta;
- struct minstrel_rate_stats *tmp_mrs;
- int idx = mi->max_tp_rate[0];
- int tmp_cur_tp;
-
- /* convert pkt per sec in kbps (1200 is the average pkt size used for
- * computing cur_tp
- */
- tmp_mrs = &mi->r[idx].stats;
- tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_avg) * 10;
- tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024;
-
- return tmp_cur_tp;
-}
-
-const struct rate_control_ops mac80211_minstrel = {
- .tx_status_ext = minstrel_tx_status,
- .get_rate = minstrel_get_rate,
- .rate_init = minstrel_rate_init,
- .get_expected_throughput = minstrel_get_expected_throughput,
-};
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
deleted file mode 100644
index 86cd80b3ffde..000000000000
--- a/net/mac80211/rc80211_minstrel.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2008 Felix Fietkau <nbd@openwrt.org>
- */
-
-#ifndef __RC_MINSTREL_H
-#define __RC_MINSTREL_H
-
-#define EWMA_LEVEL 96 /* ewma weighting factor [/EWMA_DIV] */
-#define EWMA_DIV 128
-#define SAMPLE_COLUMNS 10 /* number of columns in sample table */
-
-/* scaled fraction values */
-#define MINSTREL_SCALE 12
-#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div)
-#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE)
-
-/* number of highest throughput rates to consider*/
-#define MAX_THR_RATES 4
-
-/*
- * Coefficients for moving average with noise filter (period=16),
- * scaled by 10 bits
- *
- * a1 = exp(-pi * sqrt(2) / period)
- * coeff2 = 2 * a1 * cos(sqrt(2) * 2 * pi / period)
- * coeff3 = -sqr(a1)
- * coeff1 = 1 - coeff2 - coeff3
- */
-#define MINSTREL_AVG_COEFF1 (MINSTREL_FRAC(1, 1) - \
- MINSTREL_AVG_COEFF2 - \
- MINSTREL_AVG_COEFF3)
-#define MINSTREL_AVG_COEFF2 0x00001499
-#define MINSTREL_AVG_COEFF3 -0x0000092e
-
-/*
- * Perform EWMA (Exponentially Weighted Moving Average) calculation
- */
-static inline int
-minstrel_ewma(int old, int new, int weight)
-{
- int diff, incr;
-
- diff = new - old;
- incr = (EWMA_DIV - weight) * diff / EWMA_DIV;
-
- return old + incr;
-}
-
-static inline int minstrel_filter_avg_add(u16 *prev_1, u16 *prev_2, s32 in)
-{
- s32 out_1 = *prev_1;
- s32 out_2 = *prev_2;
- s32 val;
-
- if (!in)
- in += 1;
-
- if (!out_1) {
- val = out_1 = in;
- goto out;
- }
-
- val = MINSTREL_AVG_COEFF1 * in;
- val += MINSTREL_AVG_COEFF2 * out_1;
- val += MINSTREL_AVG_COEFF3 * out_2;
- val >>= MINSTREL_SCALE;
-
- if (val > 1 << MINSTREL_SCALE)
- val = 1 << MINSTREL_SCALE;
- if (val < 0)
- val = 1;
-
-out:
- *prev_2 = out_1;
- *prev_1 = val;
-
- return val;
-}
-
-struct minstrel_rate_stats {
- /* current / last sampling period attempts/success counters */
- u16 attempts, last_attempts;
- u16 success, last_success;
-
- /* total attempts/success counters */
- u32 att_hist, succ_hist;
-
- /* prob_avg - moving average of prob */
- u16 prob_avg;
- u16 prob_avg_1;
-
- /* maximum retry counts */
- u8 retry_count;
- u8 retry_count_rtscts;
-
- u8 sample_skipped;
- bool retry_updated;
-};
-
-struct minstrel_rate {
- int bitrate;
-
- s8 rix;
- u8 retry_count_cts;
- u8 adjusted_retry_count;
-
- unsigned int perfect_tx_time;
- unsigned int ack_time;
-
- int sample_limit;
-
- struct minstrel_rate_stats stats;
-};
-
-struct minstrel_sta_info {
- struct ieee80211_sta *sta;
-
- unsigned long last_stats_update;
- unsigned int sp_ack_dur;
- unsigned int rate_avg;
-
- unsigned int lowest_rix;
-
- u8 max_tp_rate[MAX_THR_RATES];
- u8 max_prob_rate;
- unsigned int total_packets;
- unsigned int sample_packets;
-
- unsigned int sample_row;
- unsigned int sample_column;
-
- int n_rates;
- struct minstrel_rate *r;
- bool prev_sample;
-
- /* sampling table */
- u8 *sample_table;
-};
-
-struct minstrel_priv {
- struct ieee80211_hw *hw;
- bool has_mrr;
- bool new_avg;
- u32 sample_switch;
- unsigned int cw_min;
- unsigned int cw_max;
- unsigned int max_retry;
- unsigned int segment_size;
- unsigned int update_interval;
- unsigned int lookaround_rate;
- unsigned int lookaround_rate_mrr;
-
- u8 cck_rates[4];
-
-#ifdef CONFIG_MAC80211_DEBUGFS
- /*
- * enable fixed rate processing per RC
- * - write static index to debugfs:ieee80211/phyX/rc/fixed_rate_idx
- * - write -1 to enable RC processing again
- * - setting will be applied on next update
- */
- u32 fixed_rate_idx;
-#endif
-};
-
-struct minstrel_debugfs_info {
- size_t len;
- char buf[];
-};
-
-extern const struct rate_control_ops mac80211_minstrel;
-void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
-
-/* Recalculate success probabilities and counters for a given rate using EWMA */
-void minstrel_calc_rate_stats(struct minstrel_priv *mp,
- struct minstrel_rate_stats *mrs);
-int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_avg);
-
-/* debugfs */
-int minstrel_stats_open(struct inode *inode, struct file *file);
-int minstrel_stats_csv_open(struct inode *inode, struct file *file);
-
-#endif
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
deleted file mode 100644
index 9b8e0daeb7bb..000000000000
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (C) 2008 Felix Fietkau <nbd@openwrt.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on minstrel.c:
- * Copyright (C) 2005-2007 Derek Smithies <derek@indranet.co.nz>
- * Sponsored by Indranet Technologies Ltd
- *
- * Based on sample.c:
- * Copyright (c) 2005 John Bicket
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
- * redistribution must be conditioned upon including a substantially
- * similar Disclaimer requirement for further binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
- * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGES.
- */
-#include <linux/netdevice.h>
-#include <linux/types.h>
-#include <linux/skbuff.h>
-#include <linux/debugfs.h>
-#include <linux/ieee80211.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <net/mac80211.h>
-#include "rc80211_minstrel.h"
-
-int
-minstrel_stats_open(struct inode *inode, struct file *file)
-{
- struct minstrel_sta_info *mi = inode->i_private;
- struct minstrel_debugfs_info *ms;
- unsigned int i, tp_max, tp_avg, eprob;
- char *p;
-
- ms = kmalloc(2048, GFP_KERNEL);
- if (!ms)
- return -ENOMEM;
-
- file->private_data = ms;
- p = ms->buf;
- p += sprintf(p, "\n");
- p += sprintf(p,
- "best __________rate_________ ____statistics___ ____last_____ ______sum-of________\n");
- p += sprintf(p,
- "rate [name idx airtime max_tp] [avg(tp) avg(prob)] [retry|suc|att] [#success | #attempts]\n");
-
- for (i = 0; i < mi->n_rates; i++) {
- struct minstrel_rate *mr = &mi->r[i];
- struct minstrel_rate_stats *mrs = &mi->r[i].stats;
-
- *(p++) = (i == mi->max_tp_rate[0]) ? 'A' : ' ';
- *(p++) = (i == mi->max_tp_rate[1]) ? 'B' : ' ';
- *(p++) = (i == mi->max_tp_rate[2]) ? 'C' : ' ';
- *(p++) = (i == mi->max_tp_rate[3]) ? 'D' : ' ';
- *(p++) = (i == mi->max_prob_rate) ? 'P' : ' ';
-
- p += sprintf(p, " %3u%s ", mr->bitrate / 2,
- (mr->bitrate & 1 ? ".5" : " "));
- p += sprintf(p, "%3u ", i);
- p += sprintf(p, "%6u ", mr->perfect_tx_time);
-
- tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100));
- tp_avg = minstrel_get_tp_avg(mr, mrs->prob_avg);
- eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000);
-
- p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u"
- " %3u %3u %-3u "
- "%9llu %-9llu\n",
- tp_max / 10, tp_max % 10,
- tp_avg / 10, tp_avg % 10,
- eprob / 10, eprob % 10,
- mrs->retry_count,
- mrs->last_success,
- mrs->last_attempts,
- (unsigned long long)mrs->succ_hist,
- (unsigned long long)mrs->att_hist);
- }
- p += sprintf(p, "\nTotal packet count:: ideal %d "
- "lookaround %d\n\n",
- mi->total_packets - mi->sample_packets,
- mi->sample_packets);
- ms->len = p - ms->buf;
-
- WARN_ON(ms->len + sizeof(*ms) > 2048);
-
- return 0;
-}
-
-int
-minstrel_stats_csv_open(struct inode *inode, struct file *file)
-{
- struct minstrel_sta_info *mi = inode->i_private;
- struct minstrel_debugfs_info *ms;
- unsigned int i, tp_max, tp_avg, eprob;
- char *p;
-
- ms = kmalloc(2048, GFP_KERNEL);
- if (!ms)
- return -ENOMEM;
-
- file->private_data = ms;
- p = ms->buf;
-
- for (i = 0; i < mi->n_rates; i++) {
- struct minstrel_rate *mr = &mi->r[i];
- struct minstrel_rate_stats *mrs = &mi->r[i].stats;
-
- p += sprintf(p, "%s" ,((i == mi->max_tp_rate[0]) ? "A" : ""));
- p += sprintf(p, "%s" ,((i == mi->max_tp_rate[1]) ? "B" : ""));
- p += sprintf(p, "%s" ,((i == mi->max_tp_rate[2]) ? "C" : ""));
- p += sprintf(p, "%s" ,((i == mi->max_tp_rate[3]) ? "D" : ""));
- p += sprintf(p, "%s" ,((i == mi->max_prob_rate) ? "P" : ""));
-
- p += sprintf(p, ",%u%s", mr->bitrate / 2,
- (mr->bitrate & 1 ? ".5," : ","));
- p += sprintf(p, "%u,", i);
- p += sprintf(p, "%u,",mr->perfect_tx_time);
-
- tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100));
- tp_avg = minstrel_get_tp_avg(mr, mrs->prob_avg);
- eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000);
-
- p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u,%u,"
- "%llu,%llu,%d,%d\n",
- tp_max / 10, tp_max % 10,
- tp_avg / 10, tp_avg % 10,
- eprob / 10, eprob % 10,
- mrs->retry_count,
- mrs->last_success,
- mrs->last_attempts,
- (unsigned long long)mrs->succ_hist,
- (unsigned long long)mrs->att_hist,
- mi->total_packets - mi->sample_packets,
- mi->sample_packets);
-
- }
- ms->len = p - ms->buf;
-
- WARN_ON(ms->len + sizeof(*ms) > 2048);
-
- return 0;
-}
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index b11a2af55b06..2f44f4919789 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -13,7 +13,6 @@
#include <net/mac80211.h>
#include "rate.h"
#include "sta_info.h"
-#include "rc80211_minstrel.h"
#include "rc80211_minstrel_ht.h"
#define AVG_AMPDU_SIZE 16
@@ -136,20 +135,16 @@
__VHT_GROUP(_streams, _sgi, _bw, \
VHT_GROUP_SHIFT(_streams, _sgi, _bw))
-#define CCK_DURATION(_bitrate, _short, _len) \
+#define CCK_DURATION(_bitrate, _short) \
(1000 * (10 /* SIFS */ + \
(_short ? 72 + 24 : 144 + 48) + \
- (8 * (_len + 4) * 10) / (_bitrate)))
-
-#define CCK_ACK_DURATION(_bitrate, _short) \
- (CCK_DURATION((_bitrate > 10 ? 20 : 10), false, 60) + \
- CCK_DURATION(_bitrate, _short, AVG_PKT_SIZE))
+ (8 * (AVG_PKT_SIZE + 4) * 10) / (_bitrate)))
#define CCK_DURATION_LIST(_short, _s) \
- CCK_ACK_DURATION(10, _short) >> _s, \
- CCK_ACK_DURATION(20, _short) >> _s, \
- CCK_ACK_DURATION(55, _short) >> _s, \
- CCK_ACK_DURATION(110, _short) >> _s
+ CCK_DURATION(10, _short) >> _s, \
+ CCK_DURATION(20, _short) >> _s, \
+ CCK_DURATION(55, _short) >> _s, \
+ CCK_DURATION(110, _short) >> _s
#define __CCK_GROUP(_s) \
[MINSTREL_CCK_GROUP] = { \
@@ -163,10 +158,42 @@
}
#define CCK_GROUP_SHIFT \
- GROUP_SHIFT(CCK_ACK_DURATION(10, false))
+ GROUP_SHIFT(CCK_DURATION(10, false))
#define CCK_GROUP __CCK_GROUP(CCK_GROUP_SHIFT)
+#define OFDM_DURATION(_bitrate) \
+ (1000 * (16 /* SIFS + signal ext */ + \
+ 16 /* T_PREAMBLE */ + \
+ 4 /* T_SIGNAL */ + \
+ 4 * (((16 + 80 * (AVG_PKT_SIZE + 4) + 6) / \
+ ((_bitrate) * 4)))))
+
+#define OFDM_DURATION_LIST(_s) \
+ OFDM_DURATION(60) >> _s, \
+ OFDM_DURATION(90) >> _s, \
+ OFDM_DURATION(120) >> _s, \
+ OFDM_DURATION(180) >> _s, \
+ OFDM_DURATION(240) >> _s, \
+ OFDM_DURATION(360) >> _s, \
+ OFDM_DURATION(480) >> _s, \
+ OFDM_DURATION(540) >> _s
+
+#define __OFDM_GROUP(_s) \
+ [MINSTREL_OFDM_GROUP] = { \
+ .streams = 1, \
+ .flags = 0, \
+ .shift = _s, \
+ .duration = { \
+ OFDM_DURATION_LIST(_s), \
+ } \
+ }
+
+#define OFDM_GROUP_SHIFT \
+ GROUP_SHIFT(OFDM_DURATION(60))
+
+#define OFDM_GROUP __OFDM_GROUP(OFDM_GROUP_SHIFT)
+
static bool minstrel_vht_only = true;
module_param(minstrel_vht_only, bool, 0644);
@@ -203,6 +230,7 @@ const struct mcs_group minstrel_mcs_groups[] = {
MCS_GROUP(4, 1, BW_40),
CCK_GROUP,
+ OFDM_GROUP,
VHT_GROUP(1, 0, BW_20),
VHT_GROUP(2, 0, BW_20),
@@ -235,7 +263,17 @@ const struct mcs_group minstrel_mcs_groups[] = {
VHT_GROUP(4, 1, BW_80),
};
+const s16 minstrel_cck_bitrates[4] = { 10, 20, 55, 110 };
+const s16 minstrel_ofdm_bitrates[8] = { 60, 90, 120, 180, 240, 360, 480, 540 };
static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES] __read_mostly;
+static const u8 minstrel_sample_seq[] = {
+ MINSTREL_SAMPLE_TYPE_INC,
+ MINSTREL_SAMPLE_TYPE_JUMP,
+ MINSTREL_SAMPLE_TYPE_INC,
+ MINSTREL_SAMPLE_TYPE_JUMP,
+ MINSTREL_SAMPLE_TYPE_INC,
+ MINSTREL_SAMPLE_TYPE_SLOW,
+};
static void
minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi);
@@ -279,6 +317,13 @@ minstrel_get_valid_vht_rates(int bw, int nss, __le16 mcs_map)
return 0x3ff & ~mask;
}
+static bool
+minstrel_ht_is_legacy_group(int group)
+{
+ return group == MINSTREL_CCK_GROUP ||
+ group == MINSTREL_OFDM_GROUP;
+}
+
/*
* Look up an MCS group index based on mac80211 rate information
*/
@@ -308,37 +353,74 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
if (rate->flags & IEEE80211_TX_RC_MCS) {
group = minstrel_ht_get_group_idx(rate);
idx = rate->idx % 8;
- } else if (rate->flags & IEEE80211_TX_RC_VHT_MCS) {
+ goto out;
+ }
+
+ if (rate->flags & IEEE80211_TX_RC_VHT_MCS) {
group = minstrel_vht_get_group_idx(rate);
idx = ieee80211_rate_get_vht_mcs(rate);
- } else {
- group = MINSTREL_CCK_GROUP;
+ goto out;
+ }
- for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++)
- if (rate->idx == mp->cck_rates[idx])
- break;
+ group = MINSTREL_CCK_GROUP;
+ for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) {
+ if (rate->idx != mp->cck_rates[idx])
+ continue;
/* short preamble */
if ((mi->supported[group] & BIT(idx + 4)) &&
(rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE))
- idx += 4;
+ idx += 4;
+ goto out;
}
+
+ group = MINSTREL_OFDM_GROUP;
+ for (idx = 0; idx < ARRAY_SIZE(mp->ofdm_rates[0]); idx++)
+ if (rate->idx == mp->ofdm_rates[mi->band][idx])
+ goto out;
+
+ idx = 0;
+out:
return &mi->groups[group].rates[idx];
}
static inline struct minstrel_rate_stats *
minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index)
{
- return &mi->groups[index / MCS_GROUP_RATES].rates[index % MCS_GROUP_RATES];
+ return &mi->groups[MI_RATE_GROUP(index)].rates[MI_RATE_IDX(index)];
+}
+
+static inline int minstrel_get_duration(int index)
+{
+ const struct mcs_group *group = &minstrel_mcs_groups[MI_RATE_GROUP(index)];
+ unsigned int duration = group->duration[MI_RATE_IDX(index)];
+
+ return duration << group->shift;
}
static unsigned int
minstrel_ht_avg_ampdu_len(struct minstrel_ht_sta *mi)
{
- if (!mi->avg_ampdu_len)
- return AVG_AMPDU_SIZE;
+ int duration;
+
+ if (mi->avg_ampdu_len)
+ return MINSTREL_TRUNC(mi->avg_ampdu_len);
+
+ if (minstrel_ht_is_legacy_group(MI_RATE_GROUP(mi->max_tp_rate[0])))
+ return 1;
+
+ duration = minstrel_get_duration(mi->max_tp_rate[0]);
- return MINSTREL_TRUNC(mi->avg_ampdu_len);
+ if (duration > 400 * 1000)
+ return 2;
+
+ if (duration > 250 * 1000)
+ return 4;
+
+ if (duration > 150 * 1000)
+ return 8;
+
+ return 16;
}
/*
@@ -349,15 +431,19 @@ int
minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
int prob_avg)
{
- unsigned int nsecs = 0;
+ unsigned int nsecs = 0, overhead = mi->overhead;
+ unsigned int ampdu_len = 1;
/* do not account throughput if sucess prob is below 10% */
if (prob_avg < MINSTREL_FRAC(10, 100))
return 0;
- if (group != MINSTREL_CCK_GROUP)
- nsecs = 1000 * mi->overhead / minstrel_ht_avg_ampdu_len(mi);
+ if (minstrel_ht_is_legacy_group(group))
+ overhead = mi->overhead_legacy;
+ else
+ ampdu_len = minstrel_ht_avg_ampdu_len(mi);
+ nsecs = 1000 * overhead / ampdu_len;
nsecs += minstrel_mcs_groups[group].duration[rate] <<
minstrel_mcs_groups[group].shift;
@@ -367,10 +453,9 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
* (prob is scaled - see MINSTREL_FRAC above)
*/
if (prob_avg > MINSTREL_FRAC(90, 100))
- return MINSTREL_TRUNC(100000 * ((MINSTREL_FRAC(90, 100) * 1000)
- / nsecs));
- else
- return MINSTREL_TRUNC(100000 * ((prob_avg * 1000) / nsecs));
+ prob_avg = MINSTREL_FRAC(90, 100);
+
+ return MINSTREL_TRUNC(100 * ((prob_avg * 1000000) / nsecs));
}
/*
@@ -388,14 +473,14 @@ minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u16 index,
int tmp_group, tmp_idx, tmp_tp_avg, tmp_prob;
int j = MAX_THR_RATES;
- cur_group = index / MCS_GROUP_RATES;
- cur_idx = index % MCS_GROUP_RATES;
+ cur_group = MI_RATE_GROUP(index);
+ cur_idx = MI_RATE_IDX(index);
cur_prob = mi->groups[cur_group].rates[cur_idx].prob_avg;
cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, cur_prob);
do {
- tmp_group = tp_list[j - 1] / MCS_GROUP_RATES;
- tmp_idx = tp_list[j - 1] % MCS_GROUP_RATES;
+ tmp_group = MI_RATE_GROUP(tp_list[j - 1]);
+ tmp_idx = MI_RATE_IDX(tp_list[j - 1]);
tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx,
tmp_prob);
@@ -417,41 +502,50 @@ minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u16 index,
* Find and set the topmost probability rate per sta and per group
*/
static void
-minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
+minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 *dest, u16 index)
{
struct minstrel_mcs_group_data *mg;
struct minstrel_rate_stats *mrs;
int tmp_group, tmp_idx, tmp_tp_avg, tmp_prob;
- int max_tp_group, cur_tp_avg, cur_group, cur_idx;
+ int max_tp_group, max_tp_idx, max_tp_prob;
+ int cur_tp_avg, cur_group, cur_idx;
int max_gpr_group, max_gpr_idx;
int max_gpr_tp_avg, max_gpr_prob;
- cur_group = index / MCS_GROUP_RATES;
- cur_idx = index % MCS_GROUP_RATES;
- mg = &mi->groups[index / MCS_GROUP_RATES];
- mrs = &mg->rates[index % MCS_GROUP_RATES];
+ cur_group = MI_RATE_GROUP(index);
+ cur_idx = MI_RATE_IDX(index);
+ mg = &mi->groups[cur_group];
+ mrs = &mg->rates[cur_idx];
- tmp_group = mi->max_prob_rate / MCS_GROUP_RATES;
- tmp_idx = mi->max_prob_rate % MCS_GROUP_RATES;
+ tmp_group = MI_RATE_GROUP(*dest);
+ tmp_idx = MI_RATE_IDX(*dest);
tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
/* if max_tp_rate[0] is from MCS_GROUP max_prob_rate get selected from
* MCS_GROUP as well as CCK_GROUP rates do not allow aggregation */
- max_tp_group = mi->max_tp_rate[0] / MCS_GROUP_RATES;
- if((index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) &&
- (max_tp_group != MINSTREL_CCK_GROUP))
+ max_tp_group = MI_RATE_GROUP(mi->max_tp_rate[0]);
+ max_tp_idx = MI_RATE_IDX(mi->max_tp_rate[0]);
+ max_tp_prob = mi->groups[max_tp_group].rates[max_tp_idx].prob_avg;
+
+ if (minstrel_ht_is_legacy_group(MI_RATE_GROUP(index)) &&
+ !minstrel_ht_is_legacy_group(max_tp_group))
+ return;
+
+ /* skip rates faster than max tp rate with lower prob */
+ if (minstrel_get_duration(mi->max_tp_rate[0]) > minstrel_get_duration(index) &&
+ mrs->prob_avg < max_tp_prob)
return;
- max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES;
- max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
+ max_gpr_group = MI_RATE_GROUP(mg->max_group_prob_rate);
+ max_gpr_idx = MI_RATE_IDX(mg->max_group_prob_rate);
max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_avg;
if (mrs->prob_avg > MINSTREL_FRAC(75, 100)) {
cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx,
mrs->prob_avg);
if (cur_tp_avg > tmp_tp_avg)
- mi->max_prob_rate = index;
+ *dest = index;
max_gpr_tp_avg = minstrel_ht_get_tp_avg(mi, max_gpr_group,
max_gpr_idx,
@@ -460,7 +554,7 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
mg->max_group_prob_rate = index;
} else {
if (mrs->prob_avg > tmp_prob)
- mi->max_prob_rate = index;
+ *dest = index;
if (mrs->prob_avg > max_gpr_prob)
mg->max_group_prob_rate = index;
}
@@ -476,24 +570,24 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
static void
minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi,
u16 tmp_mcs_tp_rate[MAX_THR_RATES],
- u16 tmp_cck_tp_rate[MAX_THR_RATES])
+ u16 tmp_legacy_tp_rate[MAX_THR_RATES])
{
unsigned int tmp_group, tmp_idx, tmp_cck_tp, tmp_mcs_tp, tmp_prob;
int i;
- tmp_group = tmp_cck_tp_rate[0] / MCS_GROUP_RATES;
- tmp_idx = tmp_cck_tp_rate[0] % MCS_GROUP_RATES;
+ tmp_group = MI_RATE_GROUP(tmp_legacy_tp_rate[0]);
+ tmp_idx = MI_RATE_IDX(tmp_legacy_tp_rate[0]);
tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
tmp_cck_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
- tmp_group = tmp_mcs_tp_rate[0] / MCS_GROUP_RATES;
- tmp_idx = tmp_mcs_tp_rate[0] % MCS_GROUP_RATES;
+ tmp_group = MI_RATE_GROUP(tmp_mcs_tp_rate[0]);
+ tmp_idx = MI_RATE_IDX(tmp_mcs_tp_rate[0]);
tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
if (tmp_cck_tp > tmp_mcs_tp) {
for(i = 0; i < MAX_THR_RATES; i++) {
- minstrel_ht_sort_best_tp_rates(mi, tmp_cck_tp_rate[i],
+ minstrel_ht_sort_best_tp_rates(mi, tmp_legacy_tp_rate[i],
tmp_mcs_tp_rate);
}
}
@@ -511,14 +605,17 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi)
int tmp_max_streams, group, tmp_idx, tmp_prob;
int tmp_tp = 0;
- tmp_max_streams = minstrel_mcs_groups[mi->max_tp_rate[0] /
- MCS_GROUP_RATES].streams;
+ if (!mi->sta->ht_cap.ht_supported)
+ return;
+
+ group = MI_RATE_GROUP(mi->max_tp_rate[0]);
+ tmp_max_streams = minstrel_mcs_groups[group].streams;
for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
mg = &mi->groups[group];
if (!mi->supported[group] || group == MINSTREL_CCK_GROUP)
continue;
- tmp_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
+ tmp_idx = MI_RATE_IDX(mg->max_group_prob_rate);
tmp_prob = mi->groups[group].rates[tmp_idx].prob_avg;
if (tmp_tp < minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob) &&
@@ -531,133 +628,359 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi)
}
}
+static u16
+__minstrel_ht_get_sample_rate(struct minstrel_ht_sta *mi,
+ enum minstrel_sample_type type)
+{
+ u16 *rates = mi->sample[type].sample_rates;
+ u16 cur;
+ int i;
+
+ for (i = 0; i < MINSTREL_SAMPLE_RATES; i++) {
+ if (!rates[i])
+ continue;
+
+ cur = rates[i];
+ rates[i] = 0;
+ return cur;
+ }
+
+ return 0;
+}
+
static inline int
-minstrel_get_duration(int index)
+minstrel_ewma(int old, int new, int weight)
{
- const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
- unsigned int duration = group->duration[index % MCS_GROUP_RATES];
- return duration << group->shift;
+ int diff, incr;
+
+ diff = new - old;
+ incr = (EWMA_DIV - weight) * diff / EWMA_DIV;
+
+ return old + incr;
}
-static bool
-minstrel_ht_probe_group(struct minstrel_ht_sta *mi, const struct mcs_group *tp_group,
- int tp_idx, const struct mcs_group *group)
+static inline int minstrel_filter_avg_add(u16 *prev_1, u16 *prev_2, s32 in)
{
- if (group->bw < tp_group->bw)
- return false;
+ s32 out_1 = *prev_1;
+ s32 out_2 = *prev_2;
+ s32 val;
- if (group->streams == tp_group->streams)
- return true;
+ if (!in)
+ in += 1;
- if (tp_idx < 4 && group->streams == tp_group->streams - 1)
- return true;
+ if (!out_1) {
+ val = out_1 = in;
+ goto out;
+ }
+
+ val = MINSTREL_AVG_COEFF1 * in;
+ val += MINSTREL_AVG_COEFF2 * out_1;
+ val += MINSTREL_AVG_COEFF3 * out_2;
+ val >>= MINSTREL_SCALE;
+
+ if (val > 1 << MINSTREL_SCALE)
+ val = 1 << MINSTREL_SCALE;
+ if (val < 0)
+ val = 1;
+
+out:
+ *prev_2 = out_1;
+ *prev_1 = val;
- return group->streams == tp_group->streams + 1;
+ return val;
}
+/*
+* Recalculate statistics and counters of a given rate
+*/
static void
-minstrel_ht_find_probe_rates(struct minstrel_ht_sta *mi, u16 *rates, int *n_rates,
- bool faster_rate)
+minstrel_ht_calc_rate_stats(struct minstrel_priv *mp,
+ struct minstrel_rate_stats *mrs)
{
- const struct mcs_group *group, *tp_group;
- int i, g, max_dur;
- int tp_idx;
+ unsigned int cur_prob;
+
+ if (unlikely(mrs->attempts > 0)) {
+ cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts);
+ minstrel_filter_avg_add(&mrs->prob_avg,
+ &mrs->prob_avg_1, cur_prob);
+ mrs->att_hist += mrs->attempts;
+ mrs->succ_hist += mrs->success;
+ }
- tp_group = &minstrel_mcs_groups[mi->max_tp_rate[0] / MCS_GROUP_RATES];
- tp_idx = mi->max_tp_rate[0] % MCS_GROUP_RATES;
+ mrs->last_success = mrs->success;
+ mrs->last_attempts = mrs->attempts;
+ mrs->success = 0;
+ mrs->attempts = 0;
+}
- max_dur = minstrel_get_duration(mi->max_tp_rate[0]);
- if (faster_rate)
- max_dur -= max_dur / 16;
+static bool
+minstrel_ht_find_sample_rate(struct minstrel_ht_sta *mi, int type, int idx)
+{
+ int i;
- for (g = 0; g < MINSTREL_GROUPS_NB; g++) {
- u16 supported = mi->supported[g];
+ for (i = 0; i < MINSTREL_SAMPLE_RATES; i++) {
+ u16 cur = mi->sample[type].sample_rates[i];
- if (!supported)
- continue;
+ if (cur == idx)
+ return true;
- group = &minstrel_mcs_groups[g];
- if (!minstrel_ht_probe_group(mi, tp_group, tp_idx, group))
- continue;
+ if (!cur)
+ break;
+ }
- for (i = 0; supported; supported >>= 1, i++) {
- int idx;
+ return false;
+}
- if (!(supported & 1))
- continue;
+static int
+minstrel_ht_move_sample_rates(struct minstrel_ht_sta *mi, int type,
+ u32 fast_rate_dur, u32 slow_rate_dur)
+{
+ u16 *rates = mi->sample[type].sample_rates;
+ int i, j;
- if ((group->duration[i] << group->shift) > max_dur)
- continue;
+ for (i = 0, j = 0; i < MINSTREL_SAMPLE_RATES; i++) {
+ u32 duration;
+ bool valid = false;
+ u16 cur;
- idx = g * MCS_GROUP_RATES + i;
- if (idx == mi->max_tp_rate[0])
- continue;
+ cur = rates[i];
+ if (!cur)
+ continue;
- rates[(*n_rates)++] = idx;
+ duration = minstrel_get_duration(cur);
+ switch (type) {
+ case MINSTREL_SAMPLE_TYPE_SLOW:
+ valid = duration > fast_rate_dur &&
+ duration < slow_rate_dur;
+ break;
+ case MINSTREL_SAMPLE_TYPE_INC:
+ case MINSTREL_SAMPLE_TYPE_JUMP:
+ valid = duration < fast_rate_dur;
+ break;
+ default:
+ valid = false;
break;
}
+
+ if (!valid) {
+ rates[i] = 0;
+ continue;
+ }
+
+ if (i == j)
+ continue;
+
+ rates[j++] = cur;
+ rates[i] = 0;
}
+
+ return j;
}
-static void
-minstrel_ht_rate_sample_switch(struct minstrel_priv *mp,
- struct minstrel_ht_sta *mi)
+static int
+minstrel_ht_group_min_rate_offset(struct minstrel_ht_sta *mi, int group,
+ u32 max_duration)
{
- struct minstrel_rate_stats *mrs;
- u16 rates[MINSTREL_GROUPS_NB];
- int n_rates = 0;
- int probe_rate = 0;
- bool faster_rate;
+ u16 supported = mi->supported[group];
int i;
- u8 random;
- /*
- * Use rate switching instead of probing packets for devices with
- * little control over retry fallback behavior
- */
- if (mp->hw->max_rates > 1)
- return;
+ for (i = 0; i < MCS_GROUP_RATES && supported; i++, supported >>= 1) {
+ if (!(supported & BIT(0)))
+ continue;
- /*
- * If the current EWMA prob is >75%, look for a rate that's 6.25%
- * faster than the max tp rate.
- * If that fails, look again for a rate that is at least as fast
- */
- mrs = minstrel_get_ratestats(mi, mi->max_tp_rate[0]);
- faster_rate = mrs->prob_avg > MINSTREL_FRAC(75, 100);
- minstrel_ht_find_probe_rates(mi, rates, &n_rates, faster_rate);
- if (!n_rates && faster_rate)
- minstrel_ht_find_probe_rates(mi, rates, &n_rates, false);
-
- /* If no suitable rate was found, try to pick the next one in the group */
- if (!n_rates) {
- int g_idx = mi->max_tp_rate[0] / MCS_GROUP_RATES;
- u16 supported = mi->supported[g_idx];
-
- supported >>= mi->max_tp_rate[0] % MCS_GROUP_RATES;
- for (i = 0; supported; supported >>= 1, i++) {
- if (!(supported & 1))
- continue;
+ if (minstrel_get_duration(MI_RATE(group, i)) >= max_duration)
+ continue;
- probe_rate = mi->max_tp_rate[0] + i;
+ return i;
+ }
+
+ return -1;
+}
+
+/*
+ * Incremental update rates:
+ * Flip through groups and pick the first group rate that is faster than the
+ * highest currently selected rate
+ */
+static u16
+minstrel_ht_next_inc_rate(struct minstrel_ht_sta *mi, u32 fast_rate_dur)
+{
+ struct minstrel_mcs_group_data *mg;
+ u8 type = MINSTREL_SAMPLE_TYPE_INC;
+ int i, index = 0;
+ u8 group;
+
+ group = mi->sample[type].sample_group;
+ for (i = 0; i < ARRAY_SIZE(minstrel_mcs_groups); i++) {
+ group = (group + 1) % ARRAY_SIZE(minstrel_mcs_groups);
+ mg = &mi->groups[group];
+
+ index = minstrel_ht_group_min_rate_offset(mi, group,
+ fast_rate_dur);
+ if (index < 0)
+ continue;
+
+ index = MI_RATE(group, index & 0xf);
+ if (!minstrel_ht_find_sample_rate(mi, type, index))
goto out;
+ }
+ index = 0;
+
+out:
+ mi->sample[type].sample_group = group;
+
+ return index;
+}
+
+static int
+minstrel_ht_next_group_sample_rate(struct minstrel_ht_sta *mi, int group,
+ u16 supported, int offset)
+{
+ struct minstrel_mcs_group_data *mg = &mi->groups[group];
+ u16 idx;
+ int i;
+
+ for (i = 0; i < MCS_GROUP_RATES; i++) {
+ idx = sample_table[mg->column][mg->index];
+ if (++mg->index >= MCS_GROUP_RATES) {
+ mg->index = 0;
+ if (++mg->column >= ARRAY_SIZE(sample_table))
+ mg->column = 0;
}
- return;
+ if (idx < offset)
+ continue;
+
+ if (!(supported & BIT(idx)))
+ continue;
+
+ return MI_RATE(group, idx);
}
- i = 0;
- if (n_rates > 1) {
- random = prandom_u32();
- i = random % n_rates;
+ return -1;
+}
+
+/*
+ * Jump rates:
+ * Sample random rates, use those that are faster than the highest
+ * currently selected rate. Rates between the fastest and the slowest
+ * get sorted into the slow sample bucket, but only if it has room
+ */
+static u16
+minstrel_ht_next_jump_rate(struct minstrel_ht_sta *mi, u32 fast_rate_dur,
+ u32 slow_rate_dur, int *slow_rate_ofs)
+{
+ struct minstrel_mcs_group_data *mg;
+ struct minstrel_rate_stats *mrs;
+ u32 max_duration = slow_rate_dur;
+ int i, index, offset;
+ u16 *slow_rates;
+ u16 supported;
+ u32 duration;
+ u8 group;
+
+ if (*slow_rate_ofs >= MINSTREL_SAMPLE_RATES)
+ max_duration = fast_rate_dur;
+
+ slow_rates = mi->sample[MINSTREL_SAMPLE_TYPE_SLOW].sample_rates;
+ group = mi->sample[MINSTREL_SAMPLE_TYPE_JUMP].sample_group;
+ for (i = 0; i < ARRAY_SIZE(minstrel_mcs_groups); i++) {
+ u8 type;
+
+ group = (group + 1) % ARRAY_SIZE(minstrel_mcs_groups);
+ mg = &mi->groups[group];
+
+ supported = mi->supported[group];
+ if (!supported)
+ continue;
+
+ offset = minstrel_ht_group_min_rate_offset(mi, group,
+ max_duration);
+ if (offset < 0)
+ continue;
+
+ index = minstrel_ht_next_group_sample_rate(mi, group, supported,
+ offset);
+ if (index < 0)
+ continue;
+
+ duration = minstrel_get_duration(index);
+ if (duration < fast_rate_dur)
+ type = MINSTREL_SAMPLE_TYPE_JUMP;
+ else
+ type = MINSTREL_SAMPLE_TYPE_SLOW;
+
+ if (minstrel_ht_find_sample_rate(mi, type, index))
+ continue;
+
+ if (type == MINSTREL_SAMPLE_TYPE_JUMP)
+ goto found;
+
+ if (*slow_rate_ofs >= MINSTREL_SAMPLE_RATES)
+ continue;
+
+ if (duration >= slow_rate_dur)
+ continue;
+
+ /* skip slow rates with high success probability */
+ mrs = minstrel_get_ratestats(mi, index);
+ if (mrs->prob_avg > MINSTREL_FRAC(95, 100))
+ continue;
+
+ slow_rates[(*slow_rate_ofs)++] = index;
+ if (*slow_rate_ofs >= MINSTREL_SAMPLE_RATES)
+ max_duration = fast_rate_dur;
}
- probe_rate = rates[i];
+ index = 0;
-out:
- mi->sample_rate = probe_rate;
- mi->sample_mode = MINSTREL_SAMPLE_ACTIVE;
+found:
+ mi->sample[MINSTREL_SAMPLE_TYPE_JUMP].sample_group = group;
+
+ return index;
+}
+
+static void
+minstrel_ht_refill_sample_rates(struct minstrel_ht_sta *mi)
+{
+ u32 prob_dur = minstrel_get_duration(mi->max_prob_rate);
+ u32 tp_dur = minstrel_get_duration(mi->max_tp_rate[0]);
+ u32 tp2_dur = minstrel_get_duration(mi->max_tp_rate[1]);
+ u32 fast_rate_dur = min(min(tp_dur, tp2_dur), prob_dur);
+ u32 slow_rate_dur = max(max(tp_dur, tp2_dur), prob_dur);
+ u16 *rates;
+ int i, j;
+
+ rates = mi->sample[MINSTREL_SAMPLE_TYPE_INC].sample_rates;
+ i = minstrel_ht_move_sample_rates(mi, MINSTREL_SAMPLE_TYPE_INC,
+ fast_rate_dur, slow_rate_dur);
+ while (i < MINSTREL_SAMPLE_RATES) {
+ rates[i] = minstrel_ht_next_inc_rate(mi, tp_dur);
+ if (!rates[i])
+ break;
+
+ i++;
+ }
+
+ rates = mi->sample[MINSTREL_SAMPLE_TYPE_JUMP].sample_rates;
+ i = minstrel_ht_move_sample_rates(mi, MINSTREL_SAMPLE_TYPE_JUMP,
+ fast_rate_dur, slow_rate_dur);
+ j = minstrel_ht_move_sample_rates(mi, MINSTREL_SAMPLE_TYPE_SLOW,
+ fast_rate_dur, slow_rate_dur);
+ while (i < MINSTREL_SAMPLE_RATES) {
+ rates[i] = minstrel_ht_next_jump_rate(mi, fast_rate_dur,
+ slow_rate_dur, &j);
+ if (!rates[i])
+ break;
+
+ i++;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(mi->sample); i++)
+ memcpy(mi->sample[i].cur_sample_rates, mi->sample[i].sample_rates,
+ sizeof(mi->sample[i].cur_sample_rates));
}
+
/*
* Update rate statistics and select new primary rates
*
@@ -668,26 +991,15 @@ out:
* higher throughput rates, even if the probablity is a bit lower
*/
static void
-minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
- bool sample)
+minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
{
struct minstrel_mcs_group_data *mg;
struct minstrel_rate_stats *mrs;
int group, i, j, cur_prob;
u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES];
- u16 tmp_cck_tp_rate[MAX_THR_RATES], index;
-
- mi->sample_mode = MINSTREL_SAMPLE_IDLE;
-
- if (sample) {
- mi->total_packets_cur = mi->total_packets -
- mi->total_packets_last;
- mi->total_packets_last = mi->total_packets;
- }
- if (!mp->sample_switch)
- sample = false;
- if (mi->total_packets_cur < SAMPLE_SWITCH_THR && mp->sample_switch != 1)
- sample = false;
+ u16 tmp_legacy_tp_rate[MAX_THR_RATES], tmp_max_prob_rate;
+ u16 index;
+ bool ht_supported = mi->sta->ht_cap.ht_supported;
if (mi->ampdu_packets > 0) {
if (!ieee80211_hw_check(mp->hw, TX_STATUS_NO_AMPDU_LEN))
@@ -700,65 +1012,72 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
mi->ampdu_packets = 0;
}
- mi->sample_slow = 0;
- mi->sample_count = 0;
-
- memset(tmp_mcs_tp_rate, 0, sizeof(tmp_mcs_tp_rate));
- memset(tmp_cck_tp_rate, 0, sizeof(tmp_cck_tp_rate));
if (mi->supported[MINSTREL_CCK_GROUP])
- for (j = 0; j < ARRAY_SIZE(tmp_cck_tp_rate); j++)
- tmp_cck_tp_rate[j] = MINSTREL_CCK_GROUP * MCS_GROUP_RATES;
+ group = MINSTREL_CCK_GROUP;
+ else if (mi->supported[MINSTREL_OFDM_GROUP])
+ group = MINSTREL_OFDM_GROUP;
+ else
+ group = 0;
+
+ index = MI_RATE(group, 0);
+ for (j = 0; j < ARRAY_SIZE(tmp_legacy_tp_rate); j++)
+ tmp_legacy_tp_rate[j] = index;
if (mi->supported[MINSTREL_VHT_GROUP_0])
- index = MINSTREL_VHT_GROUP_0 * MCS_GROUP_RATES;
+ group = MINSTREL_VHT_GROUP_0;
+ else if (ht_supported)
+ group = MINSTREL_HT_GROUP_0;
+ else if (mi->supported[MINSTREL_CCK_GROUP])
+ group = MINSTREL_CCK_GROUP;
else
- index = MINSTREL_HT_GROUP_0 * MCS_GROUP_RATES;
+ group = MINSTREL_OFDM_GROUP;
+ index = MI_RATE(group, 0);
+ tmp_max_prob_rate = index;
for (j = 0; j < ARRAY_SIZE(tmp_mcs_tp_rate); j++)
tmp_mcs_tp_rate[j] = index;
/* Find best rate sets within all MCS groups*/
for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
+ u16 *tp_rate = tmp_mcs_tp_rate;
+ u16 last_prob = 0;
mg = &mi->groups[group];
if (!mi->supported[group])
continue;
- mi->sample_count++;
-
/* (re)Initialize group rate indexes */
for(j = 0; j < MAX_THR_RATES; j++)
- tmp_group_tp_rate[j] = MCS_GROUP_RATES * group;
+ tmp_group_tp_rate[j] = MI_RATE(group, 0);
- for (i = 0; i < MCS_GROUP_RATES; i++) {
+ if (group == MINSTREL_CCK_GROUP && ht_supported)
+ tp_rate = tmp_legacy_tp_rate;
+
+ for (i = MCS_GROUP_RATES - 1; i >= 0; i--) {
if (!(mi->supported[group] & BIT(i)))
continue;
- index = MCS_GROUP_RATES * group + i;
+ index = MI_RATE(group, i);
mrs = &mg->rates[i];
mrs->retry_updated = false;
- minstrel_calc_rate_stats(mp, mrs);
+ minstrel_ht_calc_rate_stats(mp, mrs);
+
+ if (mrs->att_hist)
+ last_prob = max(last_prob, mrs->prob_avg);
+ else
+ mrs->prob_avg = max(last_prob, mrs->prob_avg);
cur_prob = mrs->prob_avg;
if (minstrel_ht_get_tp_avg(mi, group, i, cur_prob) == 0)
continue;
/* Find max throughput rate set */
- if (group != MINSTREL_CCK_GROUP) {
- minstrel_ht_sort_best_tp_rates(mi, index,
- tmp_mcs_tp_rate);
- } else if (group == MINSTREL_CCK_GROUP) {
- minstrel_ht_sort_best_tp_rates(mi, index,
- tmp_cck_tp_rate);
- }
+ minstrel_ht_sort_best_tp_rates(mi, index, tp_rate);
/* Find max throughput rate set within a group */
minstrel_ht_sort_best_tp_rates(mi, index,
tmp_group_tp_rate);
-
- /* Find max probability rate per group and global */
- minstrel_ht_set_best_prob_rate(mi, index);
}
memcpy(mg->max_group_tp_rate, tmp_group_tp_rate,
@@ -766,19 +1085,34 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
}
/* Assign new rate set per sta */
- minstrel_ht_assign_best_tp_rates(mi, tmp_mcs_tp_rate, tmp_cck_tp_rate);
+ minstrel_ht_assign_best_tp_rates(mi, tmp_mcs_tp_rate,
+ tmp_legacy_tp_rate);
memcpy(mi->max_tp_rate, tmp_mcs_tp_rate, sizeof(mi->max_tp_rate));
- /* Try to increase robustness of max_prob_rate*/
- minstrel_ht_prob_rate_reduce_streams(mi);
+ for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
+ if (!mi->supported[group])
+ continue;
+
+ mg = &mi->groups[group];
+ mg->max_group_prob_rate = MI_RATE(group, 0);
+
+ for (i = 0; i < MCS_GROUP_RATES; i++) {
+ if (!(mi->supported[group] & BIT(i)))
+ continue;
+
+ index = MI_RATE(group, i);
+
+ /* Find max probability rate per group and global */
+ minstrel_ht_set_best_prob_rate(mi, &tmp_max_prob_rate,
+ index);
+ }
+ }
- /* try to sample all available rates during each interval */
- mi->sample_count *= 8;
- if (mp->new_avg)
- mi->sample_count /= 2;
+ mi->max_prob_rate = tmp_max_prob_rate;
- if (sample)
- minstrel_ht_rate_sample_switch(mp, mi);
+ /* Try to increase robustness of max_prob_rate*/
+ minstrel_ht_prob_rate_reduce_streams(mi);
+ minstrel_ht_refill_sample_rates(mi);
#ifdef CONFIG_MAC80211_DEBUGFS
/* use fixed index if set */
@@ -786,17 +1120,20 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
for (i = 0; i < 4; i++)
mi->max_tp_rate[i] = mp->fixed_rate_idx;
mi->max_prob_rate = mp->fixed_rate_idx;
- mi->sample_mode = MINSTREL_SAMPLE_IDLE;
}
#endif
/* Reset update timer */
mi->last_stats_update = jiffies;
+ mi->sample_time = jiffies;
}
static bool
-minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct ieee80211_tx_rate *rate)
+minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
+ struct ieee80211_tx_rate *rate)
{
+ int i;
+
if (rate->idx < 0)
return false;
@@ -807,32 +1144,15 @@ minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct ieee80211_tx_rate *rat
rate->flags & IEEE80211_TX_RC_VHT_MCS)
return true;
- return rate->idx == mp->cck_rates[0] ||
- rate->idx == mp->cck_rates[1] ||
- rate->idx == mp->cck_rates[2] ||
- rate->idx == mp->cck_rates[3];
-}
-
-static void
-minstrel_set_next_sample_idx(struct minstrel_ht_sta *mi)
-{
- struct minstrel_mcs_group_data *mg;
-
- for (;;) {
- mi->sample_group++;
- mi->sample_group %= ARRAY_SIZE(minstrel_mcs_groups);
- mg = &mi->groups[mi->sample_group];
+ for (i = 0; i < ARRAY_SIZE(mp->cck_rates); i++)
+ if (rate->idx == mp->cck_rates[i])
+ return true;
- if (!mi->supported[mi->sample_group])
- continue;
+ for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates[0]); i++)
+ if (rate->idx == mp->ofdm_rates[mi->band][i])
+ return true;
- if (++mg->index >= MCS_GROUP_RATES) {
- mg->index = 0;
- if (++mg->column >= ARRAY_SIZE(sample_table))
- mg->column = 0;
- }
- break;
- }
+ return false;
}
static void
@@ -840,7 +1160,7 @@ minstrel_downgrade_rate(struct minstrel_ht_sta *mi, u16 *idx, bool primary)
{
int group, orig_group;
- orig_group = group = *idx / MCS_GROUP_RATES;
+ orig_group = group = MI_RATE_GROUP(*idx);
while (group > 0) {
group--;
@@ -887,21 +1207,14 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
void *priv_sta, struct ieee80211_tx_status *st)
{
struct ieee80211_tx_info *info = st->info;
- struct minstrel_ht_sta_priv *msp = priv_sta;
- struct minstrel_ht_sta *mi = &msp->ht;
+ struct minstrel_ht_sta *mi = priv_sta;
struct ieee80211_tx_rate *ar = info->status.rates;
- struct minstrel_rate_stats *rate, *rate2, *rate_sample = NULL;
+ struct minstrel_rate_stats *rate, *rate2;
struct minstrel_priv *mp = priv;
- u32 update_interval = mp->update_interval / 2;
+ u32 update_interval = mp->update_interval;
bool last, update = false;
- bool sample_status = false;
int i;
- if (!msp->is_ht)
- return mac80211_minstrel.tx_status_ext(priv, sband,
- &msp->legacy, st);
-
-
/* This packet was aggregated but doesn't carry status info */
if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
!(info->flags & IEEE80211_TX_STAT_AMPDU))
@@ -913,64 +1226,31 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
info->status.ampdu_len = 1;
}
- mi->ampdu_packets++;
- mi->ampdu_len += info->status.ampdu_len;
-
- if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) {
- int avg_ampdu_len = minstrel_ht_avg_ampdu_len(mi);
-
- mi->sample_wait = 16 + 2 * avg_ampdu_len;
- mi->sample_tries = 1;
- mi->sample_count--;
+ /* wraparound */
+ if (mi->total_packets >= ~0 - info->status.ampdu_len) {
+ mi->total_packets = 0;
+ mi->sample_packets = 0;
}
+ mi->total_packets += info->status.ampdu_len;
if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)
mi->sample_packets += info->status.ampdu_len;
- if (mi->sample_mode != MINSTREL_SAMPLE_IDLE)
- rate_sample = minstrel_get_ratestats(mi, mi->sample_rate);
+ mi->ampdu_packets++;
+ mi->ampdu_len += info->status.ampdu_len;
- last = !minstrel_ht_txstat_valid(mp, &ar[0]);
+ last = !minstrel_ht_txstat_valid(mp, mi, &ar[0]);
for (i = 0; !last; i++) {
last = (i == IEEE80211_TX_MAX_RATES - 1) ||
- !minstrel_ht_txstat_valid(mp, &ar[i + 1]);
+ !minstrel_ht_txstat_valid(mp, mi, &ar[i + 1]);
rate = minstrel_ht_get_stats(mp, mi, &ar[i]);
- if (rate == rate_sample)
- sample_status = true;
-
if (last)
rate->success += info->status.ampdu_ack_len;
rate->attempts += ar[i].count * info->status.ampdu_len;
}
- switch (mi->sample_mode) {
- case MINSTREL_SAMPLE_IDLE:
- if (mp->new_avg &&
- (mp->hw->max_rates > 1 ||
- mi->total_packets_cur < SAMPLE_SWITCH_THR))
- update_interval /= 2;
- break;
-
- case MINSTREL_SAMPLE_ACTIVE:
- if (!sample_status)
- break;
-
- mi->sample_mode = MINSTREL_SAMPLE_PENDING;
- update = true;
- break;
-
- case MINSTREL_SAMPLE_PENDING:
- if (sample_status)
- break;
-
- update = true;
- minstrel_ht_update_stats(mp, mi, false);
- break;
- }
-
-
if (mp->hw->max_rates > 1) {
/*
* check for sudden death of spatial multiplexing,
@@ -993,7 +1273,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
if (time_after(jiffies, mi->last_stats_update + update_interval)) {
update = true;
- minstrel_ht_update_stats(mp, mi, true);
+ minstrel_ht_update_stats(mp, mi);
}
if (update)
@@ -1031,7 +1311,10 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
ctime += (t_slot * cw) >> 1;
cw = min((cw << 1) | 1, mp->cw_max);
- if (index / MCS_GROUP_RATES != MINSTREL_CCK_GROUP) {
+ if (minstrel_ht_is_legacy_group(MI_RATE_GROUP(index))) {
+ overhead = mi->overhead_legacy;
+ overhead_rtscts = mi->overhead_legacy_rtscts;
+ } else {
overhead = mi->overhead;
overhead_rtscts = mi->overhead_rtscts;
}
@@ -1061,7 +1344,8 @@ static void
minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
struct ieee80211_sta_rates *ratetbl, int offset, int index)
{
- const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
+ int group_idx = MI_RATE_GROUP(index);
+ const struct mcs_group *group = &minstrel_mcs_groups[group_idx];
struct minstrel_rate_stats *mrs;
u8 idx;
u16 flags = group->flags;
@@ -1080,13 +1364,17 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
ratetbl->rate[offset].count_rts = mrs->retry_count_rtscts;
}
- if (index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP)
+ index = MI_RATE_IDX(index);
+ if (group_idx == MINSTREL_CCK_GROUP)
idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)];
+ else if (group_idx == MINSTREL_OFDM_GROUP)
+ idx = mp->ofdm_rates[mi->band][index %
+ ARRAY_SIZE(mp->ofdm_rates[0])];
else if (flags & IEEE80211_TX_RC_VHT_MCS)
idx = ((group->streams - 1) << 4) |
- ((index % MCS_GROUP_RATES) & 0xF);
+ (index & 0xF);
else
- idx = index % MCS_GROUP_RATES + (group->streams - 1) * 8;
+ idx = index + (group->streams - 1) * 8;
/* enable RTS/CTS if needed:
* - if station is in dynamic SMPS (and streams > 1)
@@ -1106,17 +1394,17 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
static inline int
minstrel_ht_get_prob_avg(struct minstrel_ht_sta *mi, int rate)
{
- int group = rate / MCS_GROUP_RATES;
- rate %= MCS_GROUP_RATES;
+ int group = MI_RATE_GROUP(rate);
+ rate = MI_RATE_IDX(rate);
return mi->groups[group].rates[rate].prob_avg;
}
static int
minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi)
{
- int group = mi->max_prob_rate / MCS_GROUP_RATES;
+ int group = MI_RATE_GROUP(mi->max_prob_rate);
const struct mcs_group *g = &minstrel_mcs_groups[group];
- int rate = mi->max_prob_rate % MCS_GROUP_RATES;
+ int rate = MI_RATE_IDX(mi->max_prob_rate);
unsigned int duration;
/* Disable A-MSDU if max_prob_rate is bad */
@@ -1164,18 +1452,14 @@ static void
minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
{
struct ieee80211_sta_rates *rates;
- u16 first_rate = mi->max_tp_rate[0];
int i = 0;
- if (mi->sample_mode == MINSTREL_SAMPLE_ACTIVE)
- first_rate = mi->sample_rate;
-
rates = kzalloc(sizeof(*rates), GFP_ATOMIC);
if (!rates)
return;
/* Start with max_tp_rate[0] */
- minstrel_ht_set_rate(mp, mi, rates, i++, first_rate);
+ minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[0]);
if (mp->hw->max_rates >= 3) {
/* At least 3 tx rates supported, use max_tp_rate[1] next */
@@ -1191,102 +1475,20 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
rate_control_set_rates(mp->hw, mi->sta, rates);
}
-static int
-minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
+static u16
+minstrel_ht_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
{
- struct minstrel_rate_stats *mrs;
- struct minstrel_mcs_group_data *mg;
- unsigned int sample_dur, sample_group, cur_max_tp_streams;
- int tp_rate1, tp_rate2;
- int sample_idx = 0;
-
- if (mp->hw->max_rates == 1 && mp->sample_switch &&
- (mi->total_packets_cur >= SAMPLE_SWITCH_THR ||
- mp->sample_switch == 1))
- return -1;
-
- if (mi->sample_wait > 0) {
- mi->sample_wait--;
- return -1;
- }
-
- if (!mi->sample_tries)
- return -1;
-
- sample_group = mi->sample_group;
- mg = &mi->groups[sample_group];
- sample_idx = sample_table[mg->column][mg->index];
- minstrel_set_next_sample_idx(mi);
+ u8 seq;
- if (!(mi->supported[sample_group] & BIT(sample_idx)))
- return -1;
-
- mrs = &mg->rates[sample_idx];
- sample_idx += sample_group * MCS_GROUP_RATES;
-
- /* Set tp_rate1, tp_rate2 to the highest / second highest max_tp_rate */
- if (minstrel_get_duration(mi->max_tp_rate[0]) >
- minstrel_get_duration(mi->max_tp_rate[1])) {
- tp_rate1 = mi->max_tp_rate[1];
- tp_rate2 = mi->max_tp_rate[0];
+ if (mp->hw->max_rates > 1) {
+ seq = mi->sample_seq;
+ mi->sample_seq = (seq + 1) % ARRAY_SIZE(minstrel_sample_seq);
+ seq = minstrel_sample_seq[seq];
} else {
- tp_rate1 = mi->max_tp_rate[0];
- tp_rate2 = mi->max_tp_rate[1];
+ seq = MINSTREL_SAMPLE_TYPE_INC;
}
- /*
- * Sampling might add some overhead (RTS, no aggregation)
- * to the frame. Hence, don't use sampling for the highest currently
- * used highest throughput or probability rate.
- */
- if (sample_idx == mi->max_tp_rate[0] || sample_idx == mi->max_prob_rate)
- return -1;
-
- /*
- * Do not sample if the probability is already higher than 95%,
- * or if the rate is 3 times slower than the current max probability
- * rate, to avoid wasting airtime.
- */
- sample_dur = minstrel_get_duration(sample_idx);
- if (mrs->prob_avg > MINSTREL_FRAC(95, 100) ||
- minstrel_get_duration(mi->max_prob_rate) * 3 < sample_dur)
- return -1;
-
-
- /*
- * For devices with no configurable multi-rate retry, skip sampling
- * below the per-group max throughput rate, and only use one sampling
- * attempt per rate
- */
- if (mp->hw->max_rates == 1 &&
- (minstrel_get_duration(mg->max_group_tp_rate[0]) < sample_dur ||
- mrs->attempts))
- return -1;
-
- /* Skip already sampled slow rates */
- if (sample_dur >= minstrel_get_duration(tp_rate1) && mrs->attempts)
- return -1;
-
- /*
- * Make sure that lower rates get sampled only occasionally,
- * if the link is working perfectly.
- */
-
- cur_max_tp_streams = minstrel_mcs_groups[tp_rate1 /
- MCS_GROUP_RATES].streams;
- if (sample_dur >= minstrel_get_duration(tp_rate2) &&
- (cur_max_tp_streams - 1 <
- minstrel_mcs_groups[sample_group].streams ||
- sample_dur >= minstrel_get_duration(mi->max_prob_rate))) {
- if (mrs->sample_skipped < 20)
- return -1;
-
- if (mi->sample_slow++ > 2)
- return -1;
- }
- mi->sample_tries--;
-
- return sample_idx;
+ return __minstrel_ht_get_sample_rate(mi, seq);
}
static void
@@ -1296,16 +1498,12 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
const struct mcs_group *sample_group;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
struct ieee80211_tx_rate *rate = &info->status.rates[0];
- struct minstrel_ht_sta_priv *msp = priv_sta;
- struct minstrel_ht_sta *mi = &msp->ht;
+ struct minstrel_ht_sta *mi = priv_sta;
struct minstrel_priv *mp = priv;
- int sample_idx;
-
- if (!msp->is_ht)
- return mac80211_minstrel.get_rate(priv, sta, &msp->legacy, txrc);
+ u16 sample_idx;
if (!(info->flags & IEEE80211_TX_CTL_AMPDU) &&
- mi->max_prob_rate / MCS_GROUP_RATES != MINSTREL_CCK_GROUP)
+ !minstrel_ht_is_legacy_group(MI_RATE_GROUP(mi->max_prob_rate)))
minstrel_aggr_check(sta, txrc->skb);
info->flags |= mi->tx_flags;
@@ -1318,23 +1516,18 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
/* Don't use EAPOL frames for sampling on non-mrr hw */
if (mp->hw->max_rates == 1 &&
(info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO))
- sample_idx = -1;
- else
- sample_idx = minstrel_get_sample_rate(mp, mi);
-
- mi->total_packets++;
+ return;
- /* wraparound */
- if (mi->total_packets == ~0) {
- mi->total_packets = 0;
- mi->sample_packets = 0;
- }
+ if (time_is_before_jiffies(mi->sample_time))
+ return;
- if (sample_idx < 0)
+ mi->sample_time = jiffies + MINSTREL_SAMPLE_INTERVAL;
+ sample_idx = minstrel_ht_get_sample_rate(mp, mi);
+ if (!sample_idx)
return;
- sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES];
- sample_idx %= MCS_GROUP_RATES;
+ sample_group = &minstrel_mcs_groups[MI_RATE_GROUP(sample_idx)];
+ sample_idx = MI_RATE_IDX(sample_idx);
if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP] &&
(sample_idx >= 4) != txrc->short_preamble)
@@ -1346,8 +1539,11 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP]) {
int idx = sample_idx % ARRAY_SIZE(mp->cck_rates);
rate->idx = mp->cck_rates[idx];
+ } else if (sample_group == &minstrel_mcs_groups[MINSTREL_OFDM_GROUP]) {
+ int idx = sample_idx % ARRAY_SIZE(mp->ofdm_rates[0]);
+ rate->idx = mp->ofdm_rates[mi->band][idx];
} else if (sample_group->flags & IEEE80211_TX_RC_VHT_MCS) {
- ieee80211_rate_set_vht(rate, sample_idx % MCS_GROUP_RATES,
+ ieee80211_rate_set_vht(rate, MI_RATE_IDX(sample_idx),
sample_group->streams);
} else {
rate->idx = sample_idx + (sample_group->streams - 1) * 8;
@@ -1366,44 +1562,59 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
if (sband->band != NL80211_BAND_2GHZ)
return;
- if (!ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES))
+ if (sta->ht_cap.ht_supported &&
+ !ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES))
return;
- mi->cck_supported = 0;
- mi->cck_supported_short = 0;
for (i = 0; i < 4; i++) {
- if (!rate_supported(sta, sband->band, mp->cck_rates[i]))
+ if (mp->cck_rates[i] == 0xff ||
+ !rate_supported(sta, sband->band, mp->cck_rates[i]))
continue;
- mi->cck_supported |= BIT(i);
+ mi->supported[MINSTREL_CCK_GROUP] |= BIT(i);
if (sband->bitrates[i].flags & IEEE80211_RATE_SHORT_PREAMBLE)
- mi->cck_supported_short |= BIT(i);
+ mi->supported[MINSTREL_CCK_GROUP] |= BIT(i + 4);
}
+}
+
+static void
+minstrel_ht_update_ofdm(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
+ struct ieee80211_supported_band *sband,
+ struct ieee80211_sta *sta)
+{
+ const u8 *rates;
+ int i;
- mi->supported[MINSTREL_CCK_GROUP] = mi->cck_supported;
+ if (sta->ht_cap.ht_supported)
+ return;
+
+ rates = mp->ofdm_rates[sband->band];
+ for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates[0]); i++) {
+ if (rates[i] == 0xff ||
+ !rate_supported(sta, sband->band, rates[i]))
+ continue;
+
+ mi->supported[MINSTREL_OFDM_GROUP] |= BIT(i);
+ }
}
static void
minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
struct cfg80211_chan_def *chandef,
- struct ieee80211_sta *sta, void *priv_sta)
+ struct ieee80211_sta *sta, void *priv_sta)
{
struct minstrel_priv *mp = priv;
- struct minstrel_ht_sta_priv *msp = priv_sta;
- struct minstrel_ht_sta *mi = &msp->ht;
+ struct minstrel_ht_sta *mi = priv_sta;
struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs;
u16 ht_cap = sta->ht_cap.cap;
struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap;
+ const struct ieee80211_rate *ctl_rate;
+ bool ldpc, erp;
int use_vht;
int n_supported = 0;
int ack_dur;
int stbc;
int i;
- bool ldpc;
-
- /* fall back to the old minstrel for legacy stations */
- if (!sta->ht_cap.ht_supported)
- goto use_legacy;
BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) != MINSTREL_GROUPS_NB);
@@ -1412,10 +1623,10 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
else
use_vht = 0;
- msp->is_ht = true;
memset(mi, 0, sizeof(*mi));
mi->sta = sta;
+ mi->band = sband->band;
mi->last_stats_update = jiffies;
ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1, 0);
@@ -1423,17 +1634,15 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
mi->overhead += ack_dur;
mi->overhead_rtscts = mi->overhead + 2 * ack_dur;
- mi->avg_ampdu_len = MINSTREL_FRAC(1, 1);
+ ctl_rate = &sband->bitrates[rate_lowest_index(sband, sta)];
+ erp = ctl_rate->flags & IEEE80211_RATE_ERP_G;
+ ack_dur = ieee80211_frame_duration(sband->band, 10,
+ ctl_rate->bitrate, erp, 1,
+ ieee80211_chandef_get_shift(chandef));
+ mi->overhead_legacy = ack_dur;
+ mi->overhead_legacy_rtscts = mi->overhead_legacy + 2 * ack_dur;
- /* When using MRR, sample more on the first attempt, without delay */
- if (mp->has_mrr) {
- mi->sample_count = 16;
- mi->sample_wait = 0;
- } else {
- mi->sample_count = 8;
- mi->sample_wait = 8;
- }
- mi->sample_tries = 4;
+ mi->avg_ampdu_len = MINSTREL_FRAC(1, 1);
if (!use_vht) {
stbc = (ht_cap & IEEE80211_HT_CAP_RX_STBC) >>
@@ -1456,10 +1665,8 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
int bw, nss;
mi->supported[i] = 0;
- if (i == MINSTREL_CCK_GROUP) {
- minstrel_ht_update_cck(mp, mi, sband, sta);
+ if (minstrel_ht_is_legacy_group(i))
continue;
- }
if (gflags & IEEE80211_TX_RC_SHORT_GI) {
if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH) {
@@ -1520,24 +1727,12 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
n_supported++;
}
- if (!n_supported)
- goto use_legacy;
-
- mi->supported[MINSTREL_CCK_GROUP] |= mi->cck_supported_short << 4;
+ minstrel_ht_update_cck(mp, mi, sband, sta);
+ minstrel_ht_update_ofdm(mp, mi, sband, sta);
/* create an initial rate table with the lowest supported rates */
- minstrel_ht_update_stats(mp, mi, true);
+ minstrel_ht_update_stats(mp, mi);
minstrel_ht_update_rates(mp, mi);
-
- return;
-
-use_legacy:
- msp->is_ht = false;
- memset(&msp->legacy, 0, sizeof(msp->legacy));
- msp->legacy.r = msp->ratelist;
- msp->legacy.sample_table = msp->sample_table;
- return mac80211_minstrel.rate_init(priv, sband, chandef, sta,
- &msp->legacy);
}
static void
@@ -1561,7 +1756,7 @@ static void *
minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
{
struct ieee80211_supported_band *sband;
- struct minstrel_ht_sta_priv *msp;
+ struct minstrel_ht_sta *mi;
struct minstrel_priv *mp = priv;
struct ieee80211_hw *hw = mp->hw;
int max_rates = 0;
@@ -1573,91 +1768,91 @@ minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
max_rates = sband->n_bitrates;
}
- msp = kzalloc(sizeof(*msp), gfp);
- if (!msp)
- return NULL;
-
- msp->ratelist = kcalloc(max_rates, sizeof(struct minstrel_rate), gfp);
- if (!msp->ratelist)
- goto error;
-
- msp->sample_table = kmalloc_array(max_rates, SAMPLE_COLUMNS, gfp);
- if (!msp->sample_table)
- goto error1;
-
- return msp;
-
-error1:
- kfree(msp->ratelist);
-error:
- kfree(msp);
- return NULL;
+ return kzalloc(sizeof(*mi), gfp);
}
static void
minstrel_ht_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta)
{
- struct minstrel_ht_sta_priv *msp = priv_sta;
-
- kfree(msp->sample_table);
- kfree(msp->ratelist);
- kfree(msp);
+ kfree(priv_sta);
}
static void
-minstrel_ht_init_cck_rates(struct minstrel_priv *mp)
+minstrel_ht_fill_rate_array(u8 *dest, struct ieee80211_supported_band *sband,
+ const s16 *bitrates, int n_rates, u32 rate_flags)
{
- static const int bitrates[4] = { 10, 20, 55, 110 };
- struct ieee80211_supported_band *sband;
- u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef);
int i, j;
- sband = mp->hw->wiphy->bands[NL80211_BAND_2GHZ];
- if (!sband)
- return;
-
for (i = 0; i < sband->n_bitrates; i++) {
struct ieee80211_rate *rate = &sband->bitrates[i];
- if (rate->flags & IEEE80211_RATE_ERP_G)
- continue;
-
if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
continue;
- for (j = 0; j < ARRAY_SIZE(bitrates); j++) {
+ for (j = 0; j < n_rates; j++) {
if (rate->bitrate != bitrates[j])
continue;
- mp->cck_rates[j] = i;
+ dest[j] = i;
break;
}
}
}
+static void
+minstrel_ht_init_cck_rates(struct minstrel_priv *mp)
+{
+ static const s16 bitrates[4] = { 10, 20, 55, 110 };
+ struct ieee80211_supported_band *sband;
+ u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef);
+
+ memset(mp->cck_rates, 0xff, sizeof(mp->cck_rates));
+ sband = mp->hw->wiphy->bands[NL80211_BAND_2GHZ];
+ if (!sband)
+ return;
+
+ BUILD_BUG_ON(ARRAY_SIZE(mp->cck_rates) != ARRAY_SIZE(bitrates));
+ minstrel_ht_fill_rate_array(mp->cck_rates, sband,
+ minstrel_cck_bitrates,
+ ARRAY_SIZE(minstrel_cck_bitrates),
+ rate_flags);
+}
+
+static void
+minstrel_ht_init_ofdm_rates(struct minstrel_priv *mp, enum nl80211_band band)
+{
+ static const s16 bitrates[8] = { 60, 90, 120, 180, 240, 360, 480, 540 };
+ struct ieee80211_supported_band *sband;
+ u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef);
+
+ memset(mp->ofdm_rates[band], 0xff, sizeof(mp->ofdm_rates[band]));
+ sband = mp->hw->wiphy->bands[band];
+ if (!sband)
+ return;
+
+ BUILD_BUG_ON(ARRAY_SIZE(mp->ofdm_rates[band]) != ARRAY_SIZE(bitrates));
+ minstrel_ht_fill_rate_array(mp->ofdm_rates[band], sband,
+ minstrel_ofdm_bitrates,
+ ARRAY_SIZE(minstrel_ofdm_bitrates),
+ rate_flags);
+}
+
static void *
minstrel_ht_alloc(struct ieee80211_hw *hw)
{
struct minstrel_priv *mp;
+ int i;
mp = kzalloc(sizeof(struct minstrel_priv), GFP_ATOMIC);
if (!mp)
return NULL;
- mp->sample_switch = -1;
-
/* contention window settings
* Just an approximation. Using the per-queue values would complicate
* the calculations and is probably unnecessary */
mp->cw_min = 15;
mp->cw_max = 1023;
- /* number of packets (in %) to use for sampling other rates
- * sample less often for non-mrr packets, because the overhead
- * is much higher than with mrr */
- mp->lookaround_rate = 5;
- mp->lookaround_rate_mrr = 10;
-
/* maximum time that the hw is allowed to stay in one MRR segment */
mp->segment_size = 6000;
@@ -1671,10 +1866,11 @@ minstrel_ht_alloc(struct ieee80211_hw *hw)
mp->has_mrr = true;
mp->hw = hw;
- mp->update_interval = HZ / 10;
- mp->new_avg = true;
+ mp->update_interval = HZ / 20;
minstrel_ht_init_cck_rates(mp);
+ for (i = 0; i < ARRAY_SIZE(mp->hw->wiphy->bands); i++)
+ minstrel_ht_init_ofdm_rates(mp, i);
return mp;
}
@@ -1688,10 +1884,6 @@ static void minstrel_ht_add_debugfs(struct ieee80211_hw *hw, void *priv,
mp->fixed_rate_idx = (u32) -1;
debugfs_create_u32("fixed_rate_idx", S_IRUGO | S_IWUGO, debugfsdir,
&mp->fixed_rate_idx);
- debugfs_create_u32("sample_switch", S_IRUGO | S_IWUSR, debugfsdir,
- &mp->sample_switch);
- debugfs_create_bool("new_avg", S_IRUGO | S_IWUSR, debugfsdir,
- &mp->new_avg);
}
#endif
@@ -1703,15 +1895,11 @@ minstrel_ht_free(void *priv)
static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
{
- struct minstrel_ht_sta_priv *msp = priv_sta;
- struct minstrel_ht_sta *mi = &msp->ht;
+ struct minstrel_ht_sta *mi = priv_sta;
int i, j, prob, tp_avg;
- if (!msp->is_ht)
- return mac80211_minstrel.get_expected_throughput(priv_sta);
-
- i = mi->max_tp_rate[0] / MCS_GROUP_RATES;
- j = mi->max_tp_rate[0] % MCS_GROUP_RATES;
+ i = MI_RATE_GROUP(mi->max_tp_rate[0]);
+ j = MI_RATE_IDX(mi->max_tp_rate[0]);
prob = mi->groups[i].rates[j].prob_avg;
/* convert tp_avg from pkt per second in kbps */
diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
index 53ea3c29debf..06e7126727ad 100644
--- a/net/mac80211/rc80211_minstrel_ht.h
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -6,6 +6,35 @@
#ifndef __RC_MINSTREL_HT_H
#define __RC_MINSTREL_HT_H
+#include <linux/bitfield.h>
+
+/* number of highest throughput rates to consider*/
+#define MAX_THR_RATES 4
+#define SAMPLE_COLUMNS 10 /* number of columns in sample table */
+
+/* scaled fraction values */
+#define MINSTREL_SCALE 12
+#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div)
+#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE)
+
+#define EWMA_LEVEL 96 /* ewma weighting factor [/EWMA_DIV] */
+#define EWMA_DIV 128
+
+/*
+ * Coefficients for moving average with noise filter (period=16),
+ * scaled by 10 bits
+ *
+ * a1 = exp(-pi * sqrt(2) / period)
+ * coeff2 = 2 * a1 * cos(sqrt(2) * 2 * pi / period)
+ * coeff3 = -sqr(a1)
+ * coeff1 = 1 - coeff2 - coeff3
+ */
+#define MINSTREL_AVG_COEFF1 (MINSTREL_FRAC(1, 1) - \
+ MINSTREL_AVG_COEFF2 - \
+ MINSTREL_AVG_COEFF3)
+#define MINSTREL_AVG_COEFF2 0x00001499
+#define MINSTREL_AVG_COEFF3 -0x0000092e
+
/*
* The number of streams can be changed to 2 to reduce code
* size and memory footprint.
@@ -18,17 +47,55 @@
MINSTREL_HT_STREAM_GROUPS)
#define MINSTREL_VHT_GROUPS_NB (MINSTREL_MAX_STREAMS * \
MINSTREL_VHT_STREAM_GROUPS)
-#define MINSTREL_CCK_GROUPS_NB 1
+#define MINSTREL_LEGACY_GROUPS_NB 2
#define MINSTREL_GROUPS_NB (MINSTREL_HT_GROUPS_NB + \
MINSTREL_VHT_GROUPS_NB + \
- MINSTREL_CCK_GROUPS_NB)
+ MINSTREL_LEGACY_GROUPS_NB)
#define MINSTREL_HT_GROUP_0 0
#define MINSTREL_CCK_GROUP (MINSTREL_HT_GROUP_0 + MINSTREL_HT_GROUPS_NB)
-#define MINSTREL_VHT_GROUP_0 (MINSTREL_CCK_GROUP + 1)
+#define MINSTREL_OFDM_GROUP (MINSTREL_CCK_GROUP + 1)
+#define MINSTREL_VHT_GROUP_0 (MINSTREL_OFDM_GROUP + 1)
#define MCS_GROUP_RATES 10
+#define MI_RATE_IDX_MASK GENMASK(3, 0)
+#define MI_RATE_GROUP_MASK GENMASK(15, 4)
+
+#define MI_RATE(_group, _idx) \
+ (FIELD_PREP(MI_RATE_GROUP_MASK, _group) | \
+ FIELD_PREP(MI_RATE_IDX_MASK, _idx))
+
+#define MI_RATE_IDX(_rate) FIELD_GET(MI_RATE_IDX_MASK, _rate)
+#define MI_RATE_GROUP(_rate) FIELD_GET(MI_RATE_GROUP_MASK, _rate)
+
+#define MINSTREL_SAMPLE_RATES 5 /* rates per sample type */
+#define MINSTREL_SAMPLE_INTERVAL (HZ / 50)
+
+struct minstrel_priv {
+ struct ieee80211_hw *hw;
+ bool has_mrr;
+ unsigned int cw_min;
+ unsigned int cw_max;
+ unsigned int max_retry;
+ unsigned int segment_size;
+ unsigned int update_interval;
+
+ u8 cck_rates[4];
+ u8 ofdm_rates[NUM_NL80211_BANDS][8];
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+ /*
+ * enable fixed rate processing per RC
+ * - write static index to debugfs:ieee80211/phyX/rc/fixed_rate_idx
+ * - write -1 to enable RC processing again
+ * - setting will be applied on next update
+ */
+ u32 fixed_rate_idx;
+#endif
+};
+
+
struct mcs_group {
u16 flags;
u8 streams;
@@ -37,8 +104,36 @@ struct mcs_group {
u16 duration[MCS_GROUP_RATES];
};
+extern const s16 minstrel_cck_bitrates[4];
+extern const s16 minstrel_ofdm_bitrates[8];
extern const struct mcs_group minstrel_mcs_groups[];
+struct minstrel_rate_stats {
+ /* current / last sampling period attempts/success counters */
+ u16 attempts, last_attempts;
+ u16 success, last_success;
+
+ /* total attempts/success counters */
+ u32 att_hist, succ_hist;
+
+ /* prob_avg - moving average of prob */
+ u16 prob_avg;
+ u16 prob_avg_1;
+
+ /* maximum retry counts */
+ u8 retry_count;
+ u8 retry_count_rtscts;
+
+ bool retry_updated;
+};
+
+enum minstrel_sample_type {
+ MINSTREL_SAMPLE_TYPE_INC,
+ MINSTREL_SAMPLE_TYPE_JUMP,
+ MINSTREL_SAMPLE_TYPE_SLOW,
+ __MINSTREL_SAMPLE_TYPE_MAX
+};
+
struct minstrel_mcs_group_data {
u8 index;
u8 column;
@@ -51,10 +146,10 @@ struct minstrel_mcs_group_data {
struct minstrel_rate_stats rates[MCS_GROUP_RATES];
};
-enum minstrel_sample_mode {
- MINSTREL_SAMPLE_IDLE,
- MINSTREL_SAMPLE_ACTIVE,
- MINSTREL_SAMPLE_PENDING,
+struct minstrel_sample_category {
+ u8 sample_group;
+ u16 sample_rates[MINSTREL_SAMPLE_RATES];
+ u16 cur_sample_rates[MINSTREL_SAMPLE_RATES];
};
struct minstrel_ht_sta {
@@ -77,28 +172,22 @@ struct minstrel_ht_sta {
/* overhead time in usec for each frame */
unsigned int overhead;
unsigned int overhead_rtscts;
+ unsigned int overhead_legacy;
+ unsigned int overhead_legacy_rtscts;
- unsigned int total_packets_last;
- unsigned int total_packets_cur;
unsigned int total_packets;
unsigned int sample_packets;
/* tx flags to add for frames for this sta */
u32 tx_flags;
- u8 sample_wait;
- u8 sample_tries;
- u8 sample_count;
- u8 sample_slow;
+ u8 band;
- enum minstrel_sample_mode sample_mode;
+ u8 sample_seq;
u16 sample_rate;
- /* current MCS group to be sampled */
- u8 sample_group;
-
- u8 cck_supported;
- u8 cck_supported_short;
+ unsigned long sample_time;
+ struct minstrel_sample_category sample[__MINSTREL_SAMPLE_TYPE_MAX];
/* Bitfield of supported MCS rates of all groups */
u16 supported[MINSTREL_GROUPS_NB];
@@ -107,16 +196,6 @@ struct minstrel_ht_sta {
struct minstrel_mcs_group_data groups[MINSTREL_GROUPS_NB];
};
-struct minstrel_ht_sta_priv {
- union {
- struct minstrel_ht_sta ht;
- struct minstrel_sta_info legacy;
- };
- void *ratelist;
- void *sample_table;
- bool is_ht;
-};
-
void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
int prob_avg);
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index bebb71917742..25b8a67a63a4 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -9,9 +9,13 @@
#include <linux/ieee80211.h>
#include <linux/export.h>
#include <net/mac80211.h>
-#include "rc80211_minstrel.h"
#include "rc80211_minstrel_ht.h"
+struct minstrel_debugfs_info {
+ size_t len;
+ char buf[];
+};
+
static ssize_t
minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos)
{
@@ -28,6 +32,18 @@ minstrel_stats_release(struct inode *inode, struct file *file)
return 0;
}
+static bool
+minstrel_ht_is_sample_rate(struct minstrel_ht_sta *mi, int idx)
+{
+ int type, i;
+
+ for (type = 0; type < ARRAY_SIZE(mi->sample); type++)
+ for (i = 0; i < MINSTREL_SAMPLE_RATES; i++)
+ if (mi->sample[type].cur_sample_rates[i] == idx)
+ return true;
+ return false;
+}
+
static char *
minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
{
@@ -52,8 +68,7 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
for (j = 0; j < MCS_GROUP_RATES; j++) {
struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j];
- static const int bitrates[4] = { 10, 20, 55, 110 };
- int idx = i * MCS_GROUP_RATES + j;
+ int idx = MI_RATE(i, j);
unsigned int duration;
if (!(mi->supported[i] & BIT(j)))
@@ -67,6 +82,9 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
p += sprintf(p, "VHT%c0 ", htmode);
p += sprintf(p, "%cGI ", gimode);
p += sprintf(p, "%d ", mg->streams);
+ } else if (i == MINSTREL_OFDM_GROUP) {
+ p += sprintf(p, "OFDM ");
+ p += sprintf(p, "1 ");
} else {
p += sprintf(p, "CCK ");
p += sprintf(p, "%cP ", j < 4 ? 'L' : 'S');
@@ -78,13 +96,19 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
*(p++) = (idx == mi->max_tp_rate[2]) ? 'C' : ' ';
*(p++) = (idx == mi->max_tp_rate[3]) ? 'D' : ' ';
*(p++) = (idx == mi->max_prob_rate) ? 'P' : ' ';
+ *(p++) = minstrel_ht_is_sample_rate(mi, idx) ? 'S' : ' ';
if (gflags & IEEE80211_TX_RC_MCS) {
p += sprintf(p, " MCS%-2u", (mg->streams - 1) * 8 + j);
} else if (gflags & IEEE80211_TX_RC_VHT_MCS) {
p += sprintf(p, " MCS%-1u/%1u", j, mg->streams);
} else {
- int r = bitrates[j % 4];
+ int r;
+
+ if (i == MINSTREL_OFDM_GROUP)
+ r = minstrel_ofdm_bitrates[j % 8];
+ else
+ r = minstrel_cck_bitrates[j % 4];
p += sprintf(p, " %2u.%1uM", r / 10, r % 10);
}
@@ -120,20 +144,11 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
static int
minstrel_ht_stats_open(struct inode *inode, struct file *file)
{
- struct minstrel_ht_sta_priv *msp = inode->i_private;
- struct minstrel_ht_sta *mi = &msp->ht;
+ struct minstrel_ht_sta *mi = inode->i_private;
struct minstrel_debugfs_info *ms;
unsigned int i;
- int ret;
char *p;
- if (!msp->is_ht) {
- inode->i_private = &msp->legacy;
- ret = minstrel_stats_open(inode, file);
- inode->i_private = msp;
- return ret;
- }
-
ms = kmalloc(32768, GFP_KERNEL);
if (!ms)
return -ENOMEM;
@@ -143,9 +158,9 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file)
p += sprintf(p, "\n");
p += sprintf(p,
- " best ____________rate__________ ____statistics___ _____last____ ______sum-of________\n");
+ " best ____________rate__________ ____statistics___ _____last____ ______sum-of________\n");
p += sprintf(p,
- "mode guard # rate [name idx airtime max_tp] [avg(tp) avg(prob)] [retry|suc|att] [#success | #attempts]\n");
+ "mode guard # rate [name idx airtime max_tp] [avg(tp) avg(prob)] [retry|suc|att] [#success | #attempts]\n");
p = minstrel_ht_stats_dump(mi, MINSTREL_CCK_GROUP, p);
for (i = 0; i < MINSTREL_CCK_GROUP; i++)
@@ -199,8 +214,7 @@ minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p)
for (j = 0; j < MCS_GROUP_RATES; j++) {
struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j];
- static const int bitrates[4] = { 10, 20, 55, 110 };
- int idx = i * MCS_GROUP_RATES + j;
+ int idx = MI_RATE(i, j);
unsigned int duration;
if (!(mi->supported[i] & BIT(j)))
@@ -214,6 +228,8 @@ minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p)
p += sprintf(p, "VHT%c0,", htmode);
p += sprintf(p, "%cGI,", gimode);
p += sprintf(p, "%d,", mg->streams);
+ } else if (i == MINSTREL_OFDM_GROUP) {
+ p += sprintf(p, "OFDM,,1,");
} else {
p += sprintf(p, "CCK,");
p += sprintf(p, "%cP,", j < 4 ? 'L' : 'S');
@@ -225,13 +241,20 @@ minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p)
p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[2]) ? "C" : ""));
p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[3]) ? "D" : ""));
p += sprintf(p, "%s" ,((idx == mi->max_prob_rate) ? "P" : ""));
+ p += sprintf(p, "%s", (minstrel_ht_is_sample_rate(mi, idx) ? "S" : ""));
if (gflags & IEEE80211_TX_RC_MCS) {
p += sprintf(p, ",MCS%-2u,", (mg->streams - 1) * 8 + j);
} else if (gflags & IEEE80211_TX_RC_VHT_MCS) {
p += sprintf(p, ",MCS%-1u/%1u,", j, mg->streams);
} else {
- int r = bitrates[j % 4];
+ int r;
+
+ if (i == MINSTREL_OFDM_GROUP)
+ r = minstrel_ofdm_bitrates[j % 8];
+ else
+ r = minstrel_cck_bitrates[j % 4];
+
p += sprintf(p, ",%2u.%1uM,", r / 10, r % 10);
}
@@ -270,22 +293,12 @@ minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p)
static int
minstrel_ht_stats_csv_open(struct inode *inode, struct file *file)
{
- struct minstrel_ht_sta_priv *msp = inode->i_private;
- struct minstrel_ht_sta *mi = &msp->ht;
+ struct minstrel_ht_sta *mi = inode->i_private;
struct minstrel_debugfs_info *ms;
unsigned int i;
- int ret;
char *p;
- if (!msp->is_ht) {
- inode->i_private = &msp->legacy;
- ret = minstrel_stats_csv_open(inode, file);
- inode->i_private = msp;
- return ret;
- }
-
ms = kmalloc(32768, GFP_KERNEL);
-
if (!ms)
return -ENOMEM;
@@ -316,10 +329,8 @@ static const struct file_operations minstrel_ht_stat_csv_fops = {
void
minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
{
- struct minstrel_ht_sta_priv *msp = priv_sta;
-
- debugfs_create_file("rc_stats", 0444, dir, msp,
+ debugfs_create_file("rc_stats", 0444, dir, priv_sta,
&minstrel_ht_stat_fops);
- debugfs_create_file("rc_stats_csv", 0444, dir, msp,
+ debugfs_create_file("rc_stats_csv", 0444, dir, priv_sta,
&minstrel_ht_stat_csv_fops);
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 13b9bcc4865d..c1343c028b76 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4095,7 +4095,9 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
.vif_type = sdata->vif.type,
.control_port_protocol = sdata->control_port_protocol,
}, *old, *new = NULL;
+ bool set_offload = false;
bool assign = false;
+ bool offload;
/* use sparse to check that we don't return without updating */
__acquire(check_fast_rx);
@@ -4176,6 +4178,8 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
rcu_read_lock();
key = rcu_dereference(sta->ptk[sta->ptk_idx]);
+ if (!key)
+ key = rcu_dereference(sdata->default_unicast_key);
if (key) {
switch (key->conf.cipher) {
case WLAN_CIPHER_SUITE_TKIP:
@@ -4206,6 +4210,17 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
if (assign)
new = kmemdup(&fastrx, sizeof(fastrx), GFP_KERNEL);
+ offload = assign &&
+ (sdata->vif.offload_flags & IEEE80211_OFFLOAD_DECAP_ENABLED);
+
+ if (offload)
+ set_offload = !test_and_set_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD);
+ else
+ set_offload = test_and_clear_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD);
+
+ if (set_offload)
+ drv_sta_set_decap_offload(local, sdata, &sta->sta, assign);
+
spin_lock_bh(&sta->lock);
old = rcu_dereference_protected(sta->fast_rx, true);
rcu_assign_pointer(sta->fast_rx, new);
@@ -4252,6 +4267,104 @@ void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata)
mutex_unlock(&local->sta_mtx);
}
+static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
+ struct ieee80211_fast_rx *fast_rx,
+ int orig_len)
+{
+ struct ieee80211_sta_rx_stats *stats;
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
+ struct sta_info *sta = rx->sta;
+ struct sk_buff *skb = rx->skb;
+ void *sa = skb->data + ETH_ALEN;
+ void *da = skb->data;
+
+ stats = &sta->rx_stats;
+ if (fast_rx->uses_rss)
+ stats = this_cpu_ptr(sta->pcpu_rx_stats);
+
+ /* statistics part of ieee80211_rx_h_sta_process() */
+ if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
+ stats->last_signal = status->signal;
+ if (!fast_rx->uses_rss)
+ ewma_signal_add(&sta->rx_stats_avg.signal,
+ -status->signal);
+ }
+
+ if (status->chains) {
+ int i;
+
+ stats->chains = status->chains;
+ for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) {
+ int signal = status->chain_signal[i];
+
+ if (!(status->chains & BIT(i)))
+ continue;
+
+ stats->chain_signal_last[i] = signal;
+ if (!fast_rx->uses_rss)
+ ewma_signal_add(&sta->rx_stats_avg.chain_signal[i],
+ -signal);
+ }
+ }
+ /* end of statistics */
+
+ stats->last_rx = jiffies;
+ stats->last_rate = sta_stats_encode_rate(status);
+
+ stats->fragments++;
+ stats->packets++;
+
+ skb->dev = fast_rx->dev;
+
+ dev_sw_netstats_rx_add(fast_rx->dev, skb->len);
+
+ /* The seqno index has the same property as needed
+ * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS
+ * for non-QoS-data frames. Here we know it's a data
+ * frame, so count MSDUs.
+ */
+ u64_stats_update_begin(&stats->syncp);
+ stats->msdu[rx->seqno_idx]++;
+ stats->bytes += orig_len;
+ u64_stats_update_end(&stats->syncp);
+
+ if (fast_rx->internal_forward) {
+ struct sk_buff *xmit_skb = NULL;
+ if (is_multicast_ether_addr(da)) {
+ xmit_skb = skb_copy(skb, GFP_ATOMIC);
+ } else if (!ether_addr_equal(da, sa) &&
+ sta_info_get(rx->sdata, da)) {
+ xmit_skb = skb;
+ skb = NULL;
+ }
+
+ if (xmit_skb) {
+ /*
+ * Send to wireless media and increase priority by 256
+ * to keep the received priority instead of
+ * reclassifying the frame (see cfg80211_classify8021d).
+ */
+ xmit_skb->priority += 256;
+ xmit_skb->protocol = htons(ETH_P_802_3);
+ skb_reset_network_header(xmit_skb);
+ skb_reset_mac_header(xmit_skb);
+ dev_queue_xmit(xmit_skb);
+ }
+
+ if (!skb)
+ return;
+ }
+
+ /* deliver to local stack */
+ skb->protocol = eth_type_trans(skb, fast_rx->dev);
+ memset(skb->cb, 0, sizeof(skb->cb));
+ if (rx->list)
+ list_add_tail(&skb->list, rx->list);
+ else
+ netif_receive_skb(skb);
+
+}
+
static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
struct ieee80211_fast_rx *fast_rx)
{
@@ -4272,9 +4385,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
} addrs __aligned(2);
struct ieee80211_sta_rx_stats *stats = &sta->rx_stats;
- if (fast_rx->uses_rss)
- stats = this_cpu_ptr(sta->pcpu_rx_stats);
-
/* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write
* to a common data structure; drivers can implement that per queue
* but we don't have that information in mac80211
@@ -4348,32 +4458,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
pskb_trim(skb, skb->len - fast_rx->icv_len))
goto drop;
- /* statistics part of ieee80211_rx_h_sta_process() */
- if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
- stats->last_signal = status->signal;
- if (!fast_rx->uses_rss)
- ewma_signal_add(&sta->rx_stats_avg.signal,
- -status->signal);
- }
-
- if (status->chains) {
- int i;
-
- stats->chains = status->chains;
- for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) {
- int signal = status->chain_signal[i];
-
- if (!(status->chains & BIT(i)))
- continue;
-
- stats->chain_signal_last[i] = signal;
- if (!fast_rx->uses_rss)
- ewma_signal_add(&sta->rx_stats_avg.chain_signal[i],
- -signal);
- }
- }
- /* end of statistics */
-
if (rx->key && !ieee80211_has_protected(hdr->frame_control))
goto drop;
@@ -4385,12 +4469,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
return true;
}
- stats->last_rx = jiffies;
- stats->last_rate = sta_stats_encode_rate(status);
-
- stats->fragments++;
- stats->packets++;
-
/* do the header conversion - first grab the addresses */
ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs);
ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs);
@@ -4399,58 +4477,14 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
/* push the addresses in front */
memcpy(skb_push(skb, sizeof(addrs)), &addrs, sizeof(addrs));
- skb->dev = fast_rx->dev;
-
- dev_sw_netstats_rx_add(fast_rx->dev, skb->len);
-
- /* The seqno index has the same property as needed
- * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS
- * for non-QoS-data frames. Here we know it's a data
- * frame, so count MSDUs.
- */
- u64_stats_update_begin(&stats->syncp);
- stats->msdu[rx->seqno_idx]++;
- stats->bytes += orig_len;
- u64_stats_update_end(&stats->syncp);
-
- if (fast_rx->internal_forward) {
- struct sk_buff *xmit_skb = NULL;
- if (is_multicast_ether_addr(addrs.da)) {
- xmit_skb = skb_copy(skb, GFP_ATOMIC);
- } else if (!ether_addr_equal(addrs.da, addrs.sa) &&
- sta_info_get(rx->sdata, addrs.da)) {
- xmit_skb = skb;
- skb = NULL;
- }
-
- if (xmit_skb) {
- /*
- * Send to wireless media and increase priority by 256
- * to keep the received priority instead of
- * reclassifying the frame (see cfg80211_classify8021d).
- */
- xmit_skb->priority += 256;
- xmit_skb->protocol = htons(ETH_P_802_3);
- skb_reset_network_header(xmit_skb);
- skb_reset_mac_header(xmit_skb);
- dev_queue_xmit(xmit_skb);
- }
-
- if (!skb)
- return true;
- }
-
- /* deliver to local stack */
- skb->protocol = eth_type_trans(skb, fast_rx->dev);
- memset(skb->cb, 0, sizeof(skb->cb));
- if (rx->list)
- list_add_tail(&skb->list, rx->list);
- else
- netif_receive_skb(skb);
+ ieee80211_rx_8023(rx, fast_rx, orig_len);
return true;
drop:
dev_kfree_skb(skb);
+ if (fast_rx->uses_rss)
+ stats = this_cpu_ptr(sta->pcpu_rx_stats);
+
stats->dropped++;
return true;
}
@@ -4504,6 +4538,43 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
return true;
}
+static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw,
+ struct ieee80211_sta *pubsta,
+ struct sk_buff *skb,
+ struct list_head *list)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_fast_rx *fast_rx;
+ struct ieee80211_rx_data rx;
+
+ memset(&rx, 0, sizeof(rx));
+ rx.skb = skb;
+ rx.local = local;
+ rx.list = list;
+
+ I802_DEBUG_INC(local->dot11ReceivedFragmentCount);
+
+ /* drop frame if too short for header */
+ if (skb->len < sizeof(struct ethhdr))
+ goto drop;
+
+ if (!pubsta)
+ goto drop;
+
+ rx.sta = container_of(pubsta, struct sta_info, sta);
+ rx.sdata = rx.sta->sdata;
+
+ fast_rx = rcu_dereference(rx.sta->fast_rx);
+ if (!fast_rx)
+ goto drop;
+
+ ieee80211_rx_8023(&rx, fast_rx, skb->len);
+ return;
+
+drop:
+ dev_kfree_skb(skb);
+}
+
/*
* This is the actual Rx frames handler. as it belongs to Rx path it must
* be called with rcu_read_lock protection.
@@ -4735,13 +4806,17 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
* if it was previously present.
* Also, frames with less than 16 bytes are dropped.
*/
- skb = ieee80211_rx_monitor(local, skb, rate);
+ if (!(status->flag & RX_FLAG_8023))
+ skb = ieee80211_rx_monitor(local, skb, rate);
if (skb) {
ieee80211_tpt_led_trig_rx(local,
((struct ieee80211_hdr *)skb->data)->frame_control,
skb->len);
- __ieee80211_rx_handle_packet(hw, pubsta, skb, list);
+ if (status->flag & RX_FLAG_8023)
+ __ieee80211_rx_handle_8023(hw, pubsta, skb, list);
+ else
+ __ieee80211_rx_handle_packet(hw, pubsta, skb, list);
}
kcov_remote_stop();
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index ae1cb2c68722..76747bfdaddd 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -133,16 +133,20 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
}
if (wide_bw_chansw_ie) {
+ u8 new_seg1 = wide_bw_chansw_ie->new_center_freq_seg1;
struct ieee80211_vht_operation vht_oper = {
.chan_width =
wide_bw_chansw_ie->new_channel_width,
.center_freq_seg0_idx =
wide_bw_chansw_ie->new_center_freq_seg0,
- .center_freq_seg1_idx =
- wide_bw_chansw_ie->new_center_freq_seg1,
+ .center_freq_seg1_idx = new_seg1,
/* .basic_mcs_set doesn't matter */
};
- struct ieee80211_ht_operation ht_oper = {};
+ struct ieee80211_ht_operation ht_oper = {
+ .operation_mode =
+ cpu_to_le16(new_seg1 <<
+ IEEE80211_HT_OP_MODE_CCFS2_SHIFT),
+ };
/* default, for the case of IEEE80211_VHT_CHANWIDTH_USE_HT,
* to the previously parsed chandef
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 7afd07636b81..78b9d0c7cc58 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -71,6 +71,7 @@
* until pending frames are delivered
* @WLAN_STA_USES_ENCRYPTION: This station was configured for encryption,
* so drop all packets without a key later.
+ * @WLAN_STA_DECAP_OFFLOAD: This station uses rx decap offload
*
* @NUM_WLAN_STA_FLAGS: number of defined flags
*/
@@ -102,6 +103,7 @@ enum ieee80211_sta_info_flags {
WLAN_STA_MPSP_RECIPIENT,
WLAN_STA_PS_DELIVER,
WLAN_STA_USES_ENCRYPTION,
+ WLAN_STA_DECAP_OFFLOAD,
NUM_WLAN_STA_FLAGS,
};
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 3485610755ef..9baf185ee4c7 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -628,16 +628,12 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie;
struct ieee80211_sub_if_data *sdata;
struct ieee80211_hdr *hdr = (void *)skb->data;
- __be16 ethertype = 0;
-
- if (skb->len >= ETH_HLEN && skb->protocol == cpu_to_be16(ETH_P_802_3))
- skb_copy_bits(skb, 2 * ETH_ALEN, &ethertype, ETH_TLEN);
rcu_read_lock();
sdata = ieee80211_sdata_from_skb(local, skb);
if (sdata) {
- if (ethertype == sdata->control_port_protocol ||
- ethertype == cpu_to_be16(ETH_P_PREAUTH))
+ if (skb->protocol == sdata->control_port_protocol ||
+ skb->protocol == cpu_to_be16(ETH_P_PREAUTH))
cfg80211_control_port_tx_status(&sdata->wdev,
cookie,
skb->data,
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index e01e4daeb8cd..f91d02b81b92 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1927,7 +1927,7 @@ ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata,
struct ieee80211_tdls_data *tf = (void *)skb->data;
struct wiphy *wiphy = sdata->local->hw.wiphy;
- ASSERT_RTNL();
+ lockdep_assert_wiphy(wiphy);
/* make sure the driver supports it */
if (!(wiphy->features & NL80211_FEATURE_TDLS_CHANNEL_SWITCH))
@@ -1979,7 +1979,7 @@ void ieee80211_tdls_chsw_work(struct work_struct *wk)
struct sk_buff *skb;
struct ieee80211_tdls_data *tf;
- rtnl_lock();
+ wiphy_lock(local->hw.wiphy);
while ((skb = skb_dequeue(&local->skb_queue_tdls_chsw))) {
tf = (struct ieee80211_tdls_data *)skb->data;
list_for_each_entry(sdata, &local->interfaces, list) {
@@ -1994,7 +1994,7 @@ void ieee80211_tdls_chsw_work(struct work_struct *wk)
kfree_skb(skb);
}
- rtnl_unlock();
+ wiphy_unlock(local->hw.wiphy);
}
void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 601322e16957..8fcc39056402 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2761,7 +2761,7 @@ DEFINE_EVENT(local_sdata_addr_evt, drv_update_vif_offload,
TP_ARGS(local, sdata)
);
-TRACE_EVENT(drv_sta_set_4addr,
+DECLARE_EVENT_CLASS(sta_flag_evt,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta *sta, bool enabled),
@@ -2788,6 +2788,22 @@ TRACE_EVENT(drv_sta_set_4addr,
)
);
+DEFINE_EVENT(sta_flag_evt, drv_sta_set_4addr,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta, bool enabled),
+
+ TP_ARGS(local, sdata, sta, enabled)
+);
+
+DEFINE_EVENT(sta_flag_evt, drv_sta_set_decap_offload,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta, bool enabled),
+
+ TP_ARGS(local, sdata, sta, enabled)
+);
+
#endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 6422da6690f7..5d06de61047a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -649,7 +649,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
if (!skip_hw && tx->key &&
tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
info->control.hw_key = &tx->key->conf;
- } else if (!ieee80211_is_mgmt(hdr->frame_control) && tx->sta &&
+ } else if (ieee80211_is_data_present(hdr->frame_control) && tx->sta &&
test_sta_flag(tx->sta, WLAN_STA_USES_ENCRYPTION)) {
return TX_DROP;
}
@@ -1182,9 +1182,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
tx->sta = rcu_dereference(sdata->u.vlan.sta);
if (!tx->sta && sdata->wdev.use_4addr)
return TX_DROP;
- } else if (info->flags & (IEEE80211_TX_INTFL_NL80211_FRAME_TX |
- IEEE80211_TX_CTL_INJECTED) ||
- tx->sdata->control_port_protocol == tx->skb->protocol) {
+ } else if (tx->sdata->control_port_protocol == tx->skb->protocol) {
tx->sta = sta_info_get_bss(sdata, hdr->addr1);
}
if (!tx->sta && !is_multicast_ether_addr(hdr->addr1))
@@ -1309,7 +1307,7 @@ static struct sk_buff *codel_dequeue_func(struct codel_vars *cvars,
fq = &local->fq;
if (cvars == &txqi->def_cvars)
- flow = &txqi->def_flow;
+ flow = &txqi->tin.default_flow;
else
flow = &fq->flows[cvars - local->cvars];
@@ -1352,7 +1350,7 @@ static struct sk_buff *fq_tin_dequeue_func(struct fq *fq,
cparams = &local->cparams;
}
- if (flow == &txqi->def_flow)
+ if (flow == &tin->default_flow)
cvars = &txqi->def_cvars;
else
cvars = &local->cvars[flow - fq->flows];
@@ -1379,17 +1377,6 @@ static void fq_skb_free_func(struct fq *fq,
ieee80211_free_txskb(&local->hw, skb);
}
-static struct fq_flow *fq_flow_get_default_func(struct fq *fq,
- struct fq_tin *tin,
- int idx,
- struct sk_buff *skb)
-{
- struct txq_info *txqi;
-
- txqi = container_of(tin, struct txq_info, tin);
- return &txqi->def_flow;
-}
-
static void ieee80211_txq_enqueue(struct ieee80211_local *local,
struct txq_info *txqi,
struct sk_buff *skb)
@@ -1402,8 +1389,7 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local,
spin_lock_bh(&fq->lock);
fq_tin_enqueue(fq, tin, flow_idx, skb,
- fq_skb_free_func,
- fq_flow_get_default_func);
+ fq_skb_free_func);
spin_unlock_bh(&fq->lock);
}
@@ -1446,7 +1432,6 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
struct txq_info *txqi, int tid)
{
fq_tin_init(&txqi->tin);
- fq_flow_init(&txqi->def_flow);
codel_vars_init(&txqi->def_cvars);
codel_stats_init(&txqi->cstats);
__skb_queue_head_init(&txqi->frags);
@@ -2133,6 +2118,19 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
if (mcs_known & IEEE80211_RADIOTAP_MCS_HAVE_BW &&
mcs_bw == IEEE80211_RADIOTAP_MCS_BW_40)
rate_flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
+
+ if (mcs_known & IEEE80211_RADIOTAP_MCS_HAVE_FEC &&
+ mcs_flags & IEEE80211_RADIOTAP_MCS_FEC_LDPC)
+ info->flags |= IEEE80211_TX_CTL_LDPC;
+
+ if (mcs_known & IEEE80211_RADIOTAP_MCS_HAVE_STBC) {
+ u8 stbc = u8_get_bits(mcs_flags,
+ IEEE80211_RADIOTAP_MCS_STBC_MASK);
+
+ info->flags |=
+ u32_encode_bits(stbc,
+ IEEE80211_TX_CTL_STBC);
+ }
break;
case IEEE80211_RADIOTAP_VHT:
@@ -3283,8 +3281,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
*/
tin = &txqi->tin;
- flow = fq_flow_classify(fq, tin, flow_idx, skb,
- fq_flow_get_default_func);
+ flow = fq_flow_classify(fq, tin, flow_idx, skb);
head = skb_peek_tail(&flow->queue);
if (!head || skb_is_gso(head))
goto out;
@@ -3351,8 +3348,6 @@ out_recalc:
if (head->len != orig_len) {
flow->backlog += head->len - orig_len;
tin->backlog_bytes += head->len - orig_len;
-
- fq_recalc_backlog(fq, tin, flow);
}
out:
spin_unlock_bh(&fq->lock);
@@ -3809,7 +3804,7 @@ void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
* get immediately moved to the back of the list on the next
* call to ieee80211_next_txq().
*/
- if (txqi->txq.sta &&
+ if (txqi->txq.sta && local->airtime_flags &&
wiphy_ext_feature_isset(local->hw.wiphy,
NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
list_add(&txqi->schedule_order,
@@ -3823,6 +3818,8 @@ void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL(__ieee80211_schedule_txq);
+DEFINE_STATIC_KEY_FALSE(aql_disable);
+
bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw,
struct ieee80211_txq *txq)
{
@@ -3832,6 +3829,9 @@ bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw,
if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
return true;
+ if (static_branch_unlikely(&aql_disable))
+ return true;
+
if (!txq->sta)
return true;
@@ -4251,7 +4251,6 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb,
struct ethhdr *ehdr = (struct ethhdr *)skb->data;
struct ieee80211_key *key;
struct sta_info *sta;
- bool offload = true;
if (unlikely(skb->len < ETH_HLEN)) {
kfree_skb(skb);
@@ -4267,18 +4266,22 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb,
if (unlikely(IS_ERR_OR_NULL(sta) || !sta->uploaded ||
!test_sta_flag(sta, WLAN_STA_AUTHORIZED) ||
- sdata->control_port_protocol == ehdr->h_proto))
- offload = false;
- else if ((key = rcu_dereference(sta->ptk[sta->ptk_idx])) &&
- (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) ||
- key->conf.cipher == WLAN_CIPHER_SUITE_TKIP))
- offload = false;
-
- if (offload)
- ieee80211_8023_xmit(sdata, dev, sta, key, skb);
- else
- ieee80211_subif_start_xmit(skb, dev);
+ sdata->control_port_protocol == ehdr->h_proto))
+ goto skip_offload;
+
+ key = rcu_dereference(sta->ptk[sta->ptk_idx]);
+ if (!key)
+ key = rcu_dereference(sdata->default_unicast_key);
+ if (key && (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) ||
+ key->conf.cipher == WLAN_CIPHER_SUITE_TKIP))
+ goto skip_offload;
+
+ ieee80211_8023_xmit(sdata, dev, sta, key, skb);
+ goto out;
+
+skip_offload:
+ ieee80211_subif_start_xmit(skb, dev);
out:
rcu_read_unlock();
@@ -5408,6 +5411,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta;
struct sk_buff *skb;
struct ethhdr *ehdr;
u32 ctrl_flags = 0;
@@ -5430,8 +5434,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
if (cookie)
ctrl_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
- flags |= IEEE80211_TX_INTFL_NL80211_FRAME_TX |
- IEEE80211_TX_CTL_INJECTED;
+ flags |= IEEE80211_TX_INTFL_NL80211_FRAME_TX;
skb = dev_alloc_skb(local->hw.extra_tx_headroom +
sizeof(struct ethhdr) + len);
@@ -5448,10 +5451,25 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
ehdr->h_proto = proto;
skb->dev = dev;
- skb->protocol = htons(ETH_P_802_3);
+ skb->protocol = proto;
skb_reset_network_header(skb);
skb_reset_mac_header(skb);
+ /* update QoS header to prioritize control port frames if possible,
+ * priorization also happens for control port frames send over
+ * AF_PACKET
+ */
+ rcu_read_lock();
+
+ if (ieee80211_lookup_ra_sta(sdata, skb, &sta) == 0 && !IS_ERR(sta)) {
+ u16 queue = __ieee80211_select_queue(sdata, sta, skb);
+
+ skb_set_queue_mapping(skb, queue);
+ skb_get_hash(skb);
+ }
+
+ rcu_read_unlock();
+
/* mutex lock is only needed for incrementing the cookie counter */
mutex_lock(&local->mtx);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 8d3ae6b2f95f..f080fcf60e45 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -832,7 +832,7 @@ void ieee80211_iterate_active_interfaces_atomic(
}
EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic);
-void ieee80211_iterate_active_interfaces_rtnl(
+void ieee80211_iterate_active_interfaces_mtx(
struct ieee80211_hw *hw, u32 iter_flags,
void (*iterator)(void *data, u8 *mac,
struct ieee80211_vif *vif),
@@ -840,12 +840,12 @@ void ieee80211_iterate_active_interfaces_rtnl(
{
struct ieee80211_local *local = hw_to_local(hw);
- ASSERT_RTNL();
+ lockdep_assert_wiphy(hw->wiphy);
__iterate_interfaces(local, iter_flags | IEEE80211_IFACE_ITER_ACTIVE,
iterator, data);
}
-EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_rtnl);
+EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_mtx);
static void __iterate_stations(struct ieee80211_local *local,
void (*iterator)(void *data,
@@ -2595,7 +2595,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
mutex_unlock(&local->mtx);
if (sched_scan_stopped)
- cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy, 0);
+ cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0);
wake_up:
@@ -3811,7 +3811,7 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local)
struct cfg80211_chan_def chandef;
/* for interface list, to avoid linking iflist_mtx and chanctx_mtx */
- ASSERT_RTNL();
+ lockdep_assert_wiphy(local->hw.wiphy);
mutex_lock(&local->mtx);
list_for_each_entry(sdata, &local->interfaces, list) {
@@ -3851,9 +3851,9 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work)
}
mutex_unlock(&local->chanctx_mtx);
- rtnl_lock();
+ wiphy_lock(local->hw.wiphy);
ieee80211_dfs_cac_cancel(local);
- rtnl_unlock();
+ wiphy_unlock(local->hw.wiphy);
if (num_chanctx > 1)
/* XXX: multi-channel is not supported yet */
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index c3ca97373774..e856f9092137 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -484,6 +484,7 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
void ieee80211_sta_set_rx_nss(struct sta_info *sta)
{
u8 ht_rx_nss = 0, vht_rx_nss = 0, he_rx_nss = 0, rx_nss;
+ bool support_160;
/* if we received a notification already don't overwrite it */
if (sta->sta.rx_nss)
@@ -514,7 +515,13 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta)
}
}
- he_rx_nss = min(rx_mcs_80, rx_mcs_160);
+ support_160 = he_cap->he_cap_elem.phy_cap_info[0] &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
+
+ if (support_160)
+ he_rx_nss = min(rx_mcs_80, rx_mcs_160);
+ else
+ he_rx_nss = rx_mcs_80;
}
if (sta->sta.ht_cap.ht_supported) {
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index b921cbdd9aaa..3780c29c321d 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -29,8 +29,16 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA),
SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR),
SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD),
+ SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD),
+ SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX),
+ SNMP_MIB_ITEM("MPJoinPortSynAckRx", MPTCP_MIB_JOINPORTSYNACKRX),
+ SNMP_MIB_ITEM("MPJoinPortAckRx", MPTCP_MIB_JOINPORTACKRX),
+ SNMP_MIB_ITEM("MismatchPortSynRx", MPTCP_MIB_MISMATCHPORTSYNRX),
+ SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX),
SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR),
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
+ SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
+ SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 47bcecce1106..72afbc135f8e 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -22,8 +22,16 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_DUPDATA, /* Segments discarded due to duplicate DSS */
MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */
MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */
+ MPTCP_MIB_PORTADD, /* Received ADD_ADDR with a port-number */
+ MPTCP_MIB_JOINPORTSYNRX, /* Received a SYN MP_JOIN with a different port-number */
+ MPTCP_MIB_JOINPORTSYNACKRX, /* Received a SYNACK MP_JOIN with a different port-number */
+ MPTCP_MIB_JOINPORTACKRX, /* Received an ACK MP_JOIN with a different port-number */
+ MPTCP_MIB_MISMATCHPORTSYNRX, /* Received a SYN MP_JOIN with a mismatched port-number */
+ MPTCP_MIB_MISMATCHPORTACKRX, /* Received an ACK MP_JOIN with a mismatched port-number */
MPTCP_MIB_RMADDR, /* Received RM_ADDR */
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
+ MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
+ MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index b70ae4ba3000..f16d9b5ee978 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -128,11 +128,13 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
- info->mptcpi_subflows_max = READ_ONCE(msk->pm.subflows_max);
- val = READ_ONCE(msk->pm.add_addr_signal_max);
+ info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
+ info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk);
+ val = mptcp_pm_get_add_addr_signal_max(msk);
info->mptcpi_add_addr_signal_max = val;
- val = READ_ONCE(msk->pm.add_addr_accept_max);
+ val = mptcp_pm_get_add_addr_accept_max(msk);
info->mptcpi_add_addr_accepted_max = val;
+ info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk);
if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags))
flags |= MPTCP_INFO_FLAG_FALLBACK;
if (READ_ONCE(msk->can_ack))
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index e0d21c0607e5..444a38681e93 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -282,6 +282,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
break;
+ case MPTCPOPT_MP_PRIO:
+ if (opsize != TCPOLEN_MPTCP_PRIO)
+ break;
+
+ mp_opt->mp_prio = 1;
+ mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP;
+ pr_debug("MP_PRIO: prio=%d", mp_opt->backup);
+ break;
+
case MPTCPOPT_MP_FASTCLOSE:
if (opsize != TCPOLEN_MPTCP_FASTCLOSE)
break;
@@ -313,6 +322,7 @@ void mptcp_get_options(const struct sk_buff *skb,
mp_opt->port = 0;
mp_opt->rm_addr = 0;
mp_opt->dss = 0;
+ mp_opt->mp_prio = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -401,6 +411,7 @@ static void clear_3rdack_retransmission(struct sock *sk)
}
static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
+ bool snd_data_fin_enable,
unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
@@ -418,9 +429,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
if (!skb)
return false;
- /* MPC/MPJ needed only on 3rd ack packet */
- if (subflow->fully_established ||
- subflow->snd_isn != TCP_SKB_CB(skb)->seq)
+ /* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */
+ if (subflow->fully_established || snd_data_fin_enable ||
+ subflow->snd_isn != TCP_SKB_CB(skb)->seq ||
+ sk->sk_state != TCP_ESTABLISHED)
return false;
if (subflow->mp_capable) {
@@ -492,6 +504,7 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
}
static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
+ bool snd_data_fin_enable,
unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
@@ -499,13 +512,12 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
unsigned int dss_size = 0;
- u64 snd_data_fin_enable;
struct mptcp_ext *mpext;
unsigned int ack_size;
bool ret = false;
+ u64 ack_seq;
mpext = skb ? mptcp_get_ext(skb) : NULL;
- snd_data_fin_enable = mptcp_data_fin_enabled(msk);
if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
unsigned int map_size;
@@ -531,13 +543,14 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
return ret;
}
+ ack_seq = READ_ONCE(msk->ack_seq);
if (READ_ONCE(msk->use_64bit_ack)) {
ack_size = TCPOLEN_MPTCP_DSS_ACK64;
- opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq);
+ opts->ext_copy.data_ack = ack_seq;
opts->ext_copy.ack64 = 1;
} else {
ack_size = TCPOLEN_MPTCP_DSS_ACK32;
- opts->ext_copy.data_ack32 = (uint32_t)READ_ONCE(msk->ack_seq);
+ opts->ext_copy.data_ack32 = (uint32_t)ack_seq;
opts->ext_copy.ack64 = 0;
}
opts->ext_copy.use_ack = 1;
@@ -679,16 +692,42 @@ static bool mptcp_established_options_rm_addr(struct sock *sk,
return true;
}
+static bool mptcp_established_options_mp_prio(struct sock *sk,
+ unsigned int *size,
+ unsigned int remaining,
+ struct mptcp_out_options *opts)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+ if (!subflow->send_mp_prio)
+ return false;
+
+ /* account for the trailing 'nop' option */
+ if (remaining < TCPOLEN_MPTCP_PRIO_ALIGN)
+ return false;
+
+ *size = TCPOLEN_MPTCP_PRIO_ALIGN;
+ opts->suboptions |= OPTION_MPTCP_PRIO;
+ opts->backup = subflow->request_bkup;
+
+ pr_debug("prio=%d", opts->backup);
+
+ return true;
+}
+
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
unsigned int *size, unsigned int remaining,
struct mptcp_out_options *opts)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
unsigned int opt_size = 0;
+ bool snd_data_fin;
bool ret = false;
opts->suboptions = 0;
- if (unlikely(mptcp_check_fallback(sk)))
+ if (unlikely(__mptcp_check_fallback(msk)))
return false;
/* prevent adding of any MPTCP related options on reset packet
@@ -697,10 +736,10 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
return false;
- if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
+ snd_data_fin = mptcp_data_fin_enabled(msk);
+ if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
ret = true;
- else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
- opts))
+ else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
ret = true;
/* we reserved enough space for the above options, and exceeding the
@@ -721,6 +760,12 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
ret = true;
}
+ if (mptcp_established_options_mp_prio(sk, &opt_size, remaining, opts)) {
+ *size += opt_size;
+ remaining -= opt_size;
+ ret = true;
+ }
+
return ret;
}
@@ -828,7 +873,7 @@ fully_established:
clear_3rdack_retransmission(ssk);
mptcp_pm_subflow_established(msk, subflow);
} else {
- mptcp_pm_fully_established(msk);
+ mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC);
}
return true;
@@ -879,8 +924,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
msk->wnd_end = new_wnd_end;
/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
- if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)) &&
- sk_stream_memory_free(ssk))
+ if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
__mptcp_check_push(sk, ssk);
if (after64(new_snd_una, old_snd_una)) {
@@ -986,6 +1030,10 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
mptcp_pm_del_add_timer(msk, &addr);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
}
+
+ if (mp_opt.port)
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD);
+
mp_opt.add_addr = 0;
}
@@ -994,6 +1042,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
mp_opt.rm_addr = 0;
}
+ if (mp_opt.mp_prio) {
+ mptcp_pm_mp_prio_received(sk, mp_opt.backup);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX);
+ mp_opt.mp_prio = 0;
+ }
+
if (!mp_opt.dss)
return;
@@ -1168,6 +1222,18 @@ mp_capable_done:
0, opts->rm_id);
}
+ if (OPTION_MPTCP_PRIO & opts->suboptions) {
+ const struct sock *ssk = (const struct sock *)tp;
+ struct mptcp_subflow_context *subflow;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ subflow->send_mp_prio = 0;
+
+ *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
+ TCPOLEN_MPTCP_PRIO,
+ opts->backup, TCPOPT_NOP);
+ }
+
if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
TCPOLEN_MPTCP_MPJ_SYN,
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index da2ed576f289..6fd4b2c1b076 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -20,6 +20,8 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
pr_debug("msk=%p, local_id=%d", msk, addr->id);
+ lockdep_assert_held(&msk->pm.lock);
+
if (add_addr) {
pr_warn("addr_signal error, add_addr=%d", add_addr);
return -EINVAL;
@@ -66,22 +68,26 @@ int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id)
/* path manager event handlers */
-void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side)
+void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side)
{
struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side);
WRITE_ONCE(pm->server_side, server_side);
+ mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC);
}
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
{
struct mptcp_pm_data *pm = &msk->pm;
+ unsigned int subflows_max;
int ret = 0;
+ subflows_max = mptcp_pm_get_subflows_max(msk);
+
pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows,
- pm->subflows_max, READ_ONCE(pm->accept_subflow));
+ subflows_max, READ_ONCE(pm->accept_subflow));
/* try to avoid acquiring the lock below */
if (!READ_ONCE(pm->accept_subflow))
@@ -89,8 +95,8 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
spin_lock_bh(&pm->lock);
if (READ_ONCE(pm->accept_subflow)) {
- ret = pm->subflows < pm->subflows_max;
- if (ret && ++pm->subflows == pm->subflows_max)
+ ret = pm->subflows < subflows_max;
+ if (ret && ++pm->subflows == subflows_max)
WRITE_ONCE(pm->accept_subflow, false);
}
spin_unlock_bh(&pm->lock);
@@ -114,16 +120,13 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
return true;
}
-void mptcp_pm_fully_established(struct mptcp_sock *msk)
+void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp)
{
struct mptcp_pm_data *pm = &msk->pm;
+ bool announce = false;
pr_debug("msk=%p", msk);
- /* try to avoid acquiring the lock below */
- if (!READ_ONCE(pm->work_pending))
- return;
-
spin_lock_bh(&pm->lock);
/* mptcp_pm_fully_established() can be invoked by multiple
@@ -133,9 +136,15 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk)
if (READ_ONCE(pm->work_pending) &&
!(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
- msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
+ if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
+ announce = true;
+
+ msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
spin_unlock_bh(&pm->lock);
+
+ if (announce)
+ mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp);
}
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
@@ -174,6 +183,8 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
READ_ONCE(pm->accept_addr));
+ mptcp_event_addr_announced(msk, addr);
+
spin_lock_bh(&pm->lock);
if (!READ_ONCE(pm->accept_addr)) {
@@ -188,8 +199,7 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk)
{
- if (!mptcp_pm_should_add_signal_ipv6(msk) &&
- !mptcp_pm_should_add_signal_port(msk))
+ if (!mptcp_pm_should_add_signal(msk))
return;
mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK);
@@ -201,12 +211,24 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id)
pr_debug("msk=%p remote_id=%d", msk, rm_id);
+ mptcp_event_addr_removed(msk, rm_id);
+
spin_lock_bh(&pm->lock);
mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED);
pm->rm_id = rm_id;
spin_unlock_bh(&pm->lock);
}
+void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+ pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
+ subflow->backup = bkup;
+
+ mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
+}
+
/* path manager helpers */
bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index a6d983d80576..8e8e35fa4002 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -26,6 +26,7 @@ struct mptcp_pm_addr_entry {
struct list_head list;
struct mptcp_addr_info addr;
struct rcu_head rcu;
+ struct socket *lsk;
};
struct mptcp_pm_add_entry {
@@ -36,6 +37,9 @@ struct mptcp_pm_add_entry {
u8 retrans_times;
};
+#define MAX_ADDR_ID 255
+#define BITMAP_SZ DIV_ROUND_UP(MAX_ADDR_ID + 1, BITS_PER_LONG)
+
struct pm_nl_pernet {
/* protects pernet updates */
spinlock_t lock;
@@ -46,25 +50,33 @@ struct pm_nl_pernet {
unsigned int local_addr_max;
unsigned int subflows_max;
unsigned int next_id;
+ unsigned long id_bitmap[BITMAP_SZ];
};
#define MPTCP_PM_ADDR_MAX 8
#define ADD_ADDR_RETRANS_MAX 3
+static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk);
+
static bool addresses_equal(const struct mptcp_addr_info *a,
struct mptcp_addr_info *b, bool use_port)
{
bool addr_equals = false;
- if (a->family != b->family)
- return false;
-
- if (a->family == AF_INET)
- addr_equals = a->addr.s_addr == b->addr.s_addr;
+ if (a->family == b->family) {
+ if (a->family == AF_INET)
+ addr_equals = a->addr.s_addr == b->addr.s_addr;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- else
- addr_equals = !ipv6_addr_cmp(&a->addr6, &b->addr6);
+ else
+ addr_equals = !ipv6_addr_cmp(&a->addr6, &b->addr6);
+ } else if (a->family == AF_INET) {
+ if (ipv6_addr_v4mapped(&b->addr6))
+ addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3];
+ } else if (b->family == AF_INET) {
+ if (ipv6_addr_v4mapped(&a->addr6))
+ addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr;
#endif
+ }
if (!addr_equals)
return false;
@@ -81,14 +93,14 @@ static bool address_zero(const struct mptcp_addr_info *addr)
memset(&zero, 0, sizeof(zero));
zero.family = addr->family;
- return addresses_equal(addr, &zero, false);
+ return addresses_equal(addr, &zero, true);
}
static void local_address(const struct sock_common *skc,
struct mptcp_addr_info *addr)
{
- addr->port = 0;
addr->family = skc->skc_family;
+ addr->port = htons(skc->skc_num);
if (addr->family == AF_INET)
addr->addr.s_addr = skc->skc_rcv_saddr;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -121,7 +133,7 @@ static bool lookup_subflow_by_saddr(const struct list_head *list,
skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow);
local_address(skc, &cur);
- if (addresses_equal(&cur, saddr, false))
+ if (addresses_equal(&cur, saddr, saddr->port))
return true;
}
@@ -133,6 +145,9 @@ select_local_address(const struct pm_nl_pernet *pernet,
struct mptcp_sock *msk)
{
struct mptcp_pm_addr_entry *entry, *ret = NULL;
+ struct sock *sk = (struct sock *)msk;
+
+ msk_owned_by_me(msk);
rcu_read_lock();
__mptcp_flush_join_list(msk);
@@ -140,11 +155,20 @@ select_local_address(const struct pm_nl_pernet *pernet,
if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
continue;
+ if (entry->addr.family != sk->sk_family) {
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if ((entry->addr.family == AF_INET &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
+ (sk->sk_family == AF_INET &&
+ !ipv6_addr_v4mapped(&entry->addr.addr6)))
+#endif
+ continue;
+ }
+
/* avoid any address already in use by subflows and
* pending join
*/
- if (entry->addr.family == ((struct sock *)msk)->sk_family &&
- !lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) {
+ if (!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) {
ret = entry;
break;
}
@@ -177,11 +201,47 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
return ret;
}
+unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk)
+{
+ struct pm_nl_pernet *pernet;
+
+ pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+ return READ_ONCE(pernet->add_addr_signal_max);
+}
+EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max);
+
+unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk)
+{
+ struct pm_nl_pernet *pernet;
+
+ pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+ return READ_ONCE(pernet->add_addr_accept_max);
+}
+EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max);
+
+unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk)
+{
+ struct pm_nl_pernet *pernet;
+
+ pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+ return READ_ONCE(pernet->subflows_max);
+}
+EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max);
+
+unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk)
+{
+ struct pm_nl_pernet *pernet;
+
+ pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+ return READ_ONCE(pernet->local_addr_max);
+}
+EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max);
+
static void check_work_pending(struct mptcp_sock *msk)
{
- if (msk->pm.add_addr_signaled == msk->pm.add_addr_signal_max &&
- (msk->pm.local_addr_used == msk->pm.local_addr_max ||
- msk->pm.subflows == msk->pm.subflows_max))
+ if (msk->pm.add_addr_signaled == mptcp_pm_get_add_addr_signal_max(msk) &&
+ (msk->pm.local_addr_used == mptcp_pm_get_local_addr_max(msk) ||
+ msk->pm.subflows == mptcp_pm_get_subflows_max(msk)))
WRITE_ONCE(msk->pm.work_pending, false);
}
@@ -191,14 +251,37 @@ lookup_anno_list_by_saddr(struct mptcp_sock *msk,
{
struct mptcp_pm_add_entry *entry;
+ lockdep_assert_held(&msk->pm.lock);
+
list_for_each_entry(entry, &msk->pm.anno_list, list) {
- if (addresses_equal(&entry->addr, addr, false))
+ if (addresses_equal(&entry->addr, addr, true))
return entry;
}
return NULL;
}
+bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk)
+{
+ struct mptcp_pm_add_entry *entry;
+ struct mptcp_addr_info saddr;
+ bool ret = false;
+
+ local_address((struct sock_common *)sk, &saddr);
+
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(entry, &msk->pm.anno_list, list) {
+ if (addresses_equal(&entry->addr, &saddr, true)) {
+ ret = true;
+ goto out;
+ }
+ }
+
+out:
+ spin_unlock_bh(&msk->pm.lock);
+ return ret;
+}
+
static void mptcp_pm_add_timer(struct timer_list *timer)
{
struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer);
@@ -266,6 +349,8 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
struct sock *sk = (struct sock *)msk;
struct net *net = sock_net(sk);
+ lockdep_assert_held(&msk->pm.lock);
+
if (lookup_anno_list_by_saddr(msk, &entry->addr))
return false;
@@ -306,20 +391,26 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
- struct mptcp_addr_info remote = { 0 };
struct sock *sk = (struct sock *)msk;
struct mptcp_pm_addr_entry *local;
+ unsigned int add_addr_signal_max;
+ unsigned int local_addr_max;
struct pm_nl_pernet *pernet;
+ unsigned int subflows_max;
pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
+ add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk);
+ local_addr_max = mptcp_pm_get_local_addr_max(msk);
+ subflows_max = mptcp_pm_get_subflows_max(msk);
+
pr_debug("local %d:%d signal %d:%d subflows %d:%d\n",
- msk->pm.local_addr_used, msk->pm.local_addr_max,
- msk->pm.add_addr_signaled, msk->pm.add_addr_signal_max,
- msk->pm.subflows, msk->pm.subflows_max);
+ msk->pm.local_addr_used, local_addr_max,
+ msk->pm.add_addr_signaled, add_addr_signal_max,
+ msk->pm.subflows, subflows_max);
/* check first for announce */
- if (msk->pm.add_addr_signaled < msk->pm.add_addr_signal_max) {
+ if (msk->pm.add_addr_signaled < add_addr_signal_max) {
local = select_signal_address(pernet,
msk->pm.add_addr_signaled);
@@ -331,22 +422,23 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
}
} else {
/* pick failed, avoid fourther attempts later */
- msk->pm.local_addr_used = msk->pm.add_addr_signal_max;
+ msk->pm.local_addr_used = add_addr_signal_max;
}
check_work_pending(msk);
}
/* check if should create a new subflow */
- if (msk->pm.local_addr_used < msk->pm.local_addr_max &&
- msk->pm.subflows < msk->pm.subflows_max) {
- remote_address((struct sock_common *)sk, &remote);
-
+ if (msk->pm.local_addr_used < local_addr_max &&
+ msk->pm.subflows < subflows_max) {
local = select_local_address(pernet, msk);
if (local) {
+ struct mptcp_addr_info remote = { 0 };
+
msk->pm.local_addr_used++;
msk->pm.subflows++;
check_work_pending(msk);
+ remote_address((struct sock_common *)sk, &remote);
spin_unlock_bh(&msk->pm.lock);
__mptcp_subflow_connect(sk, &local->addr, &remote);
spin_lock_bh(&msk->pm.lock);
@@ -354,35 +446,40 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
}
/* lookup failed, avoid fourther attempts later */
- msk->pm.local_addr_used = msk->pm.local_addr_max;
+ msk->pm.local_addr_used = local_addr_max;
check_work_pending(msk);
}
}
-void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
+static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
{
mptcp_pm_create_subflow_or_signal_addr(msk);
}
-void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
+static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
{
mptcp_pm_create_subflow_or_signal_addr(msk);
}
-void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
+static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
+ unsigned int add_addr_accept_max;
struct mptcp_addr_info remote;
struct mptcp_addr_info local;
+ unsigned int subflows_max;
bool use_port = false;
+ add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
+ subflows_max = mptcp_pm_get_subflows_max(msk);
+
pr_debug("accepted %d:%d remote family %d",
- msk->pm.add_addr_accepted, msk->pm.add_addr_accept_max,
+ msk->pm.add_addr_accepted, add_addr_accept_max,
msk->pm.remote.family);
msk->pm.add_addr_accepted++;
msk->pm.subflows++;
- if (msk->pm.add_addr_accepted >= msk->pm.add_addr_accept_max ||
- msk->pm.subflows >= msk->pm.subflows_max)
+ if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
+ msk->pm.subflows >= subflows_max)
WRITE_ONCE(msk->pm.accept_addr, false);
/* connect to the specified remote address, using whatever
@@ -404,12 +501,14 @@ void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
mptcp_pm_nl_add_addr_send_ack(msk);
}
-void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
+static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
- if (!mptcp_pm_should_add_signal_ipv6(msk) &&
- !mptcp_pm_should_add_signal_port(msk))
+ msk_owned_by_me(msk);
+ lockdep_assert_held(&msk->pm.lock);
+
+ if (!mptcp_pm_should_add_signal(msk))
return;
__mptcp_flush_join_list(msk);
@@ -419,10 +518,9 @@ void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
u8 add_addr;
spin_unlock_bh(&msk->pm.lock);
- if (mptcp_pm_should_add_signal_ipv6(msk))
- pr_debug("send ack for add_addr6");
- if (mptcp_pm_should_add_signal_port(msk))
- pr_debug("send ack for add_addr_port");
+ pr_debug("send ack for add_addr%s%s",
+ mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "",
+ mptcp_pm_should_add_signal_port(msk) ? " [port]" : "");
lock_sock(ssk);
tcp_send_ack(ssk);
@@ -438,13 +536,50 @@ void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
}
}
-void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
+int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr,
+ u8 bkup)
+{
+ struct mptcp_subflow_context *subflow;
+
+ pr_debug("bkup=%d", bkup);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ struct sock *sk = (struct sock *)msk;
+ struct mptcp_addr_info local;
+
+ local_address((struct sock_common *)ssk, &local);
+ if (!addresses_equal(&local, addr, addr->port))
+ continue;
+
+ subflow->backup = bkup;
+ subflow->send_mp_prio = 1;
+ subflow->request_bkup = bkup;
+ __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIOTX);
+
+ spin_unlock_bh(&msk->pm.lock);
+ pr_debug("send ack for mp_prio");
+ lock_sock(ssk);
+ tcp_send_ack(ssk);
+ release_sock(ssk);
+ spin_lock_bh(&msk->pm.lock);
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow, *tmp;
struct sock *sk = (struct sock *)msk;
pr_debug("address rm_id %d", msk->pm.rm_id);
+ msk_owned_by_me(msk);
+
if (!msk->pm.rm_id)
return;
@@ -460,7 +595,7 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
- __mptcp_close_ssk(sk, ssk, subflow);
+ mptcp_close_ssk(sk, ssk, subflow);
spin_lock_bh(&msk->pm.lock);
msk->pm.add_addr_accepted--;
@@ -473,6 +608,39 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
}
}
+void mptcp_pm_nl_work(struct mptcp_sock *msk)
+{
+ struct mptcp_pm_data *pm = &msk->pm;
+
+ msk_owned_by_me(msk);
+
+ spin_lock_bh(&msk->pm.lock);
+
+ pr_debug("msk=%p status=%x", msk, pm->status);
+ if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
+ pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
+ mptcp_pm_nl_add_addr_received(msk);
+ }
+ if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) {
+ pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK);
+ mptcp_pm_nl_add_addr_send_ack(msk);
+ }
+ if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) {
+ pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED);
+ mptcp_pm_nl_rm_addr_received(msk);
+ }
+ if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
+ pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
+ mptcp_pm_nl_fully_established(msk);
+ }
+ if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
+ pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
+ mptcp_pm_nl_subflow_established(msk);
+ }
+
+ spin_unlock_bh(&msk->pm.lock);
+}
+
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id)
{
struct mptcp_subflow_context *subflow, *tmp;
@@ -480,6 +648,8 @@ void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id)
pr_debug("subflow rm_id %d", rm_id);
+ msk_owned_by_me(msk);
+
if (!rm_id)
return;
@@ -495,7 +665,7 @@ void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id)
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
- __mptcp_close_ssk(sk, ssk, subflow);
+ mptcp_close_ssk(sk, ssk, subflow);
spin_lock_bh(&msk->pm.lock);
msk->pm.local_addr_used--;
@@ -518,16 +688,19 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
struct mptcp_pm_addr_entry *entry)
{
struct mptcp_pm_addr_entry *cur;
+ unsigned int addr_max;
int ret = -EINVAL;
spin_lock_bh(&pernet->lock);
/* to keep the code simple, don't do IDR-like allocation for address ID,
* just bail when we exceed limits
*/
- if (pernet->next_id > 255)
- goto out;
+ if (pernet->next_id == MAX_ADDR_ID)
+ pernet->next_id = 1;
if (pernet->addrs >= MPTCP_PM_ADDR_MAX)
goto out;
+ if (test_bit(entry->addr.id, pernet->id_bitmap))
+ goto out;
/* do not insert duplicate address, differentiate on port only
* singled addresses
@@ -539,12 +712,34 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
goto out;
}
- if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)
- pernet->add_addr_signal_max++;
- if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
- pernet->local_addr_max++;
+ if (!entry->addr.id) {
+find_next:
+ entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
+ MAX_ADDR_ID + 1,
+ pernet->next_id);
+ if ((!entry->addr.id || entry->addr.id > MAX_ADDR_ID) &&
+ pernet->next_id != 1) {
+ pernet->next_id = 1;
+ goto find_next;
+ }
+ }
+
+ if (!entry->addr.id || entry->addr.id > MAX_ADDR_ID)
+ goto out;
+
+ __set_bit(entry->addr.id, pernet->id_bitmap);
+ if (entry->addr.id > pernet->next_id)
+ pernet->next_id = entry->addr.id;
+
+ if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
+ addr_max = pernet->add_addr_signal_max;
+ WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1);
+ }
+ if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
+ addr_max = pernet->local_addr_max;
+ WRITE_ONCE(pernet->local_addr_max, addr_max + 1);
+ }
- entry->addr.id = pernet->next_id++;
pernet->addrs++;
list_add_tail_rcu(&entry->list, &pernet->local_addr_list);
ret = entry->addr.id;
@@ -554,6 +749,53 @@ out:
return ret;
}
+static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
+ struct mptcp_pm_addr_entry *entry)
+{
+ struct sockaddr_storage addr;
+ struct mptcp_sock *msk;
+ struct socket *ssock;
+ int backlog = 1024;
+ int err;
+
+ err = sock_create_kern(sock_net(sk), entry->addr.family,
+ SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk);
+ if (err)
+ return err;
+
+ msk = mptcp_sk(entry->lsk->sk);
+ if (!msk) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ ssock = __mptcp_nmpc_socket(msk);
+ if (!ssock) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family);
+ err = kernel_bind(ssock, (struct sockaddr *)&addr,
+ sizeof(struct sockaddr_in));
+ if (err) {
+ pr_warn("kernel_bind error, err=%d", err);
+ goto out;
+ }
+
+ err = kernel_listen(ssock, backlog);
+ if (err) {
+ pr_warn("kernel_listen error, err=%d", err);
+ goto out;
+ }
+
+ return 0;
+
+out:
+ sock_release(entry->lsk);
+ return err;
+}
+
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
{
struct mptcp_pm_addr_entry *entry;
@@ -580,7 +822,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
rcu_read_lock();
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (addresses_equal(&entry->addr, &skc_local, false)) {
+ if (addresses_equal(&entry->addr, &skc_local, entry->addr.port)) {
ret = entry->addr.id;
break;
}
@@ -597,6 +839,9 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
entry->addr = skc_local;
entry->addr.ifindex = 0;
entry->addr.flags = 0;
+ entry->addr.id = 0;
+ entry->addr.port = 0;
+ entry->lsk = NULL;
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
if (ret < 0)
kfree(entry);
@@ -607,26 +852,23 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
void mptcp_pm_nl_data_init(struct mptcp_sock *msk)
{
struct mptcp_pm_data *pm = &msk->pm;
- struct pm_nl_pernet *pernet;
bool subflows;
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
-
- pm->add_addr_signal_max = READ_ONCE(pernet->add_addr_signal_max);
- pm->add_addr_accept_max = READ_ONCE(pernet->add_addr_accept_max);
- pm->local_addr_max = READ_ONCE(pernet->local_addr_max);
- pm->subflows_max = READ_ONCE(pernet->subflows_max);
- subflows = !!pm->subflows_max;
- WRITE_ONCE(pm->work_pending, (!!pm->local_addr_max && subflows) ||
- !!pm->add_addr_signal_max);
- WRITE_ONCE(pm->accept_addr, !!pm->add_addr_accept_max && subflows);
+ subflows = !!mptcp_pm_get_subflows_max(msk);
+ WRITE_ONCE(pm->work_pending, (!!mptcp_pm_get_local_addr_max(msk) && subflows) ||
+ !!mptcp_pm_get_add_addr_signal_max(msk));
+ WRITE_ONCE(pm->accept_addr, !!mptcp_pm_get_add_addr_accept_max(msk) && subflows);
WRITE_ONCE(pm->accept_subflow, subflows);
}
-#define MPTCP_PM_CMD_GRP_OFFSET 0
+#define MPTCP_PM_CMD_GRP_OFFSET 0
+#define MPTCP_PM_EV_GRP_OFFSET 1
static const struct genl_multicast_group mptcp_pm_mcgrps[] = {
[MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, },
+ [MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME,
+ .flags = GENL_UNS_ADMIN_PERM,
+ },
};
static const struct nla_policy
@@ -722,6 +964,9 @@ skip_family:
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
entry->addr.flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
+ if (tb[MPTCP_PM_ADDR_ATTR_PORT])
+ entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
+
return 0;
}
@@ -730,6 +975,31 @@ static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info)
return net_generic(genl_info_net(info), pm_nl_pernet_id);
}
+static int mptcp_nl_add_subflow_or_signal_addr(struct net *net)
+{
+ struct mptcp_sock *msk;
+ long s_slot = 0, s_num = 0;
+
+ while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
+ struct sock *sk = (struct sock *)msk;
+
+ if (!READ_ONCE(msk->fully_established))
+ goto next;
+
+ lock_sock(sk);
+ spin_lock_bh(&msk->pm.lock);
+ mptcp_pm_create_subflow_or_signal_addr(msk);
+ spin_unlock_bh(&msk->pm.lock);
+ release_sock(sk);
+
+next:
+ sock_put(sk);
+ cond_resched();
+ }
+
+ return 0;
+}
+
static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
@@ -748,13 +1018,25 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
}
*entry = addr;
+ if (entry->addr.port) {
+ ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry);
+ if (ret) {
+ GENL_SET_ERR_MSG(info, "create listen socket error");
+ kfree(entry);
+ return ret;
+ }
+ }
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
if (ret < 0) {
GENL_SET_ERR_MSG(info, "too many addresses or duplicate one");
+ if (entry->lsk)
+ sock_release(entry->lsk);
kfree(entry);
return ret;
}
+ mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk));
+
return 0;
}
@@ -832,11 +1114,44 @@ next:
return 0;
}
+struct addr_entry_release_work {
+ struct rcu_work rwork;
+ struct mptcp_pm_addr_entry *entry;
+};
+
+static void mptcp_pm_release_addr_entry(struct work_struct *work)
+{
+ struct addr_entry_release_work *w;
+ struct mptcp_pm_addr_entry *entry;
+
+ w = container_of(to_rcu_work(work), struct addr_entry_release_work, rwork);
+ entry = w->entry;
+ if (entry) {
+ if (entry->lsk)
+ sock_release(entry->lsk);
+ kfree(entry);
+ }
+ kfree(w);
+}
+
+static void mptcp_pm_free_addr_entry(struct mptcp_pm_addr_entry *entry)
+{
+ struct addr_entry_release_work *w;
+
+ w = kmalloc(sizeof(*w), GFP_ATOMIC);
+ if (w) {
+ INIT_RCU_WORK(&w->rwork, mptcp_pm_release_addr_entry);
+ w->entry = entry;
+ queue_rcu_work(system_wq, &w->rwork);
+ }
+}
+
static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
struct mptcp_pm_addr_entry addr, *entry;
+ unsigned int addr_max;
int ret;
ret = mptcp_pm_parse_addr(attr, info, false, &addr);
@@ -850,17 +1165,22 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
spin_unlock_bh(&pernet->lock);
return -EINVAL;
}
- if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)
- pernet->add_addr_signal_max--;
- if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
- pernet->local_addr_max--;
+ if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
+ addr_max = pernet->add_addr_signal_max;
+ WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1);
+ }
+ if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
+ addr_max = pernet->local_addr_max;
+ WRITE_ONCE(pernet->local_addr_max, addr_max - 1);
+ }
pernet->addrs--;
list_del_rcu(&entry->list);
+ __clear_bit(entry->addr.id, pernet->id_bitmap);
spin_unlock_bh(&pernet->lock);
mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr);
- kfree_rcu(entry, rcu);
+ mptcp_pm_free_addr_entry(entry);
return ret;
}
@@ -874,15 +1194,15 @@ static void __flush_addrs(struct net *net, struct list_head *list)
struct mptcp_pm_addr_entry, list);
mptcp_nl_remove_subflow_and_signal_addr(net, &cur->addr);
list_del_rcu(&cur->list);
- kfree_rcu(cur, rcu);
+ mptcp_pm_free_addr_entry(cur);
}
}
static void __reset_counters(struct pm_nl_pernet *pernet)
{
- pernet->add_addr_signal_max = 0;
- pernet->add_addr_accept_max = 0;
- pernet->local_addr_max = 0;
+ WRITE_ONCE(pernet->add_addr_signal_max, 0);
+ WRITE_ONCE(pernet->add_addr_accept_max, 0);
+ WRITE_ONCE(pernet->local_addr_max, 0);
pernet->addrs = 0;
}
@@ -894,6 +1214,8 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
spin_lock_bh(&pernet->lock);
list_splice_init(&pernet->local_addr_list, &free_list);
__reset_counters(pernet);
+ pernet->next_id = 1;
+ bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
spin_unlock_bh(&pernet->lock);
__flush_addrs(sock_net(skb->sk), &free_list);
return 0;
@@ -911,6 +1233,8 @@ static int mptcp_nl_fill_addr(struct sk_buff *skb,
if (nla_put_u16(skb, MPTCP_PM_ADDR_ATTR_FAMILY, addr->family))
goto nla_put_failure;
+ if (nla_put_u16(skb, MPTCP_PM_ADDR_ATTR_PORT, ntohs(addr->port)))
+ goto nla_put_failure;
if (nla_put_u8(skb, MPTCP_PM_ADDR_ATTR_ID, addr->id))
goto nla_put_failure;
if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->addr.flags))
@@ -994,27 +1318,34 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
struct pm_nl_pernet *pernet;
int id = cb->args[0];
void *hdr;
+ int i;
pernet = net_generic(net, pm_nl_pernet_id);
spin_lock_bh(&pernet->lock);
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if (entry->addr.id <= id)
- continue;
-
- hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, &mptcp_genl_family,
- NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR);
- if (!hdr)
- break;
+ for (i = id; i < MAX_ADDR_ID + 1; i++) {
+ if (test_bit(i, pernet->id_bitmap)) {
+ entry = __lookup_addr_by_id(pernet, i);
+ if (!entry)
+ break;
+
+ if (entry->addr.id <= id)
+ continue;
+
+ hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &mptcp_genl_family,
+ NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR);
+ if (!hdr)
+ break;
+
+ if (mptcp_nl_fill_addr(msg, entry) < 0) {
+ genlmsg_cancel(msg, hdr);
+ break;
+ }
- if (mptcp_nl_fill_addr(msg, entry) < 0) {
- genlmsg_cancel(msg, hdr);
- break;
+ id = entry->addr.id;
+ genlmsg_end(msg, hdr);
}
-
- id = entry->addr.id;
- genlmsg_end(msg, hdr);
}
spin_unlock_bh(&pernet->lock);
@@ -1096,6 +1427,321 @@ fail:
return -EMSGSIZE;
}
+static int mptcp_nl_addr_backup(struct net *net,
+ struct mptcp_addr_info *addr,
+ u8 bkup)
+{
+ long s_slot = 0, s_num = 0;
+ struct mptcp_sock *msk;
+ int ret = -EINVAL;
+
+ while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
+ struct sock *sk = (struct sock *)msk;
+
+ if (list_empty(&msk->conn_list))
+ goto next;
+
+ lock_sock(sk);
+ spin_lock_bh(&msk->pm.lock);
+ ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup);
+ spin_unlock_bh(&msk->pm.lock);
+ release_sock(sk);
+
+next:
+ sock_put(sk);
+ cond_resched();
+ }
+
+ return ret;
+}
+
+static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
+ struct mptcp_pm_addr_entry addr, *entry;
+ struct net *net = sock_net(skb->sk);
+ u8 bkup = 0;
+ int ret;
+
+ ret = mptcp_pm_parse_addr(attr, info, true, &addr);
+ if (ret < 0)
+ return ret;
+
+ if (addr.addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
+ bkup = 1;
+
+ list_for_each_entry(entry, &pernet->local_addr_list, list) {
+ if (addresses_equal(&entry->addr, &addr.addr, true)) {
+ ret = mptcp_nl_addr_backup(net, &entry->addr, bkup);
+ if (ret)
+ return ret;
+
+ if (bkup)
+ entry->addr.flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else
+ entry->addr.flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+ }
+ }
+
+ return 0;
+}
+
+static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp)
+{
+ genlmsg_multicast_netns(&mptcp_genl_family, net,
+ nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp);
+}
+
+static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
+{
+ const struct inet_sock *issk = inet_sk(ssk);
+ const struct mptcp_subflow_context *sf;
+
+ if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family))
+ return -EMSGSIZE;
+
+ switch (ssk->sk_family) {
+ case AF_INET:
+ if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr))
+ return -EMSGSIZE;
+ if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, issk->inet_daddr))
+ return -EMSGSIZE;
+ break;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ case AF_INET6: {
+ const struct ipv6_pinfo *np = inet6_sk(ssk);
+
+ if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr))
+ return -EMSGSIZE;
+ if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &ssk->sk_v6_daddr))
+ return -EMSGSIZE;
+ break;
+ }
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ return -EMSGSIZE;
+ }
+
+ if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport))
+ return -EMSGSIZE;
+ if (nla_put_be16(skb, MPTCP_ATTR_DPORT, issk->inet_dport))
+ return -EMSGSIZE;
+
+ sf = mptcp_subflow_ctx(ssk);
+ if (WARN_ON_ONCE(!sf))
+ return -EINVAL;
+
+ if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id))
+ return -EMSGSIZE;
+
+ if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int mptcp_event_put_token_and_ssk(struct sk_buff *skb,
+ const struct mptcp_sock *msk,
+ const struct sock *ssk)
+{
+ const struct sock *sk = (const struct sock *)msk;
+ const struct mptcp_subflow_context *sf;
+ u8 sk_err;
+
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
+ return -EMSGSIZE;
+
+ if (mptcp_event_add_subflow(skb, ssk))
+ return -EMSGSIZE;
+
+ sf = mptcp_subflow_ctx(ssk);
+ if (WARN_ON_ONCE(!sf))
+ return -EINVAL;
+
+ if (nla_put_u8(skb, MPTCP_ATTR_BACKUP, sf->backup))
+ return -EMSGSIZE;
+
+ if (ssk->sk_bound_dev_if &&
+ nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if))
+ return -EMSGSIZE;
+
+ sk_err = ssk->sk_err;
+ if (sk_err && sk->sk_state == TCP_ESTABLISHED &&
+ nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int mptcp_event_sub_established(struct sk_buff *skb,
+ const struct mptcp_sock *msk,
+ const struct sock *ssk)
+{
+ return mptcp_event_put_token_and_ssk(skb, msk, ssk);
+}
+
+static int mptcp_event_sub_closed(struct sk_buff *skb,
+ const struct mptcp_sock *msk,
+ const struct sock *ssk)
+{
+ if (mptcp_event_put_token_and_ssk(skb, msk, ssk))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int mptcp_event_created(struct sk_buff *skb,
+ const struct mptcp_sock *msk,
+ const struct sock *ssk)
+{
+ int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token);
+
+ if (err)
+ return err;
+
+ return mptcp_event_add_subflow(skb, ssk);
+}
+
+void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id)
+{
+ struct net *net = sock_net((const struct sock *)msk);
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+
+ if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
+ return;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, MPTCP_EVENT_REMOVED);
+ if (!nlh)
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id))
+ goto nla_put_failure;
+
+ genlmsg_end(skb, nlh);
+ mptcp_nl_mcast_send(net, skb, GFP_ATOMIC);
+ return;
+
+nla_put_failure:
+ kfree_skb(skb);
+}
+
+void mptcp_event_addr_announced(const struct mptcp_sock *msk,
+ const struct mptcp_addr_info *info)
+{
+ struct net *net = sock_net((const struct sock *)msk);
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+
+ if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
+ return;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0,
+ MPTCP_EVENT_ANNOUNCED);
+ if (!nlh)
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id))
+ goto nla_put_failure;
+
+ if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port))
+ goto nla_put_failure;
+
+ switch (info->family) {
+ case AF_INET:
+ if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, info->addr.s_addr))
+ goto nla_put_failure;
+ break;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ case AF_INET6:
+ if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &info->addr6))
+ goto nla_put_failure;
+ break;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(skb, nlh);
+ mptcp_nl_mcast_send(net, skb, GFP_ATOMIC);
+ return;
+
+nla_put_failure:
+ kfree_skb(skb);
+}
+
+void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
+ const struct sock *ssk, gfp_t gfp)
+{
+ struct net *net = sock_net((const struct sock *)msk);
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+
+ if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
+ return;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+ if (!skb)
+ return;
+
+ nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, type);
+ if (!nlh)
+ goto nla_put_failure;
+
+ switch (type) {
+ case MPTCP_EVENT_UNSPEC:
+ WARN_ON_ONCE(1);
+ break;
+ case MPTCP_EVENT_CREATED:
+ case MPTCP_EVENT_ESTABLISHED:
+ if (mptcp_event_created(skb, msk, ssk) < 0)
+ goto nla_put_failure;
+ break;
+ case MPTCP_EVENT_CLOSED:
+ if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token) < 0)
+ goto nla_put_failure;
+ break;
+ case MPTCP_EVENT_ANNOUNCED:
+ case MPTCP_EVENT_REMOVED:
+ /* call mptcp_event_addr_announced()/removed instead */
+ WARN_ON_ONCE(1);
+ break;
+ case MPTCP_EVENT_SUB_ESTABLISHED:
+ case MPTCP_EVENT_SUB_PRIORITY:
+ if (mptcp_event_sub_established(skb, msk, ssk) < 0)
+ goto nla_put_failure;
+ break;
+ case MPTCP_EVENT_SUB_CLOSED:
+ if (mptcp_event_sub_closed(skb, msk, ssk) < 0)
+ goto nla_put_failure;
+ break;
+ }
+
+ genlmsg_end(skb, nlh);
+ mptcp_nl_mcast_send(net, skb, gfp);
+ return;
+
+nla_put_failure:
+ kfree_skb(skb);
+}
+
static const struct genl_small_ops mptcp_pm_ops[] = {
{
.cmd = MPTCP_PM_CMD_ADD_ADDR,
@@ -1126,6 +1772,11 @@ static const struct genl_small_ops mptcp_pm_ops[] = {
.cmd = MPTCP_PM_CMD_GET_LIMITS,
.doit = mptcp_nl_cmd_get_limits,
},
+ {
+ .cmd = MPTCP_PM_CMD_SET_FLAGS,
+ .doit = mptcp_nl_cmd_set_flags,
+ .flags = GENL_ADMIN_PERM,
+ },
};
static struct genl_family mptcp_genl_family __ro_after_init = {
@@ -1148,6 +1799,7 @@ static int __net_init pm_nl_init_net(struct net *net)
INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
__reset_counters(pernet);
pernet->next_id = 1;
+ bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
spin_lock_init(&pernet->lock);
return 0;
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 09b19aa2f205..c5d5e68940ea 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -11,6 +11,7 @@
#include <linux/netdevice.h>
#include <linux/sched/signal.h>
#include <linux/atomic.h>
+#include <linux/igmp.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
@@ -19,6 +20,7 @@
#include <net/tcp_states.h>
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
#include <net/transp_v6.h>
+#include <net/addrconf.h>
#endif
#include <net/mptcp.h>
#include <net/xfrm.h>
@@ -45,11 +47,14 @@ static struct percpu_counter mptcp_sockets_allocated;
static void __mptcp_destroy_sock(struct sock *sk);
static void __mptcp_check_send_data_fin(struct sock *sk);
+DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
+static struct net_device mptcp_napi_dev;
+
/* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
* completed yet or has failed, return the subflow socket.
* Otherwise return NULL.
*/
-static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
+struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
{
if (!msk->subflow || READ_ONCE(msk->can_ack))
return NULL;
@@ -114,11 +119,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
list_add(&subflow->node, &msk->conn_list);
sock_hold(ssock->sk);
subflow->request_mptcp = 1;
-
- /* accept() will wait on first subflow sk_wq, and we always wakes up
- * via msk->sk_socket
- */
- RCU_INIT_POINTER(msk->first->sk_wq, &sk->sk_socket->wq);
+ mptcp_sock_graft(msk->first, sk->sk_socket);
return 0;
}
@@ -364,8 +365,6 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
/* Look for an acknowledged DATA_FIN */
if (mptcp_pending_data_fin_ack(sk)) {
- mptcp_stop_timer(sk);
-
WRITE_ONCE(msk->snd_data_fin_enable, 0);
switch (sk->sk_state) {
@@ -427,7 +426,7 @@ static bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
static bool tcp_can_send_ack(const struct sock *ssk)
{
return !((1 << inet_sk_state_load(ssk)) &
- (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE));
+ (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN));
}
static void mptcp_send_ack(struct mptcp_sock *msk)
@@ -459,7 +458,18 @@ static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
{
struct sock *ack_hint = READ_ONCE(msk->ack_hint);
+ int old_space = READ_ONCE(msk->old_wspace);
struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ bool cleanup;
+
+ /* this is a simple superset of what tcp_cleanup_rbuf() implements
+ * so that we don't have to acquire the ssk socket lock most of the time
+ * to do actually nothing
+ */
+ cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
+ if (!cleanup)
+ return;
/* if the hinted ssk is still active, try to use it */
if (likely(ack_hint)) {
@@ -734,10 +744,14 @@ wake:
void __mptcp_flush_join_list(struct mptcp_sock *msk)
{
+ struct mptcp_subflow_context *subflow;
+
if (likely(list_empty(&msk->join_list)))
return;
spin_lock_bh(&msk->join_list_lock);
+ list_for_each_entry(subflow, &msk->join_list, node)
+ mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
list_splice_tail_init(&msk->join_list, &msk->conn_list);
spin_unlock_bh(&msk->join_list_lock);
}
@@ -877,6 +891,9 @@ static void __mptcp_wmem_reserve(struct sock *sk, int size)
struct mptcp_sock *msk = mptcp_sk(sk);
WARN_ON_ONCE(msk->wmem_reserved);
+ if (WARN_ON_ONCE(amount < 0))
+ amount = 0;
+
if (amount <= sk->sk_forward_alloc)
goto reserve;
@@ -1034,13 +1051,6 @@ out:
__mptcp_update_wmem(sk);
sk_mem_reclaim_partial(sk);
}
-
- if (sk_stream_is_writeable(sk)) {
- /* pairs with memory barrier in mptcp_poll */
- smp_mb();
- if (test_and_clear_bit(MPTCP_NOSPACE, &msk->flags))
- sk_stream_write_space(sk);
- }
}
if (snd_una == READ_ONCE(msk->snd_nxt)) {
@@ -1359,8 +1369,7 @@ struct subflow_send_info {
u64 ratio;
};
-static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk,
- u32 *sndbuf)
+static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
{
struct subflow_send_info send_info[2];
struct mptcp_subflow_context *subflow;
@@ -1371,24 +1380,17 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk,
sock_owned_by_me((struct sock *)msk);
- *sndbuf = 0;
if (__mptcp_check_fallback(msk)) {
if (!msk->first)
return NULL;
- *sndbuf = msk->first->sk_sndbuf;
return sk_stream_memory_free(msk->first) ? msk->first : NULL;
}
/* re-use last subflow, if the burst allow that */
if (msk->last_snd && msk->snd_burst > 0 &&
sk_stream_memory_free(msk->last_snd) &&
- mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) {
- mptcp_for_each_subflow(msk, subflow) {
- ssk = mptcp_subflow_tcp_sock(subflow);
- *sndbuf = max(tcp_sk(ssk)->snd_wnd, *sndbuf);
- }
+ mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd)))
return msk->last_snd;
- }
/* pick the subflow with the lower wmem/wspace ratio */
for (i = 0; i < 2; ++i) {
@@ -1401,8 +1403,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk,
continue;
nr_active += !subflow->backup;
- *sndbuf = max(tcp_sk(ssk)->snd_wnd, *sndbuf);
- if (!sk_stream_memory_free(subflow->tcp_sock))
+ if (!sk_stream_memory_free(subflow->tcp_sock) || !tcp_sk(ssk)->snd_wnd)
continue;
pace = READ_ONCE(ssk->sk_pacing_rate);
@@ -1428,9 +1429,10 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk,
if (send_info[0].ssk) {
msk->last_snd = send_info[0].ssk;
msk->snd_burst = min_t(int, MPTCP_SEND_BURST_SIZE,
- sk_stream_wspace(msk->last_snd));
+ tcp_sk(msk->last_snd)->snd_wnd);
return msk->last_snd;
}
+
return NULL;
}
@@ -1451,7 +1453,6 @@ static void mptcp_push_pending(struct sock *sk, unsigned int flags)
};
struct mptcp_data_frag *dfrag;
int len, copied = 0;
- u32 sndbuf;
while ((dfrag = mptcp_send_head(sk))) {
info.sent = dfrag->already_sent;
@@ -1462,12 +1463,7 @@ static void mptcp_push_pending(struct sock *sk, unsigned int flags)
prev_ssk = ssk;
__mptcp_flush_join_list(msk);
- ssk = mptcp_subflow_get_send(msk, &sndbuf);
-
- /* do auto tuning */
- if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
- sndbuf > READ_ONCE(sk->sk_sndbuf))
- WRITE_ONCE(sk->sk_sndbuf, sndbuf);
+ ssk = mptcp_subflow_get_send(msk);
/* try to keep the subflow socket lock across
* consecutive xmit on the same socket
@@ -1524,7 +1520,9 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_sendmsg_info info;
struct mptcp_data_frag *dfrag;
+ struct sock *xmit_ssk;
int len, copied = 0;
+ bool first = true;
info.flags = 0;
while ((dfrag = mptcp_send_head(sk))) {
@@ -1534,10 +1532,17 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
while (len > 0) {
int ret = 0;
- /* do auto tuning */
- if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
- ssk->sk_sndbuf > READ_ONCE(sk->sk_sndbuf))
- WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf);
+ /* the caller already invoked the packet scheduler,
+ * check for a different subflow usage only after
+ * spooling the first chunk of data
+ */
+ xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
+ if (!xmit_ssk)
+ goto out;
+ if (xmit_ssk != ssk) {
+ mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
+ goto out;
+ }
if (unlikely(mptcp_must_reclaim_memory(sk, ssk))) {
__mptcp_update_wmem(sk);
@@ -1557,6 +1562,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
msk->tx_pending_data -= ret;
copied += ret;
len -= ret;
+ first = false;
}
WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
}
@@ -1570,12 +1576,24 @@ out:
mptcp_set_timeout(sk, ssk);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
+ if (!mptcp_timer_pending(sk))
+ mptcp_reset_timer(sk);
+
if (msk->snd_data_fin_enable &&
msk->snd_nxt + 1 == msk->write_seq)
mptcp_schedule_work(sk);
}
}
+static void mptcp_set_nospace(struct sock *sk)
+{
+ /* enable autotune */
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+
+ /* will be cleared on avail space */
+ set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
+}
+
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1587,7 +1605,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
return -EOPNOTSUPP;
- mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, len));
+ mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, min_t(size_t, 1 << 20, len)));
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
@@ -1677,7 +1695,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
continue;
wait_for_memory:
- set_bit(MPTCP_NOSPACE, &msk->flags);
+ mptcp_set_nospace(sk);
mptcp_push_pending(sk, msg->msg_flags);
ret = sk_stream_wait_memory(sk, &timeo);
if (ret)
@@ -1864,7 +1882,7 @@ static void __mptcp_splice_receive_queue(struct sock *sk)
skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue);
}
-static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
+static bool __mptcp_move_skbs(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
unsigned int moved = 0;
@@ -1884,13 +1902,10 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
slowpath = lock_sock_fast(ssk);
mptcp_data_lock(sk);
+ __mptcp_update_rmem(sk);
done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
mptcp_data_unlock(sk);
- if (moved && rcv) {
- WRITE_ONCE(msk->rmem_pending, min(rcv, moved));
- tcp_cleanup_rbuf(ssk, 1);
- WRITE_ONCE(msk->rmem_pending, 0);
- }
+ tcp_cleanup_rbuf(ssk, moved);
unlock_sock_fast(ssk, slowpath);
} while (!done);
@@ -1903,6 +1918,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
ret |= __mptcp_ofo_queue(msk);
__mptcp_splice_receive_queue(sk);
mptcp_data_unlock(sk);
+ mptcp_cleanup_rbuf(msk);
}
if (ret)
mptcp_check_data_fin((struct sock *)msk);
@@ -1932,7 +1948,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
while (copied < len) {
- int bytes_read, old_space;
+ int bytes_read;
bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied);
if (unlikely(bytes_read < 0)) {
@@ -1943,14 +1959,11 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
copied += bytes_read;
- if (skb_queue_empty(&msk->receive_queue) &&
- __mptcp_move_skbs(msk, len - copied))
- continue;
-
/* be sure to advertise window change */
- old_space = READ_ONCE(msk->old_wspace);
- if ((tcp_space(sk) - old_space) >= old_space)
- mptcp_cleanup_rbuf(msk);
+ mptcp_cleanup_rbuf(msk);
+
+ if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk))
+ continue;
/* only the master socket status is relevant here. The exit
* conditions mirror closely tcp_recvmsg()
@@ -1978,7 +1991,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
/* race breaker: the shutdown could be after the
* previous receive queue check
*/
- if (__mptcp_move_skbs(msk, len - copied))
+ if (__mptcp_move_skbs(msk))
continue;
break;
}
@@ -2011,7 +2024,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
/* .. race-breaker: ssk might have gotten new data
* after last __mptcp_move_skbs() returned false.
*/
- if (unlikely(__mptcp_move_skbs(msk, 0)))
+ if (unlikely(__mptcp_move_skbs(msk)))
set_bit(MPTCP_DATA_READY, &msk->flags);
} else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) {
/* data to read but mptcp_wait_data() cleared DATA_READY */
@@ -2110,12 +2123,9 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
* so we need to use tcp_close() after detaching them from the mptcp
* parent socket.
*/
-void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
- struct mptcp_subflow_context *subflow)
+static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
+ struct mptcp_subflow_context *subflow)
{
- bool dispose_socket = false;
- struct socket *sock;
-
list_del(&subflow->node);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
@@ -2123,11 +2133,8 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
/* if we are invoked by the msk cleanup code, the subflow is
* already orphaned
*/
- sock = ssk->sk_socket;
- if (sock) {
- dispose_socket = sock != sk->sk_socket;
+ if (ssk->sk_socket)
sock_orphan(ssk);
- }
subflow->disposable = 1;
@@ -2145,59 +2152,40 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
__sock_put(ssk);
}
release_sock(ssk);
- if (dispose_socket)
- iput(SOCK_INODE(sock));
sock_put(ssk);
}
-static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
+void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
+ struct mptcp_subflow_context *subflow)
{
- return 0;
+ if (sk->sk_state == TCP_ESTABLISHED)
+ mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL);
+ __mptcp_close_ssk(sk, ssk, subflow);
}
-static void pm_work(struct mptcp_sock *msk)
+static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
{
- struct mptcp_pm_data *pm = &msk->pm;
-
- spin_lock_bh(&msk->pm.lock);
-
- pr_debug("msk=%p status=%x", msk, pm->status);
- if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
- pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
- mptcp_pm_nl_add_addr_received(msk);
- }
- if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) {
- pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK);
- mptcp_pm_nl_add_addr_send_ack(msk);
- }
- if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) {
- pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED);
- mptcp_pm_nl_rm_addr_received(msk);
- }
- if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
- pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
- mptcp_pm_nl_fully_established(msk);
- }
- if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
- pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
- mptcp_pm_nl_subflow_established(msk);
- }
-
- spin_unlock_bh(&msk->pm.lock);
+ return 0;
}
static void __mptcp_close_subflow(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow, *tmp;
+ might_sleep();
+
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
if (inet_sk_state_load(ssk) != TCP_CLOSE)
continue;
- __mptcp_close_ssk((struct sock *)msk, ssk, subflow);
+ /* 'subflow_data_ready' will re-sched once rx queue is empty */
+ if (!skb_queue_empty_lockless(&ssk->sk_receive_queue))
+ continue;
+
+ mptcp_close_ssk((struct sock *)msk, ssk, subflow);
}
}
@@ -2269,11 +2257,8 @@ static void mptcp_worker(struct work_struct *work)
mptcp_check_fastclose(msk);
- if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
- __mptcp_close_subflow(msk);
-
if (msk->pm.status)
- pm_work(msk);
+ mptcp_pm_nl_work(msk);
if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
mptcp_check_for_eof(msk);
@@ -2281,21 +2266,24 @@ static void mptcp_worker(struct work_struct *work)
__mptcp_check_send_data_fin(sk);
mptcp_check_data_fin(sk);
- /* if the msk data is completely acked, or the socket timedout,
- * there is no point in keeping around an orphaned sk
+ /* There is no point in keeping around an orphaned sk timedout or
+ * closed, but we need the msk around to reply to incoming DATA_FIN,
+ * even if it is orphaned and in FIN_WAIT2 state
*/
if (sock_flag(sk, SOCK_DEAD) &&
- (mptcp_check_close_timeout(sk) ||
- (state != sk->sk_state &&
- ((1 << inet_sk_state_load(sk)) & (TCPF_CLOSE | TCPF_FIN_WAIT2))))) {
+ (mptcp_check_close_timeout(sk) || sk->sk_state == TCP_CLOSE)) {
inet_sk_state_store(sk, TCP_CLOSE);
__mptcp_destroy_sock(sk);
goto unlock;
}
+ if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+ __mptcp_close_subflow(msk);
+
if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
goto unlock;
+ __mptcp_clean_una(sk);
dfrag = mptcp_rtx_head(sk);
if (!dfrag)
goto unlock;
@@ -2533,6 +2521,14 @@ static void __mptcp_destroy_sock(struct sock *sk)
pr_debug("msk=%p", msk);
+ might_sleep();
+
+ /* dispose the ancillatory tcp socket, if any */
+ if (msk->subflow) {
+ iput(SOCK_INODE(msk->subflow));
+ msk->subflow = NULL;
+ }
+
/* be sure to always acquire the join list lock, to sync vs
* mptcp_finish_join().
*/
@@ -2583,20 +2579,10 @@ cleanup:
inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
list_for_each_entry(subflow, &mptcp_sk(sk)->conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- bool slow, dispose_socket;
- struct socket *sock;
+ bool slow = lock_sock_fast(ssk);
- slow = lock_sock_fast(ssk);
- sock = ssk->sk_socket;
- dispose_socket = sock && sock != sk->sk_socket;
sock_orphan(ssk);
unlock_sock_fast(ssk, slow);
-
- /* for the outgoing subflows we additionally need to free
- * the associated socket
- */
- if (dispose_socket)
- iput(SOCK_INODE(sock));
}
sock_orphan(sk);
@@ -2611,6 +2597,10 @@ cleanup:
release_sock(sk);
if (do_cancel_work)
mptcp_cancel_work(sk);
+
+ if (mptcp_sk(sk)->token)
+ mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+
sock_put(sk);
}
@@ -2639,11 +2629,17 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
static int mptcp_disconnect(struct sock *sk, int flags)
{
- /* Should never be called.
- * inet_stream_connect() calls ->disconnect, but that
- * refers to the subflow socket, not the mptcp one.
- */
- WARN_ON_ONCE(1);
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ __mptcp_flush_join_list(msk);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ lock_sock(ssk);
+ tcp_disconnect(ssk, flags);
+ release_sock(ssk);
+ }
return 0;
}
@@ -2919,10 +2915,16 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
if (!mptcp_send_head(sk))
return;
- if (!sock_owned_by_user(sk))
- __mptcp_subflow_push_pending(sk, ssk);
- else
+ if (!sock_owned_by_user(sk)) {
+ struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
+
+ if (xmit_ssk == ssk)
+ __mptcp_subflow_push_pending(sk, ssk);
+ else if (xmit_ssk)
+ mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
+ } else {
set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+ }
}
#define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED)
@@ -2950,6 +2952,8 @@ static void mptcp_release_cb(struct sock *sk)
mptcp_push_pending(sk, 0);
spin_lock_bh(&sk->sk_lock.slock);
}
+ if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
+ __mptcp_error_report(sk);
/* clear any wmem reservation and errors */
__mptcp_update_wmem(sk);
@@ -2970,6 +2974,20 @@ static void mptcp_release_cb(struct sock *sk)
}
}
+void mptcp_subflow_process_delegated(struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct sock *sk = subflow->conn;
+
+ mptcp_data_lock(sk);
+ if (!sock_owned_by_user(sk))
+ __mptcp_subflow_push_pending(sk, ssk);
+ else
+ set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+ mptcp_data_unlock(sk);
+ mptcp_subflow_delegated_done(subflow);
+}
+
static int mptcp_hash(struct sock *sk)
{
/* should never be called,
@@ -3027,12 +3045,12 @@ void mptcp_finish_connect(struct sock *ssk)
WRITE_ONCE(msk->can_ack, 1);
WRITE_ONCE(msk->snd_una, msk->write_seq);
- mptcp_pm_new_connection(msk, 0);
+ mptcp_pm_new_connection(msk, ssk, 0);
mptcp_rcv_space_init(msk, ssk);
}
-static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
+void mptcp_sock_graft(struct sock *sk, struct socket *parent)
{
write_lock_bh(&sk->sk_callback_lock);
rcu_assign_pointer(sk->sk_wq, &parent->wq);
@@ -3056,7 +3074,7 @@ bool mptcp_finish_join(struct sock *ssk)
return false;
if (!msk->pm.server_side)
- return true;
+ goto out;
if (!mptcp_pm_allow_new_subflow(msk))
return false;
@@ -3083,9 +3101,19 @@ bool mptcp_finish_join(struct sock *ssk)
if (parent_sock && !ssk->sk_socket)
mptcp_sock_graft(ssk, parent_sock);
subflow->map_seq = READ_ONCE(msk->ack_seq);
+out:
+ mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
return true;
}
+static void mptcp_shutdown(struct sock *sk, int how)
+{
+ pr_debug("sk=%p, how=%d", sk, how);
+
+ if ((how & SEND_SHUTDOWN) && mptcp_close_state(sk))
+ __mptcp_wr_shutdown(sk);
+}
+
static struct proto mptcp_prot = {
.name = "MPTCP",
.owner = THIS_MODULE,
@@ -3095,7 +3123,7 @@ static struct proto mptcp_prot = {
.accept = mptcp_accept,
.setsockopt = mptcp_setsockopt,
.getsockopt = mptcp_getsockopt,
- .shutdown = tcp_shutdown,
+ .shutdown = mptcp_shutdown,
.destroy = mptcp_destroy,
.sendmsg = mptcp_sendmsg,
.recvmsg = mptcp_recvmsg,
@@ -3251,9 +3279,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
struct mptcp_sock *msk = mptcp_sk(newsock->sk);
struct mptcp_subflow_context *subflow;
struct sock *newsk = newsock->sk;
- bool slowpath;
- slowpath = lock_sock_fast(newsk);
+ lock_sock(newsk);
/* PM/worker can now acquire the first subflow socket
* lock without racing with listener queue cleanup,
@@ -3263,10 +3290,11 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
list_add(&subflow->node, &msk->conn_list);
sock_hold(msk->first);
if (mptcp_is_fully_established(newsk))
- mptcp_pm_fully_established(msk);
+ mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL);
mptcp_copy_inaddrs(newsk, msk->first);
mptcp_rcv_space_init(msk, msk->first);
+ mptcp_propagate_sndbuf(newsk, msk->first);
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
* This is needed so NOSPACE flag can be set from tcp stack.
@@ -3278,7 +3306,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk->sk_socket)
mptcp_sock_graft(ssk, newsock);
}
- unlock_sock_fast(newsk, slowpath);
+ release_sock(newsk);
}
if (inet_csk_listen_poll(ssock->sk))
@@ -3302,12 +3330,12 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
struct sock *sk = (struct sock *)msk;
if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN))
- return 0;
+ return EPOLLOUT | EPOLLWRNORM;
if (sk_stream_is_writeable(sk))
return EPOLLOUT | EPOLLWRNORM;
- set_bit(MPTCP_NOSPACE, &msk->flags);
+ mptcp_set_nospace(sk);
smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */
if (sk_stream_is_writeable(sk))
return EPOLLOUT | EPOLLWRNORM;
@@ -3335,53 +3363,47 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
mask |= mptcp_check_readable(msk);
mask |= mptcp_check_writeable(msk);
}
+ if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
+ mask |= EPOLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
+ /* This barrier is coupled with smp_wmb() in tcp_reset() */
+ smp_rmb();
+ if (sk->sk_err)
+ mask |= EPOLLERR;
+
return mask;
}
-static int mptcp_shutdown(struct socket *sock, int how)
+static int mptcp_release(struct socket *sock)
{
- struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct mptcp_subflow_context *subflow;
struct sock *sk = sock->sk;
- int ret = 0;
+ struct mptcp_sock *msk;
- pr_debug("sk=%p, how=%d", msk, how);
+ if (!sk)
+ return 0;
lock_sock(sk);
- how++;
- if ((how & ~SHUTDOWN_MASK) || !how) {
- ret = -EINVAL;
- goto out_unlock;
- }
-
- if (sock->state == SS_CONNECTING) {
- if ((1 << sk->sk_state) &
- (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
- sock->state = SS_DISCONNECTING;
- else
- sock->state = SS_CONNECTED;
- }
+ msk = mptcp_sk(sk);
- sk->sk_shutdown |= how;
- if ((how & SEND_SHUTDOWN) && mptcp_close_state(sk))
- __mptcp_wr_shutdown(sk);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- /* Wake up anyone sleeping in poll. */
- sk->sk_state_change(sk);
+ ip_mc_drop_socket(ssk);
+ }
-out_unlock:
release_sock(sk);
- return ret;
+ return inet_release(sock);
}
static const struct proto_ops mptcp_stream_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
- .release = inet_release,
+ .release = mptcp_release,
.bind = mptcp_bind,
.connect = mptcp_stream_connect,
.socketpair = sock_no_socketpair,
@@ -3391,7 +3413,7 @@ static const struct proto_ops mptcp_stream_ops = {
.ioctl = inet_ioctl,
.gettstamp = sock_gettstamp,
.listen = mptcp_listen,
- .shutdown = mptcp_shutdown,
+ .shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
.sendmsg = inet_sendmsg,
@@ -3408,13 +3430,58 @@ static struct inet_protosw mptcp_protosw = {
.flags = INET_PROTOSW_ICSK,
};
+static int mptcp_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct mptcp_delegated_action *delegated;
+ struct mptcp_subflow_context *subflow;
+ int work_done = 0;
+
+ delegated = container_of(napi, struct mptcp_delegated_action, napi);
+ while ((subflow = mptcp_subflow_delegated_next(delegated)) != NULL) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ bh_lock_sock_nested(ssk);
+ if (!sock_owned_by_user(ssk) &&
+ mptcp_subflow_has_delegated_action(subflow))
+ mptcp_subflow_process_delegated(ssk);
+ /* ... elsewhere tcp_release_cb_override already processed
+ * the action or will do at next release_sock().
+ * In both case must dequeue the subflow here - on the same
+ * CPU that scheduled it.
+ */
+ bh_unlock_sock(ssk);
+ sock_put(ssk);
+
+ if (++work_done == budget)
+ return budget;
+ }
+
+ /* always provide a 0 'work_done' argument, so that napi_complete_done
+ * will not try accessing the NULL napi->dev ptr
+ */
+ napi_complete_done(napi, 0);
+ return work_done;
+}
+
void __init mptcp_proto_init(void)
{
+ struct mptcp_delegated_action *delegated;
+ int cpu;
+
mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo;
if (percpu_counter_init(&mptcp_sockets_allocated, 0, GFP_KERNEL))
panic("Failed to allocate MPTCP pcpu counter\n");
+ init_dummy_netdev(&mptcp_napi_dev);
+ for_each_possible_cpu(cpu) {
+ delegated = per_cpu_ptr(&mptcp_delegated_actions, cpu);
+ INIT_LIST_HEAD(&delegated->head);
+ netif_tx_napi_add(&mptcp_napi_dev, &delegated->napi, mptcp_napi_poll,
+ NAPI_POLL_WEIGHT);
+ napi_enable(&delegated->napi);
+ }
+
mptcp_subflow_init();
mptcp_pm_init();
mptcp_token_init();
@@ -3428,10 +3495,35 @@ void __init mptcp_proto_init(void)
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+static int mptcp6_release(struct socket *sock)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk;
+ struct sock *sk = sock->sk;
+
+ if (!sk)
+ return 0;
+
+ lock_sock(sk);
+
+ msk = mptcp_sk(sk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ ip_mc_drop_socket(ssk);
+ ipv6_sock_mc_close(ssk);
+ ipv6_sock_ac_close(ssk);
+ }
+
+ release_sock(sk);
+ return inet6_release(sock);
+}
+
static const struct proto_ops mptcp_v6_stream_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
- .release = inet6_release,
+ .release = mptcp6_release,
.bind = mptcp_bind,
.connect = mptcp_stream_connect,
.socketpair = sock_no_socketpair,
@@ -3441,7 +3533,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
.ioctl = inet6_ioctl,
.gettstamp = sock_gettstamp,
.listen = mptcp_listen,
- .shutdown = mptcp_shutdown,
+ .shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
.sendmsg = inet6_sendmsg,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d67de793d363..91827d949766 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -10,6 +10,7 @@
#include <linux/random.h>
#include <net/tcp.h>
#include <net/inet_connection_sock.h>
+#include <uapi/linux/mptcp.h>
#define MPTCP_SUPPORTED_VERSION 1
@@ -24,6 +25,7 @@
#define OPTION_MPTCP_ADD_ADDR6 BIT(7)
#define OPTION_MPTCP_RM_ADDR BIT(8)
#define OPTION_MPTCP_FASTCLOSE BIT(9)
+#define OPTION_MPTCP_PRIO BIT(10)
/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE 0
@@ -59,6 +61,8 @@
#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 24
#define TCPOLEN_MPTCP_PORT_LEN 4
#define TCPOLEN_MPTCP_RM_ADDR_BASE 4
+#define TCPOLEN_MPTCP_PRIO 3
+#define TCPOLEN_MPTCP_PRIO_ALIGN 4
#define TCPOLEN_MPTCP_FASTCLOSE 12
/* MPTCP MP_JOIN flags */
@@ -86,6 +90,9 @@
#define MPTCP_ADDR_IPVERSION_4 4
#define MPTCP_ADDR_IPVERSION_6 6
+/* MPTCP MP_PRIO flags */
+#define MPTCP_PRIO_BKUP BIT(0)
+
/* MPTCP socket flags */
#define MPTCP_DATA_READY 0
#define MPTCP_NOSPACE 1
@@ -95,6 +102,7 @@
#define MPTCP_WORK_CLOSE_SUBFLOW 5
#define MPTCP_PUSH_PENDING 6
#define MPTCP_CLEAN_UNA 7
+#define MPTCP_ERROR_REPORT 8
static inline bool before64(__u64 seq1, __u64 seq2)
{
@@ -116,6 +124,7 @@ struct mptcp_options_received {
dss : 1,
add_addr : 1,
rm_addr : 1,
+ mp_prio : 1,
family : 4,
echo : 1,
backup : 1;
@@ -196,10 +205,6 @@ struct mptcp_pm_data {
u8 add_addr_accepted;
u8 local_addr_used;
u8 subflows;
- u8 add_addr_signal_max;
- u8 add_addr_accept_max;
- u8 local_addr_max;
- u8 subflows_max;
u8 status;
u8 rm_id;
};
@@ -233,7 +238,6 @@ struct mptcp_sock {
u64 wnd_end;
unsigned long timer_ival;
u32 token;
- int rmem_pending;
int rmem_released;
unsigned long flags;
bool can_ack;
@@ -285,6 +289,11 @@ struct mptcp_sock {
#define mptcp_for_each_subflow(__msk, __subflow) \
list_for_each_entry(__subflow, &((__msk)->conn_list), node)
+static inline void msk_owned_by_me(const struct mptcp_sock *msk)
+{
+ sock_owned_by_me((const struct sock *)msk);
+}
+
static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
{
return (struct mptcp_sock *)sk;
@@ -292,7 +301,7 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
static inline int __mptcp_space(const struct sock *sk)
{
- return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_pending);
+ return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released);
}
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
@@ -325,20 +334,13 @@ static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
}
-static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk)
+static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- if (!before64(msk->snd_nxt, READ_ONCE(msk->snd_una)))
+ if (msk->snd_una == READ_ONCE(msk->snd_nxt))
return NULL;
- return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
-}
-
-static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
}
@@ -372,6 +374,15 @@ enum mptcp_data_avail {
MPTCP_SUBFLOW_OOO_DATA
};
+struct mptcp_delegated_action {
+ struct napi_struct napi;
+ struct list_head head;
+};
+
+DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
+
+#define MPTCP_DELEGATE_SEND 0
+
/* MPTCP subflow context */
struct mptcp_subflow_context {
struct list_head node;/* conn_list of subflows */
@@ -396,6 +407,7 @@ struct mptcp_subflow_context {
map_valid : 1,
mpc_map : 1,
backup : 1,
+ send_mp_prio : 1,
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
disposable : 1; /* ctx can be free at ulp release time */
@@ -408,12 +420,16 @@ struct mptcp_subflow_context {
u8 local_id;
u8 remote_id;
+ long delegated_status;
+ struct list_head delegated_node; /* link into delegated_action, protected by local BH */
+
struct sock *tcp_sock; /* tcp sk backpointer */
struct sock *conn; /* parent mptcp_sock */
const struct inet_connection_sock_af_ops *icsk_af_ops;
void (*tcp_data_ready)(struct sock *sk);
void (*tcp_state_change)(struct sock *sk);
void (*tcp_write_space)(struct sock *sk);
+ void (*tcp_error_report)(struct sock *sk);
struct rcu_head rcu;
};
@@ -456,6 +472,61 @@ static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
spin_unlock_bh(&msk->join_list_lock);
}
+void mptcp_subflow_process_delegated(struct sock *ssk);
+
+static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow)
+{
+ struct mptcp_delegated_action *delegated;
+ bool schedule;
+
+ /* The implied barrier pairs with mptcp_subflow_delegated_done(), and
+ * ensures the below list check sees list updates done prior to status
+ * bit changes
+ */
+ if (!test_and_set_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
+ /* still on delegated list from previous scheduling */
+ if (!list_empty(&subflow->delegated_node))
+ return;
+
+ /* the caller held the subflow bh socket lock */
+ lockdep_assert_in_softirq();
+
+ delegated = this_cpu_ptr(&mptcp_delegated_actions);
+ schedule = list_empty(&delegated->head);
+ list_add_tail(&subflow->delegated_node, &delegated->head);
+ sock_hold(mptcp_subflow_tcp_sock(subflow));
+ if (schedule)
+ napi_schedule(&delegated->napi);
+ }
+}
+
+static inline struct mptcp_subflow_context *
+mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
+{
+ struct mptcp_subflow_context *ret;
+
+ if (list_empty(&delegated->head))
+ return NULL;
+
+ ret = list_first_entry(&delegated->head, struct mptcp_subflow_context, delegated_node);
+ list_del_init(&ret->delegated_node);
+ return ret;
+}
+
+static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
+{
+ return test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
+}
+
+static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow)
+{
+ /* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
+ * touching the status bit
+ */
+ smp_wmb();
+ clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
+}
+
int mptcp_is_enabled(struct net *net);
unsigned int mptcp_get_add_addr_timeout(struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
@@ -463,14 +534,19 @@ void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
bool mptcp_subflow_data_available(struct sock *sk);
void __init mptcp_subflow_init(void);
void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
-void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
- struct mptcp_subflow_context *subflow);
+void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
+ struct mptcp_subflow_context *subflow);
void mptcp_subflow_reset(struct sock *ssk);
+void mptcp_sock_graft(struct sock *sk, struct socket *parent);
+struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
const struct mptcp_addr_info *remote);
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
+void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
+ struct sockaddr_storage *addr,
+ unsigned short family);
static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
struct mptcp_subflow_context *ctx)
@@ -478,6 +554,7 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
sk->sk_data_ready = ctx->tcp_data_ready;
sk->sk_state_change = ctx->tcp_state_change;
sk->sk_write_space = ctx->tcp_write_space;
+ sk->sk_error_report = ctx->tcp_error_report;
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}
@@ -505,6 +582,7 @@ bool mptcp_finish_join(struct sock *sk);
bool mptcp_schedule_work(struct sock *sk);
void __mptcp_check_push(struct sock *sk, struct sock *ssk);
void __mptcp_data_acked(struct sock *sk);
+void __mptcp_error_report(struct sock *sk);
void mptcp_subflow_eof(struct sock *sk);
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
void __mptcp_flush_join_list(struct mptcp_sock *msk);
@@ -514,6 +592,25 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
}
+static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
+{
+ if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= READ_ONCE(sk->sk_sndbuf))
+ return false;
+
+ WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf);
+ return true;
+}
+
+static inline void mptcp_write_space(struct sock *sk)
+{
+ if (sk_stream_is_writeable(sk)) {
+ /* pairs with memory barrier in mptcp_poll */
+ smp_mb();
+ if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
+ sk_stream_write_space(sk);
+ }
+}
+
void mptcp_destroy_common(struct mptcp_sock *msk);
void __init mptcp_token_init(void);
@@ -539,8 +636,8 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
-void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side);
-void mptcp_pm_fully_established(struct mptcp_sock *msk);
+void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
+void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk,
@@ -550,7 +647,12 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk);
void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id);
+void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
+int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr,
+ u8 bkup);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
+bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
mptcp_pm_del_add_timer(struct mptcp_sock *msk,
struct mptcp_addr_info *addr);
@@ -561,6 +663,11 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id);
+void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
+ const struct sock *ssk, gfp_t gfp);
+void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info);
+void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
+
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{
return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL);
@@ -608,13 +715,13 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
-void mptcp_pm_nl_fully_established(struct mptcp_sock *msk);
-void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk);
-void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk);
-void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk);
-void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk);
+void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
+unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk);
+unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk);
+unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
+unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
{
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 278cbe3e539e..e1fbcab257e6 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -18,12 +18,15 @@
#include <net/tcp.h>
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
#include <net/ip6_route.h>
+#include <net/transp_v6.h>
#endif
#include <net/mptcp.h>
#include <uapi/linux/mptcp.h>
#include "protocol.h"
#include "mib.h"
+static void mptcp_subflow_ops_undo_override(struct sock *ssk);
+
static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
enum linux_mptcp_mib_field field)
{
@@ -61,11 +64,23 @@ static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
}
/* validate received token and create truncated hmac and nonce for SYN-ACK */
-static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
- const struct sk_buff *skb)
+static void subflow_req_create_thmac(struct mptcp_subflow_request_sock *subflow_req)
{
- struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
+ struct mptcp_sock *msk = subflow_req->msk;
u8 hmac[SHA256_DIGEST_SIZE];
+
+ get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
+
+ subflow_generate_hmac(msk->local_key, msk->remote_key,
+ subflow_req->local_nonce,
+ subflow_req->remote_nonce, hmac);
+
+ subflow_req->thmac = get_unaligned_be64(hmac);
+}
+
+static struct mptcp_sock *subflow_token_join_request(struct request_sock *req)
+{
+ struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct mptcp_sock *msk;
int local_id;
@@ -82,17 +97,10 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
}
subflow_req->local_id = local_id;
- get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
-
- subflow_generate_hmac(msk->local_key, msk->remote_key,
- subflow_req->local_nonce,
- subflow_req->remote_nonce, hmac);
-
- subflow_req->thmac = get_unaligned_be64(hmac);
return msk;
}
-static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
+static void subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
@@ -100,16 +108,11 @@ static int __subflow_init_req(struct request_sock *req, const struct sock *sk_li
subflow_req->mp_join = 0;
subflow_req->msk = NULL;
mptcp_token_init_request(req);
+}
-#ifdef CONFIG_TCP_MD5SIG
- /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
- * TCP option space.
- */
- if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
- return -EINVAL;
-#endif
-
- return 0;
+static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk)
+{
+ return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport;
}
/* Init mptcp request socket.
@@ -117,20 +120,23 @@ static int __subflow_init_req(struct request_sock *req, const struct sock *sk_li
* Returns an error code if a JOIN has failed and a TCP reset
* should be sent.
*/
-static int subflow_init_req(struct request_sock *req,
- const struct sock *sk_listener,
- struct sk_buff *skb)
+static int subflow_check_req(struct request_sock *req,
+ const struct sock *sk_listener,
+ struct sk_buff *skb)
{
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct mptcp_options_received mp_opt;
- int ret;
pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
- ret = __subflow_init_req(req, sk_listener);
- if (ret)
- return 0;
+#ifdef CONFIG_TCP_MD5SIG
+ /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
+ * TCP option space.
+ */
+ if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
+ return -EINVAL;
+#endif
mptcp_get_options(skb, &mp_opt);
@@ -178,12 +184,30 @@ again:
subflow_req->remote_id = mp_opt.join_id;
subflow_req->token = mp_opt.token;
subflow_req->remote_nonce = mp_opt.nonce;
- subflow_req->msk = subflow_token_join_request(req, skb);
+ subflow_req->msk = subflow_token_join_request(req);
/* Can't fall back to TCP in this case. */
if (!subflow_req->msk)
return -EPERM;
+ if (subflow_use_different_sport(subflow_req->msk, sk_listener)) {
+ pr_debug("syn inet_sport=%d %d",
+ ntohs(inet_sk(sk_listener)->inet_sport),
+ ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
+ if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) {
+ sock_put((struct sock *)subflow_req->msk);
+ mptcp_token_destroy_request(req);
+ tcp_request_sock_ops.destructor(req);
+ subflow_req->msk = NULL;
+ subflow_req->mp_join = 0;
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX);
+ return -EPERM;
+ }
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTSYNRX);
+ }
+
+ subflow_req_create_thmac(subflow_req);
+
if (unlikely(req->syncookie)) {
if (mptcp_can_accept_new_subflow(subflow_req->msk))
subflow_init_req_cookie_join_save(subflow_req, skb);
@@ -205,10 +229,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
struct mptcp_options_received mp_opt;
int err;
- err = __subflow_init_req(req, sk_listener);
- if (err)
- return err;
-
+ subflow_init_req(req, sk_listener);
mptcp_get_options(skb, &mp_opt);
if (mp_opt.mp_capable && mp_opt.mp_join)
@@ -248,12 +269,13 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
int err;
tcp_rsk(req)->is_mptcp = 1;
+ subflow_init_req(req, sk);
dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req);
if (!dst)
return NULL;
- err = subflow_init_req(req, sk, skb);
+ err = subflow_check_req(req, sk, skb);
if (err == 0)
return dst;
@@ -273,12 +295,13 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
int err;
tcp_rsk(req)->is_mptcp = 1;
+ subflow_init_req(req, sk);
dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req);
if (!dst)
return NULL;
- err = subflow_init_req(req, sk, skb);
+ err = subflow_check_req(req, sk, skb);
if (err == 0)
return dst;
@@ -326,6 +349,11 @@ void mptcp_subflow_reset(struct sock *ssk)
sock_put(sk);
}
+static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct sock *sk)
+{
+ return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport;
+}
+
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -343,6 +371,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (subflow->conn_finished)
return;
+ mptcp_propagate_sndbuf(parent, sk);
subflow->rel_write_seq = 1;
subflow->conn_finished = 1;
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
@@ -391,6 +420,13 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
+
+ if (subflow_use_different_dport(mptcp_sk(parent), sk)) {
+ pr_debug("synack inet_dport=%d %d",
+ ntohs(inet_sk(sk)->inet_dport),
+ ntohs(inet_sk(parent)->inet_dport));
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX);
+ }
} else if (mptcp_check_fallback(sk)) {
fallback:
mptcp_rcv_space_init(mptcp_sk(parent), sk);
@@ -427,6 +463,7 @@ drop:
static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
static struct inet_connection_sock_af_ops subflow_v6_specific;
static struct inet_connection_sock_af_ops subflow_v6m_specific;
+static struct proto tcpv6_prot_override;
static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
@@ -508,6 +545,8 @@ static void subflow_ulp_fallback(struct sock *sk,
icsk->icsk_ulp_ops = NULL;
rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
tcp_sk(sk)->is_mptcp = 0;
+
+ mptcp_subflow_ops_undo_override(sk);
}
static void subflow_drop_ctx(struct sock *ssk)
@@ -628,7 +667,7 @@ create_child:
* created mptcp socket
*/
new_msk->sk_destruct = mptcp_sock_destruct;
- mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
+ mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
ctx->conn = new_msk;
new_msk = NULL;
@@ -653,6 +692,17 @@ create_child:
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
tcp_rsk(req)->drop_req = true;
+
+ if (subflow_use_different_sport(owner, sk)) {
+ pr_debug("ack inet_sport=%d %d",
+ ntohs(inet_sk(sk)->inet_sport),
+ ntohs(inet_sk((struct sock *)owner)->inet_sport));
+ if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX);
+ goto out;
+ }
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX);
+ }
}
}
@@ -681,6 +731,7 @@ dispose_child:
}
static struct inet_connection_sock_af_ops subflow_specific;
+static struct proto tcp_prot_override;
enum mapping_status {
MAPPING_OK,
@@ -894,6 +945,22 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
subflow->map_valid = 0;
}
+/* sched mptcp worker to remove the subflow if no more data is pending */
+static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
+{
+ struct sock *sk = (struct sock *)msk;
+
+ if (likely(ssk->sk_state != TCP_CLOSE))
+ return;
+
+ if (skb_queue_empty(&ssk->sk_receive_queue) &&
+ !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) {
+ sock_hold(sk);
+ if (!schedule_work(&msk->work))
+ sock_put(sk);
+ }
+}
+
static bool subflow_check_data_avail(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -932,11 +999,11 @@ static bool subflow_check_data_avail(struct sock *ssk)
}
if (status != MAPPING_OK)
- return false;
+ goto no_data;
skb = skb_peek(&ssk->sk_receive_queue);
if (WARN_ON_ONCE(!skb))
- return false;
+ goto no_data;
/* if msk lacks the remote key, this subflow must provide an
* MP_CAPABLE-based mapping
@@ -970,6 +1037,9 @@ static bool subflow_check_data_avail(struct sock *ssk)
}
return true;
+no_data:
+ subflow_sched_work_if_closed(msk, ssk);
+ return false;
fatal:
/* fatal protocol error, close the socket */
/* This barrier is coupled with smp_rmb() in tcp_poll() */
@@ -1026,6 +1096,12 @@ static void subflow_data_ready(struct sock *sk)
msk = mptcp_sk(parent);
if (state & TCPF_LISTEN) {
+ /* MPJ subflow are removed from accept queue before reaching here,
+ * avoid stray wakeups
+ */
+ if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
+ return;
+
set_bit(MPTCP_DATA_READY, &msk->flags);
parent->sk_data_ready(parent);
return;
@@ -1040,7 +1116,50 @@ static void subflow_data_ready(struct sock *sk)
static void subflow_write_space(struct sock *ssk)
{
- /* we take action in __mptcp_clean_una() */
+ struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+
+ mptcp_propagate_sndbuf(sk, ssk);
+ mptcp_write_space(sk);
+}
+
+void __mptcp_error_report(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ int err = sock_error(ssk);
+
+ if (!err)
+ continue;
+
+ /* only propagate errors on fallen-back sockets or
+ * on MPC connect
+ */
+ if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
+ continue;
+
+ inet_sk_state_store(sk, inet_sk_state_load(ssk));
+ sk->sk_err = -err;
+
+ /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+ smp_wmb();
+ sk->sk_error_report(sk);
+ break;
+ }
+}
+
+static void subflow_error_report(struct sock *ssk)
+{
+ struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+
+ mptcp_data_lock(sk);
+ if (!sock_owned_by_user(sk))
+ __mptcp_error_report(sk);
+ else
+ set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags);
+ mptcp_data_unlock(sk);
}
static struct inet_connection_sock_af_ops *
@@ -1073,22 +1192,32 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
}
#endif
-static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
- struct sockaddr_storage *addr)
+void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
+ struct sockaddr_storage *addr,
+ unsigned short family)
{
memset(addr, 0, sizeof(*addr));
- addr->ss_family = info->family;
+ addr->ss_family = family;
if (addr->ss_family == AF_INET) {
struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
- in_addr->sin_addr = info->addr;
+ if (info->family == AF_INET)
+ in_addr->sin_addr = info->addr;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ else if (ipv6_addr_v4mapped(&info->addr6))
+ in_addr->sin_addr.s_addr = info->addr6.s6_addr32[3];
+#endif
in_addr->sin_port = info->port;
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
else if (addr->ss_family == AF_INET6) {
struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
- in6_addr->sin6_addr = info->addr6;
+ if (info->family == AF_INET)
+ ipv6_addr_set_v4mapped(info->addr.s_addr,
+ &in6_addr->sin6_addr);
+ else
+ in6_addr->sin6_addr = info->addr6;
in6_addr->sin6_port = info->port;
}
#endif
@@ -1132,11 +1261,11 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key;
subflow->token = msk->token;
- mptcp_info2sockaddr(loc, &addr);
+ mptcp_info2sockaddr(loc, &addr, ssk->sk_family);
addrlen = sizeof(struct sockaddr_in);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if (loc->family == AF_INET6)
+ if (addr.ss_family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
ssk->sk_bound_dev_if = loc->ifindex;
@@ -1152,13 +1281,16 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->remote_id = remote_id;
subflow->request_join = 1;
subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
- mptcp_info2sockaddr(remote, &addr);
+ mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
mptcp_add_pending_subflow(msk, subflow);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
if (err && err != -EINPROGRESS)
goto failed_unlink;
+ /* discard the subflow socket */
+ mptcp_sock_graft(ssk, sk->sk_socket);
+ iput(SOCK_INODE(sf));
return err;
failed_unlink:
@@ -1196,6 +1328,25 @@ static void mptcp_attach_cgroup(struct sock *parent, struct sock *child)
#endif /* CONFIG_SOCK_CGROUP_DATA */
}
+static void mptcp_subflow_ops_override(struct sock *ssk)
+{
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (ssk->sk_prot == &tcpv6_prot)
+ ssk->sk_prot = &tcpv6_prot_override;
+ else
+#endif
+ ssk->sk_prot = &tcp_prot_override;
+}
+
+static void mptcp_subflow_ops_undo_override(struct sock *ssk)
+{
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (ssk->sk_prot == &tcpv6_prot_override)
+ ssk->sk_prot = &tcpv6_prot;
+ else
+#endif
+ ssk->sk_prot = &tcp_prot;
+}
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
{
struct mptcp_subflow_context *subflow;
@@ -1251,6 +1402,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
*new_sock = sf;
sock_hold(sk);
subflow->conn = sk;
+ mptcp_subflow_ops_override(sf->sk);
return 0;
}
@@ -1267,6 +1419,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
INIT_LIST_HEAD(&ctx->node);
+ INIT_LIST_HEAD(&ctx->delegated_node);
pr_debug("subflow=%p", ctx);
@@ -1299,6 +1452,7 @@ static void subflow_state_change(struct sock *sk)
__subflow_state_change(sk);
if (subflow_simultaneous_connect(sk)) {
+ mptcp_propagate_sndbuf(parent, sk);
mptcp_do_fallback(sk);
mptcp_rcv_space_init(mptcp_sk(parent), sk);
pr_fallback(mptcp_sk(parent));
@@ -1316,6 +1470,8 @@ static void subflow_state_change(struct sock *sk)
if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
+ subflow_sched_work_if_closed(mptcp_sk(parent), sk);
+
if (__mptcp_check_fallback(mptcp_sk(parent)) &&
!subflow->rx_eof && subflow_is_done(sk)) {
subflow->rx_eof = 1;
@@ -1352,9 +1508,11 @@ static int subflow_ulp_init(struct sock *sk)
ctx->tcp_data_ready = sk->sk_data_ready;
ctx->tcp_state_change = sk->sk_state_change;
ctx->tcp_write_space = sk->sk_write_space;
+ ctx->tcp_error_report = sk->sk_error_report;
sk->sk_data_ready = subflow_data_ready;
sk->sk_write_space = subflow_write_space;
sk->sk_state_change = subflow_state_change;
+ sk->sk_error_report = subflow_error_report;
out:
return err;
}
@@ -1378,6 +1536,7 @@ static void subflow_ulp_release(struct sock *ssk)
sock_put(sk);
}
+ mptcp_subflow_ops_undo_override(ssk);
if (release)
kfree_rcu(ctx, rcu);
}
@@ -1407,6 +1566,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
new_ctx->tcp_state_change = old_ctx->tcp_state_change;
new_ctx->tcp_write_space = old_ctx->tcp_write_space;
+ new_ctx->tcp_error_report = old_ctx->tcp_error_report;
new_ctx->rel_write_seq = 1;
new_ctx->tcp_sock = newsk;
@@ -1431,6 +1591,16 @@ static void subflow_ulp_clone(const struct request_sock *req,
}
}
+static void tcp_release_cb_override(struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+
+ if (mptcp_subflow_has_delegated_action(subflow))
+ mptcp_subflow_process_delegated(ssk);
+
+ tcp_release_cb(ssk);
+}
+
static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
.name = "mptcp",
.owner = THIS_MODULE,
@@ -1471,6 +1641,9 @@ void __init mptcp_subflow_init(void)
subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
subflow_specific.sk_rx_dst_set = subflow_finish_connect;
+ tcp_prot_override = tcp_prot;
+ tcp_prot_override.release_cb = tcp_release_cb_override;
+
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req;
@@ -1486,6 +1659,9 @@ void __init mptcp_subflow_init(void)
subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
subflow_v6m_specific.net_frag_header_len = 0;
+
+ tcpv6_prot_override = tcpv6_prot;
+ tcpv6_prot_override.release_cb = tcp_release_cb_override;
#endif
mptcp_diag_subflow_init(&subflow_ulp_ops);
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 5b1f4ec66dd9..888ccc2d4e34 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -1120,7 +1120,7 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
int payload, i, ret;
/* Find the NCSI device */
- nd = ncsi_find_dev(dev);
+ nd = ncsi_find_dev(orig_dev);
ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
if (!ndp)
return -ENODEV;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 49fbef0d99be..1a92063c73a4 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
menu "Core Netfilter Configuration"
- depends on NET && INET && NETFILTER
+ depends on INET && NETFILTER
config NETFILTER_INGRESS
bool "Netfilter ingress support"
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 5f1208ad049e..6186358eac7c 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -141,20 +141,6 @@ htable_size(u8 hbits)
return hsize * sizeof(struct hbucket *) + sizeof(struct htable);
}
-/* Compute htable_bits from the user input parameter hashsize */
-static u8
-htable_bits(u32 hashsize)
-{
- /* Assume that hashsize == 2^htable_bits */
- u8 bits = fls(hashsize - 1);
-
- if (jhash_size(bits) != hashsize)
- /* Round up to the first 2^n value */
- bits = fls(hashsize);
-
- return bits;
-}
-
#ifdef IP_SET_HASH_WITH_NETS
#if IPSET_NET_COUNT > 1
#define __CIDR(cidr, i) (cidr[i])
@@ -640,7 +626,7 @@ mtype_resize(struct ip_set *set, bool retried)
struct htype *h = set->data;
struct htable *t, *orig;
u8 htable_bits;
- size_t dsize = set->dsize;
+ size_t hsize, dsize = set->dsize;
#ifdef IP_SET_HASH_WITH_NETS
u8 flags;
struct mtype_elem *tmp;
@@ -664,14 +650,12 @@ mtype_resize(struct ip_set *set, bool retried)
retry:
ret = 0;
htable_bits++;
- if (!htable_bits) {
- /* In case we have plenty of memory :-) */
- pr_warn("Cannot increase the hashsize of set %s further\n",
- set->name);
- ret = -IPSET_ERR_HASH_FULL;
- goto out;
- }
- t = ip_set_alloc(htable_size(htable_bits));
+ if (!htable_bits)
+ goto hbwarn;
+ hsize = htable_size(htable_bits);
+ if (!hsize)
+ goto hbwarn;
+ t = ip_set_alloc(hsize);
if (!t) {
ret = -ENOMEM;
goto out;
@@ -813,6 +797,12 @@ cleanup:
if (ret == -EAGAIN)
goto retry;
goto out;
+
+hbwarn:
+ /* In case we have plenty of memory :-) */
+ pr_warn("Cannot increase the hashsize of set %s further\n", set->name);
+ ret = -IPSET_ERR_HASH_FULL;
+ goto out;
}
/* Get the current number of elements and ext_size in the set */
@@ -1521,7 +1511,11 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
if (!h)
return -ENOMEM;
- hbits = htable_bits(hashsize);
+ /* Compute htable_bits from the user input parameter hashsize.
+ * Assume that hashsize == 2^htable_bits,
+ * otherwise round up to the first 2^n value.
+ */
+ hbits = fls(hashsize - 1);
hsize = htable_size(hbits);
if (hsize == 0) {
kfree(h);
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index eb0e329f9b8d..d61886874940 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -4,7 +4,7 @@
#
menuconfig IP_VS
tristate "IP virtual server support"
- depends on NET && INET && NETFILTER
+ depends on INET && NETFILTER
depends on (NF_CONNTRACK || NF_CONNTRACK=n)
help
IP Virtual Server support will let you build a high-performance
@@ -271,6 +271,17 @@ config IP_VS_NQ
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
+config IP_VS_TWOS
+ tristate "weighted random twos choice least-connection scheduling"
+ help
+ The weighted random twos choice least-connection scheduling
+ algorithm picks two random real servers and directs network
+ connections to the server with the least active connections
+ normalized by the server weight.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
comment 'IPVS SH scheduler'
config IP_VS_SH_TAB_BITS
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index bfce2677fda2..bb5d8125c82a 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
obj-$(CONFIG_IP_VS_MH) += ip_vs_mh.o
obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
+obj-$(CONFIG_IP_VS_TWOS) += ip_vs_twos.o
# IPVS application helpers
obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 54e086c65721..0c132ff9b446 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -68,18 +68,6 @@ EXPORT_SYMBOL(ip_vs_get_debug_level);
#endif
EXPORT_SYMBOL(ip_vs_new_conn_out);
-#ifdef CONFIG_IP_VS_PROTO_TCP
-INDIRECT_CALLABLE_DECLARE(int
- tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
- struct ip_vs_conn *cp, struct ip_vs_iphdr *iph));
-#endif
-
-#ifdef CONFIG_IP_VS_PROTO_UDP
-INDIRECT_CALLABLE_DECLARE(int
- udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
- struct ip_vs_conn *cp, struct ip_vs_iphdr *iph));
-#endif
-
#if defined(CONFIG_IP_VS_PROTO_TCP) && defined(CONFIG_IP_VS_PROTO_UDP)
#define SNAT_CALL(f, ...) \
INDIRECT_CALL_2(f, tcp_snat_handler, udp_snat_handler, __VA_ARGS__)
diff --git a/net/netfilter/ipvs/ip_vs_twos.c b/net/netfilter/ipvs/ip_vs_twos.c
new file mode 100644
index 000000000000..acb55d8393ef
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_twos.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* IPVS: Power of Twos Choice Scheduling module
+ *
+ * Authors: Darby Payne <darby.payne@applovin.com>
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/random.h>
+
+#include <net/ip_vs.h>
+
+/* Power of Twos Choice scheduling, algorithm originally described by
+ * Michael Mitzenmacher.
+ *
+ * Randomly picks two destinations and picks the one with the least
+ * amount of connections
+ *
+ * The algorithm calculates a few variables
+ * - total_weight = sum of all weights
+ * - rweight1 = random number between [0,total_weight]
+ * - rweight2 = random number between [0,total_weight]
+ *
+ * For each destination
+ * decrement rweight1 and rweight2 by the destination weight
+ * pick choice1 when rweight1 is <= 0
+ * pick choice2 when rweight2 is <= 0
+ *
+ * Return choice2 if choice2 has less connections than choice 1 normalized
+ * by weight
+ *
+ * References
+ * ----------
+ *
+ * [Mitzenmacher 2016]
+ * The Power of Two Random Choices: A Survey of Techniques and Results
+ * Michael Mitzenmacher, Andrea W. Richa y, Ramesh Sitaraman
+ * http://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/twosurvey.pdf
+ *
+ */
+static struct ip_vs_dest *ip_vs_twos_schedule(struct ip_vs_service *svc,
+ const struct sk_buff *skb,
+ struct ip_vs_iphdr *iph)
+{
+ struct ip_vs_dest *dest, *choice1 = NULL, *choice2 = NULL;
+ int rweight1, rweight2, weight1 = -1, weight2 = -1, overhead1 = 0;
+ int overhead2, total_weight = 0, weight;
+
+ IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
+
+ /* Generate a random weight between [0,sum of all weights) */
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+ if (!(dest->flags & IP_VS_DEST_F_OVERLOAD)) {
+ weight = atomic_read(&dest->weight);
+ if (weight > 0) {
+ total_weight += weight;
+ choice1 = dest;
+ }
+ }
+ }
+
+ if (!choice1) {
+ ip_vs_scheduler_err(svc, "no destination available");
+ return NULL;
+ }
+
+ /* Add 1 to total_weight so that the random weights are inclusive
+ * from 0 to total_weight
+ */
+ total_weight += 1;
+ rweight1 = prandom_u32() % total_weight;
+ rweight2 = prandom_u32() % total_weight;
+
+ /* Pick two weighted servers */
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+ if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+ continue;
+
+ weight = atomic_read(&dest->weight);
+ if (weight <= 0)
+ continue;
+
+ rweight1 -= weight;
+ rweight2 -= weight;
+
+ if (rweight1 <= 0 && weight1 == -1) {
+ choice1 = dest;
+ weight1 = weight;
+ overhead1 = ip_vs_dest_conn_overhead(dest);
+ }
+
+ if (rweight2 <= 0 && weight2 == -1) {
+ choice2 = dest;
+ weight2 = weight;
+ overhead2 = ip_vs_dest_conn_overhead(dest);
+ }
+
+ if (weight1 != -1 && weight2 != -1)
+ goto nextstage;
+ }
+
+nextstage:
+ if (choice2 && (weight2 * overhead1) > (weight1 * overhead2))
+ choice1 = choice2;
+
+ IP_VS_DBG_BUF(6, "twos: server %s:%u conns %d refcnt %d weight %d\n",
+ IP_VS_DBG_ADDR(choice1->af, &choice1->addr),
+ ntohs(choice1->port), atomic_read(&choice1->activeconns),
+ refcount_read(&choice1->refcnt),
+ atomic_read(&choice1->weight));
+
+ return choice1;
+}
+
+static struct ip_vs_scheduler ip_vs_twos_scheduler = {
+ .name = "twos",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_twos_scheduler.n_list),
+ .schedule = ip_vs_twos_schedule,
+};
+
+static int __init ip_vs_twos_init(void)
+{
+ return register_ip_vs_scheduler(&ip_vs_twos_scheduler);
+}
+
+static void __exit ip_vs_twos_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_twos_scheduler);
+ synchronize_rcu();
+}
+
+module_init(ip_vs_twos_init);
+module_exit(ip_vs_twos_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 234b7cab37c3..ff0168736f6e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1229,7 +1229,8 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
* Let nf_ct_resolve_clash() deal with this later.
*/
if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple))
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) &&
+ nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL))
continue;
NF_CT_STAT_INC_ATOMIC(net, found);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 84caf3316946..1469365bac7e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2686,12 +2686,6 @@ ctnetlink_glue_build_size(const struct nf_conn *ct)
;
}
-static struct nf_conn *ctnetlink_glue_get_ct(const struct sk_buff *skb,
- enum ip_conntrack_info *ctinfo)
-{
- return nf_ct_get(skb, ctinfo);
-}
-
static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
{
const struct nf_conntrack_zone *zone;
@@ -2925,7 +2919,6 @@ static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
}
static struct nfnl_ct_hook ctnetlink_glue_hook = {
- .get_ct = ctnetlink_glue_get_ct,
.build_size = ctnetlink_glue_build_size,
.build = ctnetlink_glue_build,
.parse = ctnetlink_glue_parse,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 46c5557c1fec..0ee702d374b0 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -523,6 +523,9 @@ nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
{
int ret;
+ /* module_param hashsize could have changed value */
+ nf_conntrack_htable_size_user = nf_conntrack_htable_size;
+
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (ret < 0 || !write)
return ret;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 513f78db3cb2..5fa657b8e03d 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -191,14 +191,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
{
const struct flow_offload_tuple *tuple = data;
- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
+ return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
}
static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
{
const struct flow_offload_tuple_rhash *tuplehash = data;
- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
}
static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -207,7 +207,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
const struct flow_offload_tuple *tuple = arg->key;
const struct flow_offload_tuple_rhash *x = ptr;
- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
return 1;
return 0;
@@ -399,7 +399,7 @@ static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
return -1;
tcph = (void *)(skb_network_header(skb) + thoff);
- inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
+ inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
return 0;
}
@@ -415,7 +415,7 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
udph = (void *)(skb_network_header(skb) + thoff);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace2(&udph->check, skb, port,
- new_port, true);
+ new_port, false);
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index ea923f8cf9c4..b7c3c902290f 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1174,6 +1174,7 @@ static int __init nf_nat_init(void)
ret = register_pernet_subsys(&nat_net_ops);
if (ret < 0) {
nf_ct_extend_unregister(&nat_extend);
+ kvfree(nf_nat_bysource);
return ret;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8d5aa0ac45f4..c1eb5cdb3033 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -508,7 +508,7 @@ static int nft_delflowtable(struct nft_ctx *ctx,
static struct nft_table *nft_table_lookup(const struct net *net,
const struct nlattr *nla,
- u8 family, u8 genmask)
+ u8 family, u8 genmask, u32 nlpid)
{
struct nft_table *table;
@@ -519,8 +519,13 @@ static struct nft_table *nft_table_lookup(const struct net *net,
lockdep_is_held(&net->nft.commit_mutex)) {
if (!nla_strcmp(nla, table->name) &&
table->family == family &&
- nft_active_genmask(table, genmask))
+ nft_active_genmask(table, genmask)) {
+ if (nft_table_has_owner(table) &&
+ table->nlpid != nlpid)
+ return ERR_PTR(-EPERM);
+
return table;
+ }
}
return ERR_PTR(-ENOENT);
@@ -679,6 +684,9 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle),
NFTA_TABLE_PAD))
goto nla_put_failure;
+ if (nft_table_has_owner(table) &&
+ nla_put_be32(skb, NFTA_TABLE_OWNER, htonl(table->nlpid)))
+ goto nla_put_failure;
if (table->udata) {
if (nla_put(skb, NFTA_TABLE_USERDATA, table->udlen, table->udata))
@@ -821,7 +829,7 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
- table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask);
+ table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask, 0);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_TABLE_NAME]);
return PTR_ERR(table);
@@ -902,8 +910,8 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
return 0;
flags = ntohl(nla_get_be32(ctx->nla[NFTA_TABLE_FLAGS]));
- if (flags & ~NFT_TABLE_F_DORMANT)
- return -EINVAL;
+ if (flags & ~NFT_TABLE_F_MASK)
+ return -EOPNOTSUPP;
if (flags == ctx->table->flags)
return 0;
@@ -1003,7 +1011,8 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
lockdep_assert_held(&net->nft.commit_mutex);
attr = nla[NFTA_TABLE_NAME];
- table = nft_table_lookup(net, attr, family, genmask);
+ table = nft_table_lookup(net, attr, family, genmask,
+ NETLINK_CB(skb).portid);
if (IS_ERR(table)) {
if (PTR_ERR(table) != -ENOENT)
return PTR_ERR(table);
@@ -1021,8 +1030,8 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
if (nla[NFTA_TABLE_FLAGS]) {
flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
- if (flags & ~NFT_TABLE_F_DORMANT)
- return -EINVAL;
+ if (flags & ~NFT_TABLE_F_MASK)
+ return -EOPNOTSUPP;
}
err = -ENOMEM;
@@ -1053,6 +1062,8 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
table->family = family;
table->flags = flags;
table->handle = ++table_handle;
+ if (table->flags & NFT_TABLE_F_OWNER)
+ table->nlpid = NETLINK_CB(skb).portid;
nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
@@ -1160,6 +1171,9 @@ static int nft_flush(struct nft_ctx *ctx, int family)
if (!nft_is_active_next(ctx->net, table))
continue;
+ if (nft_table_has_owner(table) && table->nlpid != ctx->portid)
+ continue;
+
if (nla[NFTA_TABLE_NAME] &&
nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
continue;
@@ -1196,7 +1210,8 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
table = nft_table_lookup_byhandle(net, attr, genmask);
} else {
attr = nla[NFTA_TABLE_NAME];
- table = nft_table_lookup(net, attr, family, genmask);
+ table = nft_table_lookup(net, attr, family, genmask,
+ NETLINK_CB(skb).portid);
}
if (IS_ERR(table)) {
@@ -1579,7 +1594,7 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
- table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
+ table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask, 0);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
return PTR_ERR(table);
@@ -2299,7 +2314,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
lockdep_assert_held(&net->nft.commit_mutex);
- table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
+ table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask,
+ NETLINK_CB(skb).portid);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
return PTR_ERR(table);
@@ -2395,7 +2411,8 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
u32 use;
int err;
- table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
+ table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask,
+ NETLINK_CB(skb).portid);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
return PTR_ERR(table);
@@ -3041,7 +3058,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
- table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
+ table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
return PTR_ERR(table);
@@ -3179,7 +3196,8 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
lockdep_assert_held(&net->nft.commit_mutex);
- table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
+ table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask,
+ NETLINK_CB(skb).portid);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
return PTR_ERR(table);
@@ -3403,7 +3421,8 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
int family = nfmsg->nfgen_family, err = 0;
struct nft_ctx ctx;
- table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
+ table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask,
+ NETLINK_CB(skb).portid);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
return PTR_ERR(table);
@@ -3584,7 +3603,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
struct netlink_ext_ack *extack,
- u8 genmask)
+ u8 genmask, u32 nlpid)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
int family = nfmsg->nfgen_family;
@@ -3592,7 +3611,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
if (nla[NFTA_SET_TABLE] != NULL) {
table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
- genmask);
+ genmask, nlpid);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
return PTR_ERR(table);
@@ -4007,7 +4026,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
/* Verify existence before starting dump */
err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack,
- genmask);
+ genmask, 0);
if (err < 0)
return err;
@@ -4162,7 +4181,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
NFT_SET_INTERVAL | NFT_SET_TIMEOUT |
NFT_SET_MAP | NFT_SET_EVAL |
- NFT_SET_OBJECT | NFT_SET_CONCAT))
+ NFT_SET_OBJECT | NFT_SET_CONCAT | NFT_SET_EXPR))
return -EOPNOTSUPP;
/* Only one of these operations is supported */
if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) ==
@@ -4236,7 +4255,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
if (nla[NFTA_SET_EXPR] || nla[NFTA_SET_EXPRESSIONS])
desc.expr = true;
- table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, genmask);
+ table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, genmask,
+ NETLINK_CB(skb).portid);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
return PTR_ERR(table);
@@ -4304,6 +4324,10 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
struct nlattr *tmp;
int left;
+ if (!(flags & NFT_SET_EXPR)) {
+ err = -EINVAL;
+ goto err_set_alloc_name;
+ }
i = 0;
nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
if (i == NFT_SET_EXPR_MAX) {
@@ -4409,7 +4433,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
return -EINVAL;
err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack,
- genmask);
+ genmask, NETLINK_CB(skb).portid);
if (err < 0)
return err;
@@ -4434,6 +4458,12 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
return nft_delset(&ctx, set);
}
+static int nft_validate_register_store(const struct nft_ctx *ctx,
+ enum nft_registers reg,
+ const struct nft_data *data,
+ enum nft_data_types type,
+ unsigned int len);
+
static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
@@ -4598,14 +4628,14 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
struct netlink_ext_ack *extack,
- u8 genmask)
+ u8 genmask, u32 nlpid)
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
int family = nfmsg->nfgen_family;
struct nft_table *table;
table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
- genmask);
+ genmask, nlpid);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
return PTR_ERR(table);
@@ -5022,7 +5052,7 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
int rem, err = 0;
err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
- genmask);
+ genmask, NETLINK_CB(skb).portid);
if (err < 0)
return err;
@@ -5231,9 +5261,8 @@ static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
kfree(elem);
}
-static int nft_set_elem_expr_clone(const struct nft_ctx *ctx,
- struct nft_set *set,
- struct nft_expr *expr_array[])
+int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_expr *expr_array[])
{
struct nft_expr *expr;
int err, i, k;
@@ -5254,8 +5283,8 @@ static int nft_set_elem_expr_clone(const struct nft_ctx *ctx,
return 0;
err_expr:
- for (k = i - 1; k >= 0; k++)
- nft_expr_destroy(ctx, expr_array[i]);
+ for (k = i - 1; k >= 0; k--)
+ nft_expr_destroy(ctx, expr_array[k]);
return -ENOMEM;
}
@@ -5278,6 +5307,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_expr *expr_array[NFT_SET_EXPR_MAX] = {};
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
u8 genmask = nft_genmask_next(ctx->net);
+ u32 flags = 0, size = 0, num_exprs = 0;
struct nft_set_ext_tmpl tmpl;
struct nft_set_ext *ext, *ext2;
struct nft_set_elem elem;
@@ -5287,7 +5317,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data_desc desc;
enum nft_registers dreg;
struct nft_trans *trans;
- u32 flags = 0, size = 0;
u64 timeout;
u64 expiration;
int err, i;
@@ -5353,7 +5382,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (nla[NFTA_SET_ELEM_EXPR]) {
struct nft_expr *expr;
- if (set->num_exprs != 1)
+ if (set->num_exprs && set->num_exprs != 1)
return -EOPNOTSUPP;
expr = nft_set_elem_expr_alloc(ctx, set,
@@ -5362,8 +5391,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return PTR_ERR(expr);
expr_array[0] = expr;
+ num_exprs = 1;
- if (set->exprs[0] && set->exprs[0]->ops != expr->ops) {
+ if (set->num_exprs && set->exprs[0]->ops != expr->ops) {
err = -EOPNOTSUPP;
goto err_set_elem_expr;
}
@@ -5372,12 +5402,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nlattr *tmp;
int left;
- if (set->num_exprs == 0)
- return -EOPNOTSUPP;
-
i = 0;
nla_for_each_nested(tmp, nla[NFTA_SET_ELEM_EXPRESSIONS], left) {
- if (i == set->num_exprs) {
+ if (i == NFT_SET_EXPR_MAX ||
+ (set->num_exprs && set->num_exprs == i)) {
err = -E2BIG;
goto err_set_elem_expr;
}
@@ -5391,14 +5419,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_set_elem_expr;
}
expr_array[i] = expr;
+ num_exprs++;
- if (expr->ops != set->exprs[i]->ops) {
+ if (set->num_exprs && expr->ops != set->exprs[i]->ops) {
err = -EOPNOTSUPP;
goto err_set_elem_expr;
}
i++;
}
- if (set->num_exprs != i) {
+ if (set->num_exprs && set->num_exprs != i) {
err = -EOPNOTSUPP;
goto err_set_elem_expr;
}
@@ -5406,6 +5435,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = nft_set_elem_expr_clone(ctx, set, expr_array);
if (err < 0)
goto err_set_elem_expr_clone;
+
+ num_exprs = set->num_exprs;
}
err = nft_setelem_parse_key(ctx, set, &elem.key.val,
@@ -5430,8 +5461,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
}
- if (set->num_exprs) {
- for (i = 0; i < set->num_exprs; i++)
+ if (num_exprs) {
+ for (i = 0; i < num_exprs; i++)
size += expr_array[i]->ops->size;
nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS,
@@ -5519,7 +5550,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
*nft_set_ext_obj(ext) = obj;
obj->use++;
}
- for (i = 0; i < set->num_exprs; i++)
+ for (i = 0; i < num_exprs; i++)
nft_set_elem_expr_setup(ext, i, expr_array);
trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
@@ -5581,7 +5612,7 @@ err_parse_key_end:
err_parse_key:
nft_data_release(&elem.key.val, NFT_DATA_VALUE);
err_set_elem_expr:
- for (i = 0; i < set->num_exprs && expr_array[i]; i++)
+ for (i = 0; i < num_exprs && expr_array[i]; i++)
nft_expr_destroy(ctx, expr_array[i]);
err_set_elem_expr_clone:
return err;
@@ -5602,7 +5633,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
return -EINVAL;
err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
- genmask);
+ genmask, NETLINK_CB(skb).portid);
if (err < 0)
return err;
@@ -5810,7 +5841,7 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
int rem, err = 0;
err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
- genmask);
+ genmask, NETLINK_CB(skb).portid);
if (err < 0)
return err;
@@ -6113,7 +6144,8 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
!nla[NFTA_OBJ_DATA])
return -EINVAL;
- table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
+ table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask,
+ NETLINK_CB(skb).portid);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
return PTR_ERR(table);
@@ -6383,7 +6415,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
!nla[NFTA_OBJ_TYPE])
return -EINVAL;
- table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
+ table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
return PTR_ERR(table);
@@ -6457,7 +6489,8 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
(!nla[NFTA_OBJ_NAME] && !nla[NFTA_OBJ_HANDLE]))
return -EINVAL;
- table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask);
+ table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask,
+ NETLINK_CB(skb).portid);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
return PTR_ERR(table);
@@ -6874,7 +6907,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
return -EINVAL;
table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
- genmask);
+ genmask, NETLINK_CB(skb).portid);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
return PTR_ERR(table);
@@ -7058,7 +7091,7 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
return -EINVAL;
table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
- genmask);
+ genmask, NETLINK_CB(skb).portid);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]);
return PTR_ERR(table);
@@ -7266,7 +7299,7 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
return -EINVAL;
table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family,
- genmask);
+ genmask, 0);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -8585,7 +8618,7 @@ EXPORT_SYMBOL_GPL(nft_parse_u32_check);
* Registers used to be 128 bit wide, these register numbers will be
* mapped to the corresponding 32 bit register numbers.
*/
-unsigned int nft_parse_register(const struct nlattr *attr)
+static unsigned int nft_parse_register(const struct nlattr *attr)
{
unsigned int reg;
@@ -8597,7 +8630,6 @@ unsigned int nft_parse_register(const struct nlattr *attr)
return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
}
}
-EXPORT_SYMBOL_GPL(nft_parse_register);
/**
* nft_dump_register - dump a register value to a netlink attribute
@@ -8630,7 +8662,7 @@ EXPORT_SYMBOL_GPL(nft_dump_register);
* Validate that the input register is one of the general purpose
* registers and that the length of the load is within the bounds.
*/
-int nft_validate_register_load(enum nft_registers reg, unsigned int len)
+static int nft_validate_register_load(enum nft_registers reg, unsigned int len)
{
if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
return -EINVAL;
@@ -8641,7 +8673,21 @@ int nft_validate_register_load(enum nft_registers reg, unsigned int len)
return 0;
}
-EXPORT_SYMBOL_GPL(nft_validate_register_load);
+
+int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
+{
+ u32 reg;
+ int err;
+
+ reg = nft_parse_register(attr);
+ err = nft_validate_register_load(reg, len);
+ if (err < 0)
+ return err;
+
+ *sreg = reg;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_parse_register_load);
/**
* nft_validate_register_store - validate an expressions' register store
@@ -8657,10 +8703,11 @@ EXPORT_SYMBOL_GPL(nft_validate_register_load);
* A value of NULL for the data means that its runtime gathered
* data.
*/
-int nft_validate_register_store(const struct nft_ctx *ctx,
- enum nft_registers reg,
- const struct nft_data *data,
- enum nft_data_types type, unsigned int len)
+static int nft_validate_register_store(const struct nft_ctx *ctx,
+ enum nft_registers reg,
+ const struct nft_data *data,
+ enum nft_data_types type,
+ unsigned int len)
{
int err;
@@ -8692,7 +8739,24 @@ int nft_validate_register_store(const struct nft_ctx *ctx,
return 0;
}
}
-EXPORT_SYMBOL_GPL(nft_validate_register_store);
+
+int nft_parse_register_store(const struct nft_ctx *ctx,
+ const struct nlattr *attr, u8 *dreg,
+ const struct nft_data *data,
+ enum nft_data_types type, unsigned int len)
+{
+ int err;
+ u32 reg;
+
+ reg = nft_parse_register(attr);
+ err = nft_validate_register_store(ctx, reg, data, type, len);
+ if (err < 0)
+ return err;
+
+ *dreg = reg;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_parse_register_store);
static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
[NFTA_VERDICT_CODE] = { .type = NLA_U32 },
@@ -8946,10 +9010,25 @@ int __nft_release_basechain(struct nft_ctx *ctx)
}
EXPORT_SYMBOL_GPL(__nft_release_basechain);
-static void __nft_release_tables(struct net *net)
+static void __nft_release_hook(struct net *net, struct nft_table *table)
+{
+ struct nft_chain *chain;
+
+ list_for_each_entry(chain, &table->chains, list)
+ nf_tables_unregister_hook(net, table, chain);
+}
+
+static void __nft_release_hooks(struct net *net)
+{
+ struct nft_table *table;
+
+ list_for_each_entry(table, &net->nft.tables, list)
+ __nft_release_hook(net, table);
+}
+
+static void __nft_release_table(struct net *net, struct nft_table *table)
{
struct nft_flowtable *flowtable, *nf;
- struct nft_table *table, *nt;
struct nft_chain *chain, *nc;
struct nft_object *obj, *ne;
struct nft_rule *rule, *nr;
@@ -8959,47 +9038,90 @@ static void __nft_release_tables(struct net *net)
.family = NFPROTO_NETDEV,
};
+ ctx.family = table->family;
+ ctx.table = table;
+ list_for_each_entry(chain, &table->chains, list) {
+ ctx.chain = chain;
+ list_for_each_entry_safe(rule, nr, &chain->rules, list) {
+ list_del(&rule->list);
+ chain->use--;
+ nf_tables_rule_release(&ctx, rule);
+ }
+ }
+ list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
+ list_del(&flowtable->list);
+ table->use--;
+ nf_tables_flowtable_destroy(flowtable);
+ }
+ list_for_each_entry_safe(set, ns, &table->sets, list) {
+ list_del(&set->list);
+ table->use--;
+ nft_set_destroy(&ctx, set);
+ }
+ list_for_each_entry_safe(obj, ne, &table->objects, list) {
+ nft_obj_del(obj);
+ table->use--;
+ nft_obj_destroy(&ctx, obj);
+ }
+ list_for_each_entry_safe(chain, nc, &table->chains, list) {
+ ctx.chain = chain;
+ nft_chain_del(chain);
+ table->use--;
+ nf_tables_chain_destroy(&ctx);
+ }
+ list_del(&table->list);
+ nf_tables_table_destroy(&ctx);
+}
+
+static void __nft_release_tables(struct net *net, u32 nlpid)
+{
+ struct nft_table *table, *nt;
+
list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
- ctx.family = table->family;
+ if (nft_table_has_owner(table) &&
+ nlpid != table->nlpid)
+ continue;
- list_for_each_entry(chain, &table->chains, list)
- nf_tables_unregister_hook(net, table, chain);
- /* No packets are walking on these chains anymore. */
- ctx.table = table;
- list_for_each_entry(chain, &table->chains, list) {
- ctx.chain = chain;
- list_for_each_entry_safe(rule, nr, &chain->rules, list) {
- list_del(&rule->list);
- chain->use--;
- nf_tables_rule_release(&ctx, rule);
- }
- }
- list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
- list_del(&flowtable->list);
- table->use--;
- nf_tables_flowtable_destroy(flowtable);
- }
- list_for_each_entry_safe(set, ns, &table->sets, list) {
- list_del(&set->list);
- table->use--;
- nft_set_destroy(&ctx, set);
- }
- list_for_each_entry_safe(obj, ne, &table->objects, list) {
- nft_obj_del(obj);
- table->use--;
- nft_obj_destroy(&ctx, obj);
+ __nft_release_table(net, table);
+ }
+}
+
+static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct netlink_notify *n = ptr;
+ struct nft_table *table, *nt;
+ struct net *net = n->net;
+ bool release = false;
+
+ if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
+ return NOTIFY_DONE;
+
+ mutex_lock(&net->nft.commit_mutex);
+ list_for_each_entry(table, &net->nft.tables, list) {
+ if (nft_table_has_owner(table) &&
+ n->portid == table->nlpid) {
+ __nft_release_hook(net, table);
+ release = true;
}
- list_for_each_entry_safe(chain, nc, &table->chains, list) {
- ctx.chain = chain;
- nft_chain_del(chain);
- table->use--;
- nf_tables_chain_destroy(&ctx);
+ }
+ if (release) {
+ synchronize_rcu();
+ list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
+ if (nft_table_has_owner(table) &&
+ n->portid == table->nlpid)
+ __nft_release_table(net, table);
}
- list_del(&table->list);
- nf_tables_table_destroy(&ctx);
}
+ mutex_unlock(&net->nft.commit_mutex);
+
+ return NOTIFY_DONE;
}
+static struct notifier_block nft_nl_notifier = {
+ .notifier_call = nft_rcv_nl_event,
+};
+
static int __net_init nf_tables_init_net(struct net *net)
{
INIT_LIST_HEAD(&net->nft.tables);
@@ -9013,12 +9135,17 @@ static int __net_init nf_tables_init_net(struct net *net)
return 0;
}
+static void __net_exit nf_tables_pre_exit_net(struct net *net)
+{
+ __nft_release_hooks(net);
+}
+
static void __net_exit nf_tables_exit_net(struct net *net)
{
mutex_lock(&net->nft.commit_mutex);
if (!list_empty(&net->nft.commit_list))
__nf_tables_abort(net, NFNL_ABORT_NONE);
- __nft_release_tables(net);
+ __nft_release_tables(net, 0);
mutex_unlock(&net->nft.commit_mutex);
WARN_ON_ONCE(!list_empty(&net->nft.tables));
WARN_ON_ONCE(!list_empty(&net->nft.module_list));
@@ -9026,8 +9153,9 @@ static void __net_exit nf_tables_exit_net(struct net *net)
}
static struct pernet_operations nf_tables_net_ops = {
- .init = nf_tables_init_net,
- .exit = nf_tables_exit_net,
+ .init = nf_tables_init_net,
+ .pre_exit = nf_tables_pre_exit_net,
+ .exit = nf_tables_exit_net,
};
static int __init nf_tables_module_init(void)
@@ -9041,43 +9169,50 @@ static int __init nf_tables_module_init(void)
err = nft_chain_filter_init();
if (err < 0)
- goto err1;
+ goto err_chain_filter;
err = nf_tables_core_module_init();
if (err < 0)
- goto err2;
+ goto err_core_module;
err = register_netdevice_notifier(&nf_tables_flowtable_notifier);
if (err < 0)
- goto err3;
+ goto err_netdev_notifier;
err = rhltable_init(&nft_objname_ht, &nft_objname_ht_params);
if (err < 0)
- goto err4;
+ goto err_rht_objname;
err = nft_offload_init();
if (err < 0)
- goto err5;
+ goto err_offload;
+
+ err = netlink_register_notifier(&nft_nl_notifier);
+ if (err < 0)
+ goto err_netlink_notifier;
/* must be last */
err = nfnetlink_subsys_register(&nf_tables_subsys);
if (err < 0)
- goto err6;
+ goto err_nfnl_subsys;
nft_chain_route_init();
return err;
-err6:
+
+err_nfnl_subsys:
+ netlink_unregister_notifier(&nft_nl_notifier);
+err_netlink_notifier:
nft_offload_exit();
-err5:
+err_offload:
rhltable_destroy(&nft_objname_ht);
-err4:
+err_rht_objname:
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
-err3:
+err_netdev_notifier:
nf_tables_core_module_exit();
-err2:
+err_core_module:
nft_chain_filter_fini();
-err1:
+err_chain_filter:
unregister_pernet_subsys(&nf_tables_net_ops);
return err;
}
@@ -9085,6 +9220,7 @@ err1:
static void __exit nf_tables_module_exit(void)
{
nfnetlink_subsys_unregister(&nf_tables_subsys);
+ netlink_unregister_notifier(&nft_nl_notifier);
nft_offload_exit();
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
nft_chain_filter_fini();
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index b35e8d9a5b37..26776b88a539 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -43,6 +43,10 @@
#include "../bridge/br_private.h"
#endif
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
#define NFULNL_COPY_DISABLED 0xff
#define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE
#define NFULNL_TIMEOUT_DEFAULT 100 /* every second */
@@ -733,14 +737,16 @@ nfulnl_log_packet(struct net *net,
size += nla_total_size(sizeof(u_int32_t));
if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
size += nla_total_size(sizeof(u_int32_t));
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (inst->flags & NFULNL_CFG_F_CONNTRACK) {
nfnl_ct = rcu_dereference(nfnl_ct_hook);
if (nfnl_ct != NULL) {
- ct = nfnl_ct->get_ct(skb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
if (ct != NULL)
size += nfnl_ct->build_size(ct);
}
}
+#endif
if (pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE)
size += nfulnl_get_bridge_size(skb);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index d1d8bca03b4f..48a07914fd94 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -444,13 +444,15 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
nfnl_ct = rcu_dereference(nfnl_ct_hook);
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
if (queue->flags & NFQA_CFG_F_CONNTRACK) {
if (nfnl_ct != NULL) {
- ct = nfnl_ct->get_ct(entskb, &ctinfo);
+ ct = nf_ct_get(entskb, &ctinfo);
if (ct != NULL)
size += nfnl_ct->build_size(ct);
}
}
+#endif
if (queue->flags & NFQA_CFG_F_UID_GID) {
size += (nla_total_size(sizeof(u_int32_t)) /* uid */
@@ -1104,9 +1106,10 @@ static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct,
struct nf_queue_entry *entry,
enum ip_conntrack_info *ctinfo)
{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
struct nf_conn *ct;
- ct = nfnl_ct->get_ct(entry->skb, ctinfo);
+ ct = nf_ct_get(entry->skb, ctinfo);
if (ct == NULL)
return NULL;
@@ -1118,6 +1121,9 @@ static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct,
NETLINK_CB(entry->skb).portid,
nlmsg_report(nlh));
return ct;
+#else
+ return NULL;
+#endif
}
static int nfqa_parse_bridge(struct nf_queue_entry *entry,
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index bbd773d74377..47b0dba95054 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -16,8 +16,8 @@
#include <net/netfilter/nf_tables_offload.h>
struct nft_bitwise {
- enum nft_registers sreg:8;
- enum nft_registers dreg:8;
+ u8 sreg;
+ u8 dreg;
enum nft_bitwise_ops op:8;
u8 len;
struct nft_data mask;
@@ -169,14 +169,14 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
priv->len = len;
- priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]);
- err = nft_validate_register_load(priv->sreg, priv->len);
+ err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg,
+ priv->len);
if (err < 0)
return err;
- priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, priv->len);
+ err = nft_parse_register_store(ctx, tb[NFTA_BITWISE_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ priv->len);
if (err < 0)
return err;
@@ -315,14 +315,13 @@ static int nft_bitwise_fast_init(const struct nft_ctx *ctx,
struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr);
int err;
- priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]);
- err = nft_validate_register_load(priv->sreg, sizeof(u32));
+ err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg,
+ sizeof(u32));
if (err < 0)
return err;
- priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, sizeof(u32));
+ err = nft_parse_register_store(ctx, tb[NFTA_BITWISE_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, sizeof(u32));
if (err < 0)
return err;
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 12bed3f7bbc6..9d5947ab8d4e 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -16,8 +16,8 @@
#include <net/netfilter/nf_tables.h>
struct nft_byteorder {
- enum nft_registers sreg:8;
- enum nft_registers dreg:8;
+ u8 sreg;
+ u8 dreg;
enum nft_byteorder_ops op:8;
u8 len;
u8 size;
@@ -131,20 +131,20 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
return -EINVAL;
}
- priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]);
err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len);
if (err < 0)
return err;
priv->len = len;
- err = nft_validate_register_load(priv->sreg, priv->len);
+ err = nft_parse_register_load(tb[NFTA_BYTEORDER_SREG], &priv->sreg,
+ priv->len);
if (err < 0)
return err;
- priv->dreg = nft_parse_register(tb[NFTA_BYTEORDER_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, priv->len);
+ return nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ priv->len);
}
static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 00e563a72d3d..eb6a43a180bb 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -18,7 +18,7 @@
struct nft_cmp_expr {
struct nft_data data;
- enum nft_registers sreg:8;
+ u8 sreg;
u8 len;
enum nft_cmp_ops op:8;
};
@@ -87,8 +87,7 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return err;
}
- priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
- err = nft_validate_register_load(priv->sreg, desc.len);
+ err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
@@ -174,8 +173,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
- err = nft_validate_register_load(priv->sreg, desc.len);
+ err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
@@ -268,10 +266,8 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
if (err < 0)
return ERR_PTR(err);
- if (desc.type != NFT_DATA_VALUE) {
- err = -EINVAL;
+ if (desc.type != NFT_DATA_VALUE)
goto err1;
- }
if (desc.len <= sizeof(u32) && (op == NFT_CMP_EQ || op == NFT_CMP_NEQ))
return &nft_cmp_fast_ops;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 8bcd49f14797..882fe8648653 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -27,8 +27,8 @@ struct nft_ct {
enum nft_ct_keys key:8;
enum ip_conntrack_dir dir:8;
union {
- enum nft_registers dreg:8;
- enum nft_registers sreg:8;
+ u8 dreg;
+ u8 sreg;
};
};
@@ -498,9 +498,8 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
}
}
- priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ err = nft_parse_register_store(ctx, tb[NFTA_CT_DREG], &priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
if (err < 0)
return err;
@@ -600,8 +599,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
}
}
- priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
- err = nft_validate_register_load(priv->sreg, len);
+ err = nft_parse_register_load(tb[NFTA_CT_SREG], &priv->sreg, len);
if (err < 0)
goto err1;
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index 40788b3f1071..bbf3fcba3df4 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -14,7 +14,7 @@
#include <net/netfilter/nf_dup_netdev.h>
struct nft_dup_netdev {
- enum nft_registers sreg_dev:8;
+ u8 sreg_dev;
};
static void nft_dup_netdev_eval(const struct nft_expr *expr,
@@ -40,8 +40,8 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_DEV] == NULL)
return -EINVAL;
- priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
- return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+ return nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev,
+ sizeof(int));
}
static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 983a1d5ca3ab..d44a70c11b3f 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -16,9 +16,10 @@ struct nft_dynset {
struct nft_set *set;
struct nft_set_ext_tmpl tmpl;
enum nft_dynset_ops op:8;
- enum nft_registers sreg_key:8;
- enum nft_registers sreg_data:8;
+ u8 sreg_key;
+ u8 sreg_data;
bool invert;
+ bool expr;
u8 num_exprs;
u64 timeout;
struct nft_expr *expr_array[NFT_SET_EXPR_MAX];
@@ -175,11 +176,12 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (tb[NFTA_DYNSET_FLAGS]) {
u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS]));
-
- if (flags & ~NFT_DYNSET_F_INV)
- return -EINVAL;
+ if (flags & ~(NFT_DYNSET_F_INV | NFT_DYNSET_F_EXPR))
+ return -EOPNOTSUPP;
if (flags & NFT_DYNSET_F_INV)
priv->invert = true;
+ if (flags & NFT_DYNSET_F_EXPR)
+ priv->expr = true;
}
set = nft_set_lookup_global(ctx->net, ctx->table,
@@ -210,26 +212,26 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
timeout = 0;
if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
if (!(set->flags & NFT_SET_TIMEOUT))
- return -EINVAL;
+ return -EOPNOTSUPP;
err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout);
if (err)
return err;
}
- priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]);
- err = nft_validate_register_load(priv->sreg_key, set->klen);
+ err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key,
+ set->klen);
if (err < 0)
return err;
if (tb[NFTA_DYNSET_SREG_DATA] != NULL) {
if (!(set->flags & NFT_SET_MAP))
- return -EINVAL;
+ return -EOPNOTSUPP;
if (set->dtype == NFT_DATA_VERDICT)
return -EOPNOTSUPP;
- priv->sreg_data = nft_parse_register(tb[NFTA_DYNSET_SREG_DATA]);
- err = nft_validate_register_load(priv->sreg_data, set->dlen);
+ err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_DATA],
+ &priv->sreg_data, set->dlen);
if (err < 0)
return err;
} else if (set->flags & NFT_SET_MAP)
@@ -261,6 +263,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
struct nlattr *tmp;
int left;
+ if (!priv->expr)
+ return -EINVAL;
+
i = 0;
nla_for_each_nested(tmp, tb[NFTA_DYNSET_EXPRESSIONS], left) {
if (i == NFT_SET_EXPR_MAX) {
@@ -290,6 +295,12 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
err = -EOPNOTSUPP;
goto err_expr_free;
}
+ } else if (set->num_exprs > 0) {
+ err = nft_set_elem_expr_clone(ctx, set, priv->expr_array);
+ if (err < 0)
+ return err;
+
+ priv->num_exprs = set->num_exprs;
}
nft_set_ext_prepare(&priv->tmpl);
@@ -301,8 +312,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
nft_dynset_ext_add_expr(priv);
if (set->flags & NFT_SET_TIMEOUT) {
- if (timeout || set->timeout)
+ if (timeout || set->timeout) {
+ nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT);
nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION);
+ }
}
priv->timeout = timeout;
@@ -371,22 +384,25 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
nf_jiffies64_to_msecs(priv->timeout),
NFTA_DYNSET_PAD))
goto nla_put_failure;
- if (priv->num_exprs == 1) {
- if (nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr_array[0]))
- goto nla_put_failure;
- } else if (priv->num_exprs > 1) {
- struct nlattr *nest;
-
- nest = nla_nest_start_noflag(skb, NFTA_DYNSET_EXPRESSIONS);
- if (!nest)
- goto nla_put_failure;
-
- for (i = 0; i < priv->num_exprs; i++) {
- if (nft_expr_dump(skb, NFTA_LIST_ELEM,
- priv->expr_array[i]))
+ if (priv->set->num_exprs == 0) {
+ if (priv->num_exprs == 1) {
+ if (nft_expr_dump(skb, NFTA_DYNSET_EXPR,
+ priv->expr_array[0]))
goto nla_put_failure;
+ } else if (priv->num_exprs > 1) {
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, NFTA_DYNSET_EXPRESSIONS);
+ if (!nest)
+ goto nla_put_failure;
+
+ for (i = 0; i < priv->num_exprs; i++) {
+ if (nft_expr_dump(skb, NFTA_LIST_ELEM,
+ priv->expr_array[i]))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
}
- nla_nest_end(skb, nest);
}
if (nla_put_be32(skb, NFTA_DYNSET_FLAGS, htonl(flags)))
goto nla_put_failure;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 3c48cdc8935d..f64f0017e9a5 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -19,8 +19,8 @@ struct nft_exthdr {
u8 offset;
u8 len;
u8 op;
- enum nft_registers dreg:8;
- enum nft_registers sreg:8;
+ u8 dreg;
+ u8 sreg;
u8 flags;
};
@@ -350,12 +350,12 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
priv->offset = offset;
priv->len = len;
- priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]);
priv->flags = flags;
priv->op = op;
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, priv->len);
+ return nft_parse_register_store(ctx, tb[NFTA_EXTHDR_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ priv->len);
}
static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
@@ -400,11 +400,11 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
priv->offset = offset;
priv->len = len;
- priv->sreg = nft_parse_register(tb[NFTA_EXTHDR_SREG]);
priv->flags = flags;
priv->op = op;
- return nft_validate_register_load(priv->sreg, priv->len);
+ return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg,
+ priv->len);
}
static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 4dfdaeaf09a5..b10ce732b337 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -86,7 +86,6 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
priv->result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
- priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]);
switch (priv->result) {
case NFT_FIB_RESULT_OIF:
@@ -106,8 +105,8 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
}
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ err = nft_parse_register_store(ctx, tb[NFTA_FIB_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index b77985986b24..cd59afde5b2f 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -18,7 +18,7 @@
#include <net/ip.h>
struct nft_fwd_netdev {
- enum nft_registers sreg_dev:8;
+ u8 sreg_dev;
};
static void nft_fwd_netdev_eval(const struct nft_expr *expr,
@@ -50,8 +50,8 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx,
if (tb[NFTA_FWD_SREG_DEV] == NULL)
return -EINVAL;
- priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]);
- return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+ return nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
+ sizeof(int));
}
static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -78,8 +78,8 @@ static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx,
}
struct nft_fwd_neigh {
- enum nft_registers sreg_dev:8;
- enum nft_registers sreg_addr:8;
+ u8 sreg_dev;
+ u8 sreg_addr;
u8 nfproto;
};
@@ -157,8 +157,6 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx,
!tb[NFTA_FWD_NFPROTO])
return -EINVAL;
- priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]);
- priv->sreg_addr = nft_parse_register(tb[NFTA_FWD_SREG_ADDR]);
priv->nfproto = ntohl(nla_get_be32(tb[NFTA_FWD_NFPROTO]));
switch (priv->nfproto) {
@@ -172,11 +170,13 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- err = nft_validate_register_load(priv->sreg_dev, sizeof(int));
+ err = nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
+ sizeof(int));
if (err < 0)
return err;
- return nft_validate_register_load(priv->sreg_addr, addr_len);
+ return nft_parse_register_load(tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr,
+ addr_len);
}
static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 96371d878e7e..f829f5289e16 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -14,8 +14,8 @@
#include <linux/jhash.h>
struct nft_jhash {
- enum nft_registers sreg:8;
- enum nft_registers dreg:8;
+ u8 sreg;
+ u8 dreg;
u8 len;
bool autogen_seed:1;
u32 modulus;
@@ -38,7 +38,7 @@ static void nft_jhash_eval(const struct nft_expr *expr,
}
struct nft_symhash {
- enum nft_registers dreg:8;
+ u8 dreg;
u32 modulus;
u32 offset;
};
@@ -83,9 +83,6 @@ static int nft_jhash_init(const struct nft_ctx *ctx,
if (tb[NFTA_HASH_OFFSET])
priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
- priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]);
- priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
-
err = nft_parse_u32_check(tb[NFTA_HASH_LEN], U8_MAX, &len);
if (err < 0)
return err;
@@ -94,6 +91,10 @@ static int nft_jhash_init(const struct nft_ctx *ctx,
priv->len = len;
+ err = nft_parse_register_load(tb[NFTA_HASH_SREG], &priv->sreg, len);
+ if (err < 0)
+ return err;
+
priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
if (priv->modulus < 1)
return -ERANGE;
@@ -108,9 +109,8 @@ static int nft_jhash_init(const struct nft_ctx *ctx,
get_random_bytes(&priv->seed, sizeof(priv->seed));
}
- return nft_validate_register_load(priv->sreg, len) &&
- nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, sizeof(u32));
+ return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, sizeof(u32));
}
static int nft_symhash_init(const struct nft_ctx *ctx,
@@ -126,8 +126,6 @@ static int nft_symhash_init(const struct nft_ctx *ctx,
if (tb[NFTA_HASH_OFFSET])
priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
- priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
-
priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
if (priv->modulus < 1)
return -ERANGE;
@@ -135,8 +133,9 @@ static int nft_symhash_init(const struct nft_ctx *ctx,
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, sizeof(u32));
+ return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ sizeof(u32));
}
static int nft_jhash_dump(struct sk_buff *skb,
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index c63eb3b17178..90c64d27ae53 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -48,9 +48,9 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
priv->dlen = desc.len;
- priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, &priv->data,
- desc.type, desc.len);
+ err = nft_parse_register_store(ctx, tb[NFTA_IMMEDIATE_DREG],
+ &priv->dreg, &priv->data, desc.type,
+ desc.len);
if (err < 0)
goto err1;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index f1363b8aabba..b0f558b4fea5 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -17,8 +17,8 @@
struct nft_lookup {
struct nft_set *set;
- enum nft_registers sreg:8;
- enum nft_registers dreg:8;
+ u8 sreg;
+ u8 dreg;
bool invert;
struct nft_set_binding binding;
};
@@ -76,8 +76,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (IS_ERR(set))
return PTR_ERR(set);
- priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]);
- err = nft_validate_register_load(priv->sreg, set->klen);
+ err = nft_parse_register_load(tb[NFTA_LOOKUP_SREG], &priv->sreg,
+ set->klen);
if (err < 0)
return err;
@@ -100,9 +100,9 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (!(set->flags & NFT_SET_MAP))
return -EINVAL;
- priv->dreg = nft_parse_register(tb[NFTA_LOOKUP_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- set->dtype, set->dlen);
+ err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG],
+ &priv->dreg, NULL, set->dtype,
+ set->dlen);
if (err < 0)
return err;
} else if (set->flags & NFT_SET_MAP)
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 71390b727040..9953e8053753 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -15,8 +15,8 @@
struct nft_masq {
u32 flags;
- enum nft_registers sreg_proto_min:8;
- enum nft_registers sreg_proto_max:8;
+ u8 sreg_proto_min;
+ u8 sreg_proto_max;
};
static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
@@ -54,19 +54,15 @@ static int nft_masq_init(const struct nft_ctx *ctx,
}
if (tb[NFTA_MASQ_REG_PROTO_MIN]) {
- priv->sreg_proto_min =
- nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MIN]);
-
- err = nft_validate_register_load(priv->sreg_proto_min, plen);
+ err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN],
+ &priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_MASQ_REG_PROTO_MAX]) {
- priv->sreg_proto_max =
- nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MAX]);
-
- err = nft_validate_register_load(priv->sreg_proto_max,
- plen);
+ err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MAX],
+ &priv->sreg_proto_max,
+ plen);
if (err < 0)
return err;
} else {
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index bf4b3ad5314c..a7e01e9952f1 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -535,9 +535,8 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- priv->dreg = nft_parse_register(tb[NFTA_META_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
EXPORT_SYMBOL_GPL(nft_meta_get_init);
@@ -661,8 +660,7 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- priv->sreg = nft_parse_register(tb[NFTA_META_SREG]);
- err = nft_validate_register_load(priv->sreg, len);
+ err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 4bcf33b049c4..0840c635b752 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -21,10 +21,10 @@
#include <net/ip.h>
struct nft_nat {
- enum nft_registers sreg_addr_min:8;
- enum nft_registers sreg_addr_max:8;
- enum nft_registers sreg_proto_min:8;
- enum nft_registers sreg_proto_max:8;
+ u8 sreg_addr_min;
+ u8 sreg_addr_max;
+ u8 sreg_proto_min;
+ u8 sreg_proto_max;
enum nf_nat_manip_type type:8;
u8 family;
u16 flags;
@@ -206,18 +206,15 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
priv->family = family;
if (tb[NFTA_NAT_REG_ADDR_MIN]) {
- priv->sreg_addr_min =
- nft_parse_register(tb[NFTA_NAT_REG_ADDR_MIN]);
- err = nft_validate_register_load(priv->sreg_addr_min, alen);
+ err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MIN],
+ &priv->sreg_addr_min, alen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_ADDR_MAX]) {
- priv->sreg_addr_max =
- nft_parse_register(tb[NFTA_NAT_REG_ADDR_MAX]);
-
- err = nft_validate_register_load(priv->sreg_addr_max,
- alen);
+ err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MAX],
+ &priv->sreg_addr_max,
+ alen);
if (err < 0)
return err;
} else {
@@ -229,19 +226,15 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
plen = sizeof_field(struct nf_nat_range, min_addr.all);
if (tb[NFTA_NAT_REG_PROTO_MIN]) {
- priv->sreg_proto_min =
- nft_parse_register(tb[NFTA_NAT_REG_PROTO_MIN]);
-
- err = nft_validate_register_load(priv->sreg_proto_min, plen);
+ err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN],
+ &priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_PROTO_MAX]) {
- priv->sreg_proto_max =
- nft_parse_register(tb[NFTA_NAT_REG_PROTO_MAX]);
-
- err = nft_validate_register_load(priv->sreg_proto_max,
- plen);
+ err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MAX],
+ &priv->sreg_proto_max,
+ plen);
if (err < 0)
return err;
} else {
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index f1fc824f9737..722cac1e90e0 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -16,7 +16,7 @@
static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
struct nft_ng_inc {
- enum nft_registers dreg:8;
+ u8 dreg;
u32 modulus;
atomic_t counter;
u32 offset;
@@ -66,11 +66,10 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
- priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
atomic_set(&priv->counter, priv->modulus - 1);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, sizeof(u32));
+ return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, sizeof(u32));
}
static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
@@ -100,7 +99,7 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
}
struct nft_ng_random {
- enum nft_registers dreg:8;
+ u8 dreg;
u32 modulus;
u32 offset;
};
@@ -140,10 +139,8 @@ static int nft_ng_random_init(const struct nft_ctx *ctx,
prandom_init_once(&nft_numgen_prandom_state);
- priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
-
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, sizeof(u32));
+ return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, sizeof(u32));
}
static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 5f9207a9f485..bc104d36d3bb 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -95,7 +95,7 @@ static const struct nft_expr_ops nft_objref_ops = {
struct nft_objref_map {
struct nft_set *set;
- enum nft_registers sreg:8;
+ u8 sreg;
struct nft_set_binding binding;
};
@@ -137,8 +137,8 @@ static int nft_objref_map_init(const struct nft_ctx *ctx,
if (!(set->flags & NFT_SET_OBJECT))
return -EINVAL;
- priv->sreg = nft_parse_register(tb[NFTA_OBJREF_SET_SREG]);
- err = nft_validate_register_load(priv->sreg, set->klen);
+ err = nft_parse_register_load(tb[NFTA_OBJREF_SET_SREG], &priv->sreg,
+ set->klen);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index c261d57a666a..ac61f708b82d 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -6,7 +6,7 @@
#include <linux/netfilter/nfnetlink_osf.h>
struct nft_osf {
- enum nft_registers dreg:8;
+ u8 dreg;
u8 ttl;
u32 flags;
};
@@ -78,9 +78,9 @@ static int nft_osf_init(const struct nft_ctx *ctx,
priv->flags = flags;
}
- priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN);
+ err = nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE,
+ NFT_OSF_MAXGENRELEN);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 47d4e0e21651..cb1c8c231880 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -144,10 +144,10 @@ static int nft_payload_init(const struct nft_ctx *ctx,
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
- priv->dreg = nft_parse_register(tb[NFTA_PAYLOAD_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, priv->len);
+ return nft_parse_register_store(ctx, tb[NFTA_PAYLOAD_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ priv->len);
}
static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -658,7 +658,6 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
- priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]);
if (tb[NFTA_PAYLOAD_CSUM_TYPE])
priv->csum_type =
@@ -691,7 +690,8 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- return nft_validate_register_load(priv->sreg, priv->len);
+ return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg,
+ priv->len);
}
static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 23265d757acb..9ba1de51ac07 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -19,10 +19,10 @@
static u32 jhash_initval __read_mostly;
struct nft_queue {
- enum nft_registers sreg_qnum:8;
- u16 queuenum;
- u16 queues_total;
- u16 flags;
+ u8 sreg_qnum;
+ u16 queuenum;
+ u16 queues_total;
+ u16 flags;
};
static void nft_queue_eval(const struct nft_expr *expr,
@@ -111,8 +111,8 @@ static int nft_queue_sreg_init(const struct nft_ctx *ctx,
struct nft_queue *priv = nft_expr_priv(expr);
int err;
- priv->sreg_qnum = nft_parse_register(tb[NFTA_QUEUE_SREG_QNUM]);
- err = nft_validate_register_load(priv->sreg_qnum, sizeof(u32));
+ err = nft_parse_register_load(tb[NFTA_QUEUE_SREG_QNUM],
+ &priv->sreg_qnum, sizeof(u32));
if (err < 0)
return err;
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 89efcc5a533d..e4a1c44d7f51 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -15,7 +15,7 @@
struct nft_range_expr {
struct nft_data data_from;
struct nft_data data_to;
- enum nft_registers sreg:8;
+ u8 sreg;
u8 len;
enum nft_range_ops op:8;
};
@@ -86,8 +86,8 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
goto err2;
}
- priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]);
- err = nft_validate_register_load(priv->sreg, desc_from.len);
+ err = nft_parse_register_load(tb[NFTA_RANGE_SREG], &priv->sreg,
+ desc_from.len);
if (err < 0)
goto err2;
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 2056051c0af0..ba09890dddb5 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -14,8 +14,8 @@
#include <net/netfilter/nf_tables.h>
struct nft_redir {
- enum nft_registers sreg_proto_min:8;
- enum nft_registers sreg_proto_max:8;
+ u8 sreg_proto_min;
+ u8 sreg_proto_max;
u16 flags;
};
@@ -50,19 +50,15 @@ static int nft_redir_init(const struct nft_ctx *ctx,
plen = sizeof_field(struct nf_nat_range, min_addr.all);
if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
- priv->sreg_proto_min =
- nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MIN]);
-
- err = nft_validate_register_load(priv->sreg_proto_min, plen);
+ err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN],
+ &priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_REDIR_REG_PROTO_MAX]) {
- priv->sreg_proto_max =
- nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MAX]);
-
- err = nft_validate_register_load(priv->sreg_proto_max,
- plen);
+ err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MAX],
+ &priv->sreg_proto_max,
+ plen);
if (err < 0)
return err;
} else {
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 7cfcb0e2f7ee..bcd01a63e38f 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -15,7 +15,7 @@
struct nft_rt {
enum nft_rt_keys key:8;
- enum nft_registers dreg:8;
+ u8 dreg;
};
static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst)
@@ -141,9 +141,8 @@ static int nft_rt_get_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- priv->dreg = nft_parse_register(tb[NFTA_RT_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_RT_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
static int nft_rt_get_dump(struct sk_buff *skb,
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index a28aca5124ce..c9b8a2b03b71 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -10,7 +10,7 @@
struct nft_socket {
enum nft_socket_keys key:8;
union {
- enum nft_registers dreg:8;
+ u8 dreg;
};
};
@@ -133,9 +133,8 @@ static int nft_socket_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- priv->dreg = nft_parse_register(tb[NFTA_SOCKET_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_SOCKET_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
static int nft_socket_dump(struct sk_buff *skb,
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index d67f83a0958d..43a5a780a6d3 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -13,9 +13,9 @@
#endif
struct nft_tproxy {
- enum nft_registers sreg_addr:8;
- enum nft_registers sreg_port:8;
- u8 family;
+ u8 sreg_addr;
+ u8 sreg_port;
+ u8 family;
};
static void nft_tproxy_eval_v4(const struct nft_expr *expr,
@@ -247,15 +247,15 @@ static int nft_tproxy_init(const struct nft_ctx *ctx,
}
if (tb[NFTA_TPROXY_REG_ADDR]) {
- priv->sreg_addr = nft_parse_register(tb[NFTA_TPROXY_REG_ADDR]);
- err = nft_validate_register_load(priv->sreg_addr, alen);
+ err = nft_parse_register_load(tb[NFTA_TPROXY_REG_ADDR],
+ &priv->sreg_addr, alen);
if (err < 0)
return err;
}
if (tb[NFTA_TPROXY_REG_PORT]) {
- priv->sreg_port = nft_parse_register(tb[NFTA_TPROXY_REG_PORT]);
- err = nft_validate_register_load(priv->sreg_port, sizeof(u16));
+ err = nft_parse_register_load(tb[NFTA_TPROXY_REG_PORT],
+ &priv->sreg_port, sizeof(u16));
if (err < 0)
return err;
}
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index d3eb953d0333..3b27926d5382 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -15,7 +15,7 @@
struct nft_tunnel {
enum nft_tunnel_keys key:8;
- enum nft_registers dreg:8;
+ u8 dreg;
enum nft_tunnel_mode mode:8;
};
@@ -93,8 +93,6 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]);
-
if (tb[NFTA_TUNNEL_MODE]) {
priv->mode = ntohl(nla_get_be32(tb[NFTA_TUNNEL_MODE]));
if (priv->mode > NFT_TUNNEL_MODE_MAX)
@@ -103,8 +101,8 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx,
priv->mode = NFT_TUNNEL_MODE_NONE;
}
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_TUNNEL_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
static int nft_tunnel_get_dump(struct sk_buff *skb,
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index 06d5cabf1d7c..cbbbc4ecad3a 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -24,7 +24,7 @@ static const struct nla_policy nft_xfrm_policy[NFTA_XFRM_MAX + 1] = {
struct nft_xfrm {
enum nft_xfrm_keys key:8;
- enum nft_registers dreg:8;
+ u8 dreg;
u8 dir;
u8 spnum;
};
@@ -86,9 +86,8 @@ static int nft_xfrm_get_init(const struct nft_ctx *ctx,
priv->spnum = spnum;
- priv->dreg = nft_parse_register(tb[NFTA_XFRM_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_XFRM_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
/* Return true if key asks for daddr/saddr and current
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 37253d399c6b..0d5c422f8745 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -115,6 +115,9 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
} cfg;
int ret;
+ if (strnlen(info->name, sizeof(est->name)) >= sizeof(est->name))
+ return -ENAMETOOLONG;
+
net_get_random_once(&jhash_rnd, sizeof(jhash_rnd));
mutex_lock(&xn->hash_lock);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 606411869698..0446307516cd 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -152,7 +152,8 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
/*
* Drop entries with timestamps older then 'time'.
*/
-static void recent_entry_reap(struct recent_table *t, unsigned long time)
+static void recent_entry_reap(struct recent_table *t, unsigned long time,
+ struct recent_entry *working, bool update)
{
struct recent_entry *e;
@@ -162,6 +163,12 @@ static void recent_entry_reap(struct recent_table *t, unsigned long time)
e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
/*
+ * Do not reap the entry which are going to be updated.
+ */
+ if (e == working && update)
+ return;
+
+ /*
* The last time stamp is the most recent.
*/
if (time_after(time, e->stamps[e->index-1]))
@@ -303,7 +310,8 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
/* info->seconds must be non-zero */
if (info->check_set & XT_RECENT_REAP)
- recent_entry_reap(t, time);
+ recent_entry_reap(t, time, e,
+ info->check_set & XT_RECENT_UPDATE && ret);
}
if (info->check_set & XT_RECENT_SET ||
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index daca50d6bb12..dd488938447f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -67,6 +67,8 @@
#include <net/sock.h>
#include <net/scm.h>
#include <net/netlink.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/netlink.h>
#include "af_netlink.h"
@@ -147,6 +149,12 @@ static BLOCKING_NOTIFIER_HEAD(netlink_chain);
static const struct rhashtable_params netlink_rhashtable_params;
+void do_trace_netlink_extack(const char *msg)
+{
+ trace_netlink_extack(msg);
+}
+EXPORT_SYMBOL(do_trace_netlink_extack);
+
static inline u32 netlink_group_mask(u32 group)
{
return group ? 1 << (group - 1) : 0;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index c992424e4d63..2d6fdf40df66 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1360,11 +1360,43 @@ static struct genl_family genl_ctrl __ro_after_init = {
.netnsok = true,
};
+static int genl_bind(struct net *net, int group)
+{
+ const struct genl_family *family;
+ unsigned int id;
+ int ret = 0;
+
+ genl_lock_all();
+
+ idr_for_each_entry(&genl_fam_idr, family, id) {
+ const struct genl_multicast_group *grp;
+ int i;
+
+ if (family->n_mcgrps == 0)
+ continue;
+
+ i = group - family->mcgrp_offset;
+ if (i < 0 || i >= family->n_mcgrps)
+ continue;
+
+ grp = &family->mcgrps[i];
+ if ((grp->flags & GENL_UNS_ADMIN_PERM) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ ret = -EPERM;
+
+ break;
+ }
+
+ genl_unlock_all();
+ return ret;
+}
+
static int __net_init genl_pernet_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = genl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
+ .bind = genl_bind,
};
/* we'll bump the group number right afterwards */
diff --git a/net/nfc/Kconfig b/net/nfc/Kconfig
index 96b91674dd37..466a0279b93e 100644
--- a/net/nfc/Kconfig
+++ b/net/nfc/Kconfig
@@ -4,7 +4,6 @@
#
menuconfig NFC
- depends on NET
depends on RFKILL || !RFKILL
tristate "NFC subsystem support"
default n
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 0eb4ddc056e7..c0c8fea3a186 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -236,7 +236,7 @@ static void llc_shdlc_rcv_i_frame(struct llc_shdlc *shdlc,
goto exit;
}
- if (shdlc->t1_active == false) {
+ if (!shdlc->t1_active) {
shdlc->t1_active = true;
mod_timer(&shdlc->t1_timer, jiffies +
msecs_to_jiffies(SHDLC_T1_VALUE_MS(shdlc->w)));
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index e64727e1a72f..59257400697d 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -508,7 +508,7 @@ static int nci_open_device(struct nci_dev *ndev)
};
unsigned long opt = 0;
- if (!(ndev->nci_ver & NCI_VER_2_MASK))
+ if (ndev->nci_ver & NCI_VER_2_MASK)
opt = (unsigned long)&nci_init_v2_cmd;
rc = __nci_request(ndev, nci_init_req, opt,
@@ -579,11 +579,11 @@ static int nci_close_device(struct nci_dev *ndev)
clear_bit(NCI_INIT, &ndev->flags);
- del_timer_sync(&ndev->cmd_timer);
-
/* Flush cmd wq */
flush_workqueue(ndev->cmd_wq);
+ del_timer_sync(&ndev->cmd_timer);
+
/* Clear flags */
ndev->flags = 0;
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 11b554ce07ff..1204c438e87d 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -292,7 +292,8 @@ static int nci_uart_tty_ioctl(struct tty_struct *tty, struct file *file,
/* We don't provide read/write/poll interface for user space. */
static ssize_t nci_uart_tty_read(struct tty_struct *tty, struct file *file,
- unsigned char __user *buf, size_t nr)
+ unsigned char *buf, size_t nr,
+ void **cookie, unsigned long offset)
{
return 0;
}
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 573b38ad2f8e..722f7ef891e1 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -852,6 +852,7 @@ static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info)
if (!dev->polling) {
device_unlock(&dev->dev);
+ nfc_put_device(dev);
return -EINVAL;
}
@@ -1819,9 +1820,9 @@ static int nfc_genl_rcv_nl_event(struct notifier_block *this,
w = kmalloc(sizeof(*w), GFP_ATOMIC);
if (w) {
- INIT_WORK((struct work_struct *) w, nfc_urelease_event_work);
+ INIT_WORK(&w->w, nfc_urelease_event_work);
w->portid = n->portid;
- schedule_work((struct work_struct *) w);
+ schedule_work(&w->w);
}
out:
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 955c195ae14b..9c7eb8455ba8 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -105,7 +105,7 @@ static int rawsock_connect(struct socket *sock, struct sockaddr *_addr,
if (addr->target_idx > dev->target_next_idx - 1 ||
addr->target_idx < dev->target_next_idx - dev->n_targets) {
rc = -EINVAL;
- goto error;
+ goto put_dev;
}
rc = nfc_activate_target(dev, addr->target_idx, addr->nfc_protocol);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index e8902a7e60f2..92a0b67b2728 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -957,14 +957,14 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key,
- const struct nlattr *attr, bool last)
+ const struct nlattr *attr)
{
/* The first attribute is always 'OVS_DEC_TTL_ATTR_ACTION'. */
struct nlattr *actions = nla_data(attr);
if (nla_len(actions))
return clone_execute(dp, skb, key, 0, nla_data(actions),
- nla_len(actions), last, false);
+ nla_len(actions), true, false);
consume_skb(skb);
return 0;
@@ -1418,11 +1418,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_DEC_TTL:
err = execute_dec_ttl(skb, key);
- if (err == -EHOSTUNREACH) {
- err = dec_ttl_exception_handler(dp, skb, key,
- a, true);
- return err;
- }
+ if (err == -EHOSTUNREACH)
+ return dec_ttl_exception_handler(dp, skb,
+ key, a);
break;
}
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 4c5c2331e764..fd1f809e9bc1 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2515,15 +2515,25 @@ static int validate_and_copy_dec_ttl(struct net *net,
if (type > OVS_DEC_TTL_ATTR_MAX)
continue;
- if (!type || attrs[type])
+ if (!type || attrs[type]) {
+ OVS_NLERR(log, "Duplicate or invalid key (type %d).",
+ type);
return -EINVAL;
+ }
attrs[type] = a;
}
+ if (rem) {
+ OVS_NLERR(log, "Message has %d unknown bytes.", rem);
+ return -EINVAL;
+ }
+
actions = attrs[OVS_DEC_TTL_ATTR_ACTION];
- if (rem || !actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
+ if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) {
+ OVS_NLERR(log, "Missing valid actions attribute.");
return -EINVAL;
+ }
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
if (start < 0)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index de8e8dbbdeb8..e24b2841c643 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -135,11 +135,11 @@ Resume
On transmit:
------------
-dev->header_ops != NULL
+dev_has_header(dev) == true
mac_header -> ll header
data -> ll header
-dev->header_ops == NULL (ll header is invisible to us)
+dev_has_header(dev) == false (ll header is invisible to us)
mac_header -> data
data -> data
@@ -4595,7 +4595,9 @@ static void packet_seq_stop(struct seq_file *seq, void *v)
static int packet_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
- seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
+ seq_printf(seq,
+ "%*sRefCnt Type Proto Iface R Rmem User Inode\n",
+ IS_ENABLED(CONFIG_64BIT) ? -17 : -9, "sk");
else {
struct sock *s = sk_entry(v);
const struct packet_sock *po = pkt_sk(s);
diff --git a/net/packet/internal.h b/net/packet/internal.h
index baafc3f3fa25..5f61e59ebbff 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -139,7 +139,7 @@ struct packet_sock {
atomic_t tp_drops ____cacheline_aligned_in_smp;
};
-static struct packet_sock *pkt_sk(struct sock *sk)
+static inline struct packet_sock *pkt_sk(struct sock *sk)
{
return (struct packet_sock *)sk;
}
diff --git a/net/psample/Kconfig b/net/psample/Kconfig
index 028f514a9c60..be0b839209ba 100644
--- a/net/psample/Kconfig
+++ b/net/psample/Kconfig
@@ -4,7 +4,6 @@
#
menuconfig PSAMPLE
- depends on NET
tristate "Packet-sampling netlink channel"
default n
help
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 33e238c965bd..482c07f2766b 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -309,10 +309,10 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
unsigned short tun_proto = ip_tunnel_info_af(tun_info);
const struct ip_tunnel_key *tun_key = &tun_info->key;
int tun_opts_len = tun_info->options_len;
- int sum = 0;
+ int sum = nla_total_size(0); /* PSAMPLE_ATTR_TUNNEL */
if (tun_key->tun_flags & TUNNEL_KEY)
- sum += nla_total_size(sizeof(u64));
+ sum += nla_total_size_64bit(sizeof(u64));
if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE)
sum += nla_total_size(0);
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 56aaf8cb6527..8d00dfe8139e 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -755,7 +755,7 @@ static void qrtr_ns_data_ready(struct sock *sk)
queue_work(qrtr_ns.workqueue, &qrtr_ns.work);
}
-void qrtr_ns_init(void)
+int qrtr_ns_init(void)
{
struct sockaddr_qrtr sq;
int ret;
@@ -766,7 +766,7 @@ void qrtr_ns_init(void)
ret = sock_create_kern(&init_net, AF_QIPCRTR, SOCK_DGRAM,
PF_QIPCRTR, &qrtr_ns.sock);
if (ret < 0)
- return;
+ return ret;
ret = kernel_getsockname(qrtr_ns.sock, (struct sockaddr *)&sq);
if (ret < 0) {
@@ -797,12 +797,13 @@ void qrtr_ns_init(void)
if (ret < 0)
goto err_wq;
- return;
+ return 0;
err_wq:
destroy_workqueue(qrtr_ns.workqueue);
err_sock:
sock_release(qrtr_ns.sock);
+ return ret;
}
EXPORT_SYMBOL_GPL(qrtr_ns_init);
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index f4ab3ca6d73b..b34358282f37 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1287,13 +1287,19 @@ static int __init qrtr_proto_init(void)
return rc;
rc = sock_register(&qrtr_family);
- if (rc) {
- proto_unregister(&qrtr_proto);
- return rc;
- }
+ if (rc)
+ goto err_proto;
- qrtr_ns_init();
+ rc = qrtr_ns_init();
+ if (rc)
+ goto err_sock;
+ return 0;
+
+err_sock:
+ sock_unregister(qrtr_family.family);
+err_proto:
+ proto_unregister(&qrtr_proto);
return rc;
}
postcore_initcall(qrtr_proto_init);
diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h
index dc2b67f17927..3f2d28696062 100644
--- a/net/qrtr/qrtr.h
+++ b/net/qrtr/qrtr.h
@@ -29,7 +29,7 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep);
int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len);
-void qrtr_ns_init(void);
+int qrtr_ns_init(void);
void qrtr_ns_remove(void);
diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c
index 15ce9b642b25..304b41fea5ab 100644
--- a/net/qrtr/tun.c
+++ b/net/qrtr/tun.c
@@ -31,6 +31,7 @@ static int qrtr_tun_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
static int qrtr_tun_open(struct inode *inode, struct file *filp)
{
struct qrtr_tun *tun;
+ int ret;
tun = kzalloc(sizeof(*tun), GFP_KERNEL);
if (!tun)
@@ -43,7 +44,16 @@ static int qrtr_tun_open(struct inode *inode, struct file *filp)
filp->private_data = tun;
- return qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO);
+ ret = qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO);
+ if (ret)
+ goto out;
+
+ return 0;
+
+out:
+ filp->private_data = NULL;
+ kfree(tun);
+ return ret;
}
static ssize_t qrtr_tun_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -80,6 +90,12 @@ static ssize_t qrtr_tun_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t ret;
void *kbuf;
+ if (!len)
+ return -EINVAL;
+
+ if (len > KMALLOC_MAX_SIZE)
+ return -ENOMEM;
+
kbuf = kzalloc(len, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 1d0afb1dd77b..6f1a50d50d06 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -565,6 +565,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args,
if (args->nr_local == 0)
return -EINVAL;
+ if (args->nr_local > UIO_MAXIOV)
+ return -EMSGSIZE;
+
iov->iov = kcalloc(args->nr_local,
sizeof(struct rds_iovec),
GFP_KERNEL);
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index d706bb408365..0885b22e5c0e 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -8,6 +8,7 @@ config AF_RXRPC
depends on INET
select CRYPTO
select KEYS
+ select NET_UDP_TUNNEL
help
Say Y or M here to include support for RxRPC session sockets (just
the transport part, not the presentation part: (un)marshalling is
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 0a2f4817ec6c..41671af6b33f 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -990,7 +990,7 @@ static int __init af_rxrpc_init(void)
goto error_security;
}
- ret = register_pernet_subsys(&rxrpc_net_ops);
+ ret = register_pernet_device(&rxrpc_net_ops);
if (ret)
goto error_pernet;
@@ -1035,7 +1035,7 @@ error_key_type:
error_sock:
proto_unregister(&rxrpc_proto);
error_proto:
- unregister_pernet_subsys(&rxrpc_net_ops);
+ unregister_pernet_device(&rxrpc_net_ops);
error_pernet:
rxrpc_exit_security();
error_security:
@@ -1057,7 +1057,7 @@ static void __exit af_rxrpc_exit(void)
unregister_key_type(&key_type_rxrpc);
sock_unregister(PF_RXRPC);
proto_unregister(&rxrpc_proto);
- unregister_pernet_subsys(&rxrpc_net_ops);
+ unregister_pernet_device(&rxrpc_net_ops);
ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0);
ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0);
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 382add72c66f..1ae90fb97936 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -197,6 +197,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
tail = b->peer_backlog_tail;
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_peer *peer = b->peer_backlog[tail];
+ rxrpc_put_local(peer->local);
kfree(peer);
tail = (tail + 1) & (size - 1);
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index c845594b663f..4eb91d958a48 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -548,8 +548,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
rxrpc_disconnect_call(call);
if (call->security)
call->security->free_call_crypto(call);
-
- rxrpc_cleanup_ring(call);
_leave("");
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 667c44aa5a63..dc201363f2c4 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -430,7 +430,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
return;
}
- if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) {
+ if (state == RXRPC_CALL_SERVER_RECV_REQUEST) {
unsigned long timo = READ_ONCE(call->next_req_timo);
unsigned long now, expect_req_by;
diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 9631aa8543b5..8d2073e0e3da 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -598,7 +598,7 @@ static long rxrpc_read(const struct key *key,
default: /* we have a ticket we can't encode */
pr_err("Unsupported key token type (%u)\n",
token->security_index);
- continue;
+ return -ENOPKG;
}
_debug("token[%u]: toksize=%u", ntoks, toksize);
@@ -674,7 +674,9 @@ static long rxrpc_read(const struct key *key,
break;
default:
- break;
+ pr_err("Unsupported key token type (%u)\n",
+ token->security_index);
+ return -ENOPKG;
}
ASSERTCMP((unsigned long)xdr - (unsigned long)oldxdr, ==,
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 8c2881054266..a4111408ffd0 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -16,6 +16,7 @@
#include <linux/hashtable.h>
#include <net/sock.h>
#include <net/udp.h>
+#include <net/udp_tunnel.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"
@@ -106,58 +107,44 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
*/
static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
{
+ struct udp_tunnel_sock_cfg tuncfg = {NULL};
+ struct sockaddr_rxrpc *srx = &local->srx;
+ struct udp_port_cfg udp_conf = {0};
struct sock *usk;
int ret;
_enter("%p{%d,%d}",
- local, local->srx.transport_type, local->srx.transport.family);
+ local, srx->transport_type, srx->transport.family);
- /* create a socket to represent the local endpoint */
- ret = sock_create_kern(net, local->srx.transport.family,
- local->srx.transport_type, 0, &local->socket);
+ udp_conf.family = srx->transport.family;
+ if (udp_conf.family == AF_INET) {
+ udp_conf.local_ip = srx->transport.sin.sin_addr;
+ udp_conf.local_udp_port = srx->transport.sin.sin_port;
+#if IS_ENABLED(CONFIG_AF_RXRPC_IPV6)
+ } else {
+ udp_conf.local_ip6 = srx->transport.sin6.sin6_addr;
+ udp_conf.local_udp_port = srx->transport.sin6.sin6_port;
+#endif
+ }
+ ret = udp_sock_create(net, &udp_conf, &local->socket);
if (ret < 0) {
_leave(" = %d [socket]", ret);
return ret;
}
+ tuncfg.encap_type = UDP_ENCAP_RXRPC;
+ tuncfg.encap_rcv = rxrpc_input_packet;
+ tuncfg.sk_user_data = local;
+ setup_udp_tunnel_sock(net, local->socket, &tuncfg);
+
/* set the socket up */
usk = local->socket->sk;
- inet_sk(usk)->mc_loop = 0;
-
- /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */
- inet_inc_convert_csum(usk);
-
- rcu_assign_sk_user_data(usk, local);
-
- udp_sk(usk)->encap_type = UDP_ENCAP_RXRPC;
- udp_sk(usk)->encap_rcv = rxrpc_input_packet;
- udp_sk(usk)->encap_destroy = NULL;
- udp_sk(usk)->gro_receive = NULL;
- udp_sk(usk)->gro_complete = NULL;
-
- udp_encap_enable();
-#if IS_ENABLED(CONFIG_AF_RXRPC_IPV6)
- if (local->srx.transport.family == AF_INET6)
- udpv6_encap_enable();
-#endif
usk->sk_error_report = rxrpc_error_report;
- /* if a local address was supplied then bind it */
- if (local->srx.transport_len > sizeof(sa_family_t)) {
- _debug("bind");
- ret = kernel_bind(local->socket,
- (struct sockaddr *)&local->srx.transport,
- local->srx.transport_len);
- if (ret < 0) {
- _debug("bind failed %d", ret);
- goto error;
- }
- }
-
- switch (local->srx.transport.family) {
+ switch (srx->transport.family) {
case AF_INET6:
/* we want to receive ICMPv6 errors */
- ip6_sock_set_recverr(local->socket->sk);
+ ip6_sock_set_recverr(usk);
/* Fall through and set IPv4 options too otherwise we don't get
* errors from IPv4 packets sent through the IPv6 socket.
@@ -165,13 +152,13 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
fallthrough;
case AF_INET:
/* we want to receive ICMP errors */
- ip_sock_set_recverr(local->socket->sk);
+ ip_sock_set_recverr(usk);
/* we want to set the don't fragment bit */
- ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DO);
+ ip_sock_set_mtu_discover(usk, IP_PMTUDISC_DO);
/* We want receive timestamps. */
- sock_enable_timestamps(local->socket->sk);
+ sock_enable_timestamps(usk);
break;
default:
@@ -180,15 +167,6 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
_leave(" = 0");
return 0;
-
-error:
- kernel_sock_shutdown(local->socket, SHUT_RDWR);
- local->socket->sk->sk_user_data = NULL;
- sock_release(local->socket);
- local->socket = NULL;
-
- _leave(" = %d", ret);
- return ret;
}
/*
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 2e85b636b27b..b919826939e0 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -908,7 +908,7 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
[TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY),
};
-static void tcf_idr_insert_many(struct tc_action *actions[])
+void tcf_idr_insert_many(struct tc_action *actions[])
{
int i;
@@ -928,19 +928,13 @@ static void tcf_idr_insert_many(struct tc_action *actions[])
}
}
-struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
- struct nlattr *nla, struct nlattr *est,
- char *name, int ovr, int bind,
- bool rtnl_held,
- struct netlink_ext_ack *extack)
+struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
+ bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
- struct nla_bitfield32 flags = { 0, 0 };
- u8 hw_stats = TCA_ACT_HW_STATS_ANY;
- struct tc_action *a;
+ struct nlattr *tb[TCA_ACT_MAX + 1];
struct tc_action_ops *a_o;
- struct tc_cookie *cookie = NULL;
char act_name[IFNAMSIZ];
- struct nlattr *tb[TCA_ACT_MAX + 1];
struct nlattr *kind;
int err;
@@ -948,33 +942,21 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
tcf_action_policy, extack);
if (err < 0)
- goto err_out;
+ return ERR_PTR(err);
err = -EINVAL;
kind = tb[TCA_ACT_KIND];
if (!kind) {
NL_SET_ERR_MSG(extack, "TC action kind must be specified");
- goto err_out;
+ return ERR_PTR(err);
}
if (nla_strscpy(act_name, kind, IFNAMSIZ) < 0) {
NL_SET_ERR_MSG(extack, "TC action name too long");
- goto err_out;
- }
- if (tb[TCA_ACT_COOKIE]) {
- cookie = nla_memdup_cookie(tb);
- if (!cookie) {
- NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
- err = -ENOMEM;
- goto err_out;
- }
+ return ERR_PTR(err);
}
- hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]);
- if (tb[TCA_ACT_FLAGS])
- flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
} else {
if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
NL_SET_ERR_MSG(extack, "TC action name too long");
- err = -EINVAL;
- goto err_out;
+ return ERR_PTR(-EINVAL);
}
}
@@ -996,24 +978,56 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
* indicate this using -EAGAIN.
*/
if (a_o != NULL) {
- err = -EAGAIN;
- goto err_mod;
+ module_put(a_o->owner);
+ return ERR_PTR(-EAGAIN);
}
#endif
NL_SET_ERR_MSG(extack, "Failed to load TC action module");
- err = -ENOENT;
- goto err_free;
+ return ERR_PTR(-ENOENT);
}
+ return a_o;
+}
+
+struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
+ struct nlattr *nla, struct nlattr *est,
+ char *name, int ovr, int bind,
+ struct tc_action_ops *a_o, bool rtnl_held,
+ struct netlink_ext_ack *extack)
+{
+ struct nla_bitfield32 flags = { 0, 0 };
+ u8 hw_stats = TCA_ACT_HW_STATS_ANY;
+ struct nlattr *tb[TCA_ACT_MAX + 1];
+ struct tc_cookie *cookie = NULL;
+ struct tc_action *a;
+ int err;
+
/* backward compatibility for policer */
- if (name == NULL)
+ if (name == NULL) {
+ err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
+ tcf_action_policy, extack);
+ if (err < 0)
+ return ERR_PTR(err);
+ if (tb[TCA_ACT_COOKIE]) {
+ cookie = nla_memdup_cookie(tb);
+ if (!cookie) {
+ NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
+ err = -ENOMEM;
+ goto err_out;
+ }
+ }
+ hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]);
+ if (tb[TCA_ACT_FLAGS])
+ flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
+
err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
rtnl_held, tp, flags.value, extack);
- else
+ } else {
err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
tp, flags.value, extack);
+ }
if (err < 0)
- goto err_mod;
+ goto err_out;
if (!name && tb[TCA_ACT_COOKIE])
tcf_set_action_cookie(&a->act_cookie, cookie);
@@ -1030,14 +1044,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
return a;
-err_mod:
- module_put(a_o->owner);
-err_free:
+err_out:
if (cookie) {
kfree(cookie->data);
kfree(cookie);
}
-err_out:
return ERR_PTR(err);
}
@@ -1048,6 +1059,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
struct tc_action *actions[], size_t *attr_size,
bool rtnl_held, struct netlink_ext_ack *extack)
{
+ struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {};
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
size_t sz = 0;
@@ -1060,8 +1072,19 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
return err;
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
+ struct tc_action_ops *a_o;
+
+ a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack);
+ if (IS_ERR(a_o)) {
+ err = PTR_ERR(a_o);
+ goto err_mod;
+ }
+ ops[i - 1] = a_o;
+ }
+
+ for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
- rtnl_held, extack);
+ ops[i - 1], rtnl_held, extack);
if (IS_ERR(act)) {
err = PTR_ERR(act);
goto err;
@@ -1081,6 +1104,11 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
err:
tcf_action_destroy(actions, bind);
+err_mod:
+ for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
+ if (ops[i])
+ module_put(ops[i]->owner);
+ }
return err;
}
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 83a5c6722a06..f0a0aa125b00 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -183,6 +183,7 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
IP_CT_ESTABLISHED_REPLY;
/* aligns with the CT reference on the SKB nf_ct_set */
entry->ct_metadata.cookie = (unsigned long)ct | ctinfo;
+ entry->ct_metadata.orig_dir = dir == IP_CT_DIR_ORIGINAL;
act_ct_labels = entry->ct_metadata.labels;
ct_labels = nf_ct_labels_find(ct);
@@ -1030,6 +1031,7 @@ out_push:
out:
tcf_action_update_bstats(&c->common, skb);
+ qdisc_skb_cb(skb)->post_ct = true;
if (defrag)
qdisc_skb_cb(skb)->pkt_len = skb->len;
return retval;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 37b77bd30974..e37556cc37ab 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3043,16 +3043,24 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
size_t attr_size = 0;
if (exts->police && tb[exts->police]) {
+ struct tc_action_ops *a_o;
+
+ a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack);
+ if (IS_ERR(a_o))
+ return PTR_ERR(a_o);
act = tcf_action_init_1(net, tp, tb[exts->police],
rate_tlv, "police", ovr,
- TCA_ACT_BIND, rtnl_held,
+ TCA_ACT_BIND, a_o, rtnl_held,
extack);
- if (IS_ERR(act))
+ if (IS_ERR(act)) {
+ module_put(a_o->owner);
return PTR_ERR(act);
+ }
act->type = exts->type = TCA_OLD_COMPAT;
exts->actions[0] = act;
exts->nr_actions = 1;
+ tcf_idr_insert_many(exts->actions);
} else if (exts->action && tb[exts->action]) {
int err;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 1319986693fc..d097b5c15faa 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -30,6 +30,11 @@
#include <uapi/linux/netfilter/nf_conntrack_common.h>
+#define TCA_FLOWER_KEY_CT_FLAGS_MAX \
+ ((__TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) << 1)
+#define TCA_FLOWER_KEY_CT_FLAGS_MASK \
+ (TCA_FLOWER_KEY_CT_FLAGS_MAX - 1)
+
struct fl_flow_key {
struct flow_dissector_key_meta meta;
struct flow_dissector_key_control control;
@@ -291,9 +296,11 @@ static u16 fl_ct_info_to_flower_map[] = {
[IP_CT_RELATED] = TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
TCA_FLOWER_KEY_CT_FLAGS_RELATED,
[IP_CT_ESTABLISHED_REPLY] = TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
- TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED,
+ TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
+ TCA_FLOWER_KEY_CT_FLAGS_REPLY,
[IP_CT_RELATED_REPLY] = TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
- TCA_FLOWER_KEY_CT_FLAGS_RELATED,
+ TCA_FLOWER_KEY_CT_FLAGS_RELATED |
+ TCA_FLOWER_KEY_CT_FLAGS_REPLY,
[IP_CT_NEW] = TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
TCA_FLOWER_KEY_CT_FLAGS_NEW,
};
@@ -302,6 +309,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
+ bool post_ct = qdisc_skb_cb(skb)->post_ct;
struct fl_flow_key skb_key;
struct fl_flow_mask *mask;
struct cls_fl_filter *f;
@@ -318,7 +326,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key);
skb_flow_dissect_ct(skb, &mask->dissector, &skb_key,
fl_ct_info_to_flower_map,
- ARRAY_SIZE(fl_ct_info_to_flower_map));
+ ARRAY_SIZE(fl_ct_info_to_flower_map),
+ post_ct);
skb_flow_dissect_hash(skb, &mask->dissector, &skb_key);
skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
@@ -686,8 +695,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED },
[TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED },
- [TCA_FLOWER_KEY_CT_STATE] = { .type = NLA_U16 },
- [TCA_FLOWER_KEY_CT_STATE_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_CT_STATE] =
+ NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK),
+ [TCA_FLOWER_KEY_CT_STATE_MASK] =
+ NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK),
[TCA_FLOWER_KEY_CT_ZONE] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_CT_ZONE_MASK] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_CT_MARK] = { .type = NLA_U32 },
@@ -1272,6 +1283,10 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
+ if (!nla_ok(nla_opt_msk, msk_depth)) {
+ NL_SET_ERR_MSG(extack, "Invalid nested attribute for masks");
+ return -EINVAL;
+ }
}
nla_for_each_attr(nla_opt_key, nla_enc_key,
@@ -1307,9 +1322,6 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
return -EINVAL;
}
-
- if (msk_depth)
- nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
break;
case TCA_FLOWER_KEY_ENC_OPTS_VXLAN:
if (key->enc_opts.dst_opt_type) {
@@ -1340,9 +1352,6 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
return -EINVAL;
}
-
- if (msk_depth)
- nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
break;
case TCA_FLOWER_KEY_ENC_OPTS_ERSPAN:
if (key->enc_opts.dst_opt_type) {
@@ -1373,14 +1382,54 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
return -EINVAL;
}
-
- if (msk_depth)
- nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
break;
default:
NL_SET_ERR_MSG(extack, "Unknown tunnel option type");
return -EINVAL;
}
+
+ if (!msk_depth)
+ continue;
+
+ if (!nla_ok(nla_opt_msk, msk_depth)) {
+ NL_SET_ERR_MSG(extack, "A mask attribute is invalid");
+ return -EINVAL;
+ }
+ nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
+ }
+
+ return 0;
+}
+
+static int fl_validate_ct_state(u16 state, struct nlattr *tb,
+ struct netlink_ext_ack *extack)
+{
+ if (state && !(state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb,
+ "no trk, so no other flag can be set");
+ return -EINVAL;
+ }
+
+ if (state & TCA_FLOWER_KEY_CT_FLAGS_NEW &&
+ state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) {
+ NL_SET_ERR_MSG_ATTR(extack, tb,
+ "new and est are mutually exclusive");
+ return -EINVAL;
+ }
+
+ if (state & TCA_FLOWER_KEY_CT_FLAGS_INVALID &&
+ state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+ TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb,
+ "when inv is set, only trk may be set");
+ return -EINVAL;
+ }
+
+ if (state & TCA_FLOWER_KEY_CT_FLAGS_NEW &&
+ state & TCA_FLOWER_KEY_CT_FLAGS_REPLY) {
+ NL_SET_ERR_MSG_ATTR(extack, tb,
+ "new and rpl are mutually exclusive");
+ return -EINVAL;
}
return 0;
@@ -1392,6 +1441,8 @@ static int fl_set_key_ct(struct nlattr **tb,
struct netlink_ext_ack *extack)
{
if (tb[TCA_FLOWER_KEY_CT_STATE]) {
+ int err;
+
if (!IS_ENABLED(CONFIG_NF_CONNTRACK)) {
NL_SET_ERR_MSG(extack, "Conntrack isn't enabled");
return -EOPNOTSUPP;
@@ -1399,6 +1450,13 @@ static int fl_set_key_ct(struct nlattr **tb,
fl_set_key_val(tb, &key->ct_state, TCA_FLOWER_KEY_CT_STATE,
&mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK,
sizeof(key->ct_state));
+
+ err = fl_validate_ct_state(mask->ct_state,
+ tb[TCA_FLOWER_KEY_CT_STATE_MASK],
+ extack);
+ if (err)
+ return err;
+
}
if (tb[TCA_FLOWER_KEY_CT_ZONE]) {
if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) {
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 78bec347b8b6..c4007b9cd16d 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -366,9 +366,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
if (tb[TCA_TCINDEX_MASK])
cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
- if (tb[TCA_TCINDEX_SHIFT])
+ if (tb[TCA_TCINDEX_SHIFT]) {
cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
-
+ if (cp->shift > 16) {
+ err = -EINVAL;
+ goto errout;
+ }
+ }
if (!cp->hash) {
/* Hash not specified, use perfect hash if the upper limit
* of the hashing index is below the threshold.
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index 2c1192a2ee5e..a83b237cbeb0 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -31,7 +31,7 @@ static int em_nbyte_change(struct net *net, void *data, int data_len,
em->datalen = sizeof(*nbyte) + nbyte->len;
em->data = (unsigned long)kmemdup(data, em->datalen, GFP_KERNEL);
if (em->data == 0UL)
- return -ENOBUFS;
+ return -ENOMEM;
return 0;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 51cb553e4317..e2e4353db8a7 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -412,7 +412,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
{
struct qdisc_rate_table *rtab;
- if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
+ if (tab == NULL || r->rate == 0 ||
+ r->cell_log == 0 || r->cell_log >= 32 ||
nla_len(tab) != TC_RTAB_SIZE) {
NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
return NULL;
@@ -1865,7 +1866,8 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
static int tclass_del_notify(struct net *net,
const struct Qdisc_class_ops *cops,
struct sk_buff *oskb, struct nlmsghdr *n,
- struct Qdisc *q, unsigned long cl)
+ struct Qdisc *q, unsigned long cl,
+ struct netlink_ext_ack *extack)
{
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
struct sk_buff *skb;
@@ -1884,7 +1886,7 @@ static int tclass_del_notify(struct net *net,
return -EINVAL;
}
- err = cops->delete(q, cl);
+ err = cops->delete(q, cl, extack);
if (err) {
kfree_skb(skb);
return err;
@@ -2087,7 +2089,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
goto out;
break;
case RTM_DELTCLASS:
- err = tclass_del_notify(net, cops, skb, n, q, cl);
+ err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
/* Unbind the class with flilters with 0 */
tc_bind_tclass(q, portid, clid, 0);
goto out;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 007bd2d9f1ff..d0c9a57398fc 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -320,7 +320,8 @@ err_out:
return error;
}
-static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
+static int atm_tc_delete(struct Qdisc *sch, unsigned long arg,
+ struct netlink_ext_ack *extack)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)arg;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 53d45e029c36..320b3d31fa97 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1675,7 +1675,8 @@ failure:
return err;
}
-static int cbq_delete(struct Qdisc *sch, unsigned long arg)
+static int cbq_delete(struct Qdisc *sch, unsigned long arg,
+ struct netlink_ext_ack *extack)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index bd618b00d319..50f680f03a54 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -362,7 +362,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt,
ctl = nla_data(tb[TCA_CHOKE_PARMS]);
- if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log))
return -EINVAL;
if (ctl->limit > CHOKE_MAX_QUEUE)
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index dde564670ad8..fc1e47069593 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -146,7 +146,8 @@ static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
kfree(cl);
}
-static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
+static int drr_delete_class(struct Qdisc *sch, unsigned long arg,
+ struct netlink_ext_ack *extack)
{
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl = (struct drr_class *)arg;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 2b88710994d7..cd2748e2d4a2 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -150,7 +150,8 @@ errout:
return err;
}
-static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
+static int dsmark_delete(struct Qdisc *sch, unsigned long arg,
+ struct netlink_ext_ack *extack)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 8599c6f31b05..e0bc77533acc 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -480,7 +480,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
struct gred_sched *table = qdisc_priv(sch);
struct gred_sched_data *q = table->tab[dp];
- if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) {
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log)) {
NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters");
return -EINVAL;
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index d1902fca9844..bf0034c66e35 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1090,7 +1090,8 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
}
static int
-hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
+hfsc_delete_class(struct Qdisc *sch, unsigned long arg,
+ struct netlink_ext_ack *extack)
{
struct hfsc_sched *q = qdisc_priv(sch);
struct hfsc_class *cl = (struct hfsc_class *)arg;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index cd70dbcbd72f..dff3adf5a915 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -114,6 +114,7 @@ struct htb_class {
* Written often fields
*/
struct gnet_stats_basic_packed bstats;
+ struct gnet_stats_basic_packed bstats_bias;
struct tc_htb_xstats xstats; /* our special stats */
/* token bucket parameters */
@@ -174,6 +175,11 @@ struct htb_sched {
int row_mask[TC_HTB_MAXDEPTH];
struct htb_level hlevel[TC_HTB_MAXDEPTH];
+
+ struct Qdisc **direct_qdiscs;
+ unsigned int num_direct_qdiscs;
+
+ bool offload;
};
/* find class in global hash table using given handle */
@@ -957,7 +963,7 @@ static void htb_reset(struct Qdisc *sch)
if (cl->level)
memset(&cl->inner, 0, sizeof(cl->inner));
else {
- if (cl->leaf.q)
+ if (cl->leaf.q && !q->offload)
qdisc_reset(cl->leaf.q);
}
cl->prio_activity = 0;
@@ -980,6 +986,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
[TCA_HTB_RATE64] = { .type = NLA_U64 },
[TCA_HTB_CEIL64] = { .type = NLA_U64 },
+ [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
};
static void htb_work_func(struct work_struct *work)
@@ -992,12 +999,27 @@ static void htb_work_func(struct work_struct *work)
rcu_read_unlock();
}
+static void htb_set_lockdep_class_child(struct Qdisc *q)
+{
+ static struct lock_class_key child_key;
+
+ lockdep_set_class(qdisc_lock(q), &child_key);
+}
+
+static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
+{
+ return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
+}
+
static int htb_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_htb_qopt_offload offload_opt;
struct htb_sched *q = qdisc_priv(sch);
struct nlattr *tb[TCA_HTB_MAX + 1];
struct tc_htb_glob *gopt;
+ unsigned int ntx;
int err;
qdisc_watchdog_init(&q->watchdog, sch);
@@ -1022,9 +1044,26 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
if (gopt->version != HTB_VER >> 16)
return -EINVAL;
+ q->offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
+
+ if (q->offload) {
+ if (sch->parent != TC_H_ROOT)
+ return -EOPNOTSUPP;
+
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return -EOPNOTSUPP;
+
+ q->num_direct_qdiscs = dev->real_num_tx_queues;
+ q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
+ sizeof(*q->direct_qdiscs),
+ GFP_KERNEL);
+ if (!q->direct_qdiscs)
+ return -ENOMEM;
+ }
+
err = qdisc_class_hash_init(&q->clhash);
if (err < 0)
- return err;
+ goto err_free_direct_qdiscs;
qdisc_skb_head_init(&q->direct_queue);
@@ -1037,7 +1076,107 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
q->rate2quantum = 1;
q->defcls = gopt->defcls;
+ if (!q->offload)
+ return 0;
+
+ for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
+ struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
+ struct Qdisc *qdisc;
+
+ qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
+ TC_H_MAKE(sch->handle, 0), extack);
+ if (!qdisc) {
+ err = -ENOMEM;
+ goto err_free_qdiscs;
+ }
+
+ htb_set_lockdep_class_child(qdisc);
+ q->direct_qdiscs[ntx] = qdisc;
+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
+ }
+
+ sch->flags |= TCQ_F_MQROOT;
+
+ offload_opt = (struct tc_htb_qopt_offload) {
+ .command = TC_HTB_CREATE,
+ .parent_classid = TC_H_MAJ(sch->handle) >> 16,
+ .classid = TC_H_MIN(q->defcls),
+ .extack = extack,
+ };
+ err = htb_offload(dev, &offload_opt);
+ if (err)
+ goto err_free_qdiscs;
+
return 0;
+
+err_free_qdiscs:
+ /* TC_HTB_CREATE call failed, avoid any further calls to the driver. */
+ q->offload = false;
+
+ for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
+ ntx++)
+ qdisc_put(q->direct_qdiscs[ntx]);
+
+ qdisc_class_hash_destroy(&q->clhash);
+ /* Prevent use-after-free and double-free when htb_destroy gets called.
+ */
+ q->clhash.hash = NULL;
+ q->clhash.hashsize = 0;
+
+err_free_direct_qdiscs:
+ kfree(q->direct_qdiscs);
+ q->direct_qdiscs = NULL;
+ return err;
+}
+
+static void htb_attach_offload(struct Qdisc *sch)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct htb_sched *q = qdisc_priv(sch);
+ unsigned int ntx;
+
+ for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
+ struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
+
+ old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+ qdisc_put(old);
+ qdisc_hash_add(qdisc, false);
+ }
+ for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
+ struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
+ struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
+
+ qdisc_put(old);
+ }
+
+ kfree(q->direct_qdiscs);
+ q->direct_qdiscs = NULL;
+}
+
+static void htb_attach_software(struct Qdisc *sch)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ unsigned int ntx;
+
+ /* Resemble qdisc_graft behavior. */
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+ struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
+ struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
+
+ qdisc_refcount_inc(sch);
+
+ qdisc_put(old);
+ }
+}
+
+static void htb_attach(struct Qdisc *sch)
+{
+ struct htb_sched *q = qdisc_priv(sch);
+
+ if (q->offload)
+ htb_attach_offload(sch);
+ else
+ htb_attach_software(sch);
}
static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -1046,6 +1185,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
struct nlattr *nest;
struct tc_htb_glob gopt;
+ if (q->offload)
+ sch->flags |= TCQ_F_OFFLOADED;
+ else
+ sch->flags &= ~TCQ_F_OFFLOADED;
+
sch->qstats.overlimits = q->overlimits;
/* Its safe to not acquire qdisc lock. As we hold RTNL,
* no change can happen on the qdisc parameters.
@@ -1063,6 +1207,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
goto nla_put_failure;
+ if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
+ goto nla_put_failure;
return nla_nest_end(skb, nest);
@@ -1075,6 +1221,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
struct sk_buff *skb, struct tcmsg *tcm)
{
struct htb_class *cl = (struct htb_class *)arg;
+ struct htb_sched *q = qdisc_priv(sch);
struct nlattr *nest;
struct tc_htb_opt opt;
@@ -1101,6 +1248,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
opt.level = cl->level;
if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
+ if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
+ goto nla_put_failure;
if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
TCA_HTB_PAD))
@@ -1117,10 +1266,39 @@ nla_put_failure:
return -1;
}
+static void htb_offload_aggregate_stats(struct htb_sched *q,
+ struct htb_class *cl)
+{
+ struct htb_class *c;
+ unsigned int i;
+
+ memset(&cl->bstats, 0, sizeof(cl->bstats));
+
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
+ struct htb_class *p = c;
+
+ while (p && p->level < cl->level)
+ p = p->parent;
+
+ if (p != cl)
+ continue;
+
+ cl->bstats.bytes += c->bstats_bias.bytes;
+ cl->bstats.packets += c->bstats_bias.packets;
+ if (c->level == 0) {
+ cl->bstats.bytes += c->leaf.q->bstats.bytes;
+ cl->bstats.packets += c->leaf.q->bstats.packets;
+ }
+ }
+ }
+}
+
static int
htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
{
struct htb_class *cl = (struct htb_class *)arg;
+ struct htb_sched *q = qdisc_priv(sch);
struct gnet_stats_queue qs = {
.drops = cl->drops,
.overlimits = cl->overlimits,
@@ -1135,6 +1313,19 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
INT_MIN, INT_MAX);
+ if (q->offload) {
+ if (!cl->level) {
+ if (cl->leaf.q)
+ cl->bstats = cl->leaf.q->bstats;
+ else
+ memset(&cl->bstats, 0, sizeof(cl->bstats));
+ cl->bstats.bytes += cl->bstats_bias.bytes;
+ cl->bstats.packets += cl->bstats_bias.packets;
+ } else {
+ htb_offload_aggregate_stats(q, cl);
+ }
+ }
+
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
@@ -1144,19 +1335,97 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
}
+static struct netdev_queue *
+htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_htb_qopt_offload offload_opt;
+ int err;
+
+ offload_opt = (struct tc_htb_qopt_offload) {
+ .command = TC_HTB_LEAF_QUERY_QUEUE,
+ .classid = TC_H_MIN(tcm->tcm_parent),
+ };
+ err = htb_offload(dev, &offload_opt);
+ if (err || offload_opt.qid >= dev->num_tx_queues)
+ return NULL;
+ return netdev_get_tx_queue(dev, offload_opt.qid);
+}
+
+static struct Qdisc *
+htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
+{
+ struct net_device *dev = dev_queue->dev;
+ struct Qdisc *old_q;
+
+ if (dev->flags & IFF_UP)
+ dev_deactivate(dev);
+ old_q = dev_graft_qdisc(dev_queue, new_q);
+ if (new_q)
+ new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
+ if (dev->flags & IFF_UP)
+ dev_activate(dev);
+
+ return old_q;
+}
+
+static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
+{
+ struct netdev_queue *queue_old, *queue_new;
+ struct net_device *dev = qdisc_dev(sch);
+ struct Qdisc *qdisc;
+
+ queue_old = netdev_get_tx_queue(dev, qid_old);
+ queue_new = netdev_get_tx_queue(dev, qid_new);
+
+ if (dev->flags & IFF_UP)
+ dev_deactivate(dev);
+ qdisc = dev_graft_qdisc(queue_old, NULL);
+ qdisc->dev_queue = queue_new;
+ qdisc = dev_graft_qdisc(queue_new, qdisc);
+ if (dev->flags & IFF_UP)
+ dev_activate(dev);
+
+ WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
+}
+
static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
struct Qdisc **old, struct netlink_ext_ack *extack)
{
+ struct netdev_queue *dev_queue = sch->dev_queue;
struct htb_class *cl = (struct htb_class *)arg;
+ struct htb_sched *q = qdisc_priv(sch);
+ struct Qdisc *old_q;
if (cl->level)
return -EINVAL;
- if (new == NULL &&
- (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- cl->common.classid, extack)) == NULL)
- return -ENOBUFS;
+
+ if (q->offload) {
+ dev_queue = new->dev_queue;
+ WARN_ON(dev_queue != cl->leaf.q->dev_queue);
+ }
+
+ if (!new) {
+ new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
+ cl->common.classid, extack);
+ if (!new)
+ return -ENOBUFS;
+ }
+
+ if (q->offload) {
+ htb_set_lockdep_class_child(new);
+ /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
+ qdisc_refcount_inc(new);
+ old_q = htb_graft_helper(dev_queue, new);
+ }
*old = qdisc_replace(sch, new, &cl->leaf.q);
+
+ if (q->offload) {
+ WARN_ON(old_q != *old);
+ qdisc_put(old_q);
+ }
+
return 0;
}
@@ -1184,9 +1453,10 @@ static inline int htb_parent_last_child(struct htb_class *cl)
return 1;
}
-static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
+static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
struct Qdisc *new_q)
{
+ struct htb_sched *q = qdisc_priv(sch);
struct htb_class *parent = cl->parent;
WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
@@ -1204,6 +1474,76 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
parent->cmode = HTB_CAN_SEND;
}
+static void htb_parent_to_leaf_offload(struct Qdisc *sch,
+ struct netdev_queue *dev_queue,
+ struct Qdisc *new_q)
+{
+ struct Qdisc *old_q;
+
+ /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
+ qdisc_refcount_inc(new_q);
+ old_q = htb_graft_helper(dev_queue, new_q);
+ WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
+}
+
+static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
+ bool last_child, bool destroying,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_htb_qopt_offload offload_opt;
+ struct Qdisc *q = cl->leaf.q;
+ struct Qdisc *old = NULL;
+ int err;
+
+ if (cl->level)
+ return -EINVAL;
+
+ WARN_ON(!q);
+ if (!destroying) {
+ /* On destroy of HTB, two cases are possible:
+ * 1. q is a normal qdisc, but q->dev_queue has noop qdisc.
+ * 2. q is a noop qdisc (for nodes that were inner),
+ * q->dev_queue is noop_netdev_queue.
+ */
+ old = htb_graft_helper(q->dev_queue, NULL);
+ WARN_ON(!old);
+ WARN_ON(old != q);
+ }
+
+ if (cl->parent) {
+ cl->parent->bstats_bias.bytes += q->bstats.bytes;
+ cl->parent->bstats_bias.packets += q->bstats.packets;
+ }
+
+ offload_opt = (struct tc_htb_qopt_offload) {
+ .command = !last_child ? TC_HTB_LEAF_DEL :
+ destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
+ TC_HTB_LEAF_DEL_LAST,
+ .classid = cl->common.classid,
+ .extack = extack,
+ };
+ err = htb_offload(qdisc_dev(sch), &offload_opt);
+
+ if (!err || destroying)
+ qdisc_put(old);
+ else
+ htb_graft_helper(q->dev_queue, old);
+
+ if (last_child)
+ return err;
+
+ if (!err && offload_opt.moved_qid != 0) {
+ if (destroying)
+ q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
+ offload_opt.qid);
+ else
+ htb_offload_move_qdisc(sch, offload_opt.moved_qid,
+ offload_opt.qid);
+ }
+
+ return err;
+}
+
static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
{
if (!cl->level) {
@@ -1217,8 +1557,11 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
static void htb_destroy(struct Qdisc *sch)
{
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_htb_qopt_offload offload_opt;
struct htb_sched *q = qdisc_priv(sch);
struct hlist_node *next;
+ bool nonempty, changed;
struct htb_class *cl;
unsigned int i;
@@ -1237,21 +1580,68 @@ static void htb_destroy(struct Qdisc *sch)
cl->block = NULL;
}
}
- for (i = 0; i < q->clhash.hashsize; i++) {
- hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
- common.hnode)
- htb_destroy_class(sch, cl);
- }
+
+ do {
+ nonempty = false;
+ changed = false;
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
+ common.hnode) {
+ bool last_child;
+
+ if (!q->offload) {
+ htb_destroy_class(sch, cl);
+ continue;
+ }
+
+ nonempty = true;
+
+ if (cl->level)
+ continue;
+
+ changed = true;
+
+ last_child = htb_parent_last_child(cl);
+ htb_destroy_class_offload(sch, cl, last_child,
+ true, NULL);
+ qdisc_class_hash_remove(&q->clhash,
+ &cl->common);
+ if (cl->parent)
+ cl->parent->children--;
+ if (last_child)
+ htb_parent_to_leaf(sch, cl, NULL);
+ htb_destroy_class(sch, cl);
+ }
+ }
+ } while (changed);
+ WARN_ON(nonempty);
+
qdisc_class_hash_destroy(&q->clhash);
__qdisc_reset_queue(&q->direct_queue);
+
+ if (!q->offload)
+ return;
+
+ offload_opt = (struct tc_htb_qopt_offload) {
+ .command = TC_HTB_DESTROY,
+ };
+ htb_offload(dev, &offload_opt);
+
+ if (!q->direct_qdiscs)
+ return;
+ for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
+ qdisc_put(q->direct_qdiscs[i]);
+ kfree(q->direct_qdiscs);
}
-static int htb_delete(struct Qdisc *sch, unsigned long arg)
+static int htb_delete(struct Qdisc *sch, unsigned long arg,
+ struct netlink_ext_ack *extack)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)arg;
struct Qdisc *new_q = NULL;
int last_child = 0;
+ int err;
/* TODO: why don't allow to delete subtree ? references ? does
* tc subsys guarantee us that in htb_destroy it holds no class
@@ -1260,11 +1650,28 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
if (cl->children || cl->filter_cnt)
return -EBUSY;
- if (!cl->level && htb_parent_last_child(cl)) {
- new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ if (!cl->level && htb_parent_last_child(cl))
+ last_child = 1;
+
+ if (q->offload) {
+ err = htb_destroy_class_offload(sch, cl, last_child, false,
+ extack);
+ if (err)
+ return err;
+ }
+
+ if (last_child) {
+ struct netdev_queue *dev_queue;
+
+ dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
+ new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
cl->parent->common.classid,
NULL);
- last_child = 1;
+ if (q->offload) {
+ if (new_q)
+ htb_set_lockdep_class_child(new_q);
+ htb_parent_to_leaf_offload(sch, dev_queue, new_q);
+ }
}
sch_tree_lock(sch);
@@ -1285,7 +1692,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
&q->hlevel[cl->level].wait_pq);
if (last_child)
- htb_parent_to_leaf(q, cl, new_q);
+ htb_parent_to_leaf(sch, cl, new_q);
sch_tree_unlock(sch);
@@ -1300,9 +1707,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
int err = -EINVAL;
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)*arg, *parent;
+ struct tc_htb_qopt_offload offload_opt;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_HTB_MAX + 1];
struct Qdisc *parent_qdisc = NULL;
+ struct netdev_queue *dev_queue;
struct tc_htb_opt *hopt;
u64 rate64, ceil64;
int warn = 0;
@@ -1335,8 +1744,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
NULL));
+ rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
+ ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
+
if (!cl) { /* new class */
- struct Qdisc *new_q;
+ struct net_device *dev = qdisc_dev(sch);
+ struct Qdisc *new_q, *old_q;
int prio;
struct {
struct nlattr nla;
@@ -1379,11 +1792,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE] ? : &est.nla);
- if (err) {
- tcf_block_put(cl->block);
- kfree(cl);
- goto failure;
- }
+ if (err)
+ goto err_block_put;
}
cl->children = 0;
@@ -1392,12 +1802,76 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
RB_CLEAR_NODE(&cl->node[prio]);
+ cl->common.classid = classid;
+
+ /* Make sure nothing interrupts us in between of two
+ * ndo_setup_tc calls.
+ */
+ ASSERT_RTNL();
+
/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
* so that can't be used inside of sch_tree_lock
* -- thanks to Karlis Peisenieks
*/
- new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ if (!q->offload) {
+ dev_queue = sch->dev_queue;
+ } else if (!(parent && !parent->level)) {
+ /* Assign a dev_queue to this classid. */
+ offload_opt = (struct tc_htb_qopt_offload) {
+ .command = TC_HTB_LEAF_ALLOC_QUEUE,
+ .classid = cl->common.classid,
+ .parent_classid = parent ?
+ TC_H_MIN(parent->common.classid) :
+ TC_HTB_CLASSID_ROOT,
+ .rate = max_t(u64, hopt->rate.rate, rate64),
+ .ceil = max_t(u64, hopt->ceil.rate, ceil64),
+ .extack = extack,
+ };
+ err = htb_offload(dev, &offload_opt);
+ if (err) {
+ pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
+ err);
+ goto err_kill_estimator;
+ }
+ dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
+ } else { /* First child. */
+ dev_queue = parent->leaf.q->dev_queue;
+ old_q = htb_graft_helper(dev_queue, NULL);
+ WARN_ON(old_q != parent->leaf.q);
+ offload_opt = (struct tc_htb_qopt_offload) {
+ .command = TC_HTB_LEAF_TO_INNER,
+ .classid = cl->common.classid,
+ .parent_classid =
+ TC_H_MIN(parent->common.classid),
+ .rate = max_t(u64, hopt->rate.rate, rate64),
+ .ceil = max_t(u64, hopt->ceil.rate, ceil64),
+ .extack = extack,
+ };
+ err = htb_offload(dev, &offload_opt);
+ if (err) {
+ pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
+ err);
+ htb_graft_helper(dev_queue, old_q);
+ goto err_kill_estimator;
+ }
+ parent->bstats_bias.bytes += old_q->bstats.bytes;
+ parent->bstats_bias.packets += old_q->bstats.packets;
+ qdisc_put(old_q);
+ }
+ new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
classid, NULL);
+ if (q->offload) {
+ if (new_q) {
+ htb_set_lockdep_class_child(new_q);
+ /* One ref for cl->leaf.q, the other for
+ * dev_queue->qdisc.
+ */
+ qdisc_refcount_inc(new_q);
+ }
+ old_q = htb_graft_helper(dev_queue, new_q);
+ /* No qdisc_put needed. */
+ WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
+ }
sch_tree_lock(sch);
if (parent && !parent->level) {
/* turn parent into inner node */
@@ -1415,10 +1889,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
: TC_HTB_MAXDEPTH) - 1;
memset(&parent->inner, 0, sizeof(parent->inner));
}
+
/* leaf (we) needs elementary qdisc */
cl->leaf.q = new_q ? new_q : &noop_qdisc;
- cl->common.classid = classid;
cl->parent = parent;
/* set class to be in HTB_CAN_SEND state */
@@ -1444,12 +1918,30 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (err)
return err;
}
- sch_tree_lock(sch);
- }
- rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
+ if (q->offload) {
+ struct net_device *dev = qdisc_dev(sch);
+
+ offload_opt = (struct tc_htb_qopt_offload) {
+ .command = TC_HTB_NODE_MODIFY,
+ .classid = cl->common.classid,
+ .rate = max_t(u64, hopt->rate.rate, rate64),
+ .ceil = max_t(u64, hopt->ceil.rate, ceil64),
+ .extack = extack,
+ };
+ err = htb_offload(dev, &offload_opt);
+ if (err)
+ /* Estimator was replaced, and rollback may fail
+ * as well, so we don't try to recover it, and
+ * the estimator won't work property with the
+ * offload anyway, because bstats are updated
+ * only when the stats are queried.
+ */
+ return err;
+ }
- ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
+ sch_tree_lock(sch);
+ }
psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
@@ -1492,6 +1984,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
*arg = (unsigned long)cl;
return 0;
+err_kill_estimator:
+ gen_kill_estimator(&cl->rate_est);
+err_block_put:
+ tcf_block_put(cl->block);
+ kfree(cl);
failure:
return err;
}
@@ -1557,6 +2054,7 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
static const struct Qdisc_class_ops htb_class_ops = {
+ .select_queue = htb_select_queue,
.graft = htb_graft,
.leaf = htb_leaf,
.qlen_notify = htb_qlen_notify,
@@ -1579,6 +2077,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
.dequeue = htb_dequeue,
.peek = qdisc_peek_dequeued,
.init = htb_init,
+ .attach = htb_attach,
.reset = htb_reset,
.destroy = htb_destroy,
.dump = htb_dump,
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 6335230a971e..1db9d4a2ef5e 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -529,7 +529,8 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
kfree(cl);
}
-static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
+static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,
+ struct netlink_ext_ack *extack)
{
struct qfq_sched *q = qdisc_priv(sch);
struct qfq_class *cl = (struct qfq_class *)arg;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index e89fab6ccb34..b4ae34d7aa96 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -250,7 +250,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb,
max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
ctl = nla_data(tb[TCA_RED_PARMS]);
- if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log))
return -EINVAL;
err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index da047a37a3bf..dde829d4b9f8 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -649,7 +649,8 @@ static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return -ENOSYS;
}
-static int sfb_delete(struct Qdisc *sch, unsigned long cl)
+static int sfb_delete(struct Qdisc *sch, unsigned long cl,
+ struct netlink_ext_ack *extack)
{
return -ENOSYS;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index bca2be57d9fc..b25e51440623 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -647,7 +647,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
}
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
- ctl_v1->Wlog))
+ ctl_v1->Wlog, ctl_v1->Scell_log))
return -EINVAL;
if (ctl_v1 && ctl_v1->qth_min) {
p = kmalloc(sizeof(*p), GFP_KERNEL);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index c74817ec9964..8287894541e3 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -241,7 +241,7 @@ static struct sched_entry *find_entry_to_transmit(struct sk_buff *skb,
/* Here, we are just trying to find out the
* first available interval in the next cycle.
*/
- entry_available = 1;
+ entry_available = true;
entry_found = entry;
*interval_start = ktime_add_ns(curr_intv_start, cycle);
*interval_end = ktime_add_ns(curr_intv_end, cycle);
@@ -372,7 +372,7 @@ static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch)
packet_transmit_time = length_to_duration(q, len);
do {
- sched_changed = 0;
+ sched_changed = false;
entry = find_entry_to_transmit(skb, sch, sched, admin,
minimum_time,
@@ -390,7 +390,7 @@ static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch)
if (admin && admin != sched &&
ktime_after(txtime, admin->base_time)) {
sched = admin;
- sched_changed = 1;
+ sched_changed = true;
continue;
}
@@ -1605,8 +1605,9 @@ static void taprio_reset(struct Qdisc *sch)
hrtimer_cancel(&q->advance_timer);
if (q->qdiscs) {
- for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
- qdisc_reset(q->qdiscs[i]);
+ for (i = 0; i < dev->num_tx_queues; i++)
+ if (q->qdiscs[i])
+ qdisc_reset(q->qdiscs[i]);
}
sch->qstats.backlog = 0;
sch->q.qlen = 0;
@@ -1626,7 +1627,7 @@ static void taprio_destroy(struct Qdisc *sch)
taprio_disable_offload(dev, q, NULL);
if (q->qdiscs) {
- for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
+ for (i = 0; i < dev->num_tx_queues; i++)
qdisc_put(q->qdiscs[i]);
kfree(q->qdiscs);
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index ce281a9a2875..eb874e3c399a 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -68,7 +68,7 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
goto out;
}
- segs = skb_segment(skb, features | NETIF_F_HW_CSUM | NETIF_F_SG);
+ segs = skb_segment(skb, (features | NETIF_F_HW_CSUM) & ~NETIF_F_SG);
if (IS_ERR(segs))
goto out;
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index f7da88ae20a5..982a87b3e11f 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -215,6 +215,12 @@ static void sctp_transport_seq_stop(struct seq_file *seq, void *v)
{
struct sctp_ht_iter *iter = seq->private;
+ if (v && v != SEQ_START_TOKEN) {
+ struct sctp_transport *transport = v;
+
+ sctp_transport_put(transport);
+ }
+
sctp_transport_walk_stop(&iter->hti);
}
@@ -222,6 +228,12 @@ static void *sctp_transport_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct sctp_ht_iter *iter = seq->private;
+ if (v && v != SEQ_START_TOKEN) {
+ struct sctp_transport *transport = v;
+
+ sctp_transport_put(transport);
+ }
+
++*pos;
return sctp_transport_get_next(seq_file_net(seq), &iter->hti);
@@ -277,8 +289,6 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
sk->sk_rcvbuf);
seq_printf(seq, "\n");
- sctp_transport_put(transport);
-
return 0;
}
@@ -354,8 +364,6 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "\n");
}
- sctp_transport_put(transport);
-
return 0;
}
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 59342b519e34..0df85a12651e 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -246,7 +246,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
goto errattr;
smc_clc_get_hostname(&host);
if (host) {
- snprintf(hostname, sizeof(hostname), "%s", host);
+ memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN);
+ hostname[SMC_MAX_HOSTNAME_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname))
goto errattr;
}
@@ -257,7 +258,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
smc_ism_get_system_eid(smcd_dev, &seid);
mutex_unlock(&smcd_dev_list.mutex);
if (seid && smc_ism_is_v2_capable()) {
- snprintf(smc_seid, sizeof(smc_seid), "%s", seid);
+ memcpy(smc_seid, seid, SMC_MAX_EID_LEN);
+ smc_seid[SMC_MAX_EID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid))
goto errattr;
}
@@ -295,7 +297,8 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr,
goto errattr;
if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id))
goto errattr;
- snprintf(smc_target, sizeof(smc_target), "%s", lgr->pnet_id);
+ memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN);
+ smc_target[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
goto errattr;
@@ -312,7 +315,7 @@ static int smc_nl_fill_lgr_link(struct smc_link_group *lgr,
struct sk_buff *skb,
struct netlink_callback *cb)
{
- char smc_ibname[IB_DEVICE_NAME_MAX + 1];
+ char smc_ibname[IB_DEVICE_NAME_MAX];
u8 smc_gid_target[41];
struct nlattr *attrs;
u32 link_uid = 0;
@@ -461,7 +464,8 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
goto errattr;
if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd)))
goto errattr;
- snprintf(smc_pnet, sizeof(smc_pnet), "%s", lgr->smcd->pnetid);
+ memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN);
+ smc_pnet[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
goto errattr;
@@ -474,10 +478,12 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
goto errv2attr;
if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
goto errv2attr;
- snprintf(smc_host, sizeof(smc_host), "%s", lgr->peer_hostname);
+ memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
+ smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
goto errv2attr;
- snprintf(smc_eid, sizeof(smc_eid), "%s", lgr->negotiated_eid);
+ memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
+ smc_eid[SMC_MAX_EID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
goto errv2attr;
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index ddd7fac98b1d..7d7ba0320d5a 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -371,8 +371,8 @@ static int smc_nl_handle_dev_port(struct sk_buff *skb,
if (nla_put_u8(skb, SMC_NLA_DEV_PORT_PNET_USR,
smcibdev->pnetid_by_user[port]))
goto errattr;
- snprintf(smc_pnet, sizeof(smc_pnet), "%s",
- (char *)&smcibdev->pnetid[port]);
+ memcpy(smc_pnet, &smcibdev->pnetid[port], SMC_MAX_PNETID_LEN);
+ smc_pnet[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_DEV_PORT_PNETID, smc_pnet))
goto errattr;
if (nla_put_u32(skb, SMC_NLA_DEV_PORT_NETDEV,
@@ -414,7 +414,7 @@ static int smc_nl_handle_smcr_dev(struct smc_ib_device *smcibdev,
struct sk_buff *skb,
struct netlink_callback *cb)
{
- char smc_ibname[IB_DEVICE_NAME_MAX + 1];
+ char smc_ibname[IB_DEVICE_NAME_MAX];
struct smc_pci_dev smc_pci_dev;
struct pci_dev *pci_dev;
unsigned char is_crit;
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 524ef64a191a..9c6e95882553 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -250,7 +250,8 @@ static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd,
goto errattr;
if (nla_put_u8(skb, SMC_NLA_DEV_PORT_PNET_USR, smcd->pnetid_by_user))
goto errportattr;
- snprintf(smc_pnet, sizeof(smc_pnet), "%s", smcd->pnetid);
+ memcpy(smc_pnet, smcd->pnetid, SMC_MAX_PNETID_LEN);
+ smc_pnet[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_DEV_PORT_PNETID, smc_pnet))
goto errportattr;
diff --git a/net/socket.c b/net/socket.c
index 33e8b6c4e1d3..23c7842389de 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -334,6 +334,7 @@ static const struct xattr_handler sockfs_xattr_handler = {
};
static int sockfs_security_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *inode,
const char *suffix, const void *value,
size_t size, int flags)
@@ -537,9 +538,10 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
return used;
}
-static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
+static int sockfs_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *iattr)
{
- int err = simple_setattr(dentry, iattr);
+ int err = simple_setattr(&init_user_ns, dentry, iattr);
if (!err && (iattr->ia_valid & ATTR_UID)) {
struct socket *sock = SOCKET_I(d_inode(dentry));
@@ -2126,6 +2128,9 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
return __sys_setsockopt(fd, level, optname, optval, optlen);
}
+INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
+ int optname));
+
/*
* Get a socket option. Because we don't know the option lengths we have
* to pass a user mode parameter for the protocols to sort out.
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index 010dcb876f9d..6e4dbd577a39 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -185,7 +185,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf,
scope_id = dev->ifindex;
dev_put(dev);
} else {
- if (kstrtou32(p, 10, &scope_id) == 0) {
+ if (kstrtou32(p, 10, &scope_id) != 0) {
kfree(p);
return 0;
}
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 4ecc2a959567..5f42aa5fc612 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -29,6 +29,7 @@
#include <linux/uaccess.h>
#include <linux/hashtable.h>
+#include "auth_gss_internal.h"
#include "../netns.h"
#include <trace/events/rpcgss.h>
@@ -125,35 +126,6 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);
}
-static const void *
-simple_get_bytes(const void *p, const void *end, void *res, size_t len)
-{
- const void *q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- memcpy(res, p, len);
- return q;
-}
-
-static inline const void *
-simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
-{
- const void *q;
- unsigned int len;
-
- p = simple_get_bytes(p, end, &len, sizeof(len));
- if (IS_ERR(p))
- return p;
- q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- dest->data = kmemdup(p, len, GFP_NOFS);
- if (unlikely(dest->data == NULL))
- return ERR_PTR(-ENOMEM);
- dest->len = len;
- return q;
-}
-
static struct gss_cl_ctx *
gss_cred_get_ctx(struct rpc_cred *cred)
{
diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h
new file mode 100644
index 000000000000..f6d9631bd9d0
--- /dev/null
+++ b/net/sunrpc/auth_gss/auth_gss_internal.h
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * linux/net/sunrpc/auth_gss/auth_gss_internal.h
+ *
+ * Internal definitions for RPCSEC_GSS client authentication
+ *
+ * Copyright (c) 2000 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ */
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/sunrpc/xdr.h>
+
+static inline const void *
+simple_get_bytes(const void *p, const void *end, void *res, size_t len)
+{
+ const void *q = (const void *)((const char *)p + len);
+ if (unlikely(q > end || q < p))
+ return ERR_PTR(-EFAULT);
+ memcpy(res, p, len);
+ return q;
+}
+
+static inline const void *
+simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
+{
+ const void *q;
+ unsigned int len;
+
+ p = simple_get_bytes(p, end, &len, sizeof(len));
+ if (IS_ERR(p))
+ return p;
+ q = (const void *)((const char *)p + len);
+ if (unlikely(q > end || q < p))
+ return ERR_PTR(-EFAULT);
+ if (len) {
+ dest->data = kmemdup(p, len, GFP_NOFS);
+ if (unlikely(dest->data == NULL))
+ return ERR_PTR(-ENOMEM);
+ } else
+ dest->data = NULL;
+ dest->len = len;
+ return q;
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index ae9acf3a7389..1c092b05c2bb 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -21,6 +21,8 @@
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/gss_krb5_enctypes.h>
+#include "auth_gss_internal.h"
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
@@ -143,35 +145,6 @@ get_gss_krb5_enctype(int etype)
return NULL;
}
-static const void *
-simple_get_bytes(const void *p, const void *end, void *res, int len)
-{
- const void *q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- memcpy(res, p, len);
- return q;
-}
-
-static const void *
-simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
-{
- const void *q;
- unsigned int len;
-
- p = simple_get_bytes(p, end, &len, sizeof(len));
- if (IS_ERR(p))
- return p;
- q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- res->data = kmemdup(p, len, GFP_NOFS);
- if (unlikely(res->data == NULL))
- return ERR_PTR(-ENOMEM);
- res->len = len;
- return q;
-}
-
static inline const void *
get_key(const void *p, const void *end,
struct krb5_ctx *ctx, struct crypto_sync_skcipher **res)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 8241f5a4a01c..09c000d490a1 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -478,6 +478,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
inode->i_fop = &simple_dir_operations;
inode->i_op = &simple_dir_inode_operations;
inc_nlink(inode);
+ break;
default:
break;
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4187745887f0..61fb8a18552c 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -559,7 +559,7 @@ EXPORT_SYMBOL_GPL(svc_destroy);
/*
* Allocate an RPC server's buffer space.
- * We allocate pages and place them in rq_argpages.
+ * We allocate pages and place them in rq_pages.
*/
static int
svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 5fb9164aa690..dcc50ae54550 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -857,6 +857,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
err = -EAGAIN;
if (len <= 0)
goto out_release;
+ trace_svc_xdr_recvfrom(&rqstp->rq_arg);
clear_bit(XPT_OLD, &xprt->xpt_flags);
@@ -866,7 +867,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
if (serv->sv_stats)
serv->sv_stats->netcnt++;
- trace_svc_xdr_recvfrom(rqstp, &rqstp->rq_arg);
return len;
out_release:
rqstp->rq_res.len = 0;
@@ -904,7 +904,7 @@ int svc_send(struct svc_rqst *rqstp)
xb->len = xb->head[0].iov_len +
xb->page_len +
xb->tail[0].iov_len;
- trace_svc_xdr_sendto(rqstp, xb);
+ trace_svc_xdr_sendto(rqstp->rq_xid, xb);
trace_svc_stats_latency(rqstp);
len = xprt->xpt_ops->xpo_sendto(rqstp);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b248f2349437..2e2f007dfc9f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1062,6 +1062,87 @@ err_noclose:
return 0; /* record not complete */
}
+static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
+ int flags)
+{
+ return kernel_sendpage(sock, virt_to_page(vec->iov_base),
+ offset_in_page(vec->iov_base),
+ vec->iov_len, flags);
+}
+
+/*
+ * kernel_sendpage() is used exclusively to reduce the number of
+ * copy operations in this path. Therefore the caller must ensure
+ * that the pages backing @xdr are unchanging.
+ *
+ * In addition, the logic assumes that * .bv_len is never larger
+ * than PAGE_SIZE.
+ */
+static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
+ rpc_fraghdr marker, unsigned int *sentp)
+{
+ const struct kvec *head = xdr->head;
+ const struct kvec *tail = xdr->tail;
+ struct kvec rm = {
+ .iov_base = &marker,
+ .iov_len = sizeof(marker),
+ };
+ struct msghdr msg = {
+ .msg_flags = 0,
+ };
+ int ret;
+
+ *sentp = 0;
+ xdr_alloc_bvec(xdr, GFP_KERNEL);
+
+ ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
+ if (ret < 0)
+ return ret;
+ *sentp += ret;
+ if (ret != rm.iov_len)
+ return -EAGAIN;
+
+ ret = svc_tcp_send_kvec(sock, head, 0);
+ if (ret < 0)
+ return ret;
+ *sentp += ret;
+ if (ret != head->iov_len)
+ goto out;
+
+ if (xdr->page_len) {
+ unsigned int offset, len, remaining;
+ struct bio_vec *bvec;
+
+ bvec = xdr->bvec + (xdr->page_base >> PAGE_SHIFT);
+ offset = offset_in_page(xdr->page_base);
+ remaining = xdr->page_len;
+ while (remaining > 0) {
+ len = min(remaining, bvec->bv_len - offset);
+ ret = kernel_sendpage(sock, bvec->bv_page,
+ bvec->bv_offset + offset,
+ len, 0);
+ if (ret < 0)
+ return ret;
+ *sentp += ret;
+ if (ret != len)
+ goto out;
+ remaining -= len;
+ offset = 0;
+ bvec++;
+ }
+ }
+
+ if (tail->iov_len) {
+ ret = svc_tcp_send_kvec(sock, tail, 0);
+ if (ret < 0)
+ return ret;
+ *sentp += ret;
+ }
+
+out:
+ return 0;
+}
+
/**
* svc_tcp_sendto - Send out a reply on a TCP socket
* @rqstp: completed svc_rqst
@@ -1078,26 +1159,28 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
struct xdr_buf *xdr = &rqstp->rq_res;
rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
(u32)xdr->len);
- struct msghdr msg = {
- .msg_flags = 0,
- };
unsigned int sent;
int err;
svc_tcp_release_rqst(rqstp);
+ atomic_inc(&svsk->sk_sendqlen);
mutex_lock(&xprt->xpt_mutex);
if (svc_xprt_is_dead(xprt))
goto out_notconn;
- err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent);
+ tcp_sock_set_cork(svsk->sk_sk, true);
+ err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent);
xdr_free_bvec(xdr);
trace_svcsock_tcp_send(xprt, err < 0 ? err : sent);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
+ if (atomic_dec_and_test(&svsk->sk_sendqlen))
+ tcp_sock_set_cork(svsk->sk_sk, false);
mutex_unlock(&xprt->xpt_mutex);
return sent;
out_notconn:
+ atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -ENOTCONN;
out_close:
@@ -1107,6 +1190,7 @@ out_close:
(err < 0) ? err : sent, xdr->len);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
+ atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -EAGAIN;
}
@@ -1176,7 +1260,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
svsk->sk_datalen = 0;
memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
- tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
+ tcp_sock_set_nodelay(sk);
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
switch (sk->sk_state) {
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 946edf2db646..a249837d6a55 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -2,7 +2,7 @@
/*
* Copyright (c) 2015-2020, Oracle and/or its affiliates.
*
- * Support for backward direction RPCs on RPC/RDMA.
+ * Support for reverse-direction RPCs on RPC/RDMA.
*/
#include <linux/sunrpc/xprt.h>
@@ -208,7 +208,7 @@ create_req:
}
/**
- * rpcrdma_bc_receive_call - Handle a backward direction call
+ * rpcrdma_bc_receive_call - Handle a reverse-direction Call
* @r_xprt: transport receiving the call
* @rep: receive buffer containing the call
*
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index baca49fe83af..766a1048a48a 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -306,20 +306,14 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
if (nsegs > ep->re_max_fr_depth)
nsegs = ep->re_max_fr_depth;
for (i = 0; i < nsegs;) {
- if (seg->mr_page)
- sg_set_page(&mr->mr_sg[i],
- seg->mr_page,
- seg->mr_len,
- offset_in_page(seg->mr_offset));
- else
- sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
- seg->mr_len);
+ sg_set_page(&mr->mr_sg[i], seg->mr_page,
+ seg->mr_len, seg->mr_offset);
++seg;
++i;
if (ep->re_mrtype == IB_MR_TYPE_SG_GAPS)
continue;
- if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+ if ((i < nsegs && seg->mr_offset) ||
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 8f5d0cb68360..292f066d006e 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -204,9 +204,7 @@ rpcrdma_alloc_sparse_pages(struct xdr_buf *buf)
return 0;
}
-/* Split @vec on page boundaries into SGEs. FMR registers pages, not
- * a byte range. Other modes coalesce these SGEs into a single MR
- * when they can.
+/* Convert @vec to a single SGL element.
*
* Returns pointer to next available SGE, and bumps the total number
* of SGEs consumed.
@@ -215,22 +213,11 @@ static struct rpcrdma_mr_seg *
rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
unsigned int *n)
{
- u32 remaining, page_offset;
- char *base;
-
- base = vec->iov_base;
- page_offset = offset_in_page(base);
- remaining = vec->iov_len;
- while (remaining) {
- seg->mr_page = NULL;
- seg->mr_offset = base;
- seg->mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
- remaining -= seg->mr_len;
- base += seg->mr_len;
- ++seg;
- ++(*n);
- page_offset = 0;
- }
+ seg->mr_page = virt_to_page(vec->iov_base);
+ seg->mr_offset = offset_in_page(vec->iov_base);
+ seg->mr_len = vec->iov_len;
+ ++seg;
+ ++(*n);
return seg;
}
@@ -259,7 +246,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
page_base = offset_in_page(xdrbuf->page_base);
while (len) {
seg->mr_page = *ppages;
- seg->mr_offset = (char *)page_base;
+ seg->mr_offset = page_base;
seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
len -= seg->mr_len;
++ppages;
@@ -268,10 +255,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
page_base = 0;
}
- /* When encoding a Read chunk, the tail iovec contains an
- * XDR pad and may be omitted.
- */
- if (type == rpcrdma_readch && r_xprt->rx_ep->re_implicit_roundup)
+ if (type == rpcrdma_readch)
goto out;
/* When encoding a Write chunk, some servers need to see an
@@ -283,7 +267,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
goto out;
if (xdrbuf->tail[0].iov_len)
- seg = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
+ rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
out:
if (unlikely(n > RPCRDMA_MAX_SEGS))
@@ -644,9 +628,8 @@ out_mapping_err:
return false;
}
-/* The tail iovec may include an XDR pad for the page list,
- * as well as additional content, and may not reside in the
- * same page as the head iovec.
+/* The tail iovec might not reside in the same page as the
+ * head iovec.
*/
static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
struct xdr_buf *xdr,
@@ -764,27 +747,19 @@ static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req,
struct xdr_buf *xdr)
{
+ struct kvec *tail = &xdr->tail[0];
+
if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
return false;
- /* If there is a Read chunk, the page list is being handled
+ /* If there is a Read chunk, the page list is handled
* via explicit RDMA, and thus is skipped here.
*/
- /* Do not include the tail if it is only an XDR pad */
- if (xdr->tail[0].iov_len > 3) {
- unsigned int page_base, len;
-
- /* If the content in the page list is an odd length,
- * xdr_write_pages() adds a pad at the beginning of
- * the tail iovec. Force the tail's non-pad content to
- * land at the next XDR position in the Send message.
- */
- page_base = offset_in_page(xdr->tail[0].iov_base);
- len = xdr->tail[0].iov_len;
- page_base += len & 3;
- len -= len & 3;
- if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
+ if (tail->iov_len) {
+ if (!rpcrdma_prepare_tail_iov(req, xdr,
+ offset_in_page(tail->iov_base),
+ tail->iov_len))
return false;
kref_get(&req->rl_kref);
}
@@ -1164,14 +1139,10 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
*/
p = xdr_inline_decode(xdr, 3 * sizeof(*p));
if (unlikely(!p))
- goto out_short;
+ return true;
rpcrdma_bc_receive_call(r_xprt, rep);
return true;
-
-out_short:
- pr_warn("RPC/RDMA short backward direction call\n");
- return true;
}
#else /* CONFIG_SUNRPC_BACKCHANNEL */
{
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 526da5d4710b..5bc20e9d09cd 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -62,51 +62,47 @@ static unsigned int max_max_requests = 16384;
unsigned int svcrdma_max_req_size = RPCRDMA_DEF_INLINE_THRESH;
static unsigned int min_max_inline = RPCRDMA_DEF_INLINE_THRESH;
static unsigned int max_max_inline = RPCRDMA_MAX_INLINE_THRESH;
+static unsigned int svcrdma_stat_unused;
+static unsigned int zero;
-atomic_t rdma_stat_recv;
-atomic_t rdma_stat_read;
-atomic_t rdma_stat_write;
-atomic_t rdma_stat_sq_starve;
-atomic_t rdma_stat_rq_starve;
-atomic_t rdma_stat_rq_poll;
-atomic_t rdma_stat_rq_prod;
-atomic_t rdma_stat_sq_poll;
-atomic_t rdma_stat_sq_prod;
+struct percpu_counter svcrdma_stat_read;
+struct percpu_counter svcrdma_stat_recv;
+struct percpu_counter svcrdma_stat_sq_starve;
+struct percpu_counter svcrdma_stat_write;
-/*
- * This function implements reading and resetting an atomic_t stat
- * variable through read/write to a proc file. Any write to the file
- * resets the associated statistic to zero. Any read returns it's
- * current value.
- */
-static int read_reset_stat(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+enum {
+ SVCRDMA_COUNTER_BUFSIZ = sizeof(unsigned long long),
+};
+
+static int svcrdma_counter_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
{
- atomic_t *stat = (atomic_t *)table->data;
-
- if (!stat)
- return -EINVAL;
-
- if (write)
- atomic_set(stat, 0);
- else {
- char str_buf[32];
- int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat));
- if (len >= 32)
- return -EFAULT;
- len = strlen(str_buf);
- if (*ppos > len) {
- *lenp = 0;
- return 0;
- }
- len -= *ppos;
- if (len > *lenp)
- len = *lenp;
- if (len)
- memcpy(buffer, str_buf, len);
- *lenp = len;
- *ppos += len;
+ struct percpu_counter *stat = (struct percpu_counter *)table->data;
+ char tmp[SVCRDMA_COUNTER_BUFSIZ + 1];
+ int len;
+
+ if (write) {
+ percpu_counter_set(stat, 0);
+ return 0;
}
+
+ len = snprintf(tmp, SVCRDMA_COUNTER_BUFSIZ, "%lld\n",
+ percpu_counter_sum_positive(stat));
+ if (len >= SVCRDMA_COUNTER_BUFSIZ)
+ return -EFAULT;
+ len = strlen(tmp);
+ if (*ppos > len) {
+ *lenp = 0;
+ return 0;
+ }
+ len -= *ppos;
+ if (len > *lenp)
+ len = *lenp;
+ if (len)
+ memcpy(buffer, tmp, len);
+ *lenp = len;
+ *ppos += len;
+
return 0;
}
@@ -142,66 +138,76 @@ static struct ctl_table svcrdma_parm_table[] = {
{
.procname = "rdma_stat_read",
- .data = &rdma_stat_read,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_read,
+ .maxlen = SVCRDMA_COUNTER_BUFSIZ,
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = svcrdma_counter_handler,
},
{
.procname = "rdma_stat_recv",
- .data = &rdma_stat_recv,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_recv,
+ .maxlen = SVCRDMA_COUNTER_BUFSIZ,
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = svcrdma_counter_handler,
},
{
.procname = "rdma_stat_write",
- .data = &rdma_stat_write,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_write,
+ .maxlen = SVCRDMA_COUNTER_BUFSIZ,
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = svcrdma_counter_handler,
},
{
.procname = "rdma_stat_sq_starve",
- .data = &rdma_stat_sq_starve,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_sq_starve,
+ .maxlen = SVCRDMA_COUNTER_BUFSIZ,
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = svcrdma_counter_handler,
},
{
.procname = "rdma_stat_rq_starve",
- .data = &rdma_stat_rq_starve,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_unused,
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &zero,
},
{
.procname = "rdma_stat_rq_poll",
- .data = &rdma_stat_rq_poll,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_unused,
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &zero,
},
{
.procname = "rdma_stat_rq_prod",
- .data = &rdma_stat_rq_prod,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_unused,
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &zero,
},
{
.procname = "rdma_stat_sq_poll",
- .data = &rdma_stat_sq_poll,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_unused,
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &zero,
},
{
.procname = "rdma_stat_sq_prod",
- .data = &rdma_stat_sq_prod,
- .maxlen = sizeof(atomic_t),
+ .data = &svcrdma_stat_unused,
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = read_reset_stat,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &zero,
},
{ },
};
@@ -224,27 +230,69 @@ static struct ctl_table svcrdma_root_table[] = {
{ },
};
+static void svc_rdma_proc_cleanup(void)
+{
+ if (!svcrdma_table_header)
+ return;
+ unregister_sysctl_table(svcrdma_table_header);
+ svcrdma_table_header = NULL;
+
+ percpu_counter_destroy(&svcrdma_stat_write);
+ percpu_counter_destroy(&svcrdma_stat_sq_starve);
+ percpu_counter_destroy(&svcrdma_stat_recv);
+ percpu_counter_destroy(&svcrdma_stat_read);
+}
+
+static int svc_rdma_proc_init(void)
+{
+ int rc;
+
+ if (svcrdma_table_header)
+ return 0;
+
+ rc = percpu_counter_init(&svcrdma_stat_read, 0, GFP_KERNEL);
+ if (rc)
+ goto out_err;
+ rc = percpu_counter_init(&svcrdma_stat_recv, 0, GFP_KERNEL);
+ if (rc)
+ goto out_err;
+ rc = percpu_counter_init(&svcrdma_stat_sq_starve, 0, GFP_KERNEL);
+ if (rc)
+ goto out_err;
+ rc = percpu_counter_init(&svcrdma_stat_write, 0, GFP_KERNEL);
+ if (rc)
+ goto out_err;
+
+ svcrdma_table_header = register_sysctl_table(svcrdma_root_table);
+ return 0;
+
+out_err:
+ percpu_counter_destroy(&svcrdma_stat_sq_starve);
+ percpu_counter_destroy(&svcrdma_stat_recv);
+ percpu_counter_destroy(&svcrdma_stat_read);
+ return rc;
+}
+
void svc_rdma_cleanup(void)
{
dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
- if (svcrdma_table_header) {
- unregister_sysctl_table(svcrdma_table_header);
- svcrdma_table_header = NULL;
- }
svc_unreg_xprt_class(&svc_rdma_class);
+ svc_rdma_proc_cleanup();
}
int svc_rdma_init(void)
{
+ int rc;
+
dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
dprintk("\tmax_requests : %u\n", svcrdma_max_requests);
dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
- if (!svcrdma_table_header)
- svcrdma_table_header =
- register_sysctl_table(svcrdma_root_table);
+ rc = svc_rdma_proc_init();
+ if (rc)
+ return rc;
/* Register RDMA with the SVC transport switch */
svc_reg_xprt_class(&svc_rdma_class);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 63f8be974df2..4a1edbb4028e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -2,7 +2,7 @@
/*
* Copyright (c) 2015-2018 Oracle. All rights reserved.
*
- * Support for backward direction RPCs on RPC/RDMA (server-side).
+ * Support for reverse-direction RPCs on RPC/RDMA (server-side).
*/
#include <linux/sunrpc/svc_rdma.h>
@@ -59,7 +59,7 @@ out_unlock:
spin_unlock(&xprt->queue_lock);
}
-/* Send a backwards direction RPC call.
+/* Send a reverse-direction RPC Call.
*
* Caller holds the connection's mutex and has already marshaled
* the RPC/RDMA request.
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index cbdb71247755..6d28f23ceb35 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -266,33 +266,46 @@ void svc_rdma_release_rqst(struct svc_rqst *rqstp)
svc_rdma_recv_ctxt_put(rdma, ctxt);
}
-static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
- struct svc_rdma_recv_ctxt *ctxt)
+static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
+ unsigned int wanted, bool temp)
{
+ const struct ib_recv_wr *bad_wr = NULL;
+ struct svc_rdma_recv_ctxt *ctxt;
+ struct ib_recv_wr *recv_chain;
int ret;
- trace_svcrdma_post_recv(ctxt);
- ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL);
+ recv_chain = NULL;
+ while (wanted--) {
+ ctxt = svc_rdma_recv_ctxt_get(rdma);
+ if (!ctxt)
+ break;
+
+ trace_svcrdma_post_recv(ctxt);
+ ctxt->rc_temp = temp;
+ ctxt->rc_recv_wr.next = recv_chain;
+ recv_chain = &ctxt->rc_recv_wr;
+ rdma->sc_pending_recvs++;
+ }
+ if (!recv_chain)
+ return false;
+
+ ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr);
if (ret)
goto err_post;
- return 0;
+ return true;
err_post:
- trace_svcrdma_rq_post_err(rdma, ret);
- svc_rdma_recv_ctxt_put(rdma, ctxt);
- return ret;
-}
-
-static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
-{
- struct svc_rdma_recv_ctxt *ctxt;
+ while (bad_wr) {
+ ctxt = container_of(bad_wr, struct svc_rdma_recv_ctxt,
+ rc_recv_wr);
+ bad_wr = bad_wr->next;
+ svc_rdma_recv_ctxt_put(rdma, ctxt);
+ }
- if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
- return 0;
- ctxt = svc_rdma_recv_ctxt_get(rdma);
- if (!ctxt)
- return -ENOMEM;
- return __svc_rdma_post_recv(rdma, ctxt);
+ trace_svcrdma_rq_post_err(rdma, ret);
+ /* Since we're destroying the xprt, no need to reset
+ * sc_pending_recvs. */
+ return false;
}
/**
@@ -303,20 +316,7 @@ static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
*/
bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
{
- struct svc_rdma_recv_ctxt *ctxt;
- unsigned int i;
- int ret;
-
- for (i = 0; i < rdma->sc_max_requests; i++) {
- ctxt = svc_rdma_recv_ctxt_get(rdma);
- if (!ctxt)
- return false;
- ctxt->rc_temp = true;
- ret = __svc_rdma_post_recv(rdma, ctxt);
- if (ret)
- return false;
- }
- return true;
+ return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true);
}
/**
@@ -324,8 +324,6 @@ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
* @cq: Completion Queue context
* @wc: Work Completion object
*
- * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that
- * the Receive completion handler could be running.
*/
static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
{
@@ -333,6 +331,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_recv_ctxt *ctxt;
+ rdma->sc_pending_recvs--;
+
/* WARNING: Only wc->wr_cqe and wc->status are reliable */
ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);
@@ -340,14 +340,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
if (wc->status != IB_WC_SUCCESS)
goto flushed;
- if (svc_rdma_post_recv(rdma))
- goto post_err;
-
/* All wc fields are now known to be valid */
ctxt->rc_byte_len = wc->byte_len;
- ib_dma_sync_single_for_cpu(rdma->sc_pd->device,
- ctxt->rc_recv_sge.addr,
- wc->byte_len, DMA_FROM_DEVICE);
spin_lock(&rdma->sc_rq_dto_lock);
list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q);
@@ -356,11 +350,18 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
spin_unlock(&rdma->sc_rq_dto_lock);
if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
svc_xprt_enqueue(&rdma->sc_xprt);
+
+ if (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) &&
+ rdma->sc_pending_recvs < rdma->sc_max_requests)
+ if (!svc_rdma_refresh_recvs(rdma, RPCRDMA_MAX_RECV_BATCH,
+ false))
+ goto post_err;
+
return;
flushed:
-post_err:
svc_rdma_recv_ctxt_put(rdma, ctxt);
+post_err:
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_enqueue(&rdma->sc_xprt);
}
@@ -845,9 +846,11 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
}
list_del(&ctxt->rc_list);
spin_unlock(&rdma_xprt->sc_rq_dto_lock);
+ percpu_counter_inc(&svcrdma_stat_recv);
- atomic_inc(&rdma_stat_recv);
-
+ ib_dma_sync_single_for_cpu(rdma_xprt->sc_pd->device,
+ ctxt->rc_recv_sge.addr, ctxt->rc_byte_len,
+ DMA_FROM_DEVICE);
svc_rdma_build_arg_xdr(rqstp, ctxt);
/* Prevent svc_xprt_release from releasing pages in rq_pages
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 0b63e1321d74..693d139a8633 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -364,6 +364,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
return 0;
}
+ percpu_counter_inc(&svcrdma_stat_sq_starve);
trace_svcrdma_sq_full(rdma);
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wait_event(rdma->sc_send_wait,
@@ -468,6 +469,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
DMA_TO_DEVICE);
if (ret < 0)
return -EIO;
+ percpu_counter_inc(&svcrdma_stat_write);
list_add(&ctxt->rw_list, &cc->cc_rwctxts);
cc->cc_sqecount += ret;
@@ -718,6 +720,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
segment->rs_handle, DMA_FROM_DEVICE);
if (ret < 0)
return -EIO;
+ percpu_counter_inc(&svcrdma_stat_read);
list_add(&ctxt->rw_list, &cc->cc_rwctxts);
cc->cc_sqecount += ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 68af79d4f04f..52c759a8543e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -317,7 +317,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
/* If the SQ is full, wait until an SQ entry is available */
while (1) {
if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) {
- atomic_inc(&rdma_stat_sq_starve);
+ percpu_counter_inc(&svcrdma_stat_sq_starve);
trace_svcrdma_sq_full(rdma);
atomic_inc(&rdma->sc_sq_avail);
wait_event(rdma->sc_send_wait,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index afba4e9d5425..c895f80df659 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -475,9 +475,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (!svc_rdma_post_recvs(newxprt))
goto errout;
- /* Swap out the handler */
- newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler;
-
/* Construct RDMA-CM private message */
pmsg.cp_magic = rpcrdma_cmp_magic;
pmsg.cp_version = RPCRDMA_CMP_VERSION;
@@ -498,7 +495,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
}
conn_param.private_data = &pmsg;
conn_param.private_data_len = sizeof(pmsg);
+ rdma_lock_handler(newxprt->sc_cm_id);
+ newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler;
ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
+ rdma_unlock_handler(newxprt->sc_cm_id);
if (ret) {
trace_svcrdma_accept_err(newxprt, ret);
goto errout;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 94b28657aeeb..fe3be985e239 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -98,9 +98,9 @@ struct rpcrdma_ep {
atomic_t re_completion_ids;
};
-/* Pre-allocate extra Work Requests for handling backward receives
- * and sends. This is a fixed value because the Work Queues are
- * allocated when the forward channel is set up, long before the
+/* Pre-allocate extra Work Requests for handling reverse-direction
+ * Receives and Sends. This is a fixed value because the Work Queues
+ * are allocated when the forward channel is set up, long before the
* backchannel is provisioned. This value is two times
* NFS4_DEF_CB_SLOT_TABLE_SIZE.
*/
@@ -283,10 +283,11 @@ enum {
RPCRDMA_MAX_IOV_SEGS,
};
-struct rpcrdma_mr_seg { /* chunk descriptors */
- u32 mr_len; /* length of chunk or segment */
- struct page *mr_page; /* owning page, if any */
- char *mr_offset; /* kva if no page, else offset */
+/* Arguments for DMA mapping and registration */
+struct rpcrdma_mr_seg {
+ u32 mr_len; /* length of segment */
+ struct page *mr_page; /* underlying struct page */
+ u64 mr_offset; /* IN: page offset, OUT: iova */
};
/* The Send SGE array is provisioned to send a maximum size
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c56a66cdf4ac..e35760f238a4 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -829,7 +829,7 @@ xs_stream_record_marker(struct xdr_buf *xdr)
* EAGAIN: The socket was blocked, please call again later to
* complete the request
* ENOTCONN: Caller needs to invoke connect logic then call again
- * other: Some other error occured, the request was not sent
+ * other: Some other error occurred, the request was not sent
*/
static int xs_local_send_request(struct rpc_rqst *req)
{
@@ -1665,7 +1665,7 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
* This ensures that we can continue to establish TCP
* connections even when all local ephemeral ports are already
* a part of some TCP connection. This makes no difference
- * for UDP sockets, but also doens't harm them.
+ * for UDP sockets, but also doesn't harm them.
*
* If we're asking for any reserved port (i.e. port == 0 &&
* transport->xprt.resvport == 1) xs_get_srcport above will
@@ -1875,6 +1875,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
xprt->stat.connect_time += (long)jiffies -
xprt->stat.connect_start;
xprt_set_connected(xprt);
+ break;
case -ENOBUFS:
break;
case -ENOENT:
@@ -2276,10 +2277,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case -EHOSTUNREACH:
case -EADDRINUSE:
case -ENOBUFS:
- /*
- * xs_tcp_force_close() wakes tasks with -EIO.
- * We need to wake them first to ensure the
- * correct error code.
+ /* xs_tcp_force_close() wakes tasks with a fixed error code.
+ * We need to wake them first to ensure the correct error code.
*/
xprt_wake_pending_tasks(xprt, status);
xs_tcp_force_close(xprt);
@@ -2380,7 +2379,7 @@ static void xs_error_handle(struct work_struct *work)
}
/**
- * xs_local_print_stats - display AF_LOCAL socket-specifc stats
+ * xs_local_print_stats - display AF_LOCAL socket-specific stats
* @xprt: rpc_xprt struct containing statistics
* @seq: output file
*
@@ -2409,7 +2408,7 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
}
/**
- * xs_udp_print_stats - display UDP socket-specifc stats
+ * xs_udp_print_stats - display UDP socket-specific stats
* @xprt: rpc_xprt struct containing statistics
* @seq: output file
*
@@ -2433,7 +2432,7 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
}
/**
- * xs_tcp_print_stats - display TCP socket-specifc stats
+ * xs_tcp_print_stats - display TCP socket-specific stats
* @xprt: rpc_xprt struct containing statistics
* @seq: output file
*
diff --git a/net/switchdev/Makefile b/net/switchdev/Makefile
index bd69a3136e76..c5561d7f3a7c 100644
--- a/net/switchdev/Makefile
+++ b/net/switchdev/Makefile
@@ -3,4 +3,4 @@
# Makefile for the Switch device API
#
-obj-$(CONFIG_NET_SWITCHDEV) += switchdev.o
+obj-y += switchdev.o
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 23d868545362..89a36db47ab4 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -101,19 +101,18 @@ static int switchdev_deferred_enqueue(struct net_device *dev,
static int switchdev_port_attr_notify(enum switchdev_notifier_type nt,
struct net_device *dev,
const struct switchdev_attr *attr,
- struct switchdev_trans *trans)
+ struct netlink_ext_ack *extack)
{
int err;
int rc;
struct switchdev_notifier_port_attr_info attr_info = {
.attr = attr,
- .trans = trans,
.handled = false,
};
rc = call_switchdev_blocking_notifiers(nt, dev,
- &attr_info.info, NULL);
+ &attr_info.info, extack);
err = notifier_to_errno(rc);
if (err) {
WARN_ON(!attr_info.handled);
@@ -127,36 +126,11 @@ static int switchdev_port_attr_notify(enum switchdev_notifier_type nt,
}
static int switchdev_port_attr_set_now(struct net_device *dev,
- const struct switchdev_attr *attr)
+ const struct switchdev_attr *attr,
+ struct netlink_ext_ack *extack)
{
- struct switchdev_trans trans;
- int err;
-
- /* Phase I: prepare for attr set. Driver/device should fail
- * here if there are going to be issues in the commit phase,
- * such as lack of resources or support. The driver/device
- * should reserve resources needed for the commit phase here,
- * but should not commit the attr.
- */
-
- trans.ph_prepare = true;
- err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
- &trans);
- if (err)
- return err;
-
- /* Phase II: commit attr set. This cannot fail as a fault
- * of driver/device. If it does, it's a bug in the driver/device
- * because the driver said everythings was OK in phase I.
- */
-
- trans.ph_prepare = false;
- err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
- &trans);
- WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
- dev->name, attr->id);
-
- return err;
+ return switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
+ extack);
}
static void switchdev_port_attr_set_deferred(struct net_device *dev,
@@ -165,7 +139,7 @@ static void switchdev_port_attr_set_deferred(struct net_device *dev,
const struct switchdev_attr *attr = data;
int err;
- err = switchdev_port_attr_set_now(dev, attr);
+ err = switchdev_port_attr_set_now(dev, attr, NULL);
if (err && err != -EOPNOTSUPP)
netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
err, attr->id);
@@ -185,21 +159,19 @@ static int switchdev_port_attr_set_defer(struct net_device *dev,
*
* @dev: port device
* @attr: attribute to set
- *
- * Use a 2-phase prepare-commit transaction model to ensure
- * system is not left in a partially updated state due to
- * failure from driver/device.
+ * @extack: netlink extended ack, for error message propagation
*
* rtnl_lock must be held and must not be in atomic section,
* in case SWITCHDEV_F_DEFER flag is not set.
*/
int switchdev_port_attr_set(struct net_device *dev,
- const struct switchdev_attr *attr)
+ const struct switchdev_attr *attr,
+ struct netlink_ext_ack *extack)
{
if (attr->flags & SWITCHDEV_F_DEFER)
return switchdev_port_attr_set_defer(dev, attr);
ASSERT_RTNL();
- return switchdev_port_attr_set_now(dev, attr);
+ return switchdev_port_attr_set_now(dev, attr, extack);
}
EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
@@ -221,7 +193,6 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj)
static int switchdev_port_obj_notify(enum switchdev_notifier_type nt,
struct net_device *dev,
const struct switchdev_obj *obj,
- struct switchdev_trans *trans,
struct netlink_ext_ack *extack)
{
int rc;
@@ -229,7 +200,6 @@ static int switchdev_port_obj_notify(enum switchdev_notifier_type nt,
struct switchdev_notifier_port_obj_info obj_info = {
.obj = obj,
- .trans = trans,
.handled = false,
};
@@ -244,48 +214,15 @@ static int switchdev_port_obj_notify(enum switchdev_notifier_type nt,
return 0;
}
-static int switchdev_port_obj_add_now(struct net_device *dev,
- const struct switchdev_obj *obj,
- struct netlink_ext_ack *extack)
-{
- struct switchdev_trans trans;
- int err;
-
- ASSERT_RTNL();
-
- /* Phase I: prepare for obj add. Driver/device should fail
- * here if there are going to be issues in the commit phase,
- * such as lack of resources or support. The driver/device
- * should reserve resources needed for the commit phase here,
- * but should not commit the obj.
- */
-
- trans.ph_prepare = true;
- err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
- dev, obj, &trans, extack);
- if (err)
- return err;
-
- /* Phase II: commit obj add. This cannot fail as a fault
- * of driver/device. If it does, it's a bug in the driver/device
- * because the driver said everythings was OK in phase I.
- */
-
- trans.ph_prepare = false;
- err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
- dev, obj, &trans, extack);
- WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
-
- return err;
-}
-
static void switchdev_port_obj_add_deferred(struct net_device *dev,
const void *data)
{
const struct switchdev_obj *obj = data;
int err;
- err = switchdev_port_obj_add_now(dev, obj, NULL);
+ ASSERT_RTNL();
+ err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
+ dev, obj, NULL);
if (err && err != -EOPNOTSUPP)
netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
err, obj->id);
@@ -307,10 +244,6 @@ static int switchdev_port_obj_add_defer(struct net_device *dev,
* @obj: object to add
* @extack: netlink extended ack
*
- * Use a 2-phase prepare-commit transaction model to ensure
- * system is not left in a partially updated state due to
- * failure from driver/device.
- *
* rtnl_lock must be held and must not be in atomic section,
* in case SWITCHDEV_F_DEFER flag is not set.
*/
@@ -321,7 +254,8 @@ int switchdev_port_obj_add(struct net_device *dev,
if (obj->flags & SWITCHDEV_F_DEFER)
return switchdev_port_obj_add_defer(dev, obj);
ASSERT_RTNL();
- return switchdev_port_obj_add_now(dev, obj, extack);
+ return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
+ dev, obj, extack);
}
EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
@@ -329,7 +263,7 @@ static int switchdev_port_obj_del_now(struct net_device *dev,
const struct switchdev_obj *obj)
{
return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_DEL,
- dev, obj, NULL, NULL);
+ dev, obj, NULL);
}
static void switchdev_port_obj_del_deferred(struct net_device *dev,
@@ -449,7 +383,6 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev,
bool (*check_cb)(const struct net_device *dev),
int (*add_cb)(struct net_device *dev,
const struct switchdev_obj *obj,
- struct switchdev_trans *trans,
struct netlink_ext_ack *extack))
{
struct netlink_ext_ack *extack;
@@ -460,10 +393,10 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev,
extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
if (check_cb(dev)) {
- /* This flag is only checked if the return value is success. */
- port_obj_info->handled = true;
- return add_cb(dev, port_obj_info->obj, port_obj_info->trans,
- extack);
+ err = add_cb(dev, port_obj_info->obj, extack);
+ if (err != -EOPNOTSUPP)
+ port_obj_info->handled = true;
+ return err;
}
/* Switch ports might be stacked under e.g. a LAG. Ignore the
@@ -491,7 +424,6 @@ int switchdev_handle_port_obj_add(struct net_device *dev,
bool (*check_cb)(const struct net_device *dev),
int (*add_cb)(struct net_device *dev,
const struct switchdev_obj *obj,
- struct switchdev_trans *trans,
struct netlink_ext_ack *extack))
{
int err;
@@ -515,9 +447,10 @@ static int __switchdev_handle_port_obj_del(struct net_device *dev,
int err = -EOPNOTSUPP;
if (check_cb(dev)) {
- /* This flag is only checked if the return value is success. */
- port_obj_info->handled = true;
- return del_cb(dev, port_obj_info->obj);
+ err = del_cb(dev, port_obj_info->obj);
+ if (err != -EOPNOTSUPP)
+ port_obj_info->handled = true;
+ return err;
}
/* Switch ports might be stacked under e.g. a LAG. Ignore the
@@ -561,16 +494,20 @@ static int __switchdev_handle_port_attr_set(struct net_device *dev,
bool (*check_cb)(const struct net_device *dev),
int (*set_cb)(struct net_device *dev,
const struct switchdev_attr *attr,
- struct switchdev_trans *trans))
+ struct netlink_ext_ack *extack))
{
+ struct netlink_ext_ack *extack;
struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
+ extack = switchdev_notifier_info_to_extack(&port_attr_info->info);
+
if (check_cb(dev)) {
- port_attr_info->handled = true;
- return set_cb(dev, port_attr_info->attr,
- port_attr_info->trans);
+ err = set_cb(dev, port_attr_info->attr, extack);
+ if (err != -EOPNOTSUPP)
+ port_attr_info->handled = true;
+ return err;
}
/* Switch ports might be stacked under e.g. a LAG. Ignore the
@@ -598,7 +535,7 @@ int switchdev_handle_port_attr_set(struct net_device *dev,
bool (*check_cb)(const struct net_device *dev),
int (*set_cb)(struct net_device *dev,
const struct switchdev_attr *attr,
- struct switchdev_trans *trans))
+ struct netlink_ext_ack *extack))
{
int err;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 6ae2140eb4f7..115109259430 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1030,7 +1030,6 @@ void tipc_link_reset(struct tipc_link *l)
int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
struct sk_buff_head *xmitq)
{
- struct tipc_msg *hdr = buf_msg(skb_peek(list));
struct sk_buff_head *backlogq = &l->backlogq;
struct sk_buff_head *transmq = &l->transmq;
struct sk_buff *skb, *_skb;
@@ -1038,13 +1037,18 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
u16 ack = l->rcv_nxt - 1;
u16 seqno = l->snd_nxt;
int pkt_cnt = skb_queue_len(list);
- int imp = msg_importance(hdr);
unsigned int mss = tipc_link_mss(l);
unsigned int cwin = l->window;
unsigned int mtu = l->mtu;
+ struct tipc_msg *hdr;
bool new_bundle;
int rc = 0;
+ int imp;
+
+ if (pkt_cnt <= 0)
+ return 0;
+ hdr = buf_msg(skb_peek(list));
if (unlikely(msg_size(hdr) > mtu)) {
pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n",
skb_queue_len(list), msg_user(hdr),
@@ -1053,6 +1057,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
return -EMSGSIZE;
}
+ imp = msg_importance(hdr);
/* Allow oversubscription of one data msg per source at congestion */
if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) {
if (imp == TIPC_SYSTEM_IMPORTANCE) {
@@ -2539,7 +2544,7 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win)
}
/**
- * link_reset_stats - reset link statistics
+ * tipc_link_reset_stats - reset link statistics
* @l: pointer to link
*/
void tipc_link_reset_stats(struct tipc_link *l)
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 6dce2abf436e..48fac3b17e40 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -108,7 +108,7 @@ const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
*/
static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
{
- return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32));
+ return (offsetof(struct tipc_mon_domain, members)) + (mcnt * sizeof(u32));
}
/* dom_size() : calculate size of own domain based on number of peers
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 2aca86021df5..e9263280a2d4 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -117,10 +117,6 @@ struct sk_buff *tipc_msg_create(uint user, uint type,
msg_set_origport(msg, oport);
msg_set_destport(msg, dport);
msg_set_errcode(msg, errcode);
- if (hdr_sz > SHORT_H_SIZE) {
- msg_set_orignode(msg, onode);
- msg_set_destnode(msg, dnode);
- }
return buf;
}
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 83d9eb830592..008670d1f43e 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1665,7 +1665,7 @@ static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list)
}
/**
- * tipc_node_xmit() is the general link level function for message sending
+ * tipc_node_xmit() - general link level function for message sending
* @net: the applicable net namespace
* @list: chain of buffers containing message
* @dnode: address of destination node
diff --git a/net/tls/Kconfig b/net/tls/Kconfig
index fa0724fd84b4..0cdc1f7b6b08 100644
--- a/net/tls/Kconfig
+++ b/net/tls/Kconfig
@@ -21,6 +21,7 @@ config TLS_DEVICE
bool "Transport Layer Security HW offload"
depends on TLS
select SOCK_VALIDATE_XMIT
+ select SOCK_RX_QUEUE_MAPPING
default n
help
Enable kernel support for HW offload of the TLS protocol.
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index f7fb7d2c1de1..d9cd229aa111 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -113,7 +113,7 @@ static struct net_device *get_netdev_for_sock(struct sock *sk)
struct net_device *netdev = NULL;
if (likely(dst)) {
- netdev = dst->dev;
+ netdev = netdev_sk_get_lowest_dev(dst->dev, sk);
dev_hold(netdev);
}
@@ -1329,6 +1329,8 @@ static int tls_dev_event(struct notifier_block *this, unsigned long event,
switch (event) {
case NETDEV_REGISTER:
case NETDEV_FEAT_CHANGE:
+ if (netif_is_bond_master(dev))
+ return NOTIFY_DONE;
if ((dev->features & NETIF_F_HW_TLS_RX) &&
!dev->tlsdev_ops->tls_dev_resync)
return NOTIFY_BAD;
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index d946817ed065..cacf040872c7 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -424,7 +424,7 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
struct net_device *dev,
struct sk_buff *skb)
{
- if (dev == tls_get_ctx(sk)->netdev)
+ if (dev == tls_get_ctx(sk)->netdev || netif_is_bond_master(dev))
return skb;
return tls_sw_fallback(sk, skb);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 41c3303c3357..5a31307ceb76 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -936,7 +936,7 @@ static struct sock *unix_find_other(struct net *net,
if (err)
goto fail;
inode = d_backing_inode(path.dentry);
- err = inode_permission(inode, MAY_WRITE);
+ err = path_permission(&path, MAY_WRITE);
if (err)
goto put_fail;
@@ -996,7 +996,8 @@ static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
*/
err = security_path_mknod(&path, dentry, mode, 0);
if (!err) {
- err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
+ err = vfs_mknod(mnt_user_ns(path.mnt), d_inode(path.dentry),
+ dentry, mode, 0);
if (!err) {
res->mnt = mntget(path.mnt);
res->dentry = dget(dentry);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index b12d3a322242..5546710d8ac1 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -943,10 +943,12 @@ static int vsock_shutdown(struct socket *sock, int mode)
*/
sk = sock->sk;
+
+ lock_sock(sk);
if (sock->state == SS_UNCONNECTED) {
err = -ENOTCONN;
if (sk->sk_type == SOCK_STREAM)
- return err;
+ goto out;
} else {
sock->state = SS_DISCONNECTING;
err = 0;
@@ -955,10 +957,8 @@ static int vsock_shutdown(struct socket *sock, int mode)
/* Receive and send shutdowns are treated alike. */
mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN);
if (mode) {
- lock_sock(sk);
sk->sk_shutdown |= mode;
sk->sk_state_change(sk);
- release_sock(sk);
if (sk->sk_type == SOCK_STREAM) {
sock_reset_flag(sk, SOCK_DONE);
@@ -966,6 +966,8 @@ static int vsock_shutdown(struct socket *sock, int mode)
}
}
+out:
+ release_sock(sk);
return err;
}
@@ -1014,9 +1016,12 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
} else if (sock->type == SOCK_STREAM) {
- const struct vsock_transport *transport = vsk->transport;
+ const struct vsock_transport *transport;
+
lock_sock(sk);
+ transport = vsk->transport;
+
/* Listening sockets that have connections in their accept
* queue can be read.
*/
@@ -1099,10 +1104,11 @@ static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
err = 0;
sk = sock->sk;
vsk = vsock_sk(sk);
- transport = vsk->transport;
lock_sock(sk);
+ transport = vsk->transport;
+
err = vsock_auto_bind(vsk);
if (err)
goto out;
@@ -1229,7 +1235,7 @@ static int vsock_transport_cancel_pkt(struct vsock_sock *vsk)
{
const struct vsock_transport *transport = vsk->transport;
- if (!transport->cancel_pkt)
+ if (!transport || !transport->cancel_pkt)
return -EOPNOTSUPP;
return transport->cancel_pkt(vsk);
@@ -1239,7 +1245,6 @@ static void vsock_connect_timeout(struct work_struct *work)
{
struct sock *sk;
struct vsock_sock *vsk;
- int cancel = 0;
vsk = container_of(work, struct vsock_sock, connect_work.work);
sk = sk_vsock(vsk);
@@ -1250,11 +1255,9 @@ static void vsock_connect_timeout(struct work_struct *work)
sk->sk_state = TCP_CLOSE;
sk->sk_err = ETIMEDOUT;
sk->sk_error_report(sk);
- cancel = 1;
+ vsock_transport_cancel_pkt(vsk);
}
release_sock(sk);
- if (cancel)
- vsock_transport_cancel_pkt(vsk);
sock_put(sk);
}
@@ -1561,10 +1564,11 @@ static int vsock_stream_setsockopt(struct socket *sock,
err = 0;
sk = sock->sk;
vsk = vsock_sk(sk);
- transport = vsk->transport;
lock_sock(sk);
+ transport = vsk->transport;
+
switch (optname) {
case SO_VM_SOCKETS_BUFFER_SIZE:
COPY_IN(val);
@@ -1697,7 +1701,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
sk = sock->sk;
vsk = vsock_sk(sk);
- transport = vsk->transport;
total_written = 0;
err = 0;
@@ -1706,6 +1709,8 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
lock_sock(sk);
+ transport = vsk->transport;
+
/* Callers should not provide a destination with stream sockets. */
if (msg->msg_namelen) {
err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
@@ -1840,11 +1845,12 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
sk = sock->sk;
vsk = vsock_sk(sk);
- transport = vsk->transport;
err = 0;
lock_sock(sk);
+ transport = vsk->transport;
+
if (!transport || sk->sk_state != TCP_ESTABLISHED) {
/* Recvmsg is supposed to return 0 if a peer performs an
* orderly shutdown. Differentiate between that case and when a
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 630b851f8150..cc3bae2659e7 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -474,14 +474,10 @@ static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode)
static int hvs_shutdown(struct vsock_sock *vsk, int mode)
{
- struct sock *sk = sk_vsock(vsk);
-
if (!(mode & SEND_SHUTDOWN))
return 0;
- lock_sock(sk);
hvs_shutdown_lock_held(vsk->trans, mode);
- release_sock(sk);
return 0;
}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 5956939eebb7..e4370b1b7494 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -1130,8 +1130,6 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
vsk = vsock_sk(sk);
- space_available = virtio_transport_space_update(sk, pkt);
-
lock_sock(sk);
/* Check if sk has been closed before lock_sock */
@@ -1142,6 +1140,8 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
goto free_pkt;
}
+ space_available = virtio_transport_space_update(sk, pkt);
+
/* Update CID in case it has changed after a transport reset event */
vsk->local_addr.svm_cid = dst.svm_cid;
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 27026f587fa6..f620acd2a0f5 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -21,6 +21,7 @@ config CFG80211
tristate "cfg80211 - wireless configuration API"
depends on RFKILL || !RFKILL
select FW_LOADER
+ select CRC32
# may need to update this when certificates are changed and are
# using a different algorithm, though right now they shouldn't
# (this is here rather than below to allow it to be a module)
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index e4030f1fbc60..285b8076054b 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -1093,7 +1093,7 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy,
struct wireless_dev *wdev;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
if (!IS_ENABLED(CONFIG_CFG80211_REG_RELAX_NO_IR) ||
!(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR))
@@ -1216,9 +1216,10 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
struct cfg80211_chan_def *chandef,
enum nl80211_iftype iftype)
{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
bool check_no_ir;
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
/*
* Under certain conditions suggested by some regulatory bodies a
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 4b1f35e976e7..a2785379df6e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -222,7 +222,7 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev)
{
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE))
return;
@@ -247,7 +247,7 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev)
{
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN))
return;
@@ -273,7 +273,11 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy)
dev_close(wdev->netdev);
continue;
}
+
/* otherwise, check iftype */
+
+ wiphy_lock(wiphy);
+
switch (wdev->iftype) {
case NL80211_IFTYPE_P2P_DEVICE:
cfg80211_stop_p2p_device(rdev, wdev);
@@ -284,6 +288,8 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy)
default:
break;
}
+
+ wiphy_unlock(wiphy);
}
}
EXPORT_SYMBOL_GPL(cfg80211_shutdown_all_interfaces);
@@ -318,9 +324,9 @@ static void cfg80211_event_work(struct work_struct *work)
rdev = container_of(work, struct cfg80211_registered_device,
event_work);
- rtnl_lock();
+ wiphy_lock(&rdev->wiphy);
cfg80211_process_rdev_events(rdev);
- rtnl_unlock();
+ wiphy_unlock(&rdev->wiphy);
}
void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev)
@@ -328,6 +334,7 @@ void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev)
struct wireless_dev *wdev, *tmp;
ASSERT_RTNL();
+ lockdep_assert_wiphy(&rdev->wiphy);
list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) {
if (wdev->nl_owner_dead)
@@ -343,7 +350,9 @@ static void cfg80211_destroy_iface_wk(struct work_struct *work)
destroy_work);
rtnl_lock();
+ wiphy_lock(&rdev->wiphy);
cfg80211_destroy_ifaces(rdev);
+ wiphy_unlock(&rdev->wiphy);
rtnl_unlock();
}
@@ -475,6 +484,7 @@ use_default_name:
}
}
+ mutex_init(&rdev->wiphy.mtx);
INIT_LIST_HEAD(&rdev->wiphy.wdev_list);
INIT_LIST_HEAD(&rdev->beacon_registrations);
spin_lock_init(&rdev->beacon_registrations_lock);
@@ -1007,15 +1017,16 @@ void wiphy_unregister(struct wiphy *wiphy)
wait_event(rdev->dev_wait, ({
int __count;
- rtnl_lock();
+ wiphy_lock(&rdev->wiphy);
__count = rdev->opencount;
- rtnl_unlock();
+ wiphy_unlock(&rdev->wiphy);
__count == 0; }));
if (rdev->rfkill)
rfkill_unregister(rdev->rfkill);
rtnl_lock();
+ wiphy_lock(&rdev->wiphy);
nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY);
rdev->wiphy.registered = false;
@@ -1038,6 +1049,7 @@ void wiphy_unregister(struct wiphy *wiphy)
cfg80211_rdev_list_generation++;
device_del(&rdev->wiphy.dev);
+ wiphy_unlock(&rdev->wiphy);
rtnl_unlock();
flush_work(&rdev->scan_done_wk);
@@ -1070,6 +1082,7 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
}
list_for_each_entry_safe(scan, tmp, &rdev->bss_list, list)
cfg80211_put_bss(&rdev->wiphy, &scan->pub);
+ mutex_destroy(&rdev->wiphy.mtx);
kfree(rdev);
}
@@ -1094,19 +1107,28 @@ void cfg80211_cqm_config_free(struct wireless_dev *wdev)
wdev->cqm_config = NULL;
}
-static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync)
+static void _cfg80211_unregister_wdev(struct wireless_dev *wdev,
+ bool unregister_netdev)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
flush_work(&wdev->pmsr_free_wk);
nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
+ wdev->registered = false;
+
+ if (wdev->netdev) {
+ sysfs_remove_link(&wdev->netdev->dev.kobj, "phy80211");
+ if (unregister_netdev)
+ unregister_netdevice(wdev->netdev);
+ }
+
list_del_rcu(&wdev->list);
- if (sync)
- synchronize_rcu();
+ synchronize_net();
rdev->devlist_generation++;
cfg80211_mlme_purge_registrations(wdev);
@@ -1131,14 +1153,23 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync)
flush_work(&wdev->disconnect_wk);
cfg80211_cqm_config_free(wdev);
+
+ /*
+ * Ensure that all events have been processed and
+ * freed.
+ */
+ cfg80211_process_wdev_events(wdev);
+
+ if (WARN_ON(wdev->current_bss)) {
+ cfg80211_unhold_bss(wdev->current_bss);
+ cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
+ wdev->current_bss = NULL;
+ }
}
void cfg80211_unregister_wdev(struct wireless_dev *wdev)
{
- if (WARN_ON(wdev->netdev))
- return;
-
- __cfg80211_unregister_wdev(wdev, true);
+ _cfg80211_unregister_wdev(wdev, true);
}
EXPORT_SYMBOL(cfg80211_unregister_wdev);
@@ -1149,7 +1180,7 @@ static const struct device_type wiphy_type = {
void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
enum nl80211_iftype iftype, int num)
{
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
rdev->num_running_ifaces += num;
if (iftype == NL80211_IFTYPE_MONITOR)
@@ -1162,7 +1193,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev,
struct net_device *dev = wdev->netdev;
struct cfg80211_sched_scan_request *pos, *tmp;
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
ASSERT_WDEV_LOCK(wdev);
cfg80211_pmsr_wdev_down(wdev);
@@ -1279,6 +1310,9 @@ void cfg80211_init_wdev(struct wireless_dev *wdev)
void cfg80211_register_wdev(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev)
{
+ ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
+
/*
* We get here also when the interface changes network namespaces,
* as it's registered into the new one, but we don't want it to
@@ -1290,10 +1324,51 @@ void cfg80211_register_wdev(struct cfg80211_registered_device *rdev,
wdev->identifier = ++rdev->wdev_id;
list_add_rcu(&wdev->list, &rdev->wiphy.wdev_list);
rdev->devlist_generation++;
+ wdev->registered = true;
nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
}
+int cfg80211_register_netdevice(struct net_device *dev)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev;
+ int ret;
+
+ ASSERT_RTNL();
+
+ if (WARN_ON(!wdev))
+ return -EINVAL;
+
+ rdev = wiphy_to_rdev(wdev->wiphy);
+
+ lockdep_assert_held(&rdev->wiphy.mtx);
+
+ /* we'll take care of this */
+ wdev->registered = true;
+ wdev->registering = true;
+ ret = register_netdevice(dev);
+ if (ret)
+ goto out;
+
+ if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
+ "phy80211")) {
+ pr_err("failed to add phy80211 symlink to netdev!\n");
+ unregister_netdevice(dev);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ cfg80211_register_wdev(rdev, wdev);
+ ret = 0;
+out:
+ wdev->registering = false;
+ if (ret)
+ wdev->registered = false;
+ return ret;
+}
+EXPORT_SYMBOL(cfg80211_register_netdevice);
+
static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
unsigned long state, void *ptr)
{
@@ -1319,22 +1394,30 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
cfg80211_init_wdev(wdev);
break;
case NETDEV_REGISTER:
+ if (!wdev->registered) {
+ wiphy_lock(&rdev->wiphy);
+ cfg80211_register_wdev(rdev, wdev);
+ wiphy_unlock(&rdev->wiphy);
+ }
+ break;
+ case NETDEV_UNREGISTER:
/*
- * NB: cannot take rdev->mtx here because this may be
- * called within code protected by it when interfaces
- * are added with nl80211.
+ * It is possible to get NETDEV_UNREGISTER multiple times,
+ * so check wdev->registered.
*/
- if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
- "phy80211")) {
- pr_err("failed to add phy80211 symlink to netdev!\n");
+ if (wdev->registered && !wdev->registering) {
+ wiphy_lock(&rdev->wiphy);
+ _cfg80211_unregister_wdev(wdev, false);
+ wiphy_unlock(&rdev->wiphy);
}
-
- cfg80211_register_wdev(rdev, wdev);
break;
case NETDEV_GOING_DOWN:
+ wiphy_lock(&rdev->wiphy);
cfg80211_leave(rdev, wdev);
+ wiphy_unlock(&rdev->wiphy);
break;
case NETDEV_DOWN:
+ wiphy_lock(&rdev->wiphy);
cfg80211_update_iface_num(rdev, wdev->iftype, -1);
if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
if (WARN_ON(!rdev->scan_req->notified &&
@@ -1351,9 +1434,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
}
rdev->opencount--;
+ wiphy_unlock(&rdev->wiphy);
wake_up(&rdev->dev_wait);
break;
case NETDEV_UP:
+ wiphy_lock(&rdev->wiphy);
cfg80211_update_iface_num(rdev, wdev->iftype, 1);
wdev_lock(wdev);
switch (wdev->iftype) {
@@ -1400,38 +1485,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
/* assume this means it's off */
wdev->ps = false;
}
- break;
- case NETDEV_UNREGISTER:
- /*
- * It is possible to get NETDEV_UNREGISTER
- * multiple times. To detect that, check
- * that the interface is still on the list
- * of registered interfaces, and only then
- * remove and clean it up.
- */
- if (!list_empty(&wdev->list)) {
- __cfg80211_unregister_wdev(wdev, false);
- sysfs_remove_link(&dev->dev.kobj, "phy80211");
- }
- /*
- * synchronise (so that we won't find this netdev
- * from other code any more) and then clear the list
- * head so that the above code can safely check for
- * !list_empty() to avoid double-cleanup.
- */
- synchronize_rcu();
- INIT_LIST_HEAD(&wdev->list);
- /*
- * Ensure that all events have been processed and
- * freed.
- */
- cfg80211_process_wdev_events(wdev);
-
- if (WARN_ON(wdev->current_bss)) {
- cfg80211_unhold_bss(wdev->current_bss);
- cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
- wdev->current_bss = NULL;
- }
+ wiphy_unlock(&rdev->wiphy);
break;
case NETDEV_PRE_UP:
if (!cfg80211_iftype_allowed(wdev->wiphy, wdev->iftype,
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 7df91f940212..a7d19b4b40ac 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -231,7 +231,7 @@ static inline void wdev_unlock(struct wireless_dev *wdev)
static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev)
{
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
return rdev->num_running_ifaces == rdev->num_running_monitor_ifaces &&
rdev->num_running_ifaces > 0;
diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c
index 76b845f68ac8..aab43469a2f0 100644
--- a/net/wireless/debugfs.c
+++ b/net/wireless/debugfs.c
@@ -73,8 +73,6 @@ static ssize_t ht40allow_map_read(struct file *file,
if (!buf)
return -ENOMEM;
- rtnl_lock();
-
for (band = 0; band < NUM_NL80211_BANDS; band++) {
sband = wiphy->bands[band];
if (!sband)
@@ -84,8 +82,6 @@ static ssize_t ht40allow_map_read(struct file *file,
buf, buf_size, offset);
}
- rtnl_unlock();
-
r = simple_read_from_buffer(user_buf, count, ppos, buf, offset);
kfree(buf);
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index a0621bb76d8e..8f98e546becf 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -3,6 +3,7 @@
* Some IBSS support code for cfg80211.
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2020-2021 Intel Corporation
*/
#include <linux/etherdevice.h>
@@ -92,7 +93,7 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev = dev->ieee80211_ptr;
int err;
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
ASSERT_WDEV_LOCK(wdev);
if (wdev->ssid_len)
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index e1e90761dc00..3aa69b375a10 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -450,7 +450,7 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev)
struct cfg80211_mgmt_registration *reg;
struct mgmt_frame_regs upd = {};
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
spin_lock_bh(&wdev->mgmt_registrations_lock);
if (!wdev->mgmt_registrations_need_update) {
@@ -492,10 +492,10 @@ void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk)
rdev = container_of(wk, struct cfg80211_registered_device,
mgmt_registrations_update_wk);
- rtnl_lock();
+ wiphy_lock(&rdev->wiphy);
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list)
cfg80211_mgmt_registrations_update(wdev);
- rtnl_unlock();
+ wiphy_unlock(&rdev->wiphy);
}
int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 775d0c4d86c3..521d36bb0803 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -64,9 +64,9 @@ static const struct genl_multicast_group nl80211_mcgrps[] = {
/* returns ERR_PTR values */
static struct wireless_dev *
-__cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs)
+__cfg80211_wdev_from_attrs(struct cfg80211_registered_device *rdev,
+ struct net *netns, struct nlattr **attrs)
{
- struct cfg80211_registered_device *rdev;
struct wireless_dev *result = NULL;
bool have_ifidx = attrs[NL80211_ATTR_IFINDEX];
bool have_wdev_id = attrs[NL80211_ATTR_WDEV];
@@ -74,8 +74,6 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs)
int wiphy_idx = -1;
int ifidx = -1;
- ASSERT_RTNL();
-
if (!have_ifidx && !have_wdev_id)
return ERR_PTR(-EINVAL);
@@ -86,6 +84,28 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs)
wiphy_idx = wdev_id >> 32;
}
+ if (rdev) {
+ struct wireless_dev *wdev;
+
+ lockdep_assert_held(&rdev->wiphy.mtx);
+
+ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
+ if (have_ifidx && wdev->netdev &&
+ wdev->netdev->ifindex == ifidx) {
+ result = wdev;
+ break;
+ }
+ if (have_wdev_id && wdev->identifier == (u32)wdev_id) {
+ result = wdev;
+ break;
+ }
+ }
+
+ return result ?: ERR_PTR(-ENODEV);
+ }
+
+ ASSERT_RTNL();
+
list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
struct wireless_dev *wdev;
@@ -732,6 +752,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
NL80211_SAE_PWE_BOTH),
[NL80211_ATTR_RECONNECT_REQUESTED] = { .type = NLA_REJECT },
[NL80211_ATTR_SAR_SPEC] = NLA_POLICY_NESTED(sar_policy),
+ [NL80211_ATTR_DISABLE_HE] = { .type = NLA_FLAG },
};
/* policy for the key attributes */
@@ -914,22 +935,31 @@ int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
return err;
}
- *wdev = __cfg80211_wdev_from_attrs(sock_net(cb->skb->sk),
+ rtnl_lock();
+ *wdev = __cfg80211_wdev_from_attrs(NULL, sock_net(cb->skb->sk),
attrbuf);
kfree(attrbuf);
- if (IS_ERR(*wdev))
+ if (IS_ERR(*wdev)) {
+ rtnl_unlock();
return PTR_ERR(*wdev);
+ }
*rdev = wiphy_to_rdev((*wdev)->wiphy);
+ mutex_lock(&(*rdev)->wiphy.mtx);
+ rtnl_unlock();
/* 0 is the first index - add 1 to parse only once */
cb->args[0] = (*rdev)->wiphy_idx + 1;
cb->args[1] = (*wdev)->identifier;
} else {
/* subtract the 1 again here */
- struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
+ struct wiphy *wiphy;
struct wireless_dev *tmp;
- if (!wiphy)
+ rtnl_lock();
+ wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
+ if (!wiphy) {
+ rtnl_unlock();
return -ENODEV;
+ }
*rdev = wiphy_to_rdev(wiphy);
*wdev = NULL;
@@ -940,8 +970,12 @@ int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
}
}
- if (!*wdev)
+ if (!*wdev) {
+ rtnl_unlock();
return -ENODEV;
+ }
+ mutex_lock(&(*rdev)->wiphy.mtx);
+ rtnl_unlock();
}
return 0;
@@ -3141,7 +3175,7 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info)
static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
{
- struct cfg80211_registered_device *rdev;
+ struct cfg80211_registered_device *rdev = NULL;
struct net_device *netdev = NULL;
struct wireless_dev *wdev;
int result = 0, rem_txq_params = 0;
@@ -3152,8 +3186,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
u8 coverage_class = 0;
u32 txq_limit = 0, txq_memory_limit = 0, txq_quantum = 0;
- ASSERT_RTNL();
-
+ rtnl_lock();
/*
* Try to find the wiphy and netdev. Normally this
* function shouldn't need the netdev, but this is
@@ -3177,14 +3210,18 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
if (!netdev) {
rdev = __cfg80211_rdev_from_attrs(genl_info_net(info),
info->attrs);
- if (IS_ERR(rdev))
+ if (IS_ERR(rdev)) {
+ rtnl_unlock();
return PTR_ERR(rdev);
+ }
wdev = NULL;
netdev = NULL;
result = 0;
} else
wdev = netdev->ieee80211_ptr;
+ wiphy_lock(&rdev->wiphy);
+
/*
* end workaround code, by now the rdev is available
* and locked, and wdev may or may not be NULL.
@@ -3193,26 +3230,35 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_WIPHY_NAME])
result = cfg80211_dev_rename(
rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME]));
+ rtnl_unlock();
if (result)
- return result;
+ goto out;
if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) {
struct ieee80211_txq_params txq_params;
struct nlattr *tb[NL80211_TXQ_ATTR_MAX + 1];
- if (!rdev->ops->set_txq_params)
- return -EOPNOTSUPP;
+ if (!rdev->ops->set_txq_params) {
+ result = -EOPNOTSUPP;
+ goto out;
+ }
- if (!netdev)
- return -EINVAL;
+ if (!netdev) {
+ result = -EINVAL;
+ goto out;
+ }
if (netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
- netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
- return -EINVAL;
+ netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
+ result = -EINVAL;
+ goto out;
+ }
- if (!netif_running(netdev))
- return -ENETDOWN;
+ if (!netif_running(netdev)) {
+ result = -ENETDOWN;
+ goto out;
+ }
nla_for_each_nested(nl_txq_params,
info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS],
@@ -3223,15 +3269,15 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
txq_params_policy,
info->extack);
if (result)
- return result;
+ goto out;
result = parse_txq_params(tb, &txq_params);
if (result)
- return result;
+ goto out;
result = rdev_set_txq_params(rdev, netdev,
&txq_params);
if (result)
- return result;
+ goto out;
}
}
@@ -3241,7 +3287,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
nl80211_can_set_dev_channel(wdev) ? netdev : NULL,
info);
if (result)
- return result;
+ goto out;
}
if (info->attrs[NL80211_ATTR_WIPHY_TX_POWER_SETTING]) {
@@ -3252,15 +3298,19 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
if (!(rdev->wiphy.features & NL80211_FEATURE_VIF_TXPOWER))
txp_wdev = NULL;
- if (!rdev->ops->set_tx_power)
- return -EOPNOTSUPP;
+ if (!rdev->ops->set_tx_power) {
+ result = -EOPNOTSUPP;
+ goto out;
+ }
idx = NL80211_ATTR_WIPHY_TX_POWER_SETTING;
type = nla_get_u32(info->attrs[idx]);
if (!info->attrs[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] &&
- (type != NL80211_TX_POWER_AUTOMATIC))
- return -EINVAL;
+ (type != NL80211_TX_POWER_AUTOMATIC)) {
+ result = -EINVAL;
+ goto out;
+ }
if (type != NL80211_TX_POWER_AUTOMATIC) {
idx = NL80211_ATTR_WIPHY_TX_POWER_LEVEL;
@@ -3269,7 +3319,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
result = rdev_set_tx_power(rdev, txp_wdev, type, mbm);
if (result)
- return result;
+ goto out;
}
if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] &&
@@ -3278,8 +3328,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
if ((!rdev->wiphy.available_antennas_tx &&
!rdev->wiphy.available_antennas_rx) ||
- !rdev->ops->set_antenna)
- return -EOPNOTSUPP;
+ !rdev->ops->set_antenna) {
+ result = -EOPNOTSUPP;
+ goto out;
+ }
tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]);
rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]);
@@ -3287,15 +3339,17 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
/* reject antenna configurations which don't match the
* available antenna masks, except for the "all" mask */
if ((~tx_ant && (tx_ant & ~rdev->wiphy.available_antennas_tx)) ||
- (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas_rx)))
- return -EINVAL;
+ (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas_rx))) {
+ result = -EINVAL;
+ goto out;
+ }
tx_ant = tx_ant & rdev->wiphy.available_antennas_tx;
rx_ant = rx_ant & rdev->wiphy.available_antennas_rx;
result = rdev_set_antenna(rdev, tx_ant, rx_ant);
if (result)
- return result;
+ goto out;
}
changed = 0;
@@ -3317,8 +3371,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]) {
frag_threshold = nla_get_u32(
info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]);
- if (frag_threshold < 256)
- return -EINVAL;
+ if (frag_threshold < 256) {
+ result = -EINVAL;
+ goto out;
+ }
if (frag_threshold != (u32) -1) {
/*
@@ -3339,8 +3395,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
}
if (info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]) {
- if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK])
- return -EINVAL;
+ if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) {
+ result = -EINVAL;
+ goto out;
+ }
coverage_class = nla_get_u8(
info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]);
@@ -3348,16 +3406,20 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
}
if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) {
- if (!(rdev->wiphy.features & NL80211_FEATURE_ACKTO_ESTIMATION))
- return -EOPNOTSUPP;
+ if (!(rdev->wiphy.features & NL80211_FEATURE_ACKTO_ESTIMATION)) {
+ result = -EOPNOTSUPP;
+ goto out;
+ }
changed |= WIPHY_PARAM_DYN_ACK;
}
if (info->attrs[NL80211_ATTR_TXQ_LIMIT]) {
if (!wiphy_ext_feature_isset(&rdev->wiphy,
- NL80211_EXT_FEATURE_TXQS))
- return -EOPNOTSUPP;
+ NL80211_EXT_FEATURE_TXQS)) {
+ result = -EOPNOTSUPP;
+ goto out;
+ }
txq_limit = nla_get_u32(
info->attrs[NL80211_ATTR_TXQ_LIMIT]);
changed |= WIPHY_PARAM_TXQ_LIMIT;
@@ -3365,8 +3427,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_TXQ_MEMORY_LIMIT]) {
if (!wiphy_ext_feature_isset(&rdev->wiphy,
- NL80211_EXT_FEATURE_TXQS))
- return -EOPNOTSUPP;
+ NL80211_EXT_FEATURE_TXQS)) {
+ result = -EOPNOTSUPP;
+ goto out;
+ }
txq_memory_limit = nla_get_u32(
info->attrs[NL80211_ATTR_TXQ_MEMORY_LIMIT]);
changed |= WIPHY_PARAM_TXQ_MEMORY_LIMIT;
@@ -3374,8 +3438,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_TXQ_QUANTUM]) {
if (!wiphy_ext_feature_isset(&rdev->wiphy,
- NL80211_EXT_FEATURE_TXQS))
- return -EOPNOTSUPP;
+ NL80211_EXT_FEATURE_TXQS)) {
+ result = -EOPNOTSUPP;
+ goto out;
+ }
txq_quantum = nla_get_u32(
info->attrs[NL80211_ATTR_TXQ_QUANTUM]);
changed |= WIPHY_PARAM_TXQ_QUANTUM;
@@ -3387,8 +3453,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
u8 old_coverage_class;
u32 old_txq_limit, old_txq_memory_limit, old_txq_quantum;
- if (!rdev->ops->set_wiphy_params)
- return -EOPNOTSUPP;
+ if (!rdev->ops->set_wiphy_params) {
+ result = -EOPNOTSUPP;
+ goto out;
+ }
old_retry_short = rdev->wiphy.retry_short;
old_retry_long = rdev->wiphy.retry_long;
@@ -3426,10 +3494,15 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
rdev->wiphy.txq_limit = old_txq_limit;
rdev->wiphy.txq_memory_limit = old_txq_memory_limit;
rdev->wiphy.txq_quantum = old_txq_quantum;
- return result;
+ goto out;
}
}
- return 0;
+
+ result = 0;
+
+out:
+ wiphy_unlock(&rdev->wiphy);
+ return result;
}
static int nl80211_send_chandef(struct sk_buff *msg,
@@ -3960,6 +4033,17 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
return -EOPNOTSUPP;
/*
+ * We hold RTNL, so this is safe, without RTNL opencount cannot
+ * reach 0, and thus the rdev cannot be deleted.
+ *
+ * We need to do it for the dev_close(), since that will call
+ * the netdev notifiers, and we need to acquire the mutex there
+ * but don't know if we get there from here or from some other
+ * place (e.g. "ip link set ... down").
+ */
+ mutex_unlock(&rdev->wiphy.mtx);
+
+ /*
* If we remove a wireless device without a netdev then clear
* user_ptr[1] so that nl80211_post_doit won't dereference it
* to check if it needs to do dev_put(). Otherwise it crashes
@@ -3968,6 +4052,10 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
*/
if (!wdev->netdev)
info->user_ptr[1] = NULL;
+ else
+ dev_close(wdev->netdev);
+
+ mutex_lock(&rdev->wiphy.mtx);
return rdev_del_virtual_intf(rdev, wdev);
}
@@ -5884,10 +5972,11 @@ static int nl80211_dump_station(struct sk_buff *skb,
int sta_idx = cb->args[2];
int err;
- rtnl_lock();
err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
if (err)
- goto out_err;
+ return err;
+ /* nl80211_prepare_wdev_dump acquired it in the successful case */
+ __acquire(&rdev->wiphy.mtx);
if (!wdev->netdev) {
err = -EINVAL;
@@ -5922,7 +6011,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
cb->args[2] = sta_idx;
err = skb->len;
out_err:
- rtnl_unlock();
+ wiphy_unlock(&rdev->wiphy);
return err;
}
@@ -6780,10 +6869,11 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
int path_idx = cb->args[2];
int err;
- rtnl_lock();
err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
if (err)
- goto out_err;
+ return err;
+ /* nl80211_prepare_wdev_dump acquired it in the successful case */
+ __acquire(&rdev->wiphy.mtx);
if (!rdev->ops->dump_mpath) {
err = -EOPNOTSUPP;
@@ -6816,7 +6906,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
cb->args[2] = path_idx;
err = skb->len;
out_err:
- rtnl_unlock();
+ wiphy_unlock(&rdev->wiphy);
return err;
}
@@ -6979,10 +7069,11 @@ static int nl80211_dump_mpp(struct sk_buff *skb,
int path_idx = cb->args[2];
int err;
- rtnl_lock();
err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
if (err)
- goto out_err;
+ return err;
+ /* nl80211_prepare_wdev_dump acquired it in the successful case */
+ __acquire(&rdev->wiphy.mtx);
if (!rdev->ops->dump_mpp) {
err = -EOPNOTSUPP;
@@ -7015,7 +7106,7 @@ static int nl80211_dump_mpp(struct sk_buff *skb,
cb->args[2] = path_idx;
err = skb->len;
out_err:
- rtnl_unlock();
+ wiphy_unlock(&rdev->wiphy);
return err;
}
@@ -7634,12 +7725,15 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
if (!hdr)
goto put_failure;
+ rtnl_lock();
+
if (info->attrs[NL80211_ATTR_WIPHY]) {
bool self_managed;
rdev = cfg80211_get_dev_from_info(genl_info_net(info), info);
if (IS_ERR(rdev)) {
nlmsg_free(msg);
+ rtnl_unlock();
return PTR_ERR(rdev);
}
@@ -7651,6 +7745,7 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
/* a self-managed-reg device must have a private regdom */
if (WARN_ON(!regdom && self_managed)) {
nlmsg_free(msg);
+ rtnl_unlock();
return -EINVAL;
}
@@ -7675,11 +7770,13 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
rcu_read_unlock();
genlmsg_end(msg, hdr);
+ rtnl_unlock();
return genlmsg_reply(msg, info);
nla_put_failure_rcu:
rcu_read_unlock();
nla_put_failure:
+ rtnl_unlock();
put_failure:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -7842,12 +7939,17 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
- if (!reg_is_valid_request(alpha2))
- return -EINVAL;
+ rtnl_lock();
+ if (!reg_is_valid_request(alpha2)) {
+ r = -EINVAL;
+ goto out;
+ }
rd = kzalloc(struct_size(rd, reg_rules, num_rules), GFP_KERNEL);
- if (!rd)
- return -ENOMEM;
+ if (!rd) {
+ r = -ENOMEM;
+ goto out;
+ }
rd->n_reg_rules = num_rules;
rd->alpha2[0] = alpha2[0];
@@ -7879,10 +7981,13 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
}
}
+ r = set_regdom(rd, REGD_SOURCE_CRDA);
/* set_regdom takes ownership of rd */
- return set_regdom(rd, REGD_SOURCE_CRDA);
+ rd = NULL;
bad_reg:
kfree(rd);
+ out:
+ rtnl_unlock();
return r;
}
#endif /* CONFIG_CFG80211_CRDA_SUPPORT */
@@ -9050,10 +9155,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_csa_settings params;
- /* csa_attrs is defined static to avoid waste of stack size - this
- * function is called under RTNL lock, so this should not be a problem.
- */
- static struct nlattr *csa_attrs[NL80211_ATTR_MAX+1];
+ struct nlattr **csa_attrs = NULL;
int err;
bool need_new_beacon = false;
bool need_handle_dfs_flag = true;
@@ -9118,28 +9220,39 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
+ csa_attrs = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*csa_attrs),
+ GFP_KERNEL);
+ if (!csa_attrs)
+ return -ENOMEM;
+
err = nla_parse_nested_deprecated(csa_attrs, NL80211_ATTR_MAX,
info->attrs[NL80211_ATTR_CSA_IES],
nl80211_policy, info->extack);
if (err)
- return err;
+ goto free;
err = nl80211_parse_beacon(rdev, csa_attrs, &params.beacon_csa);
if (err)
- return err;
+ goto free;
- if (!csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON])
- return -EINVAL;
+ if (!csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]) {
+ err = -EINVAL;
+ goto free;
+ }
len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]);
- if (!len || (len % sizeof(u16)))
- return -EINVAL;
+ if (!len || (len % sizeof(u16))) {
+ err = -EINVAL;
+ goto free;
+ }
params.n_counter_offsets_beacon = len / sizeof(u16);
if (rdev->wiphy.max_num_csa_counters &&
(params.n_counter_offsets_beacon >
- rdev->wiphy.max_num_csa_counters))
- return -EINVAL;
+ rdev->wiphy.max_num_csa_counters)) {
+ err = -EINVAL;
+ goto free;
+ }
params.counter_offsets_beacon =
nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]);
@@ -9148,23 +9261,31 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
for (i = 0; i < params.n_counter_offsets_beacon; i++) {
u16 offset = params.counter_offsets_beacon[i];
- if (offset >= params.beacon_csa.tail_len)
- return -EINVAL;
+ if (offset >= params.beacon_csa.tail_len) {
+ err = -EINVAL;
+ goto free;
+ }
- if (params.beacon_csa.tail[offset] != params.count)
- return -EINVAL;
+ if (params.beacon_csa.tail[offset] != params.count) {
+ err = -EINVAL;
+ goto free;
+ }
}
if (csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]) {
len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]);
- if (!len || (len % sizeof(u16)))
- return -EINVAL;
+ if (!len || (len % sizeof(u16))) {
+ err = -EINVAL;
+ goto free;
+ }
params.n_counter_offsets_presp = len / sizeof(u16);
if (rdev->wiphy.max_num_csa_counters &&
(params.n_counter_offsets_presp >
- rdev->wiphy.max_num_csa_counters))
- return -EINVAL;
+ rdev->wiphy.max_num_csa_counters)) {
+ err = -EINVAL;
+ goto free;
+ }
params.counter_offsets_presp =
nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]);
@@ -9173,35 +9294,42 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
for (i = 0; i < params.n_counter_offsets_presp; i++) {
u16 offset = params.counter_offsets_presp[i];
- if (offset >= params.beacon_csa.probe_resp_len)
- return -EINVAL;
+ if (offset >= params.beacon_csa.probe_resp_len) {
+ err = -EINVAL;
+ goto free;
+ }
if (params.beacon_csa.probe_resp[offset] !=
- params.count)
- return -EINVAL;
+ params.count) {
+ err = -EINVAL;
+ goto free;
+ }
}
}
skip_beacons:
err = nl80211_parse_chandef(rdev, info, &params.chandef);
if (err)
- return err;
+ goto free;
if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &params.chandef,
- wdev->iftype))
- return -EINVAL;
+ wdev->iftype)) {
+ err = -EINVAL;
+ goto free;
+ }
err = cfg80211_chandef_dfs_required(wdev->wiphy,
&params.chandef,
wdev->iftype);
if (err < 0)
- return err;
+ goto free;
if (err > 0) {
params.radar_required = true;
if (need_handle_dfs_flag &&
!nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS])) {
- return -EINVAL;
+ err = -EINVAL;
+ goto free;
}
}
@@ -9212,6 +9340,8 @@ skip_beacons:
err = rdev_channel_switch(rdev, dev, &params);
wdev_unlock(wdev);
+free:
+ kfree(csa_attrs);
return err;
}
@@ -9362,12 +9492,11 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
int start = cb->args[2], idx = 0;
int err;
- rtnl_lock();
err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
- if (err) {
- rtnl_unlock();
+ if (err)
return err;
- }
+ /* nl80211_prepare_wdev_dump acquired it in the successful case */
+ __acquire(&rdev->wiphy.mtx);
wdev_lock(wdev);
spin_lock_bh(&rdev->bss_lock);
@@ -9398,7 +9527,7 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
wdev_unlock(wdev);
cb->args[2] = idx;
- rtnl_unlock();
+ wiphy_unlock(&rdev->wiphy);
return skb->len;
}
@@ -9496,10 +9625,13 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
if (!attrbuf)
return -ENOMEM;
- rtnl_lock();
res = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
- if (res)
- goto out_err;
+ if (res) {
+ kfree(attrbuf);
+ return res;
+ }
+ /* nl80211_prepare_wdev_dump acquired it in the successful case */
+ __acquire(&rdev->wiphy.mtx);
/* prepare_wdev_dump parsed the attributes */
radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
@@ -9541,7 +9673,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
res = skb->len;
out_err:
kfree(attrbuf);
- rtnl_unlock();
+ wiphy_unlock(&rdev->wiphy);
return res;
}
@@ -9888,6 +10020,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT]))
req.flags |= ASSOC_REQ_DISABLE_VHT;
+ if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HE]))
+ req.flags |= ASSOC_REQ_DISABLE_HE;
+
if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
memcpy(&req.vht_capa_mask,
nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]),
@@ -10403,10 +10538,14 @@ EXPORT_SYMBOL(__cfg80211_send_event_skb);
static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
- struct wireless_dev *wdev =
- __cfg80211_wdev_from_attrs(genl_info_net(info), info->attrs);
+ struct wireless_dev *wdev;
int err;
+ lockdep_assert_held(&rdev->wiphy.mtx);
+
+ wdev = __cfg80211_wdev_from_attrs(rdev, genl_info_net(info),
+ info->attrs);
+
if (!rdev->ops->testmode_cmd)
return -EOPNOTSUPP;
@@ -10667,6 +10806,9 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT]))
connect.flags |= ASSOC_REQ_DISABLE_VHT;
+ if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HE]))
+ connect.flags |= ASSOC_REQ_DISABLE_HE;
+
if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
memcpy(&connect.vht_capa_mask,
nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]),
@@ -13591,7 +13733,8 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct wireless_dev *wdev =
- __cfg80211_wdev_from_attrs(genl_info_net(info), info->attrs);
+ __cfg80211_wdev_from_attrs(rdev, genl_info_net(info),
+ info->attrs);
int i, err;
u32 vid, subcmd;
@@ -13715,7 +13858,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
goto out;
}
- *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf);
+ *wdev = __cfg80211_wdev_from_attrs(NULL, sock_net(skb->sk), attrbuf);
if (IS_ERR(*wdev))
*wdev = NULL;
@@ -14650,31 +14793,24 @@ bad_tid_conf:
static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
struct genl_info *info)
{
- struct cfg80211_registered_device *rdev;
+ struct cfg80211_registered_device *rdev = NULL;
struct wireless_dev *wdev;
struct net_device *dev;
- bool rtnl = ops->internal_flags & NL80211_FLAG_NEED_RTNL;
-
- if (rtnl)
- rtnl_lock();
+ rtnl_lock();
if (ops->internal_flags & NL80211_FLAG_NEED_WIPHY) {
rdev = cfg80211_get_dev_from_info(genl_info_net(info), info);
if (IS_ERR(rdev)) {
- if (rtnl)
- rtnl_unlock();
+ rtnl_unlock();
return PTR_ERR(rdev);
}
info->user_ptr[0] = rdev;
} else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV ||
ops->internal_flags & NL80211_FLAG_NEED_WDEV) {
- ASSERT_RTNL();
-
- wdev = __cfg80211_wdev_from_attrs(genl_info_net(info),
+ wdev = __cfg80211_wdev_from_attrs(NULL, genl_info_net(info),
info->attrs);
if (IS_ERR(wdev)) {
- if (rtnl)
- rtnl_unlock();
+ rtnl_unlock();
return PTR_ERR(wdev);
}
@@ -14683,8 +14819,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) {
if (!dev) {
- if (rtnl)
- rtnl_unlock();
+ rtnl_unlock();
return -EINVAL;
}
@@ -14695,8 +14830,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP &&
!wdev_running(wdev)) {
- if (rtnl)
- rtnl_unlock();
+ rtnl_unlock();
return -ENETDOWN;
}
@@ -14706,6 +14840,14 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
info->user_ptr[0] = rdev;
}
+ if (rdev) {
+ wiphy_lock(&rdev->wiphy);
+ /* we keep the mutex locked until post_doit */
+ __release(&rdev->wiphy.mtx);
+ }
+ if (!(ops->internal_flags & NL80211_FLAG_NEED_RTNL))
+ rtnl_unlock();
+
return 0;
}
@@ -14723,6 +14865,14 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
}
}
+ if (info->user_ptr[0]) {
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+
+ /* we kept the mutex locked since pre_doit */
+ __acquire(&rdev->wiphy.mtx);
+ wiphy_unlock(&rdev->wiphy);
+ }
+
if (ops->internal_flags & NL80211_FLAG_NEED_RTNL)
rtnl_unlock();
@@ -14851,8 +15001,7 @@ static const struct genl_ops nl80211_ops[] = {
.dumpit = nl80211_dump_wiphy,
.done = nl80211_dump_wiphy_done,
/* can be retrieved by unprivileged users */
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WIPHY,
},
};
@@ -14862,7 +15011,6 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_wiphy,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_RTNL,
},
{
.cmd = NL80211_CMD_GET_INTERFACE,
@@ -14870,8 +15018,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_get_interface,
.dumpit = nl80211_dump_interface,
/* can be retrieved by unprivileged users */
- .internal_flags = NL80211_FLAG_NEED_WDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WDEV,
},
{
.cmd = NL80211_CMD_SET_INTERFACE,
@@ -14902,8 +15049,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_key,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_SET_KEY,
@@ -14911,7 +15057,6 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_set_key,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL |
NL80211_FLAG_CLEAR_SKB,
},
{
@@ -14920,7 +15065,6 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_new_key,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL |
NL80211_FLAG_CLEAR_SKB,
},
{
@@ -14928,64 +15072,56 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_key,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_SET_BEACON,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM,
.doit = nl80211_set_beacon,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_START_AP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM,
.doit = nl80211_start_ap,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_STOP_AP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM,
.doit = nl80211_stop_ap,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_GET_STATION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_station,
.dumpit = nl80211_dump_station,
- .internal_flags = NL80211_FLAG_NEED_NETDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV,
},
{
.cmd = NL80211_CMD_SET_STATION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_station,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_NEW_STATION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_new_station,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_DEL_STATION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_station,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_GET_MPATH,
@@ -14993,8 +15129,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_get_mpath,
.dumpit = nl80211_dump_mpath,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_GET_MPP,
@@ -15002,47 +15137,42 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_get_mpp,
.dumpit = nl80211_dump_mpp,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_SET_MPATH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_mpath,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_NEW_MPATH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_new_mpath,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_DEL_MPATH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_mpath,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_SET_BSS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_bss,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_GET_REG,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_reg_do,
.dumpit = nl80211_get_reg_dump,
- .internal_flags = NL80211_FLAG_NEED_RTNL,
+ .internal_flags = 0,
/* can be retrieved by unprivileged users */
},
#ifdef CONFIG_CFG80211_CRDA_SUPPORT
@@ -15051,7 +15181,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_reg,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_RTNL,
+ .internal_flags = 0,
},
#endif
{
@@ -15071,32 +15201,28 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_mesh_config,
/* can be retrieved by unprivileged users */
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_SET_MESH_CONFIG,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_update_mesh_config,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_TRIGGER_SCAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_trigger_scan,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
},
{
.cmd = NL80211_CMD_ABORT_SCAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_abort_scan,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
},
{
.cmd = NL80211_CMD_GET_SCAN,
@@ -15108,16 +15234,14 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_start_sched_scan,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_STOP_SCHED_SCAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_stop_sched_scan,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_AUTHENTICATE,
@@ -15125,7 +15249,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_authenticate,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL |
+ 0 |
NL80211_FLAG_CLEAR_SKB,
},
{
@@ -15134,7 +15258,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_associate,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL |
+ 0 |
NL80211_FLAG_CLEAR_SKB,
},
{
@@ -15142,32 +15266,28 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_deauthenticate,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_DISASSOCIATE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_disassociate,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_JOIN_IBSS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_join_ibss,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_LEAVE_IBSS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_leave_ibss,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
#ifdef CONFIG_NL80211_TESTMODE
{
@@ -15176,8 +15296,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_testmode_do,
.dumpit = nl80211_testmode_dump,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WIPHY,
},
#endif
{
@@ -15186,7 +15305,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_connect,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL |
+ 0 |
NL80211_FLAG_CLEAR_SKB,
},
{
@@ -15195,7 +15314,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_update_connect_params,
.flags = GENL_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL |
+ 0 |
NL80211_FLAG_CLEAR_SKB,
},
{
@@ -15203,16 +15322,14 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_disconnect,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_SET_WIPHY_NETNS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_wiphy_netns,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WIPHY,
},
{
.cmd = NL80211_CMD_GET_SURVEY,
@@ -15225,7 +15342,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_setdel_pmksa,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL |
+ 0 |
NL80211_FLAG_CLEAR_SKB,
},
{
@@ -15233,128 +15350,112 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_setdel_pmksa,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_FLUSH_PMKSA,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_flush_pmksa,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_REMAIN_ON_CHANNEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_remain_on_channel,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
},
{
.cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_cancel_remain_on_channel,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
},
{
.cmd = NL80211_CMD_SET_TX_BITRATE_MASK,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_tx_bitrate_mask,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV,
},
{
.cmd = NL80211_CMD_REGISTER_FRAME,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_register_mgmt,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WDEV,
},
{
.cmd = NL80211_CMD_FRAME,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tx_mgmt,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
},
{
.cmd = NL80211_CMD_FRAME_WAIT_CANCEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tx_mgmt_cancel_wait,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
},
{
.cmd = NL80211_CMD_SET_POWER_SAVE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_power_save,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV,
},
{
.cmd = NL80211_CMD_GET_POWER_SAVE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_power_save,
/* can be retrieved by unprivileged users */
- .internal_flags = NL80211_FLAG_NEED_NETDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV,
},
{
.cmd = NL80211_CMD_SET_CQM,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_cqm,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV,
},
{
.cmd = NL80211_CMD_SET_CHANNEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_channel,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV,
},
{
.cmd = NL80211_CMD_JOIN_MESH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_join_mesh,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_LEAVE_MESH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_leave_mesh,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_JOIN_OCB,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_join_ocb,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_LEAVE_OCB,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_leave_ocb,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
#ifdef CONFIG_PM
{
@@ -15362,16 +15463,14 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_wowlan,
/* can be retrieved by unprivileged users */
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WIPHY,
},
{
.cmd = NL80211_CMD_SET_WOWLAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_wowlan,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WIPHY,
},
#endif
{
@@ -15380,7 +15479,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_set_rekey_data,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL |
+ 0 |
NL80211_FLAG_CLEAR_SKB,
},
{
@@ -15388,48 +15487,42 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tdls_mgmt,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_TDLS_OPER,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tdls_oper,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_UNEXPECTED_FRAME,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_register_unexpected_frame,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV,
},
{
.cmd = NL80211_CMD_PROBE_CLIENT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_probe_client,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_REGISTER_BEACONS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_register_beacons,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WIPHY,
},
{
.cmd = NL80211_CMD_SET_NOACK_MAP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_noack_map,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV,
},
{
.cmd = NL80211_CMD_START_P2P_DEVICE,
@@ -15468,48 +15561,42 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_nan_add_func,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
},
{
.cmd = NL80211_CMD_DEL_NAN_FUNCTION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_nan_del_func,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
},
{
.cmd = NL80211_CMD_CHANGE_NAN_CONFIG,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_nan_change_config,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
},
{
.cmd = NL80211_CMD_SET_MCAST_RATE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_mcast_rate,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV,
},
{
.cmd = NL80211_CMD_SET_MAC_ACL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_mac_acl,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV,
},
{
.cmd = NL80211_CMD_RADAR_DETECT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_start_radar_detection,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_GET_PROTOCOL_FEATURES,
@@ -15521,47 +15608,41 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_update_ft_ies,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_CRIT_PROTOCOL_START,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_crit_protocol_start,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
},
{
.cmd = NL80211_CMD_CRIT_PROTOCOL_STOP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_crit_protocol_stop,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
},
{
.cmd = NL80211_CMD_GET_COALESCE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_coalesce,
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WIPHY,
},
{
.cmd = NL80211_CMD_SET_COALESCE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_coalesce,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WIPHY,
},
{
.cmd = NL80211_CMD_CHANNEL_SWITCH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_channel_switch,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_VENDOR,
@@ -15570,7 +15651,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.dumpit = nl80211_vendor_cmd_dump,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL |
+ 0 |
NL80211_FLAG_CLEAR_SKB,
},
{
@@ -15578,123 +15659,108 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_qos_map,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_ADD_TX_TS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_add_tx_ts,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_DEL_TX_TS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_tx_ts,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_TDLS_CHANNEL_SWITCH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tdls_channel_switch,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tdls_cancel_channel_switch,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_multicast_to_unicast,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV,
},
{
.cmd = NL80211_CMD_SET_PMK,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_pmk,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL |
+ 0 |
NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_DEL_PMK,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_pmk,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_EXTERNAL_AUTH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_external_auth,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_CONTROL_PORT_FRAME,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tx_control_port,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_GET_FTM_RESPONDER_STATS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_ftm_responder_stats,
- .internal_flags = NL80211_FLAG_NEED_NETDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV,
},
{
.cmd = NL80211_CMD_PEER_MEASUREMENT_START,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_pmsr_start,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
},
{
.cmd = NL80211_CMD_NOTIFY_RADAR,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_notify_radar_detection,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_UPDATE_OWE_INFO,
.doit = nl80211_update_owe_info,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_PROBE_MESH_LINK,
.doit = nl80211_probe_mesh_link,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
},
{
.cmd = NL80211_CMD_SET_TID_CONFIG,
.doit = nl80211_set_tid_config,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV,
},
{
.cmd = NL80211_CMD_SET_SAR_SPECS,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index bb72447ad960..21536c48deec 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -5,7 +5,7 @@
* Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2021 Intel Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -139,10 +139,18 @@ static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
return rcu_dereference_rtnl(cfg80211_regdomain);
}
+/*
+ * Returns the regulatory domain associated with the wiphy.
+ *
+ * Requires any of RTNL, wiphy mutex or RCU protection.
+ */
const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy)
{
- return rcu_dereference_rtnl(wiphy->regd);
+ return rcu_dereference_check(wiphy->regd,
+ lockdep_is_held(&wiphy->mtx) ||
+ lockdep_rtnl_is_held());
}
+EXPORT_SYMBOL(get_wiphy_regdom);
static const char *reg_dfs_region_str(enum nl80211_dfs_regions dfs_region)
{
@@ -164,7 +172,9 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy)
const struct ieee80211_regdomain *regd = NULL;
const struct ieee80211_regdomain *wiphy_regd = NULL;
+ rcu_read_lock();
regd = get_cfg80211_regdom();
+
if (!wiphy)
goto out;
@@ -181,6 +191,8 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy)
reg_dfs_region_str(regd->dfs_region));
out:
+ rcu_read_unlock();
+
return regd->dfs_region;
}
@@ -1617,7 +1629,7 @@ __freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 min_bw)
{
const struct ieee80211_regdomain *regd = reg_get_regdomain(wiphy);
static const u32 bws[] = {0, 1, 2, 4, 5, 8, 10, 16, 20};
- const struct ieee80211_reg_rule *reg_rule;
+ const struct ieee80211_reg_rule *reg_rule = ERR_PTR(-ERANGE);
int i = ARRAY_SIZE(bws) - 1;
u32 bw;
@@ -2571,9 +2583,15 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy,
if (IS_ERR(new_regd))
return;
+ rtnl_lock();
+ wiphy_lock(wiphy);
+
tmp = get_wiphy_regdom(wiphy);
rcu_assign_pointer(wiphy->regd, new_regd);
rcu_free_regdom(tmp);
+
+ wiphy_unlock(wiphy);
+ rtnl_unlock();
}
EXPORT_SYMBOL(wiphy_apply_custom_regulatory);
@@ -2735,7 +2753,10 @@ reg_process_hint_driver(struct wiphy *wiphy,
return REG_REQ_IGNORE;
tmp = get_wiphy_regdom(wiphy);
+ ASSERT_RTNL();
+ wiphy_lock(wiphy);
rcu_assign_pointer(wiphy->regd, regd);
+ wiphy_unlock(wiphy);
rcu_free_regdom(tmp);
}
@@ -3067,41 +3088,52 @@ static void reg_process_pending_beacon_hints(void)
spin_unlock_bh(&reg_pending_beacons_lock);
}
-static void reg_process_self_managed_hints(void)
+static void reg_process_self_managed_hint(struct wiphy *wiphy)
{
- struct cfg80211_registered_device *rdev;
- struct wiphy *wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
const struct ieee80211_regdomain *tmp;
const struct ieee80211_regdomain *regd;
enum nl80211_band band;
struct regulatory_request request = {};
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
- wiphy = &rdev->wiphy;
+ ASSERT_RTNL();
+ lockdep_assert_wiphy(wiphy);
- spin_lock(&reg_requests_lock);
- regd = rdev->requested_regd;
- rdev->requested_regd = NULL;
- spin_unlock(&reg_requests_lock);
+ spin_lock(&reg_requests_lock);
+ regd = rdev->requested_regd;
+ rdev->requested_regd = NULL;
+ spin_unlock(&reg_requests_lock);
- if (regd == NULL)
- continue;
+ if (!regd)
+ return;
- tmp = get_wiphy_regdom(wiphy);
- rcu_assign_pointer(wiphy->regd, regd);
- rcu_free_regdom(tmp);
+ tmp = get_wiphy_regdom(wiphy);
+ rcu_assign_pointer(wiphy->regd, regd);
+ rcu_free_regdom(tmp);
- for (band = 0; band < NUM_NL80211_BANDS; band++)
- handle_band_custom(wiphy, wiphy->bands[band], regd);
+ for (band = 0; band < NUM_NL80211_BANDS; band++)
+ handle_band_custom(wiphy, wiphy->bands[band], regd);
+
+ reg_process_ht_flags(wiphy);
- reg_process_ht_flags(wiphy);
+ request.wiphy_idx = get_wiphy_idx(wiphy);
+ request.alpha2[0] = regd->alpha2[0];
+ request.alpha2[1] = regd->alpha2[1];
+ request.initiator = NL80211_REGDOM_SET_BY_DRIVER;
- request.wiphy_idx = get_wiphy_idx(wiphy);
- request.alpha2[0] = regd->alpha2[0];
- request.alpha2[1] = regd->alpha2[1];
- request.initiator = NL80211_REGDOM_SET_BY_DRIVER;
+ nl80211_send_wiphy_reg_change_event(&request);
+}
- nl80211_send_wiphy_reg_change_event(&request);
+static void reg_process_self_managed_hints(void)
+{
+ struct cfg80211_registered_device *rdev;
+
+ ASSERT_RTNL();
+
+ list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+ wiphy_lock(&rdev->wiphy);
+ reg_process_self_managed_hint(&rdev->wiphy);
+ wiphy_unlock(&rdev->wiphy);
}
reg_check_channels();
@@ -3780,14 +3812,21 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd,
return -ENODEV;
if (!driver_request->intersect) {
- if (request_wiphy->regd)
+ ASSERT_RTNL();
+ wiphy_lock(request_wiphy);
+ if (request_wiphy->regd) {
+ wiphy_unlock(request_wiphy);
return -EALREADY;
+ }
regd = reg_copy_regd(rd);
- if (IS_ERR(regd))
+ if (IS_ERR(regd)) {
+ wiphy_unlock(request_wiphy);
return PTR_ERR(regd);
+ }
rcu_assign_pointer(request_wiphy->regd, regd);
+ wiphy_unlock(request_wiphy);
reset_regdomains(false, rd);
return 0;
}
@@ -3969,8 +4008,8 @@ int regulatory_set_wiphy_regd(struct wiphy *wiphy,
}
EXPORT_SYMBOL(regulatory_set_wiphy_regd);
-int regulatory_set_wiphy_regd_sync_rtnl(struct wiphy *wiphy,
- struct ieee80211_regdomain *rd)
+int regulatory_set_wiphy_regd_sync(struct wiphy *wiphy,
+ struct ieee80211_regdomain *rd)
{
int ret;
@@ -3981,10 +4020,11 @@ int regulatory_set_wiphy_regd_sync_rtnl(struct wiphy *wiphy,
return ret;
/* process the request immediately */
- reg_process_self_managed_hints();
+ reg_process_self_managed_hint(wiphy);
+ reg_check_channels();
return 0;
}
-EXPORT_SYMBOL(regulatory_set_wiphy_regd_sync_rtnl);
+EXPORT_SYMBOL(regulatory_set_wiphy_regd_sync);
void wiphy_regulatory_register(struct wiphy *wiphy)
{
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index f9e83031a40a..f3707f729024 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -63,7 +63,6 @@ unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd,
const struct ieee80211_reg_rule *rule);
bool reg_last_request_cell_base(void);
-const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy);
/**
* regulatory_hint_found_beacon - hints a beacon was found on a channel
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 1b7fec3b53cd..019952d4fc7d 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -918,7 +918,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
union iwreq_data wrqu;
#endif
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
if (rdev->scan_msg) {
nl80211_send_scan_msg(rdev, rdev->scan_msg);
@@ -987,9 +987,9 @@ void __cfg80211_scan_done(struct work_struct *wk)
rdev = container_of(wk, struct cfg80211_registered_device,
scan_done_wk);
- rtnl_lock();
+ wiphy_lock(&rdev->wiphy);
___cfg80211_scan_done(rdev, true);
- rtnl_unlock();
+ wiphy_unlock(&rdev->wiphy);
}
void cfg80211_scan_done(struct cfg80211_scan_request *request,
@@ -1022,7 +1022,7 @@ EXPORT_SYMBOL(cfg80211_scan_done);
void cfg80211_add_sched_scan_req(struct cfg80211_registered_device *rdev,
struct cfg80211_sched_scan_request *req)
{
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
list_add_rcu(&req->list, &rdev->sched_scan_req_list);
}
@@ -1030,7 +1030,7 @@ void cfg80211_add_sched_scan_req(struct cfg80211_registered_device *rdev,
static void cfg80211_del_sched_scan_req(struct cfg80211_registered_device *rdev,
struct cfg80211_sched_scan_request *req)
{
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
list_del_rcu(&req->list);
kfree_rcu(req, rcu_head);
@@ -1042,7 +1042,7 @@ cfg80211_find_sched_scan_req(struct cfg80211_registered_device *rdev, u64 reqid)
struct cfg80211_sched_scan_request *pos;
list_for_each_entry_rcu(pos, &rdev->sched_scan_req_list, list,
- lockdep_rtnl_is_held()) {
+ lockdep_is_held(&rdev->wiphy.mtx)) {
if (pos->reqid == reqid)
return pos;
}
@@ -1090,7 +1090,7 @@ void cfg80211_sched_scan_results_wk(struct work_struct *work)
rdev = container_of(work, struct cfg80211_registered_device,
sched_scan_res_wk);
- rtnl_lock();
+ wiphy_lock(&rdev->wiphy);
list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) {
if (req->report_results) {
req->report_results = false;
@@ -1105,7 +1105,7 @@ void cfg80211_sched_scan_results_wk(struct work_struct *work)
NL80211_CMD_SCHED_SCAN_RESULTS);
}
}
- rtnl_unlock();
+ wiphy_unlock(&rdev->wiphy);
}
void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid)
@@ -1126,23 +1126,23 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid)
}
EXPORT_SYMBOL(cfg80211_sched_scan_results);
-void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy, u64 reqid)
+void cfg80211_sched_scan_stopped_locked(struct wiphy *wiphy, u64 reqid)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- ASSERT_RTNL();
+ lockdep_assert_held(&wiphy->mtx);
trace_cfg80211_sched_scan_stopped(wiphy, reqid);
__cfg80211_stop_sched_scan(rdev, reqid, true);
}
-EXPORT_SYMBOL(cfg80211_sched_scan_stopped_rtnl);
+EXPORT_SYMBOL(cfg80211_sched_scan_stopped_locked);
void cfg80211_sched_scan_stopped(struct wiphy *wiphy, u64 reqid)
{
- rtnl_lock();
- cfg80211_sched_scan_stopped_rtnl(wiphy, reqid);
- rtnl_unlock();
+ wiphy_lock(wiphy);
+ cfg80211_sched_scan_stopped_locked(wiphy, reqid);
+ wiphy_unlock(wiphy);
}
EXPORT_SYMBOL(cfg80211_sched_scan_stopped);
@@ -1150,7 +1150,7 @@ int cfg80211_stop_sched_scan_req(struct cfg80211_registered_device *rdev,
struct cfg80211_sched_scan_request *req,
bool driver_initiated)
{
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
if (!driver_initiated) {
int err = rdev_sched_scan_stop(rdev, req->dev, req->reqid);
@@ -1170,7 +1170,7 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
{
struct cfg80211_sched_scan_request *sched_scan_req;
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
sched_scan_req = cfg80211_find_sched_scan_req(rdev, reqid);
if (!sched_scan_req)
@@ -2774,6 +2774,8 @@ int cfg80211_wext_siwscan(struct net_device *dev,
eth_broadcast_addr(creq->bssid);
+ wiphy_lock(&rdev->wiphy);
+
rdev->scan_req = creq;
err = rdev_scan(rdev, creq);
if (err) {
@@ -2785,6 +2787,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
creq = NULL;
dev_hold(dev);
}
+ wiphy_unlock(&rdev->wiphy);
out:
kfree(creq);
return err;
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 38df713f2e2e..07756ca5e3b5 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -67,7 +67,6 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
struct cfg80211_scan_request *request;
int n_channels, err;
- ASSERT_RTNL();
ASSERT_WDEV_LOCK(wdev);
if (rdev->scan_req || rdev->scan_msg)
@@ -233,7 +232,7 @@ void cfg80211_conn_work(struct work_struct *work)
u8 bssid_buf[ETH_ALEN], *bssid = NULL;
enum nl80211_timeout_reason treason;
- rtnl_lock();
+ wiphy_lock(&rdev->wiphy);
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
if (!wdev->netdev)
@@ -266,7 +265,7 @@ void cfg80211_conn_work(struct work_struct *work)
wdev_unlock(wdev);
}
- rtnl_unlock();
+ wiphy_unlock(&rdev->wiphy);
}
/* Returned bss is reference counted and must be cleaned up appropriately. */
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 3ac1f48195d2..9b959e3b09c6 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -5,6 +5,7 @@
*
* Copyright 2005-2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2020-2021 Intel Corporation
*/
#include <linux/device.h>
@@ -81,12 +82,6 @@ static void wiphy_dev_release(struct device *dev)
cfg80211_dev_free(rdev);
}
-static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
- /* TODO, we probably need stuff here */
- return 0;
-}
-
#ifdef CONFIG_PM_SLEEP
static void cfg80211_leave_all(struct cfg80211_registered_device *rdev)
{
@@ -104,6 +99,7 @@ static int wiphy_suspend(struct device *dev)
rdev->suspend_at = ktime_get_boottime_seconds();
rtnl_lock();
+ wiphy_lock(&rdev->wiphy);
if (rdev->wiphy.registered) {
if (!rdev->wiphy.wowlan_config) {
cfg80211_leave_all(rdev);
@@ -118,6 +114,7 @@ static int wiphy_suspend(struct device *dev)
ret = rdev_suspend(rdev, NULL);
}
}
+ wiphy_unlock(&rdev->wiphy);
rtnl_unlock();
return ret;
@@ -132,8 +129,10 @@ static int wiphy_resume(struct device *dev)
cfg80211_bss_age(rdev, ktime_get_boottime_seconds() - rdev->suspend_at);
rtnl_lock();
+ wiphy_lock(&rdev->wiphy);
if (rdev->wiphy.registered && rdev->ops->resume)
ret = rdev_resume(rdev);
+ wiphy_unlock(&rdev->wiphy);
rtnl_unlock();
return ret;
@@ -157,7 +156,6 @@ struct class ieee80211_class = {
.owner = THIS_MODULE,
.dev_release = wiphy_dev_release,
.dev_groups = ieee80211_groups,
- .dev_uevent = wiphy_uevent,
.pm = WIPHY_PM_OPS,
.ns_type = &net_ns_type_operations,
.namespace = wiphy_namespace,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index b4acc805114b..1bf0200f562a 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -997,7 +997,7 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev)
{
struct wireless_dev *wdev;
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list)
cfg80211_process_wdev_events(wdev);
@@ -1010,7 +1010,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
int err;
enum nl80211_iftype otype = dev->ieee80211_ptr->iftype;
- ASSERT_RTNL();
+ lockdep_assert_held(&rdev->wiphy.mtx);
/* don't support changing VLANs, you just re-create them */
if (otype == NL80211_IFTYPE_AP_VLAN)
@@ -1188,6 +1188,25 @@ static u32 cfg80211_calculate_bitrate_dmg(struct rate_info *rate)
return __mcs2bitrate[rate->mcs];
}
+static u32 cfg80211_calculate_bitrate_extended_sc_dmg(struct rate_info *rate)
+{
+ static const u32 __mcs2bitrate[] = {
+ [6 - 6] = 26950, /* MCS 9.1 : 2695.0 mbps */
+ [7 - 6] = 50050, /* MCS 12.1 */
+ [8 - 6] = 53900,
+ [9 - 6] = 57750,
+ [10 - 6] = 63900,
+ [11 - 6] = 75075,
+ [12 - 6] = 80850,
+ };
+
+ /* Extended SC MCS not defined for base MCS below 6 or above 12 */
+ if (WARN_ON_ONCE(rate->mcs < 6 || rate->mcs > 12))
+ return 0;
+
+ return __mcs2bitrate[rate->mcs - 6];
+}
+
static u32 cfg80211_calculate_bitrate_edmg(struct rate_info *rate)
{
static const u32 __mcs2bitrate[] = {
@@ -1224,7 +1243,7 @@ static u32 cfg80211_calculate_bitrate_edmg(struct rate_info *rate)
static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
{
- static const u32 base[4][10] = {
+ static const u32 base[4][12] = {
{ 6500000,
13000000,
19500000,
@@ -1235,7 +1254,9 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
65000000,
78000000,
/* not in the spec, but some devices use this: */
- 86500000,
+ 86700000,
+ 97500000,
+ 108300000,
},
{ 13500000,
27000000,
@@ -1247,6 +1268,8 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
135000000,
162000000,
180000000,
+ 202500000,
+ 225000000,
},
{ 29300000,
58500000,
@@ -1258,6 +1281,8 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
292500000,
351000000,
390000000,
+ 438800000,
+ 487500000,
},
{ 58500000,
117000000,
@@ -1269,12 +1294,14 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
585000000,
702000000,
780000000,
+ 877500000,
+ 975000000,
},
};
u32 bitrate;
int idx;
- if (rate->mcs > 9)
+ if (rate->mcs > 11)
goto warn;
switch (rate->bw) {
@@ -1398,6 +1425,8 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate)
return cfg80211_calculate_bitrate_ht(rate);
if (rate->flags & RATE_INFO_FLAGS_DMG)
return cfg80211_calculate_bitrate_dmg(rate);
+ if (rate->flags & RATE_INFO_FLAGS_EXTENDED_SC_DMG)
+ return cfg80211_calculate_bitrate_extended_sc_dmg(rate);
if (rate->flags & RATE_INFO_FLAGS_EDMG)
return cfg80211_calculate_bitrate_edmg(rate);
if (rate->flags & RATE_INFO_FLAGS_VHT_MCS)
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index fd9ad74972fb..a8320dc59af7 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -7,7 +7,7 @@
* we directly assign the wireless handlers of wireless interfaces.
*
* Copyright 2008-2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019-2021 Intel Corporation
*/
#include <linux/export.h>
@@ -39,6 +39,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
struct cfg80211_registered_device *rdev;
struct vif_params vifparams;
enum nl80211_iftype type;
+ int ret;
rdev = wiphy_to_rdev(wdev->wiphy);
@@ -61,7 +62,11 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
memset(&vifparams, 0, sizeof(vifparams));
- return cfg80211_change_iface(rdev, dev, type, &vifparams);
+ wiphy_lock(wdev->wiphy);
+ ret = cfg80211_change_iface(rdev, dev, type, &vifparams);
+ wiphy_unlock(wdev->wiphy);
+
+ return ret;
}
EXPORT_WEXT_HANDLER(cfg80211_wext_siwmode);
@@ -253,17 +258,23 @@ int cfg80211_wext_siwrts(struct net_device *dev,
u32 orts = wdev->wiphy->rts_threshold;
int err;
- if (rts->disabled || !rts->fixed)
+ wiphy_lock(&rdev->wiphy);
+ if (rts->disabled || !rts->fixed) {
wdev->wiphy->rts_threshold = (u32) -1;
- else if (rts->value < 0)
- return -EINVAL;
- else
+ } else if (rts->value < 0) {
+ err = -EINVAL;
+ goto out;
+ } else {
wdev->wiphy->rts_threshold = rts->value;
+ }
err = rdev_set_wiphy_params(rdev, WIPHY_PARAM_RTS_THRESHOLD);
+
if (err)
wdev->wiphy->rts_threshold = orts;
+out:
+ wiphy_unlock(&rdev->wiphy);
return err;
}
EXPORT_WEXT_HANDLER(cfg80211_wext_siwrts);
@@ -291,11 +302,13 @@ int cfg80211_wext_siwfrag(struct net_device *dev,
u32 ofrag = wdev->wiphy->frag_threshold;
int err;
- if (frag->disabled || !frag->fixed)
+ wiphy_lock(&rdev->wiphy);
+ if (frag->disabled || !frag->fixed) {
wdev->wiphy->frag_threshold = (u32) -1;
- else if (frag->value < 256)
- return -EINVAL;
- else {
+ } else if (frag->value < 256) {
+ err = -EINVAL;
+ goto out;
+ } else {
/* Fragment length must be even, so strip LSB. */
wdev->wiphy->frag_threshold = frag->value & ~0x1;
}
@@ -303,6 +316,8 @@ int cfg80211_wext_siwfrag(struct net_device *dev,
err = rdev_set_wiphy_params(rdev, WIPHY_PARAM_FRAG_THRESHOLD);
if (err)
wdev->wiphy->frag_threshold = ofrag;
+out:
+ wiphy_unlock(&rdev->wiphy);
return err;
}
@@ -337,6 +352,7 @@ static int cfg80211_wext_siwretry(struct net_device *dev,
(retry->flags & IW_RETRY_TYPE) != IW_RETRY_LIMIT)
return -EINVAL;
+ wiphy_lock(&rdev->wiphy);
if (retry->flags & IW_RETRY_LONG) {
wdev->wiphy->retry_long = retry->value;
changed |= WIPHY_PARAM_RETRY_LONG;
@@ -355,6 +371,7 @@ static int cfg80211_wext_siwretry(struct net_device *dev,
wdev->wiphy->retry_short = oshort;
wdev->wiphy->retry_long = olong;
}
+ wiphy_unlock(&rdev->wiphy);
return err;
}
@@ -577,15 +594,18 @@ static int cfg80211_wext_siwencode(struct net_device *dev,
!rdev->ops->set_default_key)
return -EOPNOTSUPP;
+ wiphy_lock(&rdev->wiphy);
idx = erq->flags & IW_ENCODE_INDEX;
if (idx == 0) {
idx = wdev->wext.default_key;
if (idx < 0)
idx = 0;
- } else if (idx < 1 || idx > 4)
- return -EINVAL;
- else
+ } else if (idx < 1 || idx > 4) {
+ err = -EINVAL;
+ goto out;
+ } else {
idx--;
+ }
if (erq->flags & IW_ENCODE_DISABLED)
remove = true;
@@ -599,22 +619,28 @@ static int cfg80211_wext_siwencode(struct net_device *dev,
if (!err)
wdev->wext.default_key = idx;
wdev_unlock(wdev);
- return err;
+ goto out;
}
memset(&params, 0, sizeof(params));
params.key = keybuf;
params.key_len = erq->length;
- if (erq->length == 5)
+ if (erq->length == 5) {
params.cipher = WLAN_CIPHER_SUITE_WEP40;
- else if (erq->length == 13)
+ } else if (erq->length == 13) {
params.cipher = WLAN_CIPHER_SUITE_WEP104;
- else if (!remove)
- return -EINVAL;
+ } else if (!remove) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = cfg80211_set_encryption(rdev, dev, false, NULL, remove,
+ wdev->wext.default_key == -1,
+ idx, &params);
+out:
+ wiphy_unlock(&rdev->wiphy);
- return cfg80211_set_encryption(rdev, dev, false, NULL, remove,
- wdev->wext.default_key == -1,
- idx, &params);
+ return err;
}
static int cfg80211_wext_siwencodeext(struct net_device *dev,
@@ -629,6 +655,7 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev,
bool remove = false;
struct key_params params;
u32 cipher;
+ int ret;
if (wdev->iftype != NL80211_IFTYPE_STATION &&
wdev->iftype != NL80211_IFTYPE_ADHOC)
@@ -700,12 +727,16 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev,
params.seq_len = 6;
}
- return cfg80211_set_encryption(
+ wiphy_lock(wdev->wiphy);
+ ret = cfg80211_set_encryption(
rdev, dev,
!(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY),
addr, remove,
ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY,
idx, &params);
+ wiphy_unlock(wdev->wiphy);
+
+ return ret;
}
static int cfg80211_wext_giwencode(struct net_device *dev,
@@ -754,38 +785,61 @@ static int cfg80211_wext_siwfreq(struct net_device *dev,
struct cfg80211_chan_def chandef = {
.width = NL80211_CHAN_WIDTH_20_NOHT,
};
- int freq;
+ int freq, ret;
+
+ wiphy_lock(&rdev->wiphy);
switch (wdev->iftype) {
case NL80211_IFTYPE_STATION:
- return cfg80211_mgd_wext_siwfreq(dev, info, wextfreq, extra);
+ ret = cfg80211_mgd_wext_siwfreq(dev, info, wextfreq, extra);
+ break;
case NL80211_IFTYPE_ADHOC:
- return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra);
+ ret = cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra);
+ break;
case NL80211_IFTYPE_MONITOR:
freq = cfg80211_wext_freq(wextfreq);
- if (freq < 0)
- return freq;
- if (freq == 0)
- return -EINVAL;
+ if (freq < 0) {
+ ret = freq;
+ break;
+ }
+ if (freq == 0) {
+ ret = -EINVAL;
+ break;
+ }
chandef.center_freq1 = freq;
chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq);
- if (!chandef.chan)
- return -EINVAL;
- return cfg80211_set_monitor_channel(rdev, &chandef);
+ if (!chandef.chan) {
+ ret = -EINVAL;
+ break;
+ }
+ ret = cfg80211_set_monitor_channel(rdev, &chandef);
+ break;
case NL80211_IFTYPE_MESH_POINT:
freq = cfg80211_wext_freq(wextfreq);
- if (freq < 0)
- return freq;
- if (freq == 0)
- return -EINVAL;
+ if (freq < 0) {
+ ret = freq;
+ break;
+ }
+ if (freq == 0) {
+ ret = -EINVAL;
+ break;
+ }
chandef.center_freq1 = freq;
chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq);
- if (!chandef.chan)
- return -EINVAL;
- return cfg80211_set_mesh_channel(rdev, wdev, &chandef);
+ if (!chandef.chan) {
+ ret = -EINVAL;
+ break;
+ }
+ ret = cfg80211_set_mesh_channel(rdev, wdev, &chandef);
+ break;
default:
- return -EOPNOTSUPP;
+ ret = -EOPNOTSUPP;
+ break;
}
+
+ wiphy_unlock(&rdev->wiphy);
+
+ return ret;
}
static int cfg80211_wext_giwfreq(struct net_device *dev,
@@ -797,24 +851,35 @@ static int cfg80211_wext_giwfreq(struct net_device *dev,
struct cfg80211_chan_def chandef = {};
int ret;
+ wiphy_lock(&rdev->wiphy);
switch (wdev->iftype) {
case NL80211_IFTYPE_STATION:
- return cfg80211_mgd_wext_giwfreq(dev, info, freq, extra);
+ ret = cfg80211_mgd_wext_giwfreq(dev, info, freq, extra);
+ break;
case NL80211_IFTYPE_ADHOC:
- return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra);
+ ret = cfg80211_ibss_wext_giwfreq(dev, info, freq, extra);
+ break;
case NL80211_IFTYPE_MONITOR:
- if (!rdev->ops->get_channel)
- return -EINVAL;
+ if (!rdev->ops->get_channel) {
+ ret = -EINVAL;
+ break;
+ }
ret = rdev_get_channel(rdev, wdev, &chandef);
if (ret)
- return ret;
+ break;
freq->m = chandef.chan->center_freq;
freq->e = 6;
- return 0;
+ ret = 0;
+ break;
default:
- return -EINVAL;
+ ret = -EINVAL;
+ break;
}
+
+ wiphy_unlock(&rdev->wiphy);
+
+ return ret;
}
static int cfg80211_wext_siwtxpower(struct net_device *dev,
@@ -825,6 +890,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev,
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
enum nl80211_tx_power_setting type;
int dbm = 0;
+ int ret;
if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM)
return -EINVAL;
@@ -866,7 +932,11 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev,
return 0;
}
- return rdev_set_tx_power(rdev, wdev, type, DBM_TO_MBM(dbm));
+ wiphy_lock(&rdev->wiphy);
+ ret = rdev_set_tx_power(rdev, wdev, type, DBM_TO_MBM(dbm));
+ wiphy_unlock(&rdev->wiphy);
+
+ return ret;
}
static int cfg80211_wext_giwtxpower(struct net_device *dev,
@@ -885,7 +955,9 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev,
if (!rdev->ops->get_tx_power)
return -EOPNOTSUPP;
+ wiphy_lock(&rdev->wiphy);
err = rdev_get_tx_power(rdev, wdev, &val);
+ wiphy_unlock(&rdev->wiphy);
if (err)
return err;
@@ -1125,7 +1197,9 @@ static int cfg80211_wext_siwpower(struct net_device *dev,
timeout = wrq->value / 1000;
}
+ wiphy_lock(&rdev->wiphy);
err = rdev_set_power_mgmt(rdev, dev, ps, timeout);
+ wiphy_unlock(&rdev->wiphy);
if (err)
return err;
@@ -1156,7 +1230,7 @@ static int cfg80211_wext_siwrate(struct net_device *dev,
struct cfg80211_bitrate_mask mask;
u32 fixed, maxrate;
struct ieee80211_supported_band *sband;
- int band, ridx;
+ int band, ridx, ret;
bool match = false;
if (!rdev->ops->set_bitrate_mask)
@@ -1195,7 +1269,11 @@ static int cfg80211_wext_siwrate(struct net_device *dev,
if (!match)
return -EINVAL;
- return rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
+ wiphy_lock(&rdev->wiphy);
+ ret = rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
+ wiphy_unlock(&rdev->wiphy);
+
+ return ret;
}
static int cfg80211_wext_giwrate(struct net_device *dev,
@@ -1224,7 +1302,9 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
if (err)
return err;
+ wiphy_lock(&rdev->wiphy);
err = rdev_get_station(rdev, dev, addr, &sinfo);
+ wiphy_unlock(&rdev->wiphy);
if (err)
return err;
@@ -1249,6 +1329,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
static struct iw_statistics wstats;
static struct station_info sinfo = {};
u8 bssid[ETH_ALEN];
+ int ret;
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION)
return NULL;
@@ -1267,7 +1348,11 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
memset(&sinfo, 0, sizeof(sinfo));
- if (rdev_get_station(rdev, dev, bssid, &sinfo))
+ wiphy_lock(&rdev->wiphy);
+ ret = rdev_get_station(rdev, dev, bssid, &sinfo);
+ wiphy_unlock(&rdev->wiphy);
+
+ if (ret)
return NULL;
memset(&wstats, 0, sizeof(wstats));
@@ -1318,15 +1403,24 @@ static int cfg80211_wext_siwap(struct net_device *dev,
struct sockaddr *ap_addr, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ int ret;
+ wiphy_lock(&rdev->wiphy);
switch (wdev->iftype) {
case NL80211_IFTYPE_ADHOC:
- return cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra);
+ ret = cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra);
+ break;
case NL80211_IFTYPE_STATION:
- return cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra);
+ ret = cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra);
+ break;
default:
- return -EOPNOTSUPP;
+ ret = -EOPNOTSUPP;
+ break;
}
+ wiphy_unlock(&rdev->wiphy);
+
+ return ret;
}
static int cfg80211_wext_giwap(struct net_device *dev,
@@ -1334,15 +1428,24 @@ static int cfg80211_wext_giwap(struct net_device *dev,
struct sockaddr *ap_addr, char *extra)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ int ret;
+ wiphy_lock(&rdev->wiphy);
switch (wdev->iftype) {
case NL80211_IFTYPE_ADHOC:
- return cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra);
+ ret = cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra);
+ break;
case NL80211_IFTYPE_STATION:
- return cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra);
+ ret = cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra);
+ break;
default:
- return -EOPNOTSUPP;
+ ret = -EOPNOTSUPP;
+ break;
}
+ wiphy_unlock(&rdev->wiphy);
+
+ return ret;
}
static int cfg80211_wext_siwessid(struct net_device *dev,
@@ -1350,15 +1453,24 @@ static int cfg80211_wext_siwessid(struct net_device *dev,
struct iw_point *data, char *ssid)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ int ret;
+ wiphy_lock(&rdev->wiphy);
switch (wdev->iftype) {
case NL80211_IFTYPE_ADHOC:
- return cfg80211_ibss_wext_siwessid(dev, info, data, ssid);
+ ret = cfg80211_ibss_wext_siwessid(dev, info, data, ssid);
+ break;
case NL80211_IFTYPE_STATION:
- return cfg80211_mgd_wext_siwessid(dev, info, data, ssid);
+ ret = cfg80211_mgd_wext_siwessid(dev, info, data, ssid);
+ break;
default:
- return -EOPNOTSUPP;
+ ret = -EOPNOTSUPP;
+ break;
}
+ wiphy_unlock(&rdev->wiphy);
+
+ return ret;
}
static int cfg80211_wext_giwessid(struct net_device *dev,
@@ -1366,18 +1478,27 @@ static int cfg80211_wext_giwessid(struct net_device *dev,
struct iw_point *data, char *ssid)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ int ret;
data->flags = 0;
data->length = 0;
+ wiphy_lock(&rdev->wiphy);
switch (wdev->iftype) {
case NL80211_IFTYPE_ADHOC:
- return cfg80211_ibss_wext_giwessid(dev, info, data, ssid);
+ ret = cfg80211_ibss_wext_giwessid(dev, info, data, ssid);
+ break;
case NL80211_IFTYPE_STATION:
- return cfg80211_mgd_wext_giwessid(dev, info, data, ssid);
+ ret = cfg80211_mgd_wext_giwessid(dev, info, data, ssid);
+ break;
default:
- return -EOPNOTSUPP;
+ ret = -EOPNOTSUPP;
+ break;
}
+ wiphy_unlock(&rdev->wiphy);
+
+ return ret;
}
static int cfg80211_wext_siwpmksa(struct net_device *dev,
@@ -1388,6 +1509,7 @@ static int cfg80211_wext_siwpmksa(struct net_device *dev,
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_pmksa cfg_pmksa;
struct iw_pmksa *pmksa = (struct iw_pmksa *)extra;
+ int ret;
memset(&cfg_pmksa, 0, sizeof(struct cfg80211_pmksa));
@@ -1397,28 +1519,39 @@ static int cfg80211_wext_siwpmksa(struct net_device *dev,
cfg_pmksa.bssid = pmksa->bssid.sa_data;
cfg_pmksa.pmkid = pmksa->pmkid;
+ wiphy_lock(&rdev->wiphy);
switch (pmksa->cmd) {
case IW_PMKSA_ADD:
- if (!rdev->ops->set_pmksa)
- return -EOPNOTSUPP;
-
- return rdev_set_pmksa(rdev, dev, &cfg_pmksa);
+ if (!rdev->ops->set_pmksa) {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ ret = rdev_set_pmksa(rdev, dev, &cfg_pmksa);
+ break;
case IW_PMKSA_REMOVE:
- if (!rdev->ops->del_pmksa)
- return -EOPNOTSUPP;
-
- return rdev_del_pmksa(rdev, dev, &cfg_pmksa);
+ if (!rdev->ops->del_pmksa) {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ ret = rdev_del_pmksa(rdev, dev, &cfg_pmksa);
+ break;
case IW_PMKSA_FLUSH:
- if (!rdev->ops->flush_pmksa)
- return -EOPNOTSUPP;
-
- return rdev_flush_pmksa(rdev, dev);
+ if (!rdev->ops->flush_pmksa) {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ ret = rdev_flush_pmksa(rdev, dev);
+ break;
default:
- return -EOPNOTSUPP;
+ ret = -EOPNOTSUPP;
+ break;
}
+ wiphy_unlock(&rdev->wiphy);
+
+ return ret;
}
#define DEFINE_WEXT_COMPAT_STUB(func, type) \
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 69102fda9ebd..76a80a41615b 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -896,8 +896,9 @@ out:
int call_commit_handler(struct net_device *dev)
{
#ifdef CONFIG_WIRELESS_EXT
- if ((netif_running(dev)) &&
- (dev->wireless_handlers->standard[0] != NULL))
+ if (netif_running(dev) &&
+ dev->wireless_handlers &&
+ dev->wireless_handlers->standard[0])
/* Call the commit handler on the driver */
return dev->wireless_handlers->standard[0](dev, NULL,
NULL, NULL);
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 73df23570d43..193a18a53142 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -3,7 +3,7 @@
* cfg80211 wext compat for managed mode.
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2009 Intel Corporation. All rights reserved.
+ * Copyright (C) 2009, 2020-2021 Intel Corporation.
*/
#include <linux/export.h>
@@ -379,6 +379,7 @@ int cfg80211_wext_siwmlme(struct net_device *dev,
if (mlme->addr.sa_family != ARPHRD_ETHER)
return -EINVAL;
+ wiphy_lock(&rdev->wiphy);
wdev_lock(wdev);
switch (mlme->cmd) {
case IW_MLME_DEAUTH:
@@ -390,6 +391,7 @@ int cfg80211_wext_siwmlme(struct net_device *dev,
break;
}
wdev_unlock(wdev);
+ wiphy_unlock(&rdev->wiphy);
return err;
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ac4a317038f1..4faabd1ecfd1 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -108,9 +108,9 @@ EXPORT_SYMBOL(xsk_get_pool_from_qid);
void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id)
{
- if (queue_id < dev->real_num_rx_queues)
+ if (queue_id < dev->num_rx_queues)
dev->_rx[queue_id].pool = NULL;
- if (queue_id < dev->real_num_tx_queues)
+ if (queue_id < dev->num_tx_queues)
dev->_tx[queue_id].pool = NULL;
}
@@ -184,12 +184,13 @@ static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len)
memcpy(to_buf, from_buf, len + metalen);
}
-static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len,
- bool explicit_free)
+static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
struct xdp_buff *xsk_xdp;
int err;
+ u32 len;
+ len = xdp->data_end - xdp->data;
if (len > xsk_pool_get_rx_frame_size(xs->pool)) {
xs->rx_dropped++;
return -ENOSPC;
@@ -207,8 +208,6 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len,
xsk_buff_free(xsk_xdp);
return err;
}
- if (explicit_free)
- xdp_return_buff(xdp);
return 0;
}
@@ -230,11 +229,8 @@ static bool xsk_is_bound(struct xdp_sock *xs)
return false;
}
-static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp,
- bool explicit_free)
+static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp)
{
- u32 len;
-
if (!xsk_is_bound(xs))
return -EINVAL;
@@ -242,11 +238,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp,
return -EINVAL;
sk_mark_napi_id_once_xdp(&xs->sk, xdp);
- len = xdp->data_end - xdp->data;
-
- return xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL ?
- __xsk_rcv_zc(xs, xdp, len) :
- __xsk_rcv(xs, xdp, len, explicit_free);
+ return 0;
}
static void xsk_flush(struct xdp_sock *xs)
@@ -261,18 +253,41 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
int err;
spin_lock_bh(&xs->rx_lock);
- err = xsk_rcv(xs, xdp, false);
- xsk_flush(xs);
+ err = xsk_rcv_check(xs, xdp);
+ if (!err) {
+ err = __xsk_rcv(xs, xdp);
+ xsk_flush(xs);
+ }
spin_unlock_bh(&xs->rx_lock);
return err;
}
+static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+ int err;
+ u32 len;
+
+ err = xsk_rcv_check(xs, xdp);
+ if (err)
+ return err;
+
+ if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
+ len = xdp->data_end - xdp->data;
+ return __xsk_rcv_zc(xs, xdp, len);
+ }
+
+ err = __xsk_rcv(xs, xdp);
+ if (!err)
+ xdp_return_buff(xdp);
+ return err;
+}
+
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
{
struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
int err;
- err = xsk_rcv(xs, xdp, true);
+ err = xsk_rcv(xs, xdp);
if (err)
return err;
@@ -423,9 +438,9 @@ static void xsk_destruct_skb(struct sk_buff *skb)
struct xdp_sock *xs = xdp_sk(skb->sk);
unsigned long flags;
- spin_lock_irqsave(&xs->tx_completion_lock, flags);
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
xskq_prod_submit_addr(xs->pool->cq, addr);
- spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
sock_wfree(skb);
}
@@ -437,6 +452,7 @@ static int xsk_generic_xmit(struct sock *sk)
bool sent_frame = false;
struct xdp_desc desc;
struct sk_buff *skb;
+ unsigned long flags;
int err = 0;
mutex_lock(&xs->mutex);
@@ -468,10 +484,13 @@ static int xsk_generic_xmit(struct sock *sk)
* if there is space in it. This avoids having to implement
* any buffering in the Tx path.
*/
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
kfree_skb(skb);
goto out;
}
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
skb->dev = xs->dev;
skb->priority = sk->sk_priority;
@@ -483,6 +502,9 @@ static int xsk_generic_xmit(struct sock *sk)
if (err == NETDEV_TX_BUSY) {
/* Tell user-space to retry the send */
skb->destructor = sock_wfree;
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ xskq_prod_cancel(xs->pool->cq);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
/* Free skb without triggering the perf drop trace */
consume_skb(skb);
err = -EAGAIN;
@@ -878,6 +900,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
}
+ /* FQ and CQ are now owned by the buffer pool and cleaned up with it. */
+ xs->fq_tmp = NULL;
+ xs->cq_tmp = NULL;
+
xs->dev = dev;
xs->zc = xs->umem->zc;
xs->queue_id = qid;
@@ -1299,7 +1325,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
xs->state = XSK_READY;
mutex_init(&xs->mutex);
spin_lock_init(&xs->rx_lock);
- spin_lock_init(&xs->tx_completion_lock);
INIT_LIST_HEAD(&xs->map_list);
spin_lock_init(&xs->map_list_lock);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 67a4494d63b6..8de01aaac4a0 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -71,12 +71,11 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
spin_lock_init(&pool->xsk_tx_list_lock);
+ spin_lock_init(&pool->cq_lock);
refcount_set(&pool->users, 1);
pool->fq = xs->fq_tmp;
pool->cq = xs->cq_tmp;
- xs->fq_tmp = NULL;
- xs->cq_tmp = NULL;
for (i = 0; i < pool->free_heads_cnt; i++) {
xskb = &pool->heads[i];
@@ -120,8 +119,8 @@ static void xp_disable_drv_zc(struct xsk_buff_pool *pool)
}
}
-static int __xp_assign_dev(struct xsk_buff_pool *pool,
- struct net_device *netdev, u16 queue_id, u16 flags)
+int xp_assign_dev(struct xsk_buff_pool *pool,
+ struct net_device *netdev, u16 queue_id, u16 flags)
{
bool force_zc, force_copy;
struct netdev_bpf bpf;
@@ -192,12 +191,6 @@ err_unreg_pool:
return err;
}
-int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev,
- u16 queue_id, u16 flags)
-{
- return __xp_assign_dev(pool, dev, queue_id, flags);
-}
-
int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
struct net_device *dev, u16 queue_id)
{
@@ -211,7 +204,7 @@ int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem,
if (pool->uses_need_wakeup)
flags |= XDP_USE_NEED_WAKEUP;
- return __xp_assign_dev(pool, dev, queue_id, flags);
+ return xp_assign_dev(pool, dev, queue_id, flags);
}
void xp_clear_dev(struct xsk_buff_pool *pool)
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 4a9663aa7afe..2823b7c3302d 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -334,6 +334,11 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q)
return xskq_prod_nb_free(q, 1) ? false : true;
}
+static inline void xskq_prod_cancel(struct xsk_queue *q)
+{
+ q->cached_prod--;
+}
+
static inline int xskq_prod_reserve(struct xsk_queue *q)
{
if (xskq_prod_is_full(q))
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index be6351e3f3cd..1158cd0311d7 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -660,7 +660,7 @@ resume:
/* only the first xfrm gets the encap type */
encap_type = 0;
- if (async && x->repl->recheck(x, skb, seq)) {
+ if (x->repl->recheck(x, skb, seq)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
goto drop_unlock;
}
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 697cdcfbb5e1..495b1f5c979b 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -296,7 +296,8 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
}
mtu = dst_mtu(dst);
- if (skb->len > mtu) {
+ if ((!skb_is_gso(skb) && skb->len > mtu) ||
+ (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))) {
skb_dst_update_pmtu_no_confirm(skb, mtu);
if (skb->protocol == htons(ETH_P_IPV6)) {
@@ -564,6 +565,11 @@ static void xfrmi_dev_setup(struct net_device *dev)
eth_broadcast_addr(dev->broadcast);
}
+#define XFRMI_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_GSO_SOFTWARE | \
+ NETIF_F_HW_CSUM)
+
static int xfrmi_dev_init(struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
@@ -581,6 +587,8 @@ static int xfrmi_dev_init(struct net_device *dev)
}
dev->features |= NETIF_F_LLTX;
+ dev->features |= XFRMI_FEATURES;
+ dev->hw_features |= XFRMI_FEATURES;
if (phydev) {
dev->needed_headroom = phydev->needed_headroom;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d622c2548d22..b74f28cabe24 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -793,15 +793,22 @@ static int xfrm_policy_addr_delta(const xfrm_address_t *a,
const xfrm_address_t *b,
u8 prefixlen, u16 family)
{
+ u32 ma, mb, mask;
unsigned int pdw, pbi;
int delta = 0;
switch (family) {
case AF_INET:
- if (sizeof(long) == 4 && prefixlen == 0)
- return ntohl(a->a4) - ntohl(b->a4);
- return (ntohl(a->a4) & ((~0UL << (32 - prefixlen)))) -
- (ntohl(b->a4) & ((~0UL << (32 - prefixlen))));
+ if (prefixlen == 0)
+ return 0;
+ mask = ~0U << (32 - prefixlen);
+ ma = ntohl(a->a4) & mask;
+ mb = ntohl(b->a4) & mask;
+ if (ma < mb)
+ delta = -1;
+ else if (ma > mb)
+ delta = 1;
+ break;
case AF_INET6:
pdw = prefixlen >> 5;
pbi = prefixlen & 0x1f;
@@ -812,10 +819,13 @@ static int xfrm_policy_addr_delta(const xfrm_address_t *a,
return delta;
}
if (pbi) {
- u32 mask = ~0u << (32 - pbi);
-
- delta = (ntohl(a->a6[pdw]) & mask) -
- (ntohl(b->a6[pdw]) & mask);
+ mask = ~0U << (32 - pbi);
+ ma = ntohl(a->a6[pdw]) & mask;
+ mb = ntohl(b->a6[pdw]) & mask;
+ if (ma < mb)
+ delta = -1;
+ else if (ma > mb)
+ delta = 1;
}
break;
default:
@@ -3078,8 +3088,8 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
xflo.flags = flags;
/* To accelerate a bit... */
- if ((dst_orig->flags & DST_NOXFRM) ||
- !net->xfrm.policy_count[XFRM_POLICY_OUT])
+ if (!if_id && ((dst_orig->flags & DST_NOXFRM) ||
+ !net->xfrm.policy_count[XFRM_POLICY_OUT]))
goto nopol;
xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo, if_id);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 0727ac853b55..5a0ef4361e43 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2504,7 +2504,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]),
sizeof(*encap), GFP_KERNEL);
if (!encap)
- return 0;
+ return -ENOMEM;
}
err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap);