summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/ndisc.c6
-rw-r--r--net/8021q/vlan_dev.c10
-rw-r--r--net/8021q/vlanproc.c4
-rw-r--r--net/Kconfig6
-rw-r--r--net/batman-adv/soft-interface.c5
-rw-r--r--net/batman-adv/types.h2
-rw-r--r--net/bluetooth/cmtp/Kconfig4
-rw-r--r--net/bluetooth/cmtp/capi.c32
-rw-r--r--net/bluetooth/hci_conn.c13
-rw-r--r--net/bluetooth/hci_core.c29
-rw-r--r--net/bluetooth/hci_event.c2
-rw-r--r--net/bluetooth/hci_sync.c47
-rw-r--r--net/bluetooth/leds.c2
-rw-r--r--net/bluetooth/mgmt.c161
-rw-r--r--net/bluetooth/smp.c151
-rw-r--r--net/bridge/br_device.c6
-rw-r--r--net/bridge/br_fdb.c6
-rw-r--r--net/bridge/br_netfilter_hooks.c3
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c7
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c3
-rw-r--r--net/caif/cfpkt_skbuff.c6
-rw-r--r--net/caif/chnl_net.c2
-rw-r--r--net/can/bcm.c6
-rw-r--r--net/can/j1939/transport.c8
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/datagram.c6
-rw-r--r--net/core/dev.c93
-rw-r--r--net/core/dev_addr_lists.c6
-rw-r--r--net/core/dev_ioctl.c9
-rw-r--r--net/core/devmem.c389
-rw-r--r--net/core/devmem.h180
-rw-r--r--net/core/fib_rules.c9
-rw-r--r--net/core/filter.c25
-rw-r--r--net/core/gro.c5
-rw-r--r--net/core/lwt_bpf.c3
-rw-r--r--net/core/mp_dmabuf_devmem.h44
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/net-sysfs.c17
-rw-r--r--net/core/net_namespace.c84
-rw-r--r--net/core/netdev-genl-gen.c23
-rw-r--r--net/core/netdev-genl-gen.h6
-rw-r--r--net/core/netdev-genl.c147
-rw-r--r--net/core/netdev_rx_queue.c81
-rw-r--r--net/core/netmem_priv.h31
-rw-r--r--net/core/netpoll.c46
-rw-r--r--net/core/page_pool.c119
-rw-r--r--net/core/page_pool_priv.h46
-rw-r--r--net/core/page_pool_user.c32
-rw-r--r--net/core/pktgen.c14
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/core/skbuff.c136
-rw-r--r--net/core/skmsg.c2
-rw-r--r--net/core/sock.c76
-rw-r--r--net/core/sock_map.c1
-rw-r--r--net/core/sock_reuseport.c5
-rw-r--r--net/core/utils.c2
-rw-r--r--net/dsa/tag.c5
-rw-r--r--net/dsa/tag.h135
-rw-r--r--net/dsa/tag_ksz.c11
-rw-r--r--net/dsa/tag_ocelot.c37
-rw-r--r--net/dsa/user.c3
-rw-r--r--net/ethtool/Makefile3
-rw-r--r--net/ethtool/cabletest.c57
-rw-r--r--net/ethtool/channels.c20
-rw-r--r--net/ethtool/cmis.h1
-rw-r--r--net/ethtool/cmis_cdb.c14
-rw-r--r--net/ethtool/common.c61
-rw-r--r--net/ethtool/common.h7
-rw-r--r--net/ethtool/ioctl.c47
-rw-r--r--net/ethtool/linkinfo.c2
-rw-r--r--net/ethtool/linkmodes.c2
-rw-r--r--net/ethtool/netlink.c68
-rw-r--r--net/ethtool/netlink.h37
-rw-r--r--net/ethtool/phy.c306
-rw-r--r--net/ethtool/plca.c30
-rw-r--r--net/ethtool/pse-pd.c38
-rw-r--r--net/ethtool/rss.c233
-rw-r--r--net/ethtool/strset.c27
-rw-r--r--net/handshake/netlink.c4
-rw-r--r--net/hsr/hsr_device.c17
-rw-r--r--net/hsr/hsr_main.h1
-rw-r--r--net/hsr/hsr_slave.c11
-rw-r--r--net/ieee802154/6lowpan/core.c2
-rw-r--r--net/ieee802154/core.c10
-rw-r--r--net/ipv4/Kconfig3
-rw-r--r--net/ipv4/devinet.c53
-rw-r--r--net/ipv4/esp4.c3
-rw-r--r--net/ipv4/fib_frontend.c4
-rw-r--r--net/ipv4/fib_rules.c54
-rw-r--r--net/ipv4/fib_semantics.c6
-rw-r--r--net/ipv4/fib_trie.c3
-rw-r--r--net/ipv4/fou_core.c29
-rw-r--r--net/ipv4/icmp.c119
-rw-r--r--net/ipv4/inet_connection_sock.c7
-rw-r--r--net/ipv4/inet_diag.c4
-rw-r--r--net/ipv4/inet_hashtables.c12
-rw-r--r--net/ipv4/ip_gre.c7
-rw-r--r--net/ipv4/ip_input.c6
-rw-r--r--net/ipv4/ip_output.c5
-rw-r--r--net/ipv4/ip_tunnel.c15
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ipmr.c12
-rw-r--r--net/ipv4/netfilter.c3
-rw-r--r--net/ipv4/netfilter/arp_tables.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c4
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c3
-rw-r--r--net/ipv4/netfilter/nf_dup_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c5
-rw-r--r--net/ipv4/nexthop.c55
-rw-r--r--net/ipv4/route.c16
-rw-r--r--net/ipv4/sysctl_net_ipv4.c32
-rw-r--r--net/ipv4/tcp.c307
-rw-r--r--net/ipv4/tcp_bpf.c6
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_input.c13
-rw-r--r--net/ipv4/tcp_ipv4.c37
-rw-r--r--net/ipv4/tcp_metrics.c10
-rw-r--r--net/ipv4/tcp_minisocks.c33
-rw-r--r--net/ipv4/tcp_output.c7
-rw-r--r--net/ipv4/tcp_timer.c7
-rw-r--r--net/ipv4/udp.c11
-rw-r--r--net/ipv4/udp_offload.c3
-rw-r--r--net/ipv4/udp_tunnel_core.c3
-rw-r--r--net/ipv6/addrconf.c26
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/esp6.c3
-rw-r--r--net/ipv6/fib6_rules.c43
-rw-r--r--net/ipv6/icmp.c28
-rw-r--r--net/ipv6/ila/ila.h1
-rw-r--r--net/ipv6/ila/ila_main.c6
-rw-r--r--net/ipv6/ila/ila_xlat.c13
-rw-r--r--net/ipv6/inet6_hashtables.c15
-rw-r--r--net/ipv6/ioam6_iptunnel.c86
-rw-r--r--net/ipv6/ip6_gre.c7
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_output.c10
-rw-r--r--net/ipv6/ip6_tunnel.c23
-rw-r--r--net/ipv6/ip6mr.c5
-rw-r--r--net/ipv6/ipv6_sockglue.c4
-rw-r--r--net/ipv6/mcast.c5
-rw-r--r--net/ipv6/ndisc.c6
-rw-r--r--net/ipv6/netfilter/ip6_tables.c2
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c4
-rw-r--r--net/ipv6/route.c8
-rw-r--r--net/ipv6/rpl_iptunnel.c12
-rw-r--r--net/ipv6/sit.c11
-rw-r--r--net/ipv6/tcp_ipv6.c5
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/iucv/iucv.c4
-rw-r--r--net/kcm/kcmsock.c4
-rw-r--r--net/l2tp/l2tp_core.c382
-rw-r--r--net/l2tp/l2tp_core.h25
-rw-r--r--net/l2tp/l2tp_debugfs.c24
-rw-r--r--net/l2tp/l2tp_eth.c44
-rw-r--r--net/l2tp/l2tp_ip.c125
-rw-r--r--net/l2tp/l2tp_ip6.c123
-rw-r--r--net/l2tp/l2tp_netlink.c76
-rw-r--r--net/l2tp/l2tp_ppp.c154
-rw-r--r--net/mac80211/agg-rx.c15
-rw-r--r--net/mac80211/agg-tx.c15
-rw-r--r--net/mac80211/airtime.c140
-rw-r--r--net/mac80211/cfg.c49
-rw-r--r--net/mac80211/chan.c5
-rw-r--r--net/mac80211/ht.c15
-rw-r--r--net/mac80211/ieee80211_i.h33
-rw-r--r--net/mac80211/iface.c25
-rw-r--r--net/mac80211/link.c12
-rw-r--r--net/mac80211/main.c6
-rw-r--r--net/mac80211/mesh_pathtbl.c2
-rw-r--r--net/mac80211/mlme.c47
-rw-r--r--net/mac80211/offchannel.c1
-rw-r--r--net/mac80211/pm.c2
-rw-r--r--net/mac80211/rate.c2
-rw-r--r--net/mac80211/scan.c16
-rw-r--r--net/mac80211/status.c1
-rw-r--r--net/mac80211/tx.c6
-rw-r--r--net/mac80211/util.c100
-rw-r--r--net/mctp/af_mctp.c3
-rw-r--r--net/mctp/test/route-test.c2
-rw-r--r--net/mpls/af_mpls.c6
-rw-r--r--net/mpls/mpls_iptunnel.c2
-rw-r--r--net/mptcp/ctrl.c133
-rw-r--r--net/mptcp/fastopen.c4
-rw-r--r--net/mptcp/mib.c7
-rw-r--r--net/mptcp/mib.h7
-rw-r--r--net/mptcp/options.c50
-rw-r--r--net/mptcp/pm.c56
-rw-r--r--net/mptcp/pm_netlink.c312
-rw-r--r--net/mptcp/pm_userspace.c40
-rw-r--r--net/mptcp/protocol.c83
-rw-r--r--net/mptcp/protocol.h43
-rw-r--r--net/mptcp/sched.c4
-rw-r--r--net/mptcp/sockopt.c4
-rw-r--r--net/mptcp/subflow.c110
-rw-r--r--net/netfilter/core.c4
-rw-r--r--net/netfilter/nf_conncount.c15
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c9
-rw-r--r--net/netfilter/nf_flow_table_core.c6
-rw-r--r--net/netfilter/nf_flow_table_inet.c5
-rw-r--r--net/netfilter/nf_flow_table_ip.c3
-rw-r--r--net/netfilter/nf_nat_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c201
-rw-r--r--net/netfilter/nf_tables_core.c2
-rw-r--r--net/netfilter/nfnetlink.c14
-rw-r--r--net/netfilter/nfnetlink_queue.c12
-rw-r--r--net/netfilter/nft_bitwise.c4
-rw-r--r--net/netfilter/nft_byteorder.c2
-rw-r--r--net/netfilter/nft_cmp.c6
-rw-r--r--net/netfilter/nft_compat.c6
-rw-r--r--net/netfilter/nft_counter.c91
-rw-r--r--net/netfilter/nft_ct.c2
-rw-r--r--net/netfilter/nft_dup_netdev.c2
-rw-r--r--net/netfilter/nft_dynset.c22
-rw-r--r--net/netfilter/nft_exthdr.c2
-rw-r--r--net/netfilter/nft_fib.c3
-rw-r--r--net/netfilter/nft_flow_offload.c6
-rw-r--r--net/netfilter/nft_fwd_netdev.c9
-rw-r--r--net/netfilter/nft_hash.c2
-rw-r--r--net/netfilter/nft_immediate.c3
-rw-r--r--net/netfilter/nft_lookup.c5
-rw-r--r--net/netfilter/nft_masq.c7
-rw-r--r--net/netfilter/nft_meta.c8
-rw-r--r--net/netfilter/nft_nat.c11
-rw-r--r--net/netfilter/nft_objref.c2
-rw-r--r--net/netfilter/nft_osf.c3
-rw-r--r--net/netfilter/nft_payload.c2
-rw-r--r--net/netfilter/nft_queue.c5
-rw-r--r--net/netfilter/nft_range.c2
-rw-r--r--net/netfilter/nft_redir.c7
-rw-r--r--net/netfilter/nft_reject.c3
-rw-r--r--net/netfilter/nft_reject_inet.c3
-rw-r--r--net/netfilter/nft_reject_netdev.c3
-rw-r--r--net/netfilter/nft_rt.c3
-rw-r--r--net/netfilter/nft_socket.c51
-rw-r--r--net/netfilter/nft_synproxy.c3
-rw-r--r--net/netfilter/nft_tproxy.c7
-rw-r--r--net/netfilter/nft_xfrm.c3
-rw-r--r--net/netfilter/xt_connlimit.c15
-rw-r--r--net/netlink/af_netlink.h1
-rw-r--r--net/netrom/nr_route.c4
-rw-r--r--net/openvswitch/actions.c8
-rw-r--r--net/openvswitch/conntrack.c35
-rw-r--r--net/openvswitch/datapath.c2
-rw-r--r--net/openvswitch/datapath.h3
-rw-r--r--net/openvswitch/flow_netlink.c2
-rw-r--r--net/openvswitch/vport-internal_dev.c11
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/rds/Kconfig9
-rw-r--r--net/rds/Makefile5
-rw-r--r--net/rds/ib.h4
-rw-r--r--net/rfkill/core.c8
-rw-r--r--net/rfkill/rfkill-gpio.c18
-rw-r--r--net/rxrpc/ar-internal.h2
-rw-r--r--net/sched/act_ct.c4
-rw-r--r--net/sched/act_vlan.c1
-rw-r--r--net/sched/sch_cake.c64
-rw-r--r--net/sched/sch_fq.c4
-rw-r--r--net/sched/sch_netem.c56
-rw-r--r--net/sched/sch_taprio.c4
-rw-r--r--net/sctp/protocol.c3
-rw-r--r--net/sctp/sm_statefuns.c22
-rw-r--r--net/smc/af_smc.c8
-rw-r--r--net/smc/smc.h3
-rw-r--r--net/smc/smc_clc.h4
-rw-r--r--net/smc/smc_core.c72
-rw-r--r--net/smc/smc_core.h2
-rw-r--r--net/smc/smc_inet.c8
-rw-r--r--net/smc/smc_loopback.h1
-rw-r--r--net/smc/smc_pnet.c3
-rw-r--r--net/smc/smc_stats.c6
-rw-r--r--net/smc/smc_stats.h28
-rw-r--r--net/smc/smc_sysctl.c11
-rw-r--r--net/socket.c14
-rw-r--r--net/sunrpc/xprtrdma/ib_client.c6
-rw-r--r--net/tipc/bcast.c2
-rw-r--r--net/tipc/bearer.c10
-rw-r--r--net/tipc/monitor.c2
-rw-r--r--net/tipc/socket.c6
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/unix/af_unix.c80
-rw-r--r--net/unix/garbage.c16
-rw-r--r--net/vmw_vsock/af_vsock.c58
-rw-r--r--net/vmw_vsock/virtio_transport.c4
-rw-r--r--net/vmw_vsock/virtio_transport_common.c35
-rw-r--r--net/vmw_vsock/vsock_loopback.c6
-rw-r--r--net/wireless/core.c10
-rw-r--r--net/wireless/core.h8
-rw-r--r--net/wireless/ibss.c2
-rw-r--r--net/wireless/lib80211.c10
-rw-r--r--net/wireless/lib80211_crypt_ccmp.c2
-rw-r--r--net/wireless/lib80211_crypt_tkip.c2
-rw-r--r--net/wireless/lib80211_crypt_wep.c2
-rw-r--r--net/wireless/mesh.c2
-rw-r--r--net/wireless/mlme.c20
-rw-r--r--net/wireless/nl80211.c77
-rw-r--r--net/wireless/rdev-ops.h13
-rw-r--r--net/wireless/reg.c19
-rw-r--r--net/wireless/scan.c45
-rw-r--r--net/wireless/sme.c3
-rw-r--r--net/wireless/trace.h40
-rw-r--r--net/wireless/util.c14
-rw-r--r--net/xdp/xsk_buff_pool.c40
-rw-r--r--net/xdp/xsk_queue.h5
-rw-r--r--net/xfrm/xfrm_device.c6
-rw-r--r--net/xfrm/xfrm_interface_core.c2
-rw-r--r--net/xfrm/xfrm_policy.c225
310 files changed, 6167 insertions, 2549 deletions
diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c
index 16be8f8b2f8c..c40b98f7743c 100644
--- a/net/6lowpan/ndisc.c
+++ b/net/6lowpan/ndisc.c
@@ -11,11 +11,6 @@
#include "6lowpan_i.h"
-static int lowpan_ndisc_is_useropt(u8 nd_opt_type)
-{
- return nd_opt_type == ND_OPT_6CO;
-}
-
#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
#define NDISC_802154_SHORT_ADDR_LENGTH 1
static int lowpan_ndisc_parse_802154_options(const struct net_device *dev,
@@ -222,7 +217,6 @@ static void lowpan_ndisc_prefix_rcv_add_addr(struct net *net,
#endif
const struct ndisc_ops lowpan_ndisc_ops = {
- .is_useropt = lowpan_ndisc_is_useropt,
#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
.parse_options = lowpan_ndisc_parse_options,
.update = lowpan_ndisc_update,
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 217be32426b5..458040e8a0e0 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -564,17 +564,20 @@ static int vlan_dev_init(struct net_device *dev)
NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE |
NETIF_F_GSO_ENCAP_ALL |
NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC |
- NETIF_F_ALL_FCOE;
+ NETIF_F_FCOE_CRC | NETIF_F_FSO;
if (real_dev->vlan_features & NETIF_F_HW_MACSEC)
dev->hw_features |= NETIF_F_HW_MACSEC;
- dev->features |= dev->hw_features | NETIF_F_LLTX;
+ dev->features |= dev->hw_features;
+ dev->lltx = true;
+ dev->fcoe_mtu = true;
netif_inherit_tso_max(dev, real_dev);
if (dev->features & NETIF_F_VLAN_FEATURES)
netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n");
- dev->vlan_features = real_dev->vlan_features & ~NETIF_F_ALL_FCOE;
+ dev->vlan_features = real_dev->vlan_features &
+ ~(NETIF_F_FCOE_CRC | NETIF_F_FSO);
dev->hw_enc_features = vlan_tnl_features(real_dev);
dev->mpls_features = real_dev->mpls_features;
@@ -655,7 +658,6 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
lower_features |= NETIF_F_HW_CSUM;
features = netdev_intersect_features(features, lower_features);
features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_GSO_SOFTWARE);
- features |= NETIF_F_LLTX;
return features;
}
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 87b959da00cd..fa67374bda49 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -238,9 +238,9 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
stats = dev_get_stats(vlandev, &temp);
seq_printf(seq,
- "%s VID: %d REORDER_HDR: %i dev->priv_flags: %llx\n",
+ "%s VID: %d REORDER_HDR: %i dev->priv_flags: %x\n",
vlandev->name, vlan->vlan_id,
- (int)(vlan->flags & 1), vlandev->priv_flags);
+ (int)(vlan->flags & 1), (u32)vlandev->priv_flags);
seq_printf(seq, fmt64, "total frames received", stats->rx_packets);
seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes);
diff --git a/net/Kconfig b/net/Kconfig
index d27d0deac0bf..a629f92dc86b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -66,6 +66,12 @@ config SKB_DECRYPTED
config SKB_EXTENSIONS
bool
+config NET_DEVMEM
+ def_bool y
+ depends on DMA_SHARED_BUFFER
+ depends on GENERIC_ALLOCATOR
+ depends on PAGE_POOL
+
menu "Networking options"
source "net/packet/Kconfig"
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 30ecbc2ef1fd..2758aba47a2f 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1020,9 +1020,10 @@ static void batadv_softif_init_early(struct net_device *dev)
dev->netdev_ops = &batadv_netdev_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = batadv_softif_free;
- dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL;
- dev->features |= NETIF_F_LLTX;
+ dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
dev->priv_flags |= IFF_NO_QUEUE;
+ dev->lltx = true;
+ dev->netns_local = true;
/* can't call min_mtu, because the needed variables
* have not been initialized yet
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 00840d5784fe..04f6398b3a40 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -287,7 +287,7 @@ struct batadv_frag_table_entry {
/** @lock: lock to protect the list of fragments */
spinlock_t lock;
- /** @timestamp: time (jiffie) of last received fragment */
+ /** @timestamp: time (jiffy) of last received fragment */
unsigned long timestamp;
/** @seqno: sequence number of the fragments in the list */
diff --git a/net/bluetooth/cmtp/Kconfig b/net/bluetooth/cmtp/Kconfig
index c8337786da6b..34e923466236 100644
--- a/net/bluetooth/cmtp/Kconfig
+++ b/net/bluetooth/cmtp/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config BT_CMTP
- tristate "CMTP protocol support"
- depends on BT_BREDR && ISDN_CAPI
+ tristate "CMTP protocol support (DEPRECATED)"
+ depends on BT_BREDR && ISDN_CAPI && DEPRECATED
help
CMTP (CAPI Message Transport Protocol) is a transport layer
for CAPI messages. CMTP is required for the Bluetooth Common
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index f3bedc3b613a..884703fda979 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -248,18 +248,10 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s
break;
case CAPI_FUNCTION_GET_MANUFACTURER:
- if (skb->len < CAPI_MSG_BASELEN + 15)
- break;
-
- if (!info && ctrl) {
- int len = min_t(uint, CAPI_MANUFACTURER_LEN,
- skb->data[CAPI_MSG_BASELEN + 14]);
-
- memset(ctrl->manu, 0, CAPI_MANUFACTURER_LEN);
- strncpy(ctrl->manu,
- skb->data + CAPI_MSG_BASELEN + 15, len);
- }
-
+ if (!info && ctrl && skb->len > CAPI_MSG_BASELEN + 14)
+ strscpy_pad(ctrl->manu,
+ skb->data + CAPI_MSG_BASELEN + 15,
+ skb->data[CAPI_MSG_BASELEN + 14]);
break;
case CAPI_FUNCTION_GET_VERSION:
@@ -276,18 +268,10 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s
break;
case CAPI_FUNCTION_GET_SERIAL_NUMBER:
- if (skb->len < CAPI_MSG_BASELEN + 17)
- break;
-
- if (!info && ctrl) {
- int len = min_t(uint, CAPI_SERIAL_LEN,
- skb->data[CAPI_MSG_BASELEN + 16]);
-
- memset(ctrl->serial, 0, CAPI_SERIAL_LEN);
- strncpy(ctrl->serial,
- skb->data + CAPI_MSG_BASELEN + 17, len);
- }
-
+ if (!info && ctrl && skb->len > CAPI_MSG_BASELEN + 16)
+ strscpy_pad(ctrl->serial,
+ skb->data + CAPI_MSG_BASELEN + 17,
+ skb->data[CAPI_MSG_BASELEN + 16]);
break;
}
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 8e48ccd2af30..d083117ee36c 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -106,8 +106,7 @@ void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
* where a timeout + cancel does indicate an actual failure.
*/
if (status && status != HCI_ERROR_UNKNOWN_CONN_ID)
- mgmt_connect_failed(hdev, &conn->dst, conn->type,
- conn->dst_type, status);
+ mgmt_connect_failed(hdev, conn, status);
/* The connection attempt was doing scan for new RPA, and is
* in scan phase. If params are not associated with any other
@@ -778,7 +777,6 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *c
if (!d)
return -ENOMEM;
- memset(d, 0, sizeof(*d));
d->big = big;
d->sync_handle = conn->sync_handle;
@@ -1250,8 +1248,7 @@ void hci_conn_failed(struct hci_conn *conn, u8 status)
hci_le_conn_failed(conn, status);
break;
case ACL_LINK:
- mgmt_connect_failed(hdev, &conn->dst, conn->type,
- conn->dst_type, status);
+ mgmt_connect_failed(hdev, conn, status);
break;
}
@@ -2952,5 +2949,9 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
return 0;
}
- return hci_cmd_sync_queue_once(hdev, abort_conn_sync, conn, NULL);
+ /* Run immediately if on cmd_sync_work since this may be called
+ * as a result to MGMT_OP_DISCONNECT/MGMT_OP_UNPAIR which does
+ * already queue its callback on cmd_sync_work.
+ */
+ return hci_cmd_sync_run_once(hdev, abort_conn_sync, conn, NULL);
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 06da8ac13dca..d6976db02c06 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2406,10 +2406,16 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
/* To avoid a potential race with hci_unregister_dev. */
hci_dev_hold(hdev);
- if (action == PM_SUSPEND_PREPARE)
+ switch (action) {
+ case PM_HIBERNATION_PREPARE:
+ case PM_SUSPEND_PREPARE:
ret = hci_suspend_dev(hdev);
- else if (action == PM_POST_SUSPEND)
+ break;
+ case PM_POST_HIBERNATION:
+ case PM_POST_SUSPEND:
ret = hci_resume_dev(hdev);
+ break;
+ }
if (ret)
bt_dev_err(hdev, "Suspend notifier action (%lu) failed: %d",
@@ -3664,19 +3670,19 @@ static void hci_sched_le(struct hci_dev *hdev)
{
struct hci_chan *chan;
struct sk_buff *skb;
- int quote, cnt, tmp;
+ int quote, *cnt, tmp;
BT_DBG("%s", hdev->name);
if (!hci_conn_num(hdev, LE_LINK))
return;
- cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt;
+ cnt = hdev->le_pkts ? &hdev->le_cnt : &hdev->acl_cnt;
- __check_timeout(hdev, cnt, LE_LINK);
+ __check_timeout(hdev, *cnt, LE_LINK);
- tmp = cnt;
- while (cnt && (chan = hci_chan_sent(hdev, LE_LINK, &quote))) {
+ tmp = *cnt;
+ while (*cnt && (chan = hci_chan_sent(hdev, LE_LINK, &quote))) {
u32 priority = (skb_peek(&chan->data_q))->priority;
while (quote-- && (skb = skb_peek(&chan->data_q))) {
BT_DBG("chan %p skb %p len %d priority %u", chan, skb,
@@ -3691,7 +3697,7 @@ static void hci_sched_le(struct hci_dev *hdev)
hci_send_frame(hdev, skb);
hdev->le_last_tx = jiffies;
- cnt--;
+ (*cnt)--;
chan->sent++;
chan->conn->sent++;
@@ -3701,12 +3707,7 @@ static void hci_sched_le(struct hci_dev *hdev)
}
}
- if (hdev->le_pkts)
- hdev->le_cnt = cnt;
- else
- hdev->acl_cnt = cnt;
-
- if (cnt != tmp)
+ if (*cnt != tmp)
hci_prio_recalculate(hdev, LE_LINK);
}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index d0c118c47f6c..1c82dcdf6e8f 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5920,7 +5920,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
* while we have an existing one in peripheral role.
*/
if (hdev->conn_hash.le_num_peripheral > 0 &&
- (!test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) ||
+ (test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) ||
!(hdev->le_states[3] & 0x10)))
return NULL;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index e79cd40bd079..40ccdef168d7 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -112,7 +112,7 @@ static void hci_cmd_sync_add(struct hci_request *req, u16 opcode, u32 plen,
skb_queue_tail(&req->cmd_q, skb);
}
-static int hci_cmd_sync_run(struct hci_request *req)
+static int hci_req_sync_run(struct hci_request *req)
{
struct hci_dev *hdev = req->hdev;
struct sk_buff *skb;
@@ -169,7 +169,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen,
hdev->req_status = HCI_REQ_PEND;
- err = hci_cmd_sync_run(&req);
+ err = hci_req_sync_run(&req);
if (err < 0)
return ERR_PTR(err);
@@ -782,6 +782,44 @@ int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
}
EXPORT_SYMBOL(hci_cmd_sync_queue_once);
+/* Run HCI command:
+ *
+ * - hdev must be running
+ * - if on cmd_sync_work then run immediately otherwise queue
+ */
+int hci_cmd_sync_run(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+ void *data, hci_cmd_sync_work_destroy_t destroy)
+{
+ /* Only queue command if hdev is running which means it had been opened
+ * and is either on init phase or is already up.
+ */
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ return -ENETDOWN;
+
+ /* If on cmd_sync_work then run immediately otherwise queue */
+ if (current_work() == &hdev->cmd_sync_work)
+ return func(hdev, data);
+
+ return hci_cmd_sync_submit(hdev, func, data, destroy);
+}
+EXPORT_SYMBOL(hci_cmd_sync_run);
+
+/* Run HCI command entry once:
+ *
+ * - Lookup if an entry already exist and only if it doesn't creates a new entry
+ * and run it.
+ * - if on cmd_sync_work then run immediately otherwise queue
+ */
+int hci_cmd_sync_run_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func,
+ void *data, hci_cmd_sync_work_destroy_t destroy)
+{
+ if (hci_cmd_sync_lookup_entry(hdev, func, data, destroy))
+ return 0;
+
+ return hci_cmd_sync_run(hdev, func, data, destroy);
+}
+EXPORT_SYMBOL(hci_cmd_sync_run_once);
+
/* Lookup HCI command entry:
*
* - Return first entry that matches by function callback or data or
@@ -5342,7 +5380,10 @@ int hci_stop_discovery_sync(struct hci_dev *hdev)
if (!e)
return 0;
- return hci_remote_name_cancel_sync(hdev, &e->data.bdaddr);
+ /* Ignore cancel errors since it should interfere with stopping
+ * of the discovery.
+ */
+ hci_remote_name_cancel_sync(hdev, &e->data.bdaddr);
}
return 0;
diff --git a/net/bluetooth/leds.c b/net/bluetooth/leds.c
index f46847632ffa..6e349704efe4 100644
--- a/net/bluetooth/leds.c
+++ b/net/bluetooth/leds.c
@@ -48,7 +48,7 @@ static int power_activate(struct led_classdev *led_cdev)
htrig = to_hci_basic_led_trigger(led_cdev->trigger);
powered = test_bit(HCI_UP, &htrig->hdev->flags);
- led_trigger_event(led_cdev->trigger, powered ? LED_FULL : LED_OFF);
+ led_set_brightness(led_cdev, powered ? LED_FULL : LED_OFF);
return 0;
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 40d4887c7f79..e4f564d6f6fb 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2830,16 +2830,6 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
bt_dev_dbg(hdev, "debug_keys %u key_count %u", cp->debug_keys,
key_count);
- for (i = 0; i < key_count; i++) {
- struct mgmt_link_key_info *key = &cp->keys[i];
-
- /* Considering SMP over BREDR/LE, there is no need to check addr_type */
- if (key->type > 0x08)
- return mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_LOAD_LINK_KEYS,
- MGMT_STATUS_INVALID_PARAMS);
- }
-
hci_dev_lock(hdev);
hci_link_keys_clear(hdev);
@@ -2864,6 +2854,19 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
continue;
}
+ if (key->addr.type != BDADDR_BREDR) {
+ bt_dev_warn(hdev,
+ "Invalid link address type %u for %pMR",
+ key->addr.type, &key->addr.bdaddr);
+ continue;
+ }
+
+ if (key->type > 0x08) {
+ bt_dev_warn(hdev, "Invalid link key type %u for %pMR",
+ key->type, &key->addr.bdaddr);
+ continue;
+ }
+
/* Always ignore debug keys and require a new pairing if
* the user wants to use them.
*/
@@ -2921,7 +2924,12 @@ static int unpair_device_sync(struct hci_dev *hdev, void *data)
if (!conn)
return 0;
- return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM);
+ /* Disregard any possible error since the likes of hci_abort_conn_sync
+ * will clean up the connection no matter the error.
+ */
+ hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
+
+ return 0;
}
static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -3053,13 +3061,44 @@ unlock:
return err;
}
+static void disconnect_complete(struct hci_dev *hdev, void *data, int err)
+{
+ struct mgmt_pending_cmd *cmd = data;
+
+ cmd->cmd_complete(cmd, mgmt_status(err));
+ mgmt_pending_free(cmd);
+}
+
+static int disconnect_sync(struct hci_dev *hdev, void *data)
+{
+ struct mgmt_pending_cmd *cmd = data;
+ struct mgmt_cp_disconnect *cp = cmd->param;
+ struct hci_conn *conn;
+
+ if (cp->addr.type == BDADDR_BREDR)
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK,
+ &cp->addr.bdaddr);
+ else
+ conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr,
+ le_addr_type(cp->addr.type));
+
+ if (!conn)
+ return -ENOTCONN;
+
+ /* Disregard any possible error since the likes of hci_abort_conn_sync
+ * will clean up the connection no matter the error.
+ */
+ hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
+
+ return 0;
+}
+
static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
struct mgmt_cp_disconnect *cp = data;
struct mgmt_rp_disconnect rp;
struct mgmt_pending_cmd *cmd;
- struct hci_conn *conn;
int err;
bt_dev_dbg(hdev, "sock %p", sk);
@@ -3082,27 +3121,7 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- if (pending_find(MGMT_OP_DISCONNECT, hdev)) {
- err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT,
- MGMT_STATUS_BUSY, &rp, sizeof(rp));
- goto failed;
- }
-
- if (cp->addr.type == BDADDR_BREDR)
- conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK,
- &cp->addr.bdaddr);
- else
- conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr,
- le_addr_type(cp->addr.type));
-
- if (!conn || conn->state == BT_OPEN || conn->state == BT_CLOSED) {
- err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT,
- MGMT_STATUS_NOT_CONNECTED, &rp,
- sizeof(rp));
- goto failed;
- }
-
- cmd = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, hdev, data, len);
+ cmd = mgmt_pending_new(sk, MGMT_OP_DISCONNECT, hdev, data, len);
if (!cmd) {
err = -ENOMEM;
goto failed;
@@ -3110,9 +3129,10 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data,
cmd->cmd_complete = generic_cmd_complete;
- err = hci_disconnect(conn, HCI_ERROR_REMOTE_USER_TERM);
+ err = hci_cmd_sync_queue(hdev, disconnect_sync, cmd,
+ disconnect_complete);
if (err < 0)
- mgmt_pending_remove(cmd);
+ mgmt_pending_free(cmd);
failed:
hci_dev_unlock(hdev);
@@ -3456,6 +3476,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
* will be kept and this function does nothing.
*/
p = hci_conn_params_add(hdev, &cp->addr.bdaddr, addr_type);
+ if (!p) {
+ err = -EIO;
+ goto unlock;
+ }
if (p->auto_connect == HCI_AUTO_CONN_EXPLICIT)
p->auto_connect = HCI_AUTO_CONN_DISABLED;
@@ -7068,7 +7092,6 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
for (i = 0; i < irk_count; i++) {
struct mgmt_irk_info *irk = &cp->irks[i];
- u8 addr_type = le_addr_type(irk->addr.type);
if (hci_is_blocked_key(hdev,
HCI_BLOCKED_KEY_TYPE_IRK,
@@ -7078,12 +7101,8 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
continue;
}
- /* When using SMP over BR/EDR, the addr type should be set to BREDR */
- if (irk->addr.type == BDADDR_BREDR)
- addr_type = BDADDR_BREDR;
-
hci_add_irk(hdev, &irk->addr.bdaddr,
- addr_type, irk->val,
+ le_addr_type(irk->addr.type), irk->val,
BDADDR_ANY);
}
@@ -7148,15 +7167,6 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
bt_dev_dbg(hdev, "key_count %u", key_count);
- for (i = 0; i < key_count; i++) {
- struct mgmt_ltk_info *key = &cp->keys[i];
-
- if (!ltk_is_valid(key))
- return mgmt_cmd_status(sk, hdev->id,
- MGMT_OP_LOAD_LONG_TERM_KEYS,
- MGMT_STATUS_INVALID_PARAMS);
- }
-
hci_dev_lock(hdev);
hci_smp_ltks_clear(hdev);
@@ -7164,7 +7174,6 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
for (i = 0; i < key_count; i++) {
struct mgmt_ltk_info *key = &cp->keys[i];
u8 type, authenticated;
- u8 addr_type = le_addr_type(key->addr.type);
if (hci_is_blocked_key(hdev,
HCI_BLOCKED_KEY_TYPE_LTK,
@@ -7174,6 +7183,12 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
continue;
}
+ if (!ltk_is_valid(key)) {
+ bt_dev_warn(hdev, "Invalid LTK for %pMR",
+ &key->addr.bdaddr);
+ continue;
+ }
+
switch (key->type) {
case MGMT_LTK_UNAUTHENTICATED:
authenticated = 0x00;
@@ -7199,12 +7214,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
continue;
}
- /* When using SMP over BR/EDR, the addr type should be set to BREDR */
- if (key->addr.type == BDADDR_BREDR)
- addr_type = BDADDR_BREDR;
-
hci_add_ltk(hdev, &key->addr.bdaddr,
- addr_type, type, authenticated,
+ le_addr_type(key->addr.type), type, authenticated,
key->val, key->enc_size, key->ediv, key->rand);
}
@@ -9498,7 +9509,7 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key,
ev.store_hint = persistent;
bacpy(&ev.key.addr.bdaddr, &key->bdaddr);
- ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type);
+ ev.key.addr.type = BDADDR_BREDR;
ev.key.type = key->type;
memcpy(ev.key.val, key->val, HCI_LINK_KEY_SIZE);
ev.key.pin_len = key->pin_len;
@@ -9549,7 +9560,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
ev.store_hint = persistent;
bacpy(&ev.key.addr.bdaddr, &key->bdaddr);
- ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type);
+ ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type);
ev.key.type = mgmt_ltk_type(key);
ev.key.enc_size = key->enc_size;
ev.key.ediv = key->ediv;
@@ -9578,7 +9589,7 @@ void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent)
bacpy(&ev.rpa, &irk->rpa);
bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr);
- ev.irk.addr.type = link_to_bdaddr(irk->link_type, irk->addr_type);
+ ev.irk.addr.type = link_to_bdaddr(LE_LINK, irk->addr_type);
memcpy(ev.irk.val, irk->val, sizeof(irk->val));
mgmt_event(MGMT_EV_NEW_IRK, hdev, &ev, sizeof(ev), NULL);
@@ -9607,7 +9618,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk,
ev.store_hint = persistent;
bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr);
- ev.key.addr.type = link_to_bdaddr(csrk->link_type, csrk->bdaddr_type);
+ ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type);
ev.key.type = csrk->type;
memcpy(ev.key.val, csrk->val, sizeof(csrk->val));
@@ -9685,18 +9696,6 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn,
mgmt_event_skb(skb, NULL);
}
-static void disconnect_rsp(struct mgmt_pending_cmd *cmd, void *data)
-{
- struct sock **sk = data;
-
- cmd->cmd_complete(cmd, 0);
-
- *sk = cmd->sk;
- sock_hold(*sk);
-
- mgmt_pending_remove(cmd);
-}
-
static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data)
{
struct hci_dev *hdev = data;
@@ -9740,8 +9739,6 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr,
if (link_type != ACL_LINK && link_type != LE_LINK)
return;
- mgmt_pending_foreach(MGMT_OP_DISCONNECT, hdev, disconnect_rsp, &sk);
-
bacpy(&ev.addr.bdaddr, bdaddr);
ev.addr.type = link_to_bdaddr(link_type, addr_type);
ev.reason = reason;
@@ -9754,9 +9751,6 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr,
if (sk)
sock_put(sk);
-
- mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp,
- hdev);
}
void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr,
@@ -9785,13 +9779,18 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr,
mgmt_pending_remove(cmd);
}
-void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
- u8 addr_type, u8 status)
+void mgmt_connect_failed(struct hci_dev *hdev, struct hci_conn *conn, u8 status)
{
struct mgmt_ev_connect_failed ev;
- bacpy(&ev.addr.bdaddr, bdaddr);
- ev.addr.type = link_to_bdaddr(link_type, addr_type);
+ if (test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) {
+ mgmt_device_disconnected(hdev, &conn->dst, conn->type,
+ conn->dst_type, status, true);
+ return;
+ }
+
+ bacpy(&ev.addr.bdaddr, &conn->dst);
+ ev.addr.type = link_to_bdaddr(conn->type, conn->dst_type);
ev.status = mgmt_status(status);
mgmt_event(MGMT_EV_CONNECT_FAILED, hdev, &ev, sizeof(ev), NULL);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 1e7ea3a4b7ef..8b9724fd752a 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -914,7 +914,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
* Confirms and the responder Enters the passkey.
*/
if (smp->method == OVERLAP) {
- if (hcon->role == HCI_ROLE_MASTER)
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags))
smp->method = CFM_PASSKEY;
else
smp->method = REQ_PASSKEY;
@@ -964,7 +964,7 @@ static u8 smp_confirm(struct smp_chan *smp)
smp_send_cmd(smp->conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp);
- if (conn->hcon->out)
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags))
SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM);
else
SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM);
@@ -980,7 +980,8 @@ static u8 smp_random(struct smp_chan *smp)
int ret;
bt_dev_dbg(conn->hcon->hdev, "conn %p %s", conn,
- conn->hcon->out ? "initiator" : "responder");
+ test_bit(SMP_FLAG_INITIATOR, &smp->flags) ? "initiator" :
+ "responder");
ret = smp_c1(smp->tk, smp->rrnd, smp->preq, smp->prsp,
hcon->init_addr_type, &hcon->init_addr,
@@ -994,7 +995,7 @@ static u8 smp_random(struct smp_chan *smp)
return SMP_CONFIRM_FAILED;
}
- if (hcon->out) {
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
u8 stk[16];
__le64 rand = 0;
__le16 ediv = 0;
@@ -1059,7 +1060,6 @@ static void smp_notify_keys(struct l2cap_conn *conn)
}
if (smp->remote_irk) {
- smp->remote_irk->link_type = hcon->type;
mgmt_new_irk(hdev, smp->remote_irk, persistent);
/* Now that user space can be considered to know the
@@ -1079,28 +1079,24 @@ static void smp_notify_keys(struct l2cap_conn *conn)
}
if (smp->csrk) {
- smp->csrk->link_type = hcon->type;
smp->csrk->bdaddr_type = hcon->dst_type;
bacpy(&smp->csrk->bdaddr, &hcon->dst);
mgmt_new_csrk(hdev, smp->csrk, persistent);
}
if (smp->responder_csrk) {
- smp->responder_csrk->link_type = hcon->type;
smp->responder_csrk->bdaddr_type = hcon->dst_type;
bacpy(&smp->responder_csrk->bdaddr, &hcon->dst);
mgmt_new_csrk(hdev, smp->responder_csrk, persistent);
}
if (smp->ltk) {
- smp->ltk->link_type = hcon->type;
smp->ltk->bdaddr_type = hcon->dst_type;
bacpy(&smp->ltk->bdaddr, &hcon->dst);
mgmt_new_ltk(hdev, smp->ltk, persistent);
}
if (smp->responder_ltk) {
- smp->responder_ltk->link_type = hcon->type;
smp->responder_ltk->bdaddr_type = hcon->dst_type;
bacpy(&smp->responder_ltk->bdaddr, &hcon->dst);
mgmt_new_ltk(hdev, smp->responder_ltk, persistent);
@@ -1120,8 +1116,6 @@ static void smp_notify_keys(struct l2cap_conn *conn)
key = hci_add_link_key(hdev, smp->conn->hcon, &hcon->dst,
smp->link_key, type, 0, &persistent);
if (key) {
- key->link_type = hcon->type;
- key->bdaddr_type = hcon->dst_type;
mgmt_new_link_key(hdev, key, persistent);
/* Don't keep debug keys around if the relevant
@@ -1256,14 +1250,15 @@ static void smp_distribute_keys(struct smp_chan *smp)
rsp = (void *) &smp->prsp[1];
/* The responder sends its keys first */
- if (hcon->out && (smp->remote_key_dist & KEY_DIST_MASK)) {
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags) &&
+ (smp->remote_key_dist & KEY_DIST_MASK)) {
smp_allow_key_dist(smp);
return;
}
req = (void *) &smp->preq[1];
- if (hcon->out) {
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
keydist = &rsp->init_key_dist;
*keydist &= req->init_key_dist;
} else {
@@ -1432,7 +1427,7 @@ static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16])
struct hci_conn *hcon = smp->conn->hcon;
u8 *na, *nb, a[7], b[7];
- if (hcon->out) {
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
na = smp->prnd;
nb = smp->rrnd;
} else {
@@ -1460,7 +1455,7 @@ static void sc_dhkey_check(struct smp_chan *smp)
a[6] = hcon->init_addr_type;
b[6] = hcon->resp_addr_type;
- if (hcon->out) {
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
local_addr = a;
remote_addr = b;
memcpy(io_cap, &smp->preq[1], 3);
@@ -1539,7 +1534,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op)
/* The round is only complete when the initiator
* receives pairing random.
*/
- if (!hcon->out) {
+ if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM,
sizeof(smp->prnd), smp->prnd);
if (smp->passkey_round == 20)
@@ -1567,7 +1562,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op)
SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM);
- if (hcon->out) {
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM,
sizeof(smp->prnd), smp->prnd);
return 0;
@@ -1578,7 +1573,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op)
case SMP_CMD_PUBLIC_KEY:
default:
/* Initiating device starts the round */
- if (!hcon->out)
+ if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags))
return 0;
bt_dev_dbg(hdev, "Starting passkey round %u",
@@ -1623,7 +1618,7 @@ static int sc_user_reply(struct smp_chan *smp, u16 mgmt_op, __le32 passkey)
}
/* Initiator sends DHKey check first */
- if (hcon->out) {
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
sc_dhkey_check(smp);
SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK);
} else if (test_and_clear_bit(SMP_FLAG_DHKEY_PENDING, &smp->flags)) {
@@ -1746,7 +1741,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
struct smp_cmd_pairing rsp, *req = (void *) skb->data;
struct l2cap_chan *chan = conn->smp;
struct hci_dev *hdev = conn->hcon->hdev;
- struct smp_chan *smp;
+ struct smp_chan *smp = chan->data;
u8 key_size, auth, sec_level;
int ret;
@@ -1755,16 +1750,14 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
if (skb->len < sizeof(*req))
return SMP_INVALID_PARAMS;
- if (conn->hcon->role != HCI_ROLE_SLAVE)
+ if (smp && test_bit(SMP_FLAG_INITIATOR, &smp->flags))
return SMP_CMD_NOTSUPP;
- if (!chan->data)
+ if (!smp) {
smp = smp_chan_create(conn);
- else
- smp = chan->data;
-
- if (!smp)
- return SMP_UNSPECIFIED;
+ if (!smp)
+ return SMP_UNSPECIFIED;
+ }
/* We didn't start the pairing, so match remote */
auth = req->auth_req & AUTH_REQ_MASK(hdev);
@@ -1946,7 +1939,7 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
if (skb->len < sizeof(*rsp))
return SMP_INVALID_PARAMS;
- if (conn->hcon->role != HCI_ROLE_MASTER)
+ if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags))
return SMP_CMD_NOTSUPP;
skb_pull(skb, sizeof(*rsp));
@@ -2041,7 +2034,7 @@ static u8 sc_check_confirm(struct smp_chan *smp)
if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY)
return sc_passkey_round(smp, SMP_CMD_PAIRING_CONFIRM);
- if (conn->hcon->out) {
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd),
smp->prnd);
SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM);
@@ -2063,7 +2056,7 @@ static int fixup_sc_false_positive(struct smp_chan *smp)
u8 auth;
/* The issue is only observed when we're in responder role */
- if (hcon->out)
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags))
return SMP_UNSPECIFIED;
if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) {
@@ -2099,7 +2092,8 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb)
struct hci_dev *hdev = hcon->hdev;
bt_dev_dbg(hdev, "conn %p %s", conn,
- hcon->out ? "initiator" : "responder");
+ test_bit(SMP_FLAG_INITIATOR, &smp->flags) ? "initiator" :
+ "responder");
if (skb->len < sizeof(smp->pcnf))
return SMP_INVALID_PARAMS;
@@ -2121,7 +2115,7 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb)
return ret;
}
- if (conn->hcon->out) {
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd),
smp->prnd);
SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM);
@@ -2156,7 +2150,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb)
if (!test_bit(SMP_FLAG_SC, &smp->flags))
return smp_random(smp);
- if (hcon->out) {
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
pkax = smp->local_pk;
pkbx = smp->remote_pk;
na = smp->prnd;
@@ -2169,7 +2163,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb)
}
if (smp->method == REQ_OOB) {
- if (!hcon->out)
+ if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags))
smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM,
sizeof(smp->prnd), smp->prnd);
SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK);
@@ -2180,7 +2174,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb)
if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY)
return sc_passkey_round(smp, SMP_CMD_PAIRING_RANDOM);
- if (hcon->out) {
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
u8 cfm[16];
err = smp_f4(smp->tfm_cmac, smp->remote_pk, smp->local_pk,
@@ -2221,7 +2215,7 @@ mackey_and_ltk:
return SMP_UNSPECIFIED;
if (smp->method == REQ_OOB) {
- if (hcon->out) {
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
sc_dhkey_check(smp);
SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK);
}
@@ -2295,10 +2289,27 @@ bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level,
return false;
}
+static void smp_send_pairing_req(struct smp_chan *smp, __u8 auth)
+{
+ struct smp_cmd_pairing cp;
+
+ if (smp->conn->hcon->type == ACL_LINK)
+ build_bredr_pairing_cmd(smp, &cp, NULL);
+ else
+ build_pairing_cmd(smp->conn, &cp, NULL, auth);
+
+ smp->preq[0] = SMP_CMD_PAIRING_REQ;
+ memcpy(&smp->preq[1], &cp, sizeof(cp));
+
+ smp_send_cmd(smp->conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp);
+ SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP);
+
+ set_bit(SMP_FLAG_INITIATOR, &smp->flags);
+}
+
static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
{
struct smp_cmd_security_req *rp = (void *) skb->data;
- struct smp_cmd_pairing cp;
struct hci_conn *hcon = conn->hcon;
struct hci_dev *hdev = hcon->hdev;
struct smp_chan *smp;
@@ -2347,16 +2358,20 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
skb_pull(skb, sizeof(*rp));
- memset(&cp, 0, sizeof(cp));
- build_pairing_cmd(conn, &cp, NULL, auth);
+ smp_send_pairing_req(smp, auth);
- smp->preq[0] = SMP_CMD_PAIRING_REQ;
- memcpy(&smp->preq[1], &cp, sizeof(cp));
+ return 0;
+}
- smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp);
- SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP);
+static void smp_send_security_req(struct smp_chan *smp, __u8 auth)
+{
+ struct smp_cmd_security_req cp;
- return 0;
+ cp.auth_req = auth;
+ smp_send_cmd(smp->conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp);
+ SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_REQ);
+
+ clear_bit(SMP_FLAG_INITIATOR, &smp->flags);
}
int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
@@ -2427,23 +2442,11 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
authreq |= SMP_AUTH_MITM;
}
- if (hcon->role == HCI_ROLE_MASTER) {
- struct smp_cmd_pairing cp;
-
- build_pairing_cmd(conn, &cp, NULL, authreq);
- smp->preq[0] = SMP_CMD_PAIRING_REQ;
- memcpy(&smp->preq[1], &cp, sizeof(cp));
-
- smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp);
- SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP);
- } else {
- struct smp_cmd_security_req cp;
- cp.auth_req = authreq;
- smp_send_cmd(conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp);
- SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_REQ);
- }
+ if (hcon->role == HCI_ROLE_MASTER)
+ smp_send_pairing_req(smp, authreq);
+ else
+ smp_send_security_req(smp, authreq);
- set_bit(SMP_FLAG_INITIATOR, &smp->flags);
ret = 0;
unlock:
@@ -2694,8 +2697,6 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb)
static u8 sc_select_method(struct smp_chan *smp)
{
- struct l2cap_conn *conn = smp->conn;
- struct hci_conn *hcon = conn->hcon;
struct smp_cmd_pairing *local, *remote;
u8 local_mitm, remote_mitm, local_io, remote_io, method;
@@ -2708,7 +2709,7 @@ static u8 sc_select_method(struct smp_chan *smp)
* the "struct smp_cmd_pairing" from them we need to skip the
* first byte which contains the opcode.
*/
- if (hcon->out) {
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
local = (void *) &smp->preq[1];
remote = (void *) &smp->prsp[1];
} else {
@@ -2777,7 +2778,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
/* Non-initiating device sends its public key after receiving
* the key from the initiating device.
*/
- if (!hcon->out) {
+ if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
err = sc_send_public_key(smp);
if (err)
return err;
@@ -2839,7 +2840,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
}
if (smp->method == REQ_OOB) {
- if (hcon->out)
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags))
smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM,
sizeof(smp->prnd), smp->prnd);
@@ -2848,7 +2849,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
return 0;
}
- if (hcon->out)
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags))
SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM);
if (smp->method == REQ_PASSKEY) {
@@ -2863,7 +2864,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
/* The Initiating device waits for the non-initiating device to
* send the confirm value.
*/
- if (conn->hcon->out)
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags))
return 0;
err = smp_f4(smp->tfm_cmac, smp->local_pk, smp->remote_pk, smp->prnd,
@@ -2897,7 +2898,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb)
a[6] = hcon->init_addr_type;
b[6] = hcon->resp_addr_type;
- if (hcon->out) {
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
local_addr = a;
remote_addr = b;
memcpy(io_cap, &smp->prsp[1], 3);
@@ -2922,7 +2923,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb)
if (crypto_memneq(check->e, e, 16))
return SMP_DHKEY_CHECK_FAILED;
- if (!hcon->out) {
+ if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
if (test_bit(SMP_FLAG_WAIT_USER, &smp->flags)) {
set_bit(SMP_FLAG_DHKEY_PENDING, &smp->flags);
return 0;
@@ -2934,7 +2935,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb)
sc_add_ltk(smp);
- if (hcon->out) {
+ if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) {
hci_le_start_enc(hcon, 0, 0, smp->tk, smp->enc_key_size);
hcon->enc_key_size = smp->enc_key_size;
}
@@ -3083,7 +3084,6 @@ static void bredr_pairing(struct l2cap_chan *chan)
struct l2cap_conn *conn = chan->conn;
struct hci_conn *hcon = conn->hcon;
struct hci_dev *hdev = hcon->hdev;
- struct smp_cmd_pairing req;
struct smp_chan *smp;
bt_dev_dbg(hdev, "chan %p", chan);
@@ -3135,14 +3135,7 @@ static void bredr_pairing(struct l2cap_chan *chan)
bt_dev_dbg(hdev, "starting SMP over BR/EDR");
- /* Prepare and send the BR/EDR SMP Pairing Request */
- build_bredr_pairing_cmd(smp, &req, NULL);
-
- smp->preq[0] = SMP_CMD_PAIRING_REQ;
- memcpy(&smp->preq[1], &req, sizeof(req));
-
- smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(req), &req);
- SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP);
+ smp_send_pairing_req(smp, 0x00);
}
static void smp_resume_cb(struct l2cap_chan *chan)
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index fb1115857e49..26b79feb385d 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -487,9 +487,11 @@ void br_dev_setup(struct net_device *dev)
dev->ethtool_ops = &br_ethtool_ops;
SET_NETDEV_DEVTYPE(dev, &br_type);
dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;
+ dev->lltx = true;
+ dev->netns_local = true;
- dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL |
- NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
+ dev->features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX;
dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
dev->vlan_features = COMMON_FEATURES;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index c77591e63841..ad7a42b505ef 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -1469,12 +1469,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
modified = true;
}
- if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) {
+ if (test_and_set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) {
/* Refresh entry */
fdb->used = jiffies;
- } else if (!test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags)) {
- /* Take over SW learned entry */
- set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags);
+ } else {
modified = true;
}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 8f9c19d992ac..0e8bc0ea6175 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -36,6 +36,7 @@
#include <net/route.h>
#include <net/netfilter/br_netfilter.h>
#include <net/netns/generic.h>
+#include <net/inet_dscp.h>
#include <linux/uaccess.h>
#include "br_private.h"
@@ -402,7 +403,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
goto free_skb;
rt = ip_route_output(net, iph->daddr, 0,
- RT_TOS(iph->tos), 0,
+ iph->tos & INET_DSCP_MASK, 0,
RT_SCOPE_UNIVERSE);
if (!IS_ERR(rt)) {
/* - Bridged-and-DNAT'ed traffic doesn't
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index cbd0e3586c3f..3e67d4aff419 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1256,7 +1256,7 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
goto free_unlock;
}
- ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
if (!ops) {
ret = -ENOMEM;
if (newinfo->nentries)
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index bd4d1b4d745f..d12a221366d6 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -142,7 +142,7 @@ static int nft_meta_bridge_set_init(const struct nft_ctx *ctx,
}
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len);
+ err = nft_parse_register_load(ctx, tb[NFTA_META_SREG], &priv->sreg, len);
if (err < 0)
return err;
@@ -168,8 +168,7 @@ static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track,
}
static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
struct nft_meta *priv = nft_expr_priv(expr);
unsigned int hooks;
@@ -179,7 +178,7 @@ static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx,
hooks = 1 << NF_BR_PRE_ROUTING;
break;
default:
- return nft_meta_set_validate(ctx, expr, data);
+ return nft_meta_set_validate(ctx, expr);
}
return nft_chain_validate_hooks(ctx->chain, hooks);
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 71b54fed7263..1cb5c16e97b7 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -170,8 +170,7 @@ out:
}
static int nft_reject_bridge_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
return nft_chain_validate_hooks(ctx->chain, (1 << NF_BR_PRE_ROUTING) |
(1 << NF_BR_LOCAL_IN));
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 2ae8cfa3df88..96236d21b18e 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -298,10 +298,8 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) {
return dstpkt;
}
- if (expectlen > addlen)
- neededtailspace = expectlen;
- else
- neededtailspace = addlen;
+
+ neededtailspace = max(expectlen, addlen);
if (dst->tail + neededtailspace > dst->end) {
/* Create a dumplicate of 'dst' with more tail space */
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 47901bd4def1..94ad09e36df2 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -47,7 +47,6 @@ struct chnl_net {
struct caif_connect_request conn_req;
struct list_head list_field;
struct net_device *netdev;
- char name[256];
wait_queue_head_t netmgmt_wq;
/* Flow status to remember and control the transmission. */
bool flowenabled;
@@ -347,7 +346,6 @@ static int chnl_net_init(struct net_device *dev)
struct chnl_net *priv;
ASSERT_RTNL();
priv = netdev_priv(dev);
- strncpy(priv->name, dev->name, sizeof(priv->name));
INIT_LIST_HEAD(&priv->list_field);
return 0;
}
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 27d5fcf0eac9..217049fa496e 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1470,6 +1470,12 @@ static void bcm_notify(struct bcm_sock *bo, unsigned long msg,
/* remove device reference, if this is our bound device */
if (bo->bound && bo->ifindex == dev->ifindex) {
+#if IS_ENABLED(CONFIG_PROC_FS)
+ if (sock_net(sk)->can.bcmproc_dir && bo->bcm_proc_read) {
+ remove_proc_entry(bo->procname, sock_net(sk)->can.bcmproc_dir);
+ bo->bcm_proc_read = NULL;
+ }
+#endif
bo->bound = 0;
bo->ifindex = 0;
notify_enodev = 1;
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 4be73de5033c..319f47df3330 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1179,10 +1179,10 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
break;
case -ENETDOWN:
/* In this case we should get a netdev_event(), all active
- * sessions will be cleared by
- * j1939_cancel_all_active_sessions(). So handle this as an
- * error, but let j1939_cancel_all_active_sessions() do the
- * cleanup including propagation of the error to user space.
+ * sessions will be cleared by j1939_cancel_active_session().
+ * So handle this as an error, but let
+ * j1939_cancel_active_session() do the cleanup including
+ * propagation of the error to user space.
*/
break;
case -EOVERFLOW:
diff --git a/net/core/Makefile b/net/core/Makefile
index 62be9aef2528..c3ebbaf9c81e 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
obj-y += net-sysfs.o
obj-y += hotdata.o
+obj-y += netdev_rx_queue.o
obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
@@ -43,3 +44,4 @@ obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
obj-$(CONFIG_OF) += of_net.o
obj-$(CONFIG_NET_TEST) += net_test.o
+obj-$(CONFIG_NET_DEVMEM) += devmem.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index a40f733b37d7..f0693707aece 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -407,6 +407,9 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
return 0;
}
+ if (!skb_frags_readable(skb))
+ goto short_copy;
+
/* Copy paged appendix. Hmm... why does this look so complicated? */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
@@ -623,6 +626,9 @@ int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
{
int frag = skb_shinfo(skb)->nr_frags;
+ if (!skb_frags_readable(skb))
+ return -EFAULT;
+
while (length && iov_iter_count(from)) {
struct page *head, *last_head = NULL;
struct page *pages[MAX_SKB_FRAGS];
diff --git a/net/core/dev.c b/net/core/dev.c
index f66e61407883..cd479f5f22f6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -158,8 +158,10 @@
#include <net/page_pool/types.h>
#include <net/page_pool/helpers.h>
#include <net/rps.h>
+#include <linux/phy_link_topology.h>
#include "dev.h"
+#include "devmem.h"
#include "net-sysfs.h"
static DEFINE_SPINLOCK(ptype_lock);
@@ -3310,6 +3312,10 @@ int skb_checksum_help(struct sk_buff *skb)
return -EINVAL;
}
+ if (!skb_frags_readable(skb)) {
+ return -EFAULT;
+ }
+
/* Before computing a checksum, we should make sure no frag could
* be modified by an external entity : checksum could be wrong.
*/
@@ -3386,6 +3392,7 @@ int skb_crc32c_csum_help(struct sk_buff *skb)
out:
return ret;
}
+EXPORT_SYMBOL(skb_crc32c_csum_help);
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
@@ -3431,8 +3438,9 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
if (!(dev->features & NETIF_F_HIGHDMA)) {
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ struct page *page = skb_frag_page(frag);
- if (PageHighMem(skb_frag_page(frag)))
+ if (page && PageHighMem(page))
return 1;
}
}
@@ -3705,7 +3713,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d
next = skb->next;
skb_mark_not_on_list(skb);
- /* in case skb wont be segmented, point to itself */
+ /* in case skb won't be segmented, point to itself */
skb->prev = skb;
skb = validate_xmit_skb(skb, dev, again);
@@ -4245,13 +4253,6 @@ u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
}
EXPORT_SYMBOL(dev_pick_tx_zero);
-u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev)
-{
- return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
-}
-EXPORT_SYMBOL(dev_pick_tx_cpu_id);
-
u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev)
{
@@ -5248,7 +5249,7 @@ int netif_rx(struct sk_buff *skb)
}
EXPORT_SYMBOL(netif_rx);
-static __latent_entropy void net_tx_action(struct softirq_action *h)
+static __latent_entropy void net_tx_action(void)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -5725,10 +5726,9 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
struct packet_type *pt_curr = NULL;
/* Current (common) orig_dev of sublist */
struct net_device *od_curr = NULL;
- struct list_head sublist;
struct sk_buff *skb, *next;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct net_device *orig_dev = skb->dev;
struct packet_type *pt_prev = NULL;
@@ -5866,9 +5866,8 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
void netif_receive_skb_list_internal(struct list_head *head)
{
struct sk_buff *skb, *next;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue),
skb);
@@ -6921,7 +6920,7 @@ static int napi_threaded_poll(void *data)
return 0;
}
-static __latent_entropy void net_rx_action(struct softirq_action *h)
+static __latent_entropy void net_rx_action(void)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies +
@@ -9272,7 +9271,7 @@ EXPORT_SYMBOL(netdev_port_same_parent_id);
*/
int dev_change_proto_down(struct net_device *dev, bool proto_down)
{
- if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN))
+ if (!dev->change_proto_down)
return -EOPNOTSUPP;
if (!netif_device_present(dev))
return -ENODEV;
@@ -9369,6 +9368,20 @@ u8 dev_xdp_prog_count(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
+int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf)
+{
+ if (!dev->netdev_ops->ndo_bpf)
+ return -EOPNOTSUPP;
+
+ if (dev_get_min_mp_channel_count(dev)) {
+ NL_SET_ERR_MSG(bpf->extack, "unable to propagate XDP to device using memory provider");
+ return -EBUSY;
+ }
+
+ return dev->netdev_ops->ndo_bpf(dev, bpf);
+}
+EXPORT_SYMBOL_GPL(dev_xdp_propagate);
+
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
{
struct bpf_prog *prog = dev_xdp_prog(dev, mode);
@@ -9397,6 +9410,11 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
struct netdev_bpf xdp;
int err;
+ if (dev_get_min_mp_channel_count(dev)) {
+ NL_SET_ERR_MSG(extack, "unable to install XDP to device using memory provider");
+ return -EBUSY;
+ }
+
memset(&xdp, 0, sizeof(xdp));
xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG;
xdp.extack = extack;
@@ -9821,6 +9839,20 @@ err_out:
return err;
}
+u32 dev_get_min_mp_channel_count(const struct net_device *dev)
+{
+ int i;
+
+ ASSERT_RTNL();
+
+ for (i = dev->real_num_rx_queues - 1; i >= 0; i--)
+ if (dev->_rx[i].mp_params.mp_priv)
+ /* The channel count is the idx plus 1. */
+ return i + 1;
+
+ return 0;
+}
+
/**
* dev_index_reserve() - allocate an ifindex in a namespace
* @net: the applicable net namespace
@@ -10321,6 +10353,17 @@ static void netdev_do_free_pcpu_stats(struct net_device *dev)
}
}
+static void netdev_free_phy_link_topology(struct net_device *dev)
+{
+ struct phy_link_topology *topo = dev->link_topo;
+
+ if (IS_ENABLED(CONFIG_PHYLIB) && topo) {
+ xa_destroy(&topo->phys);
+ kfree(topo);
+ dev->link_topo = NULL;
+ }
+}
+
/**
* register_netdevice() - register a network device
* @dev: device to register
@@ -10868,7 +10911,7 @@ noinline void netdev_core_stats_inc(struct net_device *dev, u32 offset)
return;
}
- field = (__force unsigned long __percpu *)((__force void *)p + offset);
+ field = (unsigned long __percpu *)((void __percpu *)p + offset);
this_cpu_inc(*field);
}
EXPORT_SYMBOL_GPL(netdev_core_stats_inc);
@@ -11099,6 +11142,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
#ifdef CONFIG_NET_SCHED
hash_init(dev->qdisc_hash);
#endif
+
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
@@ -11120,7 +11164,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
if (!dev->ethtool)
goto free_all;
- strcpy(dev->name, name);
+ strscpy(dev->name, name);
dev->name_assign_type = name_assign_type;
dev->group = INIT_NETDEV_GROUP;
if (!dev->ethtool_ops)
@@ -11191,6 +11235,8 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->xdp_bulkq);
dev->xdp_bulkq = NULL;
+ netdev_free_phy_link_topology(dev);
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED ||
dev->reg_state == NETREG_DUMMY) {
@@ -11343,6 +11389,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_tcx_uninstall(dev);
dev_xdp_uninstall(dev);
bpf_dev_bound_netdev_unregister(dev);
+ dev_dmabuf_uninstall(dev);
netdev_offload_xstats_disable_all(dev);
@@ -11407,7 +11454,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
* @head: list of devices
*
* Note: As most callers use a stack allocated list_head,
- * we force a list_del() to make sure stack wont be corrupted later.
+ * we force a list_del() to make sure stack won't be corrupted later.
*/
void unregister_netdevice_many(struct list_head *head)
{
@@ -11462,10 +11509,10 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
/* Don't allow namespace local devices to be moved. */
err = -EINVAL;
- if (dev->features & NETIF_F_NETNS_LOCAL)
+ if (dev->netns_local)
goto out;
- /* Ensure the device has been registrered */
+ /* Ensure the device has been registered */
if (dev->reg_state != NETREG_REGISTERED)
goto out;
@@ -11844,7 +11891,7 @@ static void __net_exit default_device_exit_net(struct net *net)
char fb_name[IFNAMSIZ];
/* Ignore unmoveable devices (i.e. loopback) */
- if (dev->features & NETIF_F_NETNS_LOCAL)
+ if (dev->netns_local)
continue;
/* Leave virtual devices for the generic cleanup */
@@ -11905,7 +11952,7 @@ static struct pernet_operations __net_initdata default_device_ops = {
static void __init net_dev_struct_check(void)
{
/* TX read-mostly hotpath */
- CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags_fast);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, netdev_ops);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, header_ops);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, _tx);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index baa63dee2829..166e404f7c03 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -262,7 +262,7 @@ static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list,
}
/* This function only works where there is a strict 1-1 relationship
- * between source and destionation of they synch. If you ever need to
+ * between source and destination of they synch. If you ever need to
* sync addresses to more then 1 destination, you need to use
* __hw_addr_sync_multiple().
*/
@@ -299,8 +299,8 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
EXPORT_SYMBOL(__hw_addr_unsync);
/**
- * __hw_addr_sync_dev - Synchonize device's multicast list
- * @list: address list to syncronize
+ * __hw_addr_sync_dev - Synchronize device's multicast list
+ * @list: address list to synchronize
* @dev: device to sync
* @sync: function to call if address should be added
* @unsync: function to call if address should be removed
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 8592c052c0f4..473c437b6b53 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -317,8 +317,7 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr)
* should take precedence in front of hardware timestamping provided by the
* netdev. If the netdev driver needs to perform specific actions even for PHY
* timestamping to work properly (a switch port must trap the timestamped
- * frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in
- * dev->priv_flags.
+ * frames and not forward them), it must set dev->see_all_hwtstamp_requests.
*/
int dev_set_hwtstamp_phylib(struct net_device *dev,
struct kernel_hwtstamp_config *cfg,
@@ -332,13 +331,13 @@ int dev_set_hwtstamp_phylib(struct net_device *dev,
cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV;
- if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) {
+ if (phy_ts && dev->see_all_hwtstamp_requests) {
err = ops->ndo_hwtstamp_get(dev, &old_cfg);
if (err)
return err;
}
- if (!phy_ts || (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) {
+ if (!phy_ts || dev->see_all_hwtstamp_requests) {
err = ops->ndo_hwtstamp_set(dev, cfg, extack);
if (err) {
if (extack->_msg)
@@ -347,7 +346,7 @@ int dev_set_hwtstamp_phylib(struct net_device *dev,
}
}
- if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS))
+ if (phy_ts && dev->see_all_hwtstamp_requests)
changed = kernel_hwtstamp_config_changed(&old_cfg, cfg);
if (phy_ts) {
diff --git a/net/core/devmem.c b/net/core/devmem.c
new file mode 100644
index 000000000000..11b91c12ee11
--- /dev/null
+++ b/net/core/devmem.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Devmem TCP
+ *
+ * Authors: Mina Almasry <almasrymina@google.com>
+ * Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+ * Kaiyuan Zhang <kaiyuanz@google.com
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/genalloc.h>
+#include <linux/mm.h>
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
+#include <net/page_pool/helpers.h>
+#include <trace/events/page_pool.h>
+
+#include "devmem.h"
+#include "mp_dmabuf_devmem.h"
+#include "page_pool_priv.h"
+
+/* Device memory support */
+
+/* Protected by rtnl_lock() */
+static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
+
+static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool,
+ struct gen_pool_chunk *chunk,
+ void *not_used)
+{
+ struct dmabuf_genpool_chunk_owner *owner = chunk->owner;
+
+ kvfree(owner->niovs);
+ kfree(owner);
+}
+
+static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov)
+{
+ struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
+
+ return owner->base_dma_addr +
+ ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT);
+}
+
+void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding)
+{
+ size_t size, avail;
+
+ gen_pool_for_each_chunk(binding->chunk_pool,
+ net_devmem_dmabuf_free_chunk_owner, NULL);
+
+ size = gen_pool_size(binding->chunk_pool);
+ avail = gen_pool_avail(binding->chunk_pool);
+
+ if (!WARN(size != avail, "can't destroy genpool. size=%zu, avail=%zu",
+ size, avail))
+ gen_pool_destroy(binding->chunk_pool);
+
+ dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt,
+ DMA_FROM_DEVICE);
+ dma_buf_detach(binding->dmabuf, binding->attachment);
+ dma_buf_put(binding->dmabuf);
+ xa_destroy(&binding->bound_rxqs);
+ kfree(binding);
+}
+
+struct net_iov *
+net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
+{
+ struct dmabuf_genpool_chunk_owner *owner;
+ unsigned long dma_addr;
+ struct net_iov *niov;
+ ssize_t offset;
+ ssize_t index;
+
+ dma_addr = gen_pool_alloc_owner(binding->chunk_pool, PAGE_SIZE,
+ (void **)&owner);
+ if (!dma_addr)
+ return NULL;
+
+ offset = dma_addr - owner->base_dma_addr;
+ index = offset / PAGE_SIZE;
+ niov = &owner->niovs[index];
+
+ niov->pp_magic = 0;
+ niov->pp = NULL;
+ atomic_long_set(&niov->pp_ref_count, 0);
+
+ return niov;
+}
+
+void net_devmem_free_dmabuf(struct net_iov *niov)
+{
+ struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov);
+ unsigned long dma_addr = net_devmem_get_dma_addr(niov);
+
+ if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr,
+ PAGE_SIZE)))
+ return;
+
+ gen_pool_free(binding->chunk_pool, dma_addr, PAGE_SIZE);
+}
+
+void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
+{
+ struct netdev_rx_queue *rxq;
+ unsigned long xa_idx;
+ unsigned int rxq_idx;
+
+ if (binding->list.next)
+ list_del(&binding->list);
+
+ xa_for_each(&binding->bound_rxqs, xa_idx, rxq) {
+ WARN_ON(rxq->mp_params.mp_priv != binding);
+
+ rxq->mp_params.mp_priv = NULL;
+
+ rxq_idx = get_netdev_rx_queue_index(rxq);
+
+ WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx));
+ }
+
+ xa_erase(&net_devmem_dmabuf_bindings, binding->id);
+
+ net_devmem_dmabuf_binding_put(binding);
+}
+
+int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
+ struct net_devmem_dmabuf_binding *binding,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_rx_queue *rxq;
+ u32 xa_idx;
+ int err;
+
+ if (rxq_idx >= dev->real_num_rx_queues) {
+ NL_SET_ERR_MSG(extack, "rx queue index out of range");
+ return -ERANGE;
+ }
+
+ rxq = __netif_get_rx_queue(dev, rxq_idx);
+ if (rxq->mp_params.mp_priv) {
+ NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
+ return -EEXIST;
+ }
+
+#ifdef CONFIG_XDP_SOCKETS
+ if (rxq->pool) {
+ NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP");
+ return -EBUSY;
+ }
+#endif
+
+ err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b,
+ GFP_KERNEL);
+ if (err)
+ return err;
+
+ rxq->mp_params.mp_priv = binding;
+
+ err = netdev_rx_queue_restart(dev, rxq_idx);
+ if (err)
+ goto err_xa_erase;
+
+ return 0;
+
+err_xa_erase:
+ rxq->mp_params.mp_priv = NULL;
+ xa_erase(&binding->bound_rxqs, xa_idx);
+
+ return err;
+}
+
+struct net_devmem_dmabuf_binding *
+net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
+ struct netlink_ext_ack *extack)
+{
+ struct net_devmem_dmabuf_binding *binding;
+ static u32 id_alloc_next;
+ struct scatterlist *sg;
+ struct dma_buf *dmabuf;
+ unsigned int sg_idx, i;
+ unsigned long virtual;
+ int err;
+
+ dmabuf = dma_buf_get(dmabuf_fd);
+ if (IS_ERR(dmabuf))
+ return ERR_CAST(dmabuf);
+
+ binding = kzalloc_node(sizeof(*binding), GFP_KERNEL,
+ dev_to_node(&dev->dev));
+ if (!binding) {
+ err = -ENOMEM;
+ goto err_put_dmabuf;
+ }
+
+ binding->dev = dev;
+
+ err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id,
+ binding, xa_limit_32b, &id_alloc_next,
+ GFP_KERNEL);
+ if (err < 0)
+ goto err_free_binding;
+
+ xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC);
+
+ refcount_set(&binding->ref, 1);
+
+ binding->dmabuf = dmabuf;
+
+ binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent);
+ if (IS_ERR(binding->attachment)) {
+ err = PTR_ERR(binding->attachment);
+ NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device");
+ goto err_free_id;
+ }
+
+ binding->sgt = dma_buf_map_attachment_unlocked(binding->attachment,
+ DMA_FROM_DEVICE);
+ if (IS_ERR(binding->sgt)) {
+ err = PTR_ERR(binding->sgt);
+ NL_SET_ERR_MSG(extack, "Failed to map dmabuf attachment");
+ goto err_detach;
+ }
+
+ /* For simplicity we expect to make PAGE_SIZE allocations, but the
+ * binding can be much more flexible than that. We may be able to
+ * allocate MTU sized chunks here. Leave that for future work...
+ */
+ binding->chunk_pool =
+ gen_pool_create(PAGE_SHIFT, dev_to_node(&dev->dev));
+ if (!binding->chunk_pool) {
+ err = -ENOMEM;
+ goto err_unmap;
+ }
+
+ virtual = 0;
+ for_each_sgtable_dma_sg(binding->sgt, sg, sg_idx) {
+ dma_addr_t dma_addr = sg_dma_address(sg);
+ struct dmabuf_genpool_chunk_owner *owner;
+ size_t len = sg_dma_len(sg);
+ struct net_iov *niov;
+
+ owner = kzalloc_node(sizeof(*owner), GFP_KERNEL,
+ dev_to_node(&dev->dev));
+ if (!owner) {
+ err = -ENOMEM;
+ goto err_free_chunks;
+ }
+
+ owner->base_virtual = virtual;
+ owner->base_dma_addr = dma_addr;
+ owner->num_niovs = len / PAGE_SIZE;
+ owner->binding = binding;
+
+ err = gen_pool_add_owner(binding->chunk_pool, dma_addr,
+ dma_addr, len, dev_to_node(&dev->dev),
+ owner);
+ if (err) {
+ kfree(owner);
+ err = -EINVAL;
+ goto err_free_chunks;
+ }
+
+ owner->niovs = kvmalloc_array(owner->num_niovs,
+ sizeof(*owner->niovs),
+ GFP_KERNEL);
+ if (!owner->niovs) {
+ err = -ENOMEM;
+ goto err_free_chunks;
+ }
+
+ for (i = 0; i < owner->num_niovs; i++) {
+ niov = &owner->niovs[i];
+ niov->owner = owner;
+ page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
+ net_devmem_get_dma_addr(niov));
+ }
+
+ virtual += len;
+ }
+
+ return binding;
+
+err_free_chunks:
+ gen_pool_for_each_chunk(binding->chunk_pool,
+ net_devmem_dmabuf_free_chunk_owner, NULL);
+ gen_pool_destroy(binding->chunk_pool);
+err_unmap:
+ dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt,
+ DMA_FROM_DEVICE);
+err_detach:
+ dma_buf_detach(dmabuf, binding->attachment);
+err_free_id:
+ xa_erase(&net_devmem_dmabuf_bindings, binding->id);
+err_free_binding:
+ kfree(binding);
+err_put_dmabuf:
+ dma_buf_put(dmabuf);
+ return ERR_PTR(err);
+}
+
+void dev_dmabuf_uninstall(struct net_device *dev)
+{
+ struct net_devmem_dmabuf_binding *binding;
+ struct netdev_rx_queue *rxq;
+ unsigned long xa_idx;
+ unsigned int i;
+
+ for (i = 0; i < dev->real_num_rx_queues; i++) {
+ binding = dev->_rx[i].mp_params.mp_priv;
+ if (!binding)
+ continue;
+
+ xa_for_each(&binding->bound_rxqs, xa_idx, rxq)
+ if (rxq == &dev->_rx[i]) {
+ xa_erase(&binding->bound_rxqs, xa_idx);
+ break;
+ }
+ }
+}
+
+/*** "Dmabuf devmem memory provider" ***/
+
+int mp_dmabuf_devmem_init(struct page_pool *pool)
+{
+ struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
+
+ if (!binding)
+ return -EINVAL;
+
+ if (!pool->dma_map)
+ return -EOPNOTSUPP;
+
+ if (pool->dma_sync)
+ return -EOPNOTSUPP;
+
+ if (pool->p.order != 0)
+ return -E2BIG;
+
+ net_devmem_dmabuf_binding_get(binding);
+ return 0;
+}
+
+netmem_ref mp_dmabuf_devmem_alloc_netmems(struct page_pool *pool, gfp_t gfp)
+{
+ struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
+ struct net_iov *niov;
+ netmem_ref netmem;
+
+ niov = net_devmem_alloc_dmabuf(binding);
+ if (!niov)
+ return 0;
+
+ netmem = net_iov_to_netmem(niov);
+
+ page_pool_set_pp_info(pool, netmem);
+
+ pool->pages_state_hold_cnt++;
+ trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt);
+ return netmem;
+}
+
+void mp_dmabuf_devmem_destroy(struct page_pool *pool)
+{
+ struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
+
+ net_devmem_dmabuf_binding_put(binding);
+}
+
+bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem)
+{
+ long refcount = atomic_long_read(netmem_get_pp_ref_count_ref(netmem));
+
+ if (WARN_ON_ONCE(!netmem_is_net_iov(netmem)))
+ return false;
+
+ if (WARN_ON_ONCE(refcount != 1))
+ return false;
+
+ page_pool_clear_pp_info(netmem);
+
+ net_devmem_free_dmabuf(netmem_to_net_iov(netmem));
+
+ /* We don't want the page pool put_page()ing our net_iovs. */
+ return false;
+}
diff --git a/net/core/devmem.h b/net/core/devmem.h
new file mode 100644
index 000000000000..76099ef9c482
--- /dev/null
+++ b/net/core/devmem.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Device memory TCP support
+ *
+ * Authors: Mina Almasry <almasrymina@google.com>
+ * Willem de Bruijn <willemb@google.com>
+ * Kaiyuan Zhang <kaiyuanz@google.com>
+ *
+ */
+#ifndef _NET_DEVMEM_H
+#define _NET_DEVMEM_H
+
+struct netlink_ext_ack;
+
+struct net_devmem_dmabuf_binding {
+ struct dma_buf *dmabuf;
+ struct dma_buf_attachment *attachment;
+ struct sg_table *sgt;
+ struct net_device *dev;
+ struct gen_pool *chunk_pool;
+
+ /* The user holds a ref (via the netlink API) for as long as they want
+ * the binding to remain alive. Each page pool using this binding holds
+ * a ref to keep the binding alive. Each allocated net_iov holds a
+ * ref.
+ *
+ * The binding undos itself and unmaps the underlying dmabuf once all
+ * those refs are dropped and the binding is no longer desired or in
+ * use.
+ */
+ refcount_t ref;
+
+ /* The list of bindings currently active. Used for netlink to notify us
+ * of the user dropping the bind.
+ */
+ struct list_head list;
+
+ /* rxq's this binding is active on. */
+ struct xarray bound_rxqs;
+
+ /* ID of this binding. Globally unique to all bindings currently
+ * active.
+ */
+ u32 id;
+};
+
+#if defined(CONFIG_NET_DEVMEM)
+/* Owner of the dma-buf chunks inserted into the gen pool. Each scatterlist
+ * entry from the dmabuf is inserted into the genpool as a chunk, and needs
+ * this owner struct to keep track of some metadata necessary to create
+ * allocations from this chunk.
+ */
+struct dmabuf_genpool_chunk_owner {
+ /* Offset into the dma-buf where this chunk starts. */
+ unsigned long base_virtual;
+
+ /* dma_addr of the start of the chunk. */
+ dma_addr_t base_dma_addr;
+
+ /* Array of net_iovs for this chunk. */
+ struct net_iov *niovs;
+ size_t num_niovs;
+
+ struct net_devmem_dmabuf_binding *binding;
+};
+
+void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding);
+struct net_devmem_dmabuf_binding *
+net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
+ struct netlink_ext_ack *extack);
+void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding);
+int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
+ struct net_devmem_dmabuf_binding *binding,
+ struct netlink_ext_ack *extack);
+void dev_dmabuf_uninstall(struct net_device *dev);
+
+static inline struct dmabuf_genpool_chunk_owner *
+net_iov_owner(const struct net_iov *niov)
+{
+ return niov->owner;
+}
+
+static inline unsigned int net_iov_idx(const struct net_iov *niov)
+{
+ return niov - net_iov_owner(niov)->niovs;
+}
+
+static inline struct net_devmem_dmabuf_binding *
+net_iov_binding(const struct net_iov *niov)
+{
+ return net_iov_owner(niov)->binding;
+}
+
+static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
+{
+ struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
+
+ return owner->base_virtual +
+ ((unsigned long)net_iov_idx(niov) << PAGE_SHIFT);
+}
+
+static inline u32 net_iov_binding_id(const struct net_iov *niov)
+{
+ return net_iov_owner(niov)->binding->id;
+}
+
+static inline void
+net_devmem_dmabuf_binding_get(struct net_devmem_dmabuf_binding *binding)
+{
+ refcount_inc(&binding->ref);
+}
+
+static inline void
+net_devmem_dmabuf_binding_put(struct net_devmem_dmabuf_binding *binding)
+{
+ if (!refcount_dec_and_test(&binding->ref))
+ return;
+
+ __net_devmem_dmabuf_binding_free(binding);
+}
+
+struct net_iov *
+net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding);
+void net_devmem_free_dmabuf(struct net_iov *ppiov);
+
+#else
+struct net_devmem_dmabuf_binding;
+
+static inline void
+__net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding)
+{
+}
+
+static inline struct net_devmem_dmabuf_binding *
+net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
+ struct netlink_ext_ack *extack)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void
+net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
+{
+}
+
+static inline int
+net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
+ struct net_devmem_dmabuf_binding *binding,
+ struct netlink_ext_ack *extack)
+
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void dev_dmabuf_uninstall(struct net_device *dev)
+{
+}
+
+static inline struct net_iov *
+net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
+{
+ return NULL;
+}
+
+static inline void net_devmem_free_dmabuf(struct net_iov *ppiov)
+{
+}
+
+static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
+{
+ return 0;
+}
+
+static inline u32 net_iov_binding_id(const struct net_iov *niov)
+{
+ return 0;
+}
+#endif
+
+#endif /* _NET_DEVMEM_H */
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 6ebffbc63236..154a2681f55c 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -11,6 +11,7 @@
#include <linux/list.h>
#include <linux/module.h>
#include <net/net_namespace.h>
+#include <net/inet_dscp.h>
#include <net/sock.h>
#include <net/fib_rules.h>
#include <net/ip_tunnels.h>
@@ -72,7 +73,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->suppress_prefixlen = -1;
r->suppress_ifgroup = -1;
- /* The lock is not required here, the list in unreacheable
+ /* The lock is not required here, the list in unreachable
* at the moment this function is called */
list_add_tail(&r->list, &ops->rules_list);
return 0;
@@ -766,7 +767,8 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = {
[FRA_PROTOCOL] = { .type = NLA_U8 },
[FRA_IP_PROTO] = { .type = NLA_U8 },
[FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
- [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
+ [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
+ [FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2),
};
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1205,8 +1207,7 @@ static void notify_rule_change(int event, struct fib_rule *rule,
rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, ops->nlgroup, err);
+ rtnl_set_sk_err(net, ops->nlgroup, err);
}
static void attach_rules(struct list_head *rules, struct net_device *dev)
diff --git a/net/core/filter.c b/net/core/filter.c
index e4a4454df5f9..cd3524cb326b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -84,6 +84,7 @@
#include <net/netkit.h>
#include <linux/un.h>
#include <net/xdp_sock_drv.h>
+#include <net/inet_dscp.h>
#include "dev.h"
@@ -2371,7 +2372,7 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
struct flowi4 fl4 = {
.flowi4_flags = FLOWI_FLAG_ANYSRC,
.flowi4_mark = skb->mark,
- .flowi4_tos = RT_TOS(ip4h->tos),
+ .flowi4_tos = ip4h->tos & INET_DSCP_MASK,
.flowi4_oif = dev->ifindex,
.flowi4_proto = ip4h->protocol,
.daddr = ip4h->daddr,
@@ -3189,6 +3190,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
bpf_push_mac_rcsum(skb);
ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
bpf_pull_mac_rcsum(skb);
+ skb_reset_mac_len(skb);
bpf_compute_data_pointers(skb);
return ret;
@@ -5278,6 +5280,11 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
return -EINVAL;
inet_csk(sk)->icsk_rto_min = timeout;
break;
+ case TCP_BPF_SOCK_OPS_CB_FLAGS:
+ if (val & ~(BPF_SOCK_OPS_ALL_CB_FLAGS))
+ return -EINVAL;
+ tp->bpf_sock_ops_cb_flags = val;
+ break;
default:
return -EINVAL;
}
@@ -5366,6 +5373,17 @@ static int sol_tcp_sockopt(struct sock *sk, int optname,
if (*optlen < 1)
return -EINVAL;
break;
+ case TCP_BPF_SOCK_OPS_CB_FLAGS:
+ if (*optlen != sizeof(int))
+ return -EINVAL;
+ if (getopt) {
+ struct tcp_sock *tp = tcp_sk(sk);
+ int cb_flags = tp->bpf_sock_ops_cb_flags;
+
+ memcpy(optval, &cb_flags, *optlen);
+ return 0;
+ }
+ return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
default:
if (getopt)
return -EINVAL;
@@ -5899,7 +5917,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
fl4.flowi4_iif = params->ifindex;
fl4.flowi4_oif = 0;
}
- fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
+ fl4.flowi4_tos = params->tos & INET_DSCP_MASK;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = 0;
@@ -12060,7 +12078,7 @@ int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
}
BTF_KFUNCS_START(bpf_kfunc_check_set_skb)
-BTF_ID_FLAGS(func, bpf_dynptr_from_skb)
+BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS)
BTF_KFUNCS_END(bpf_kfunc_check_set_skb)
BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
@@ -12109,6 +12127,7 @@ static int __init bpf_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
&bpf_kfunc_set_sock_addr);
diff --git a/net/core/gro.c b/net/core/gro.c
index b3b43de1a650..802b4a062400 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -374,7 +374,7 @@ static void gro_list_prepare(const struct list_head *head,
skb_mac_header(skb),
maclen);
- /* in most common scenarions 'slow_gro' is 0
+ /* in most common scenarios 'slow_gro' is 0
* otherwise we are already on some slower paths
* either skip all the infrequent tests altogether or
* avoid trying too hard to skip each of them individually
@@ -408,7 +408,8 @@ static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
pinfo = skb_shinfo(skb);
frag0 = &pinfo->frags[0];
- if (pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0)) &&
+ if (pinfo->nr_frags && skb_frag_page(frag0) &&
+ !PageHighMem(skb_frag_page(frag0)) &&
(!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index afb05f58b64c..1a14f915b7a4 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -12,6 +12,7 @@
#include <net/gre.h>
#include <net/ip6_route.h>
#include <net/ipv6_stubs.h>
+#include <net/inet_dscp.h>
struct bpf_lwt_prog {
struct bpf_prog *prog;
@@ -205,7 +206,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
fl4.flowi4_oif = oif;
fl4.flowi4_mark = skb->mark;
fl4.flowi4_uid = sock_net_uid(net, sk);
- fl4.flowi4_tos = RT_TOS(iph->tos);
+ fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
fl4.flowi4_proto = iph->protocol;
fl4.daddr = iph->daddr;
diff --git a/net/core/mp_dmabuf_devmem.h b/net/core/mp_dmabuf_devmem.h
new file mode 100644
index 000000000000..67cd0dd7319c
--- /dev/null
+++ b/net/core/mp_dmabuf_devmem.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Dmabuf device memory provider.
+ *
+ * Authors: Mina Almasry <almasrymina@google.com>
+ *
+ */
+#ifndef _NET_MP_DMABUF_DEVMEM_H
+#define _NET_MP_DMABUF_DEVMEM_H
+
+#include <net/netmem.h>
+
+#if defined(CONFIG_NET_DEVMEM)
+int mp_dmabuf_devmem_init(struct page_pool *pool);
+
+netmem_ref mp_dmabuf_devmem_alloc_netmems(struct page_pool *pool, gfp_t gfp);
+
+void mp_dmabuf_devmem_destroy(struct page_pool *pool);
+
+bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem);
+#else
+static inline int mp_dmabuf_devmem_init(struct page_pool *pool)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline netmem_ref
+mp_dmabuf_devmem_alloc_netmems(struct page_pool *pool, gfp_t gfp)
+{
+ return 0;
+}
+
+static inline void mp_dmabuf_devmem_destroy(struct page_pool *pool)
+{
+}
+
+static inline bool
+mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem)
+{
+ return false;
+}
+#endif
+
+#endif /* _NET_MP_DMABUF_DEVMEM_H */
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a6fe88eca939..77b819cd995b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3530,8 +3530,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags,
rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+ rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
}
void neigh_app_ns(struct neighbour *n)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 0e2084ce7b75..05cf5347f25e 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -32,6 +32,7 @@
#ifdef CONFIG_SYSFS
static const char fmt_hex[] = "%#x\n";
static const char fmt_dec[] = "%d\n";
+static const char fmt_uint[] = "%u\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
@@ -235,7 +236,7 @@ static ssize_t speed_show(struct device *dev,
if (!rtnl_trylock())
return restart_syscall();
- if (netif_running(netdev) && netif_device_present(netdev)) {
+ if (netif_running(netdev)) {
struct ethtool_link_ksettings cmd;
if (!__ethtool_get_link_ksettings(netdev, &cmd))
@@ -425,6 +426,9 @@ NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
{
+ if (val > S32_MAX)
+ return -ERANGE;
+
WRITE_ONCE(dev->napi_defer_hard_irqs, val);
return 0;
}
@@ -438,7 +442,7 @@ static ssize_t napi_defer_hard_irqs_store(struct device *dev,
return netdev_store(dev, attr, buf, len, change_napi_defer_hard_irqs);
}
-NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_dec);
+NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_uint);
static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
@@ -1056,7 +1060,7 @@ static const void *rx_queue_namespace(const struct kobject *kobj)
struct device *dev = &queue->dev->dev;
const void *ns = NULL;
- if (dev->class && dev->class->ns_type)
+ if (dev->class && dev->class->namespace)
ns = dev->class->namespace(dev);
return ns;
@@ -1524,7 +1528,7 @@ static const struct attribute_group dql_group = {
};
#else
/* Fake declaration, all the code using it should be dead */
-extern const struct attribute_group dql_group;
+static const struct attribute_group dql_group = {};
#endif /* CONFIG_BQL */
#ifdef CONFIG_XPS
@@ -1740,7 +1744,7 @@ static const void *netdev_queue_namespace(const struct kobject *kobj)
struct device *dev = &queue->dev->dev;
const void *ns = NULL;
- if (dev->class && dev->class->ns_type)
+ if (dev->class && dev->class->namespace)
ns = dev->class->namespace(dev);
return ns;
@@ -1764,8 +1768,7 @@ static const struct kobj_type netdev_queue_ktype = {
static bool netdev_uses_bql(const struct net_device *dev)
{
- if (dev->features & NETIF_F_LLTX ||
- dev->priv_flags & IFF_NO_QUEUE)
+ if (dev->lltx || (dev->priv_flags & IFF_NO_QUEUE))
return false;
return IS_ENABLED(CONFIG_BQL);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6a823ba906c6..11e4dd4f09ed 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -125,7 +125,7 @@ static int ops_init(const struct pernet_operations *ops, struct net *net)
int err = -ENOMEM;
void *data = NULL;
- if (ops->id && ops->size) {
+ if (ops->id) {
data = kzalloc(ops->size, GFP_KERNEL);
if (!data)
goto out;
@@ -140,7 +140,7 @@ static int ops_init(const struct pernet_operations *ops, struct net *net)
if (!err)
return 0;
- if (ops->id && ops->size) {
+ if (ops->id) {
ng = rcu_dereference_protected(net->gen,
lockdep_is_held(&pernet_ops_rwsem));
ng->ptr[*ops->id] = NULL;
@@ -182,7 +182,8 @@ static void ops_free_list(const struct pernet_operations *ops,
struct list_head *net_exit_list)
{
struct net *net;
- if (ops->size && ops->id) {
+
+ if (ops->id) {
list_for_each_entry(net, net_exit_list, exit_list)
kfree(net_generic(net, *ops->id));
}
@@ -308,16 +309,38 @@ struct net *get_net_ns_by_id(const struct net *net, int id)
}
EXPORT_SYMBOL_GPL(get_net_ns_by_id);
+static __net_init void preinit_net_sysctl(struct net *net)
+{
+ net->core.sysctl_somaxconn = SOMAXCONN;
+ /* Limits per socket sk_omem_alloc usage.
+ * TCP zerocopy regular usage needs 128 KB.
+ */
+ net->core.sysctl_optmem_max = 128 * 1024;
+ net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
+}
+
/* init code that must occur even if setup_net() is not called. */
-static __net_init void preinit_net(struct net *net)
+static __net_init void preinit_net(struct net *net, struct user_namespace *user_ns)
{
+ refcount_set(&net->passive, 1);
+ refcount_set(&net->ns.count, 1);
+ ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt");
+
+ get_random_bytes(&net->hash_mix, sizeof(u32));
+ net->dev_base_seq = 1;
+ net->user_ns = user_ns;
+
+ idr_init(&net->netns_ids);
+ spin_lock_init(&net->nsid_lock);
+ mutex_init(&net->ipv4.ra_mutex);
+ preinit_net_sysctl(net);
}
/*
* setup_net runs the initializers for the network namespace object.
*/
-static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
+static __net_init int setup_net(struct net *net)
{
/* Must be called with pernet_ops_rwsem held */
const struct pernet_operations *ops, *saved_ops;
@@ -325,19 +348,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
LIST_HEAD(dev_kill_list);
int error = 0;
- refcount_set(&net->ns.count, 1);
- ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
-
- refcount_set(&net->passive, 1);
- get_random_bytes(&net->hash_mix, sizeof(u32));
preempt_disable();
net->net_cookie = gen_cookie_next(&net_cookie);
preempt_enable();
- net->dev_base_seq = 1;
- net->user_ns = user_ns;
- idr_init(&net->netns_ids);
- spin_lock_init(&net->nsid_lock);
- mutex_init(&net->ipv4.ra_mutex);
list_for_each_entry(ops, &pernet_list, list) {
error = ops_init(ops, net);
@@ -382,32 +395,6 @@ out_undo:
goto out;
}
-static int __net_init net_defaults_init_net(struct net *net)
-{
- net->core.sysctl_somaxconn = SOMAXCONN;
- /* Limits per socket sk_omem_alloc usage.
- * TCP zerocopy regular usage needs 128 KB.
- */
- net->core.sysctl_optmem_max = 128 * 1024;
- net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
-
- return 0;
-}
-
-static struct pernet_operations net_defaults_ops = {
- .init = net_defaults_init_net,
-};
-
-static __init int net_defaults_init(void)
-{
- if (register_pernet_subsys(&net_defaults_ops))
- panic("Cannot initialize net default settings");
-
- return 0;
-}
-
-core_initcall(net_defaults_init);
-
#ifdef CONFIG_NET_NS
static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
{
@@ -496,8 +483,7 @@ struct net *copy_net_ns(unsigned long flags,
goto dec_ucounts;
}
- preinit_net(net);
- refcount_set(&net->passive, 1);
+ preinit_net(net, user_ns);
net->ucounts = ucounts;
get_user_ns(user_ns);
@@ -505,7 +491,7 @@ struct net *copy_net_ns(unsigned long flags,
if (rv < 0)
goto put_userns;
- rv = setup_net(net, user_ns);
+ rv = setup_net(net);
up_read(&pernet_ops_rwsem);
@@ -1199,9 +1185,10 @@ void __init net_ns_init(void)
#ifdef CONFIG_KEYS
init_net.key_domain = &init_net_key_domain;
#endif
+ preinit_net(&init_net, &init_user_ns);
+
down_write(&pernet_ops_rwsem);
- preinit_net(&init_net);
- if (setup_net(&init_net, &init_user_ns))
+ if (setup_net(&init_net))
panic("Could not setup the initial network namespace");
init_net_initialized = true;
@@ -1244,7 +1231,7 @@ static int __register_pernet_operations(struct list_head *list,
LIST_HEAD(net_exit_list);
list_add_tail(&ops->list, list);
- if (ops->init || (ops->id && ops->size)) {
+ if (ops->init || ops->id) {
/* We held write locked pernet_ops_rwsem, and parallel
* setup_net() and cleanup_net() are not possible.
*/
@@ -1310,6 +1297,9 @@ static int register_pernet_operations(struct list_head *list,
{
int error;
+ if (WARN_ON(!!ops->id ^ !!ops->size))
+ return -EINVAL;
+
if (ops->id) {
error = ida_alloc_min(&net_generic_ids, MIN_PERNET_OPS_ID,
GFP_KERNEL);
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index 8350a0afa9ec..b28424ae06d5 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -9,6 +9,7 @@
#include "netdev-genl-gen.h"
#include <uapi/linux/netdev.h>
+#include <linux/list.h>
/* Integer value ranges */
static const struct netlink_range_validation netdev_a_page_pool_id_range = {
@@ -27,6 +28,11 @@ const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFIND
[NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range),
};
+const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1] = {
+ [NETDEV_A_QUEUE_ID] = { .type = NLA_U32, },
+ [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1),
+};
+
/* NETDEV_CMD_DEV_GET - do */
static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = {
[NETDEV_A_DEV_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
@@ -74,6 +80,13 @@ static const struct nla_policy netdev_qstats_get_nl_policy[NETDEV_A_QSTATS_SCOPE
[NETDEV_A_QSTATS_SCOPE] = NLA_POLICY_MASK(NLA_UINT, 0x1),
};
+/* NETDEV_CMD_BIND_RX - do */
+static const struct nla_policy netdev_bind_rx_nl_policy[NETDEV_A_DMABUF_FD + 1] = {
+ [NETDEV_A_DMABUF_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+ [NETDEV_A_DMABUF_FD] = { .type = NLA_U32, },
+ [NETDEV_A_DMABUF_QUEUES] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy),
+};
+
/* Ops table for netdev */
static const struct genl_split_ops netdev_nl_ops[] = {
{
@@ -151,6 +164,13 @@ static const struct genl_split_ops netdev_nl_ops[] = {
.maxattr = NETDEV_A_QSTATS_SCOPE,
.flags = GENL_CMD_CAP_DUMP,
},
+ {
+ .cmd = NETDEV_CMD_BIND_RX,
+ .doit = netdev_nl_bind_rx_doit,
+ .policy = netdev_bind_rx_nl_policy,
+ .maxattr = NETDEV_A_DMABUF_FD,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
};
static const struct genl_multicast_group netdev_nl_mcgrps[] = {
@@ -168,4 +188,7 @@ struct genl_family netdev_nl_family __ro_after_init = {
.n_split_ops = ARRAY_SIZE(netdev_nl_ops),
.mcgrps = netdev_nl_mcgrps,
.n_mcgrps = ARRAY_SIZE(netdev_nl_mcgrps),
+ .sock_priv_size = sizeof(struct list_head),
+ .sock_priv_init = (void *)netdev_nl_sock_priv_init,
+ .sock_priv_destroy = (void *)netdev_nl_sock_priv_destroy,
};
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index 4db40fd5b4a9..8cda334fd042 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -10,9 +10,11 @@
#include <net/genetlink.h>
#include <uapi/linux/netdev.h>
+#include <linux/list.h>
/* Common nested types */
extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1];
+extern const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1];
int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
@@ -30,6 +32,7 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info);
enum {
NETDEV_NLGRP_MGMT,
@@ -38,4 +41,7 @@ enum {
extern struct genl_family netdev_nl_family;
+void netdev_nl_sock_priv_init(struct list_head *priv);
+void netdev_nl_sock_priv_destroy(struct list_head *priv);
+
#endif /* _LINUX_NETDEV_GEN_H */
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 05f9515d2c05..1cb954f2d39e 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -3,16 +3,17 @@
#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/rtnetlink.h>
+#include <net/busy_poll.h>
#include <net/net_namespace.h>
+#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
#include <net/sock.h>
#include <net/xdp.h>
#include <net/xdp_sock.h>
-#include <net/netdev_rx_queue.h>
-#include <net/netdev_queues.h>
-#include <net/busy_poll.h>
-#include "netdev-genl-gen.h"
#include "dev.h"
+#include "devmem.h"
+#include "netdev-genl-gen.h"
struct netdev_nl_dump_ctx {
unsigned long ifindex;
@@ -216,10 +217,12 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
rtnl_lock();
napi = napi_by_id(napi_id);
- if (napi)
+ if (napi) {
err = netdev_nl_napi_fill_one(rsp, napi, info);
- else
- err = -EINVAL;
+ } else {
+ NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
+ err = -ENOENT;
+ }
rtnl_unlock();
@@ -292,6 +295,7 @@ static int
netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
u32 q_idx, u32 q_type, const struct genl_info *info)
{
+ struct net_devmem_dmabuf_binding *binding;
struct netdev_rx_queue *rxq;
struct netdev_queue *txq;
void *hdr;
@@ -311,6 +315,12 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
rxq->napi->napi_id))
goto nla_put_failure;
+
+ binding = rxq->mp_params.mp_priv;
+ if (binding &&
+ nla_put_u32(rsp, NETDEV_A_QUEUE_DMABUF, binding->id))
+ goto nla_put_failure;
+
break;
case NETDEV_QUEUE_TYPE_TX:
txq = netdev_get_tx_queue(netdev, q_idx);
@@ -721,6 +731,129 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
return err;
}
+int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
+ struct net_devmem_dmabuf_binding *binding;
+ struct list_head *sock_binding_list;
+ u32 ifindex, dmabuf_fd, rxq_idx;
+ struct net_device *netdev;
+ struct sk_buff *rsp;
+ struct nlattr *attr;
+ int rem, err = 0;
+ void *hdr;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
+ GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD) ||
+ GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_QUEUES))
+ return -EINVAL;
+
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
+ dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]);
+
+ sock_binding_list = genl_sk_priv_get(&netdev_nl_family,
+ NETLINK_CB(skb).sk);
+ if (IS_ERR(sock_binding_list))
+ return PTR_ERR(sock_binding_list);
+
+ rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!rsp)
+ return -ENOMEM;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto err_genlmsg_free;
+ }
+
+ rtnl_lock();
+
+ netdev = __dev_get_by_index(genl_info_net(info), ifindex);
+ if (!netdev || !netif_device_present(netdev)) {
+ err = -ENODEV;
+ goto err_unlock;
+ }
+
+ if (dev_xdp_prog_count(netdev)) {
+ NL_SET_ERR_MSG(info->extack, "unable to bind dmabuf to device with XDP program attached");
+ err = -EEXIST;
+ goto err_unlock;
+ }
+
+ binding = net_devmem_bind_dmabuf(netdev, dmabuf_fd, info->extack);
+ if (IS_ERR(binding)) {
+ err = PTR_ERR(binding);
+ goto err_unlock;
+ }
+
+ nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES,
+ genlmsg_data(info->genlhdr),
+ genlmsg_len(info->genlhdr), rem) {
+ err = nla_parse_nested(
+ tb, ARRAY_SIZE(netdev_queue_id_nl_policy) - 1, attr,
+ netdev_queue_id_nl_policy, info->extack);
+ if (err < 0)
+ goto err_unbind;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) ||
+ NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE)) {
+ err = -EINVAL;
+ goto err_unbind;
+ }
+
+ if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
+ NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]);
+ err = -EINVAL;
+ goto err_unbind;
+ }
+
+ rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]);
+
+ err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, binding,
+ info->extack);
+ if (err)
+ goto err_unbind;
+ }
+
+ list_add(&binding->list, sock_binding_list);
+
+ nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id);
+ genlmsg_end(rsp, hdr);
+
+ err = genlmsg_reply(rsp, info);
+ if (err)
+ goto err_unbind;
+
+ rtnl_unlock();
+
+ return 0;
+
+err_unbind:
+ net_devmem_unbind_dmabuf(binding);
+err_unlock:
+ rtnl_unlock();
+err_genlmsg_free:
+ nlmsg_free(rsp);
+ return err;
+}
+
+void netdev_nl_sock_priv_init(struct list_head *priv)
+{
+ INIT_LIST_HEAD(priv);
+}
+
+void netdev_nl_sock_priv_destroy(struct list_head *priv)
+{
+ struct net_devmem_dmabuf_binding *binding;
+ struct net_devmem_dmabuf_binding *temp;
+
+ list_for_each_entry_safe(binding, temp, priv, list) {
+ rtnl_lock();
+ net_devmem_unbind_dmabuf(binding);
+ rtnl_unlock();
+ }
+}
+
static int netdev_genl_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
new file mode 100644
index 000000000000..e217a5838c87
--- /dev/null
+++ b/net/core/netdev_rx_queue.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/netdevice.h>
+#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
+
+#include "page_pool_priv.h"
+
+int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
+{
+ struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, rxq_idx);
+ void *new_mem, *old_mem;
+ int err;
+
+ if (!dev->queue_mgmt_ops || !dev->queue_mgmt_ops->ndo_queue_stop ||
+ !dev->queue_mgmt_ops->ndo_queue_mem_free ||
+ !dev->queue_mgmt_ops->ndo_queue_mem_alloc ||
+ !dev->queue_mgmt_ops->ndo_queue_start)
+ return -EOPNOTSUPP;
+
+ ASSERT_RTNL();
+
+ new_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL);
+ if (!new_mem)
+ return -ENOMEM;
+
+ old_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL);
+ if (!old_mem) {
+ err = -ENOMEM;
+ goto err_free_new_mem;
+ }
+
+ err = dev->queue_mgmt_ops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx);
+ if (err)
+ goto err_free_old_mem;
+
+ err = page_pool_check_memory_provider(dev, rxq);
+ if (err)
+ goto err_free_new_queue_mem;
+
+ err = dev->queue_mgmt_ops->ndo_queue_stop(dev, old_mem, rxq_idx);
+ if (err)
+ goto err_free_new_queue_mem;
+
+ err = dev->queue_mgmt_ops->ndo_queue_start(dev, new_mem, rxq_idx);
+ if (err)
+ goto err_start_queue;
+
+ dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem);
+
+ kvfree(old_mem);
+ kvfree(new_mem);
+
+ return 0;
+
+err_start_queue:
+ /* Restarting the queue with old_mem should be successful as we haven't
+ * changed any of the queue configuration, and there is not much we can
+ * do to recover from a failure here.
+ *
+ * WARN if we fail to recover the old rx queue, and at least free
+ * old_mem so we don't also leak that.
+ */
+ if (dev->queue_mgmt_ops->ndo_queue_start(dev, old_mem, rxq_idx)) {
+ WARN(1,
+ "Failed to restart old queue in error path. RX queue %d may be unhealthy.",
+ rxq_idx);
+ dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem);
+ }
+
+err_free_new_queue_mem:
+ dev->queue_mgmt_ops->ndo_queue_mem_free(dev, new_mem);
+
+err_free_old_mem:
+ kvfree(old_mem);
+
+err_free_new_mem:
+ kvfree(new_mem);
+
+ return err;
+}
diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h
new file mode 100644
index 000000000000..7eadb8393e00
--- /dev/null
+++ b/net/core/netmem_priv.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NETMEM_PRIV_H
+#define __NETMEM_PRIV_H
+
+static inline unsigned long netmem_get_pp_magic(netmem_ref netmem)
+{
+ return __netmem_clear_lsb(netmem)->pp_magic;
+}
+
+static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
+{
+ __netmem_clear_lsb(netmem)->pp_magic |= pp_magic;
+}
+
+static inline void netmem_clear_pp_magic(netmem_ref netmem)
+{
+ __netmem_clear_lsb(netmem)->pp_magic = 0;
+}
+
+static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool)
+{
+ __netmem_clear_lsb(netmem)->pp = pool;
+}
+
+static inline void netmem_set_dma_addr(netmem_ref netmem,
+ unsigned long dma_addr)
+{
+ __netmem_clear_lsb(netmem)->dma_addr = dma_addr;
+}
+#endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 55bcacf67df3..ca52cbe0f63c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -48,8 +48,6 @@
static struct sk_buff_head skb_pool;
-DEFINE_STATIC_SRCU(netpoll_srcu);
-
#define USEC_PER_POLL 50
#define MAX_SKB_SIZE \
@@ -162,7 +160,7 @@ static void poll_one_napi(struct napi_struct *napi)
if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state))
return;
- /* We explicilty pass the polling call a budget of 0 to
+ /* We explicitly pass the polling call a budget of 0 to
* indicate that we are clearing the Tx path only.
*/
work = napi->poll(napi, 0);
@@ -220,26 +218,21 @@ EXPORT_SYMBOL(netpoll_poll_dev);
void netpoll_poll_disable(struct net_device *dev)
{
struct netpoll_info *ni;
- int idx;
+
might_sleep();
- idx = srcu_read_lock(&netpoll_srcu);
- ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
+ ni = rtnl_dereference(dev->npinfo);
if (ni)
down(&ni->dev_lock);
- srcu_read_unlock(&netpoll_srcu, idx);
}
-EXPORT_SYMBOL(netpoll_poll_disable);
void netpoll_poll_enable(struct net_device *dev)
{
struct netpoll_info *ni;
- rcu_read_lock();
- ni = rcu_dereference(dev->npinfo);
+
+ ni = rtnl_dereference(dev->npinfo);
if (ni)
up(&ni->dev_lock);
- rcu_read_unlock();
}
-EXPORT_SYMBOL(netpoll_poll_enable);
static void refill_skbs(void)
{
@@ -626,12 +619,9 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
const struct net_device_ops *ops;
int err;
- np->dev = ndev;
- strscpy(np->dev_name, ndev->name, IFNAMSIZ);
-
if (ndev->priv_flags & IFF_DISABLE_NETPOLL) {
np_err(np, "%s doesn't support polling, aborting\n",
- np->dev_name);
+ ndev->name);
err = -ENOTSUPP;
goto out;
}
@@ -649,7 +639,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
refcount_set(&npinfo->refcnt, 1);
- ops = np->dev->netdev_ops;
+ ops = ndev->netdev_ops;
if (ops->ndo_netpoll_setup) {
err = ops->ndo_netpoll_setup(ndev, npinfo);
if (err)
@@ -660,6 +650,8 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
refcount_inc(&npinfo->refcnt);
}
+ np->dev = ndev;
+ strscpy(np->dev_name, ndev->name, IFNAMSIZ);
npinfo->netpoll = np;
/* last thing to do is link it to the net device structure */
@@ -677,6 +669,7 @@ EXPORT_SYMBOL_GPL(__netpoll_setup);
int netpoll_setup(struct netpoll *np)
{
struct net_device *ndev = NULL;
+ bool ip_overwritten = false;
struct in_device *in_dev;
int err;
@@ -741,6 +734,7 @@ put_noaddr:
}
np->local_ip.ip = ifa->ifa_local;
+ ip_overwritten = true;
np_info(np, "local IP %pI4\n", &np->local_ip.ip);
} else {
#if IS_ENABLED(CONFIG_IPV6)
@@ -757,6 +751,7 @@ put_noaddr:
!!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
continue;
np->local_ip.in6 = ifp->addr;
+ ip_overwritten = true;
err = 0;
break;
}
@@ -787,6 +782,9 @@ put_noaddr:
return 0;
put:
+ DEBUG_NET_WARN_ON_ONCE(np->dev);
+ if (ip_overwritten)
+ memset(&np->local_ip, 0, sizeof(np->local_ip));
netdev_put(ndev, &np->dev_tracker);
unlock:
rtnl_unlock();
@@ -826,8 +824,6 @@ void __netpoll_cleanup(struct netpoll *np)
if (!npinfo)
return;
- synchronize_srcu(&netpoll_srcu);
-
if (refcount_dec_and_test(&npinfo->refcnt)) {
const struct net_device_ops *ops;
@@ -853,14 +849,20 @@ void __netpoll_free(struct netpoll *np)
}
EXPORT_SYMBOL_GPL(__netpoll_free);
+void do_netpoll_cleanup(struct netpoll *np)
+{
+ __netpoll_cleanup(np);
+ netdev_put(np->dev, &np->dev_tracker);
+ np->dev = NULL;
+}
+EXPORT_SYMBOL(do_netpoll_cleanup);
+
void netpoll_cleanup(struct netpoll *np)
{
rtnl_lock();
if (!np->dev)
goto out;
- __netpoll_cleanup(np);
- netdev_put(np->dev, &np->dev_tracker);
- np->dev = NULL;
+ do_netpoll_cleanup(np);
out:
rtnl_unlock();
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 2abe6e919224..a813d30d2135 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/device.h>
+#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
#include <net/xdp.h>
@@ -24,8 +25,12 @@
#include <trace/events/page_pool.h>
+#include "mp_dmabuf_devmem.h"
+#include "netmem_priv.h"
#include "page_pool_priv.h"
+DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers);
+
#define DEFER_TIME (msecs_to_jiffies(1000))
#define DEFER_WARN_INTERVAL (60 * HZ)
@@ -187,6 +192,8 @@ static int page_pool_init(struct page_pool *pool,
int cpuid)
{
unsigned int ring_qsize = 1024; /* Default */
+ struct netdev_rx_queue *rxq;
+ int err;
page_pool_struct_check();
@@ -268,7 +275,37 @@ static int page_pool_init(struct page_pool *pool,
if (pool->dma_map)
get_device(pool->p.dev);
+ if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) {
+ /* We rely on rtnl_lock()ing to make sure netdev_rx_queue
+ * configuration doesn't change while we're initializing
+ * the page_pool.
+ */
+ ASSERT_RTNL();
+ rxq = __netif_get_rx_queue(pool->slow.netdev,
+ pool->slow.queue_idx);
+ pool->mp_priv = rxq->mp_params.mp_priv;
+ }
+
+ if (pool->mp_priv) {
+ err = mp_dmabuf_devmem_init(pool);
+ if (err) {
+ pr_warn("%s() mem-provider init failed %d\n", __func__,
+ err);
+ goto free_ptr_ring;
+ }
+
+ static_branch_inc(&page_pool_mem_providers);
+ }
+
return 0;
+
+free_ptr_ring:
+ ptr_ring_cleanup(&pool->ring, NULL);
+#ifdef CONFIG_PAGE_POOL_STATS
+ if (!pool->system)
+ free_percpu(pool->recycle_stats);
+#endif
+ return err;
}
static void page_pool_uninit(struct page_pool *pool)
@@ -358,7 +395,7 @@ static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool)
if (unlikely(!netmem))
break;
- if (likely(page_to_nid(netmem_to_page(netmem)) == pref_nid)) {
+ if (likely(netmem_is_pref_nid(netmem, pref_nid))) {
pool->alloc.cache[pool->alloc.count++] = netmem;
} else {
/* NUMA mismatch;
@@ -452,32 +489,6 @@ unmap_failed:
return false;
}
-static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
-{
- struct page *page = netmem_to_page(netmem);
-
- page->pp = pool;
- page->pp_magic |= PP_SIGNATURE;
-
- /* Ensuring all pages have been split into one fragment initially:
- * page_pool_set_pp_info() is only called once for every page when it
- * is allocated from the page allocator and page_pool_fragment_page()
- * is dirtying the same cache line as the page->pp_magic above, so
- * the overhead is negligible.
- */
- page_pool_fragment_netmem(netmem, 1);
- if (pool->has_init_callback)
- pool->slow.init_callback(netmem, pool->slow.init_arg);
-}
-
-static void page_pool_clear_pp_info(netmem_ref netmem)
-{
- struct page *page = netmem_to_page(netmem);
-
- page->pp_magic = 0;
- page->pp = NULL;
-}
-
static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
gfp_t gfp)
{
@@ -573,7 +584,10 @@ netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp)
return netmem;
/* Slow-path: cache empty, do real allocation */
- netmem = __page_pool_alloc_pages_slow(pool, gfp);
+ if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv)
+ netmem = mp_dmabuf_devmem_alloc_netmems(pool, gfp);
+ else
+ netmem = __page_pool_alloc_pages_slow(pool, gfp);
return netmem;
}
EXPORT_SYMBOL(page_pool_alloc_netmem);
@@ -609,6 +623,28 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict)
return inflight;
}
+void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
+{
+ netmem_set_pp(netmem, pool);
+ netmem_or_pp_magic(netmem, PP_SIGNATURE);
+
+ /* Ensuring all pages have been split into one fragment initially:
+ * page_pool_set_pp_info() is only called once for every page when it
+ * is allocated from the page allocator and page_pool_fragment_page()
+ * is dirtying the same cache line as the page->pp_magic above, so
+ * the overhead is negligible.
+ */
+ page_pool_fragment_netmem(netmem, 1);
+ if (pool->has_init_callback)
+ pool->slow.init_callback(netmem, pool->slow.init_arg);
+}
+
+void page_pool_clear_pp_info(netmem_ref netmem)
+{
+ netmem_clear_pp_magic(netmem);
+ netmem_set_pp(netmem, NULL);
+}
+
static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
netmem_ref netmem)
{
@@ -637,8 +673,13 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
{
int count;
+ bool put;
- __page_pool_release_page_dma(pool, netmem);
+ put = true;
+ if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv)
+ put = mp_dmabuf_devmem_release_page(pool, netmem);
+ else
+ __page_pool_release_page_dma(pool, netmem);
/* This may be the last page returned, releasing the pool, so
* it is not safe to reference pool afterwards.
@@ -646,8 +687,10 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
trace_page_pool_state_release(pool, netmem, count);
- page_pool_clear_pp_info(netmem);
- put_page(netmem_to_page(netmem));
+ if (put) {
+ page_pool_clear_pp_info(netmem);
+ put_page(netmem_to_page(netmem));
+ }
/* An optimization would be to call __free_pages(page, pool->p.order)
* knowing page is not part of page-cache (thus avoiding a
* __page_cache_release() call).
@@ -692,8 +735,9 @@ static bool page_pool_recycle_in_cache(netmem_ref netmem,
static bool __page_pool_page_can_be_recycled(netmem_ref netmem)
{
- return page_ref_count(netmem_to_page(netmem)) == 1 &&
- !page_is_pfmemalloc(netmem_to_page(netmem));
+ return netmem_is_net_iov(netmem) ||
+ (page_ref_count(netmem_to_page(netmem)) == 1 &&
+ !page_is_pfmemalloc(netmem_to_page(netmem)));
}
/* If the page refcnt == 1, this will try to recycle the page.
@@ -728,6 +772,7 @@ __page_pool_put_page(struct page_pool *pool, netmem_ref netmem,
/* Page found as candidate for recycling */
return netmem;
}
+
/* Fallback/non-XDP mode: API user have elevated refcnt.
*
* Many drivers split up the page into fragments, and some
@@ -949,7 +994,7 @@ static void page_pool_empty_ring(struct page_pool *pool)
/* Empty recycle ring */
while ((netmem = (__force netmem_ref)ptr_ring_consume_bh(&pool->ring))) {
/* Verify the refcnt invariant of cached pages */
- if (!(page_ref_count(netmem_to_page(netmem)) == 1))
+ if (!(netmem_ref_count(netmem) == 1))
pr_crit("%s() page_pool refcnt %d violation\n",
__func__, netmem_ref_count(netmem));
@@ -964,6 +1009,12 @@ static void __page_pool_destroy(struct page_pool *pool)
page_pool_unlist(pool);
page_pool_uninit(pool);
+
+ if (pool->mp_priv) {
+ mp_dmabuf_devmem_destroy(pool);
+ static_branch_dec(&page_pool_mem_providers);
+ }
+
kfree(pool);
}
diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h
index 90665d40f1eb..57439787b9c2 100644
--- a/net/core/page_pool_priv.h
+++ b/net/core/page_pool_priv.h
@@ -3,10 +3,56 @@
#ifndef __PAGE_POOL_PRIV_H
#define __PAGE_POOL_PRIV_H
+#include <net/page_pool/helpers.h>
+
+#include "netmem_priv.h"
+
s32 page_pool_inflight(const struct page_pool *pool, bool strict);
int page_pool_list(struct page_pool *pool);
void page_pool_detached(struct page_pool *pool);
void page_pool_unlist(struct page_pool *pool);
+static inline bool
+page_pool_set_dma_addr_netmem(netmem_ref netmem, dma_addr_t addr)
+{
+ if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) {
+ netmem_set_dma_addr(netmem, addr >> PAGE_SHIFT);
+
+ /* We assume page alignment to shave off bottom bits,
+ * if this "compression" doesn't work we need to drop.
+ */
+ return addr != (dma_addr_t)netmem_get_dma_addr(netmem)
+ << PAGE_SHIFT;
+ }
+
+ netmem_set_dma_addr(netmem, addr);
+ return false;
+}
+
+static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+{
+ return page_pool_set_dma_addr_netmem(page_to_netmem(page), addr);
+}
+
+#if defined(CONFIG_PAGE_POOL)
+void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem);
+void page_pool_clear_pp_info(netmem_ref netmem);
+int page_pool_check_memory_provider(struct net_device *dev,
+ struct netdev_rx_queue *rxq);
+#else
+static inline void page_pool_set_pp_info(struct page_pool *pool,
+ netmem_ref netmem)
+{
+}
+static inline void page_pool_clear_pp_info(netmem_ref netmem)
+{
+}
+static inline int page_pool_check_memory_provider(struct net_device *dev,
+ struct netdev_rx_queue *rxq)
+{
+ return 0;
+}
+#endif
+
#endif
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
index 3a3277ba167b..48335766c1bf 100644
--- a/net/core/page_pool_user.c
+++ b/net/core/page_pool_user.c
@@ -4,10 +4,12 @@
#include <linux/netdevice.h>
#include <linux/xarray.h>
#include <net/net_debug.h>
-#include <net/page_pool/types.h>
+#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
+#include <net/page_pool/types.h>
#include <net/sock.h>
+#include "devmem.h"
#include "page_pool_priv.h"
#include "netdev-genl-gen.h"
@@ -212,6 +214,7 @@ static int
page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
const struct genl_info *info)
{
+ struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
size_t inflight, refsz;
void *hdr;
@@ -241,6 +244,9 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
pool->user.detach_time))
goto err_cancel;
+ if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id))
+ goto err_cancel;
+
genlmsg_end(rsp, hdr);
return 0;
@@ -344,6 +350,30 @@ void page_pool_unlist(struct page_pool *pool)
mutex_unlock(&page_pools_lock);
}
+int page_pool_check_memory_provider(struct net_device *dev,
+ struct netdev_rx_queue *rxq)
+{
+ struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv;
+ struct page_pool *pool;
+ struct hlist_node *n;
+
+ if (!binding)
+ return 0;
+
+ mutex_lock(&page_pools_lock);
+ hlist_for_each_entry_safe(pool, n, &dev->page_pools, user.list) {
+ if (pool->mp_priv != binding)
+ continue;
+
+ if (pool->slow.queue_idx == get_netdev_rx_queue_index(rxq)) {
+ mutex_unlock(&page_pools_lock);
+ return 0;
+ }
+ }
+ mutex_unlock(&page_pools_lock);
+ return -ENODATA;
+}
+
static void page_pool_unreg_netdev_wipe(struct net_device *netdev)
{
struct page_pool *pool;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index ea55a758a475..34f68ef74b8f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -69,7 +69,7 @@
*
* By design there should only be *one* "controlling" process. In practice
* multiple write accesses gives unpredictable result. Understood by "write"
- * to /proc gives result code thats should be read be the "writer".
+ * to /proc gives result code that should be read be the "writer".
* For practical use this should be no problem.
*
* Note when adding devices to a specific CPU there good idea to also assign
@@ -2371,11 +2371,11 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
if (pkt_dev->spi) {
/* We need as quick as possible to find the right SA
- * Searching with minimum criteria to archieve this.
+ * Searching with minimum criteria to achieve, this.
*/
x = xfrm_state_lookup_byspi(pn->net, htonl(pkt_dev->spi), AF_INET);
} else {
- /* slow path: we dont already have xfrm_state */
+ /* slow path: we don't already have xfrm_state */
x = xfrm_stateonly_find(pn->net, DUMMY_MARK, 0,
(xfrm_address_t *)&pkt_dev->cur_daddr,
(xfrm_address_t *)&pkt_dev->cur_saddr,
@@ -3654,7 +3654,7 @@ static int pktgen_thread_worker(void *arg)
struct pktgen_dev *pkt_dev = NULL;
int cpu = t->cpu;
- WARN_ON(smp_processor_id() != cpu);
+ WARN_ON_ONCE(smp_processor_id() != cpu);
init_waitqueue_head(&t->queue);
complete(&t->start_done);
@@ -3838,8 +3838,8 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->ipsmode = XFRM_MODE_TRANSPORT;
pkt_dev->ipsproto = IPPROTO_ESP;
- /* xfrm tunnel mode needs additional dst to extract outter
- * ip header protocol/ttl/id field, here creat a phony one.
+ /* xfrm tunnel mode needs additional dst to extract outer
+ * ip header protocol/ttl/id field, here create a phony one.
* instead of looking for a valid rt, which definitely hurting
* performance under such circumstance.
*/
@@ -3989,6 +3989,7 @@ static int __net_init pg_net_init(struct net *net)
goto remove;
}
+ cpus_read_lock();
for_each_online_cpu(cpu) {
int err;
@@ -3997,6 +3998,7 @@ static int __net_init pg_net_init(struct net *net)
pr_warn("Cannot create thread for cpu %d (%d)\n",
cpu, err);
}
+ cpus_read_unlock();
if (list_empty(&pn->pktgen_threads)) {
pr_err("Initialization failed for all threads\n");
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 73fd7f543fd0..f0a520987085 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2724,7 +2724,7 @@ static int do_set_proto_down(struct net_device *dev,
bool proto_down;
int err;
- if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN)) {
+ if (!dev->change_proto_down) {
NL_SET_ERR_MSG(extack, "Protodown not supported by device");
return -EOPNOTSUPP;
}
@@ -4087,8 +4087,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
}
return skb;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_LINK, err);
+ rtnl_set_sk_err(net, RTNLGRP_LINK, err);
return NULL;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 83f8cd8aa2d1..74149dc4ee31 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -88,6 +88,7 @@
#include <linux/textsearch.h>
#include "dev.h"
+#include "netmem_priv.h"
#include "sock_destructor.h"
#ifdef CONFIG_SKB_EXTENSIONS
@@ -314,8 +315,8 @@ void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
fragsz = SKB_DATA_ALIGN(fragsz);
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
- data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC,
- align_mask);
+ data = __page_frag_alloc_align(&nc->page, fragsz,
+ GFP_ATOMIC | __GFP_NOWARN, align_mask);
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
return data;
@@ -330,7 +331,8 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);
fragsz = SKB_DATA_ALIGN(fragsz);
- data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC,
+ data = __page_frag_alloc_align(nc, fragsz,
+ GFP_ATOMIC | __GFP_NOWARN,
align_mask);
} else {
local_bh_disable();
@@ -349,7 +351,7 @@ static struct sk_buff *napi_skb_cache_get(void)
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
if (unlikely(!nc->skb_count)) {
nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
- GFP_ATOMIC,
+ GFP_ATOMIC | __GFP_NOWARN,
NAPI_SKB_CACHE_BULK,
nc->skb_cache);
if (unlikely(!nc->skb_count)) {
@@ -418,7 +420,8 @@ struct sk_buff *slab_build_skb(void *data)
struct sk_buff *skb;
unsigned int size;
- skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(net_hotdata.skbuff_cache,
+ GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
return NULL;
@@ -469,7 +472,8 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
{
struct sk_buff *skb;
- skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(net_hotdata.skbuff_cache,
+ GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
return NULL;
@@ -917,9 +921,9 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
-static bool is_pp_page(struct page *page)
+static bool is_pp_netmem(netmem_ref netmem)
{
- return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
+ return (netmem_get_pp_magic(netmem) & ~0x3UL) == PP_SIGNATURE;
}
int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
@@ -1017,9 +1021,7 @@ EXPORT_SYMBOL(skb_cow_data_for_xdp);
#if IS_ENABLED(CONFIG_PAGE_POOL)
bool napi_pp_put_page(netmem_ref netmem)
{
- struct page *page = netmem_to_page(netmem);
-
- page = compound_head(page);
+ netmem = netmem_compound_head(netmem);
/* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
* in order to preserve any existing bits, such as bit 0 for the
@@ -1028,10 +1030,10 @@ bool napi_pp_put_page(netmem_ref netmem)
* and page_is_pfmemalloc() is checked in __page_pool_put_page()
* to avoid recycling the pfmemalloc page.
*/
- if (unlikely(!is_pp_page(page)))
+ if (unlikely(!is_pp_netmem(netmem)))
return false;
- page_pool_put_full_netmem(page->pp, page_to_netmem(page), false);
+ page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false);
return true;
}
@@ -1058,7 +1060,7 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data)
static int skb_pp_frag_ref(struct sk_buff *skb)
{
struct skb_shared_info *shinfo;
- struct page *head_page;
+ netmem_ref head_netmem;
int i;
if (!skb->pp_recycle)
@@ -1067,11 +1069,11 @@ static int skb_pp_frag_ref(struct sk_buff *skb)
shinfo = skb_shinfo(skb);
for (i = 0; i < shinfo->nr_frags; i++) {
- head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
- if (likely(is_pp_page(head_page)))
- page_pool_ref_page(head_page);
+ head_netmem = netmem_compound_head(shinfo->frags[i].netmem);
+ if (likely(is_pp_netmem(head_netmem)))
+ page_pool_ref_netmem(head_netmem);
else
- page_ref_inc(head_page);
+ page_ref_inc(netmem_to_page(head_netmem));
}
return 0;
}
@@ -1369,6 +1371,14 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
struct page *p;
u8 *vaddr;
+ if (skb_frag_is_net_iov(frag)) {
+ printk("%sskb frag %d: not readable\n", level, i);
+ len -= skb_frag_size(frag);
+ if (!len)
+ break;
+ continue;
+ }
+
skb_frag_foreach_page(frag, skb_frag_off(frag),
skb_frag_size(frag), p, p_off, p_len,
copied) {
@@ -1962,6 +1972,9 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
return -EINVAL;
+ if (!skb_frags_readable(skb))
+ return -EFAULT;
+
if (!num_frags)
goto release;
@@ -2135,6 +2148,9 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
unsigned int size;
int headerlen;
+ if (!skb_frags_readable(skb))
+ return NULL;
+
if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
return NULL;
@@ -2473,6 +2489,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
struct sk_buff *n;
int oldheadroom;
+ if (!skb_frags_readable(skb))
+ return NULL;
+
if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
return NULL;
@@ -2817,6 +2836,9 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
*/
int i, k, eat = (skb->tail + delta) - skb->end;
+ if (!skb_frags_readable(skb))
+ return NULL;
+
if (eat > 0 || skb_cloned(skb)) {
if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
GFP_ATOMIC))
@@ -2970,6 +2992,9 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
to += copy;
}
+ if (!skb_frags_readable(skb))
+ goto fault;
+
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
skb_frag_t *f = &skb_shinfo(skb)->frags[i];
@@ -3158,9 +3183,15 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
/*
* then map the fragments
*/
+ if (!skb_frags_readable(skb))
+ return false;
+
for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
+ if (WARN_ON_ONCE(!skb_frag_page(f)))
+ return false;
+
if (__splice_segment(skb_frag_page(f),
skb_frag_off(f), skb_frag_size(f),
offset, len, spd, false, sk, pipe))
@@ -3378,6 +3409,9 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
from += copy;
}
+ if (!skb_frags_readable(skb))
+ goto fault;
+
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
int end;
@@ -3457,6 +3491,9 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
pos = copy;
}
+ if (WARN_ON_ONCE(!skb_frags_readable(skb)))
+ return 0;
+
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -3557,6 +3594,9 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
pos = copy;
}
+ if (!skb_frags_readable(skb))
+ return 0;
+
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
@@ -4048,6 +4088,7 @@ static inline void skb_split_inside_header(struct sk_buff *skb,
skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
+ skb1->unreadable = skb->unreadable;
skb_shinfo(skb)->nr_frags = 0;
skb1->data_len = skb->data_len;
skb1->len += skb1->data_len;
@@ -4095,6 +4136,8 @@ static inline void skb_split_no_header(struct sk_buff *skb,
pos += size;
}
skb_shinfo(skb1)->nr_frags = k;
+
+ skb1->unreadable = skb->unreadable;
}
/**
@@ -4169,8 +4212,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
/* Actual merge is delayed until the point when we know we can
* commit all, so that we don't have to undo partial changes
*/
- if (!to ||
- !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
+ if (!skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
skb_frag_off(fragfrom))) {
merge = -1;
} else {
@@ -4333,6 +4375,9 @@ next_skb:
return block_limit - abs_offset;
}
+ if (!skb_frags_readable(st->cur_skb))
+ return 0;
+
if (st->frag_idx == 0 && !st->frag_data)
st->stepped_offset += skb_headlen(st->cur_skb);
@@ -4409,6 +4454,41 @@ void skb_abort_seq_read(struct skb_seq_state *st)
}
EXPORT_SYMBOL(skb_abort_seq_read);
+/**
+ * skb_copy_seq_read() - copy from a skb_seq_state to a buffer
+ * @st: source skb_seq_state
+ * @offset: offset in source
+ * @to: destination buffer
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @offset bytes into the source @st to the destination
+ * buffer @to. `offset` should increase (or be unchanged) with each subsequent
+ * call to this function. If offset needs to decrease from the previous use `st`
+ * should be reset first.
+ *
+ * Return: 0 on success or -EINVAL if the copy ended early
+ */
+int skb_copy_seq_read(struct skb_seq_state *st, int offset, void *to, int len)
+{
+ const u8 *data;
+ u32 sqlen;
+
+ for (;;) {
+ sqlen = skb_seq_read(offset, &data, st);
+ if (sqlen == 0)
+ return -EINVAL;
+ if (sqlen >= len) {
+ memcpy(to, data, len);
+ return 0;
+ }
+ memcpy(to, data, sqlen);
+ to += sqlen;
+ offset += sqlen;
+ len -= sqlen;
+ }
+}
+EXPORT_SYMBOL(skb_copy_seq_read);
+
#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
@@ -5161,7 +5241,7 @@ EXPORT_SYMBOL_GPL(skb_to_sgvec);
* 3. sg_unmark_end
* 4. skb_to_sgvec(payload2)
*
- * When mapping mutilple payload conditionally, skb_to_sgvec_nomark
+ * When mapping multiple payload conditionally, skb_to_sgvec_nomark
* is more preferable.
*/
int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
@@ -5945,7 +6025,10 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
if (to->pp_recycle != from->pp_recycle)
return false;
- if (len <= skb_tailroom(to)) {
+ if (skb_frags_readable(from) != skb_frags_readable(to))
+ return false;
+
+ if (len <= skb_tailroom(to) && skb_frags_readable(from)) {
if (len)
BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
*delta_truesize = 0;
@@ -6019,7 +6102,7 @@ EXPORT_SYMBOL(skb_try_coalesce);
* @skb: buffer to clean
* @xnet: packet is crossing netns
*
- * skb_scrub_packet can be used after encapsulating or decapsulting a packet
+ * skb_scrub_packet can be used after encapsulating or decapsulating a packet
* into/from a tunnel. Some information have to be cleared during these
* operations.
* skb_scrub_packet can also be used to clean a skb before injecting it in
@@ -6122,6 +6205,9 @@ int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len)
if (!pskb_may_pull(skb, write_len))
return -ENOMEM;
+ if (!skb_frags_readable(skb))
+ return -EFAULT;
+
if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
return 0;
@@ -6241,7 +6327,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
return err;
skb->protocol = skb->vlan_proto;
- skb->mac_len += VLAN_HLEN;
+ skb->network_header -= VLAN_HLEN;
skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
}
@@ -6801,7 +6887,7 @@ void skb_condense(struct sk_buff *skb)
{
if (skb->data_len) {
if (skb->data_len > skb->end - skb->tail ||
- skb_cloned(skb))
+ skb_cloned(skb) || !skb_frags_readable(skb))
return;
/* Nice, we can free page frag(s) right now */
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index bbf40b999713..b1dcbd3be89e 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -293,7 +293,7 @@ out:
/* If we trim data a full sg elem before curr pointer update
* copybreak and current so that any future copy operations
* start at new copy location.
- * However trimed data that has not yet been used in a copy op
+ * However trimmed data that has not yet been used in a copy op
* does not require an update.
*/
if (!msg->sg.size) {
diff --git a/net/core/sock.c b/net/core/sock.c
index 9abc4fe25953..fe87f9bd8f16 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -124,6 +124,7 @@
#include <linux/netdevice.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
+#include <linux/skbuff_ref.h>
#include <net/net_namespace.h>
#include <net/request_sock.h>
#include <net/sock.h>
@@ -1049,6 +1050,69 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
return 0;
}
+#ifdef CONFIG_PAGE_POOL
+
+/* This is the number of tokens that the user can SO_DEVMEM_DONTNEED in
+ * 1 syscall. The limit exists to limit the amount of memory the kernel
+ * allocates to copy these tokens.
+ */
+#define MAX_DONTNEED_TOKENS 128
+
+static noinline_for_stack int
+sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+ unsigned int num_tokens, i, j, k, netmem_num = 0;
+ struct dmabuf_token *tokens;
+ netmem_ref netmems[16];
+ int ret = 0;
+
+ if (!sk_is_tcp(sk))
+ return -EBADF;
+
+ if (optlen % sizeof(struct dmabuf_token) ||
+ optlen > sizeof(*tokens) * MAX_DONTNEED_TOKENS)
+ return -EINVAL;
+
+ tokens = kvmalloc_array(optlen, sizeof(*tokens), GFP_KERNEL);
+ if (!tokens)
+ return -ENOMEM;
+
+ num_tokens = optlen / sizeof(struct dmabuf_token);
+ if (copy_from_sockptr(tokens, optval, optlen)) {
+ kvfree(tokens);
+ return -EFAULT;
+ }
+
+ xa_lock_bh(&sk->sk_user_frags);
+ for (i = 0; i < num_tokens; i++) {
+ for (j = 0; j < tokens[i].token_count; j++) {
+ netmem_ref netmem = (__force netmem_ref)__xa_erase(
+ &sk->sk_user_frags, tokens[i].token_start + j);
+
+ if (netmem &&
+ !WARN_ON_ONCE(!netmem_is_net_iov(netmem))) {
+ netmems[netmem_num++] = netmem;
+ if (netmem_num == ARRAY_SIZE(netmems)) {
+ xa_unlock_bh(&sk->sk_user_frags);
+ for (k = 0; k < netmem_num; k++)
+ WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
+ netmem_num = 0;
+ xa_lock_bh(&sk->sk_user_frags);
+ }
+ ret++;
+ }
+ }
+ }
+
+ xa_unlock_bh(&sk->sk_user_frags);
+ for (k = 0; k < netmem_num; k++)
+ WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
+
+ kvfree(tokens);
+ return ret;
+}
+#endif
+
void sockopt_lock_sock(struct sock *sk)
{
/* When current->bpf_ctx is set, the setsockopt is called from
@@ -1211,6 +1275,10 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
ret = -EOPNOTSUPP;
return ret;
}
+#ifdef CONFIG_PAGE_POOL
+ case SO_DEVMEM_DONTNEED:
+ return sock_devmem_dontneed(sk, optval, optlen);
+#endif
}
sockopt_lock_sock(sk);
@@ -2048,7 +2116,7 @@ static inline void sock_lock_init(struct sock *sk)
/*
* Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
- * even temporarly, because of RCU lookups. sk_node should also be left as is.
+ * even temporarily, because of RCU lookups. sk_node should also be left as is.
* We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
*/
static void sock_copy(struct sock *nsk, const struct sock *osk)
@@ -2538,7 +2606,7 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
skb_set_hash_from_sk(skb, sk);
/*
* We used to take a refcount on sk, but following operation
- * is enough to guarantee sk_free() wont free this sock until
+ * is enough to guarantee sk_free() won't free this sock until
* all in-flight packets are completed
*/
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
@@ -3429,7 +3497,7 @@ static void sock_def_destruct(struct sock *sk)
void sk_send_sigurg(struct sock *sk)
{
if (sk->sk_socket && sk->sk_socket->file)
- if (send_sigurg(&sk->sk_socket->file->f_owner))
+ if (send_sigurg(sk->sk_socket->file))
sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
}
EXPORT_SYMBOL(sk_send_sigurg);
@@ -3697,7 +3765,7 @@ EXPORT_SYMBOL(sock_recv_errqueue);
*
* FIX: POSIX 1003.1g is very ambiguous here. It states that
* asynchronous errors should be reported by getsockopt. We assume
- * this means if you specify SO_ERROR (otherwise whats the point of it).
+ * this means if you specify SO_ERROR (otherwise what is the point of it).
*/
int sock_common_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index d3dbb92153f2..724b6856fcc3 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1183,6 +1183,7 @@ static void sock_hash_free(struct bpf_map *map)
sock_put(elem->sk);
sock_hash_free_elem(htab, elem);
}
+ cond_resched();
}
/* wait for psock readers accessing its map link */
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 5a165286e4d8..4211710393a8 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -173,10 +173,9 @@ static bool __reuseport_detach_closed_sock(struct sock *sk,
static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
{
- unsigned int size = sizeof(struct sock_reuseport) +
- sizeof(struct sock *) * max_socks;
- struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC);
+ struct sock_reuseport *reuse;
+ reuse = kzalloc(struct_size(reuse, socks, max_socks), GFP_ATOMIC);
if (!reuse)
return NULL;
diff --git a/net/core/utils.c b/net/core/utils.c
index c994e95172ac..27f4cffaae05 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * Generic address resultion entity
+ * Generic address resolution entity
*
* Authors:
* net_random Alan Cox
diff --git a/net/dsa/tag.c b/net/dsa/tag.c
index 6e402d49afd3..79ad105902d9 100644
--- a/net/dsa/tag.c
+++ b/net/dsa/tag.c
@@ -105,8 +105,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
p = netdev_priv(skb->dev);
- if (unlikely(cpu_dp->ds->untag_bridge_pvid)) {
- nskb = dsa_untag_bridge_pvid(skb);
+ if (unlikely(cpu_dp->ds->untag_bridge_pvid ||
+ cpu_dp->ds->untag_vlan_aware_bridge_pvid)) {
+ nskb = dsa_software_vlan_untag(skb);
if (!nskb) {
kfree_skb(skb);
return 0;
diff --git a/net/dsa/tag.h b/net/dsa/tag.h
index f6b9c73718df..d5707870906b 100644
--- a/net/dsa/tag.h
+++ b/net/dsa/tag.h
@@ -44,46 +44,81 @@ static inline struct net_device *dsa_conduit_find_user(struct net_device *dev,
return NULL;
}
-/* If under a bridge with vlan_filtering=0, make sure to send pvid-tagged
- * frames as untagged, since the bridge will not untag them.
+/**
+ * dsa_software_untag_vlan_aware_bridge: Software untagging for VLAN-aware bridge
+ * @skb: Pointer to received socket buffer (packet)
+ * @br: Pointer to bridge upper interface of ingress port
+ * @vid: Parsed VID from packet
+ *
+ * The bridge can process tagged packets. Software like STP/PTP may not. The
+ * bridge can also process untagged packets, to the same effect as if they were
+ * tagged with the PVID of the ingress port. So packets tagged with the PVID of
+ * the bridge port must be software-untagged, to support both use cases.
*/
-static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb)
+static inline void dsa_software_untag_vlan_aware_bridge(struct sk_buff *skb,
+ struct net_device *br,
+ u16 vid)
{
- struct dsa_port *dp = dsa_user_to_port(skb->dev);
- struct net_device *br = dsa_port_bridge_dev_get(dp);
- struct net_device *dev = skb->dev;
- struct net_device *upper_dev;
- u16 vid, pvid, proto;
+ u16 pvid, proto;
int err;
- if (!br || br_vlan_enabled(br))
- return skb;
-
err = br_vlan_get_proto(br, &proto);
if (err)
- return skb;
+ return;
- /* Move VLAN tag from data to hwaccel */
- if (!skb_vlan_tag_present(skb) && skb->protocol == htons(proto)) {
- skb = skb_vlan_untag(skb);
- if (!skb)
- return NULL;
- }
+ err = br_vlan_get_pvid_rcu(skb->dev, &pvid);
+ if (err)
+ return;
- if (!skb_vlan_tag_present(skb))
- return skb;
+ if (vid == pvid && skb->vlan_proto == htons(proto))
+ __vlan_hwaccel_clear_tag(skb);
+}
- vid = skb_vlan_tag_get_id(skb);
+/**
+ * dsa_software_untag_vlan_unaware_bridge: Software untagging for VLAN-unaware bridge
+ * @skb: Pointer to received socket buffer (packet)
+ * @br: Pointer to bridge upper interface of ingress port
+ * @vid: Parsed VID from packet
+ *
+ * The bridge ignores all VLAN tags. Software like STP/PTP may not (it may run
+ * on the plain port, or on a VLAN upper interface). Maybe packets are coming
+ * to software as tagged with a driver-defined VID which is NOT equal to the
+ * PVID of the bridge port (since the bridge is VLAN-unaware, its configuration
+ * should NOT be committed to hardware). DSA needs a method for this private
+ * VID to be communicated by software to it, and if packets are tagged with it,
+ * software-untag them. Note: the private VID may be different per bridge, to
+ * support the FDB isolation use case.
+ *
+ * FIXME: this is currently implemented based on the broken assumption that
+ * the "private VID" used by the driver in VLAN-unaware mode is equal to the
+ * bridge PVID. It should not be, except for a coincidence; the bridge PVID is
+ * irrelevant to the data path in the VLAN-unaware mode. Thus, the VID that
+ * this function removes is wrong.
+ *
+ * All users of ds->untag_bridge_pvid should fix their drivers, if necessary,
+ * to make the two independent. Only then, if there still remains a need to
+ * strip the private VID from packets, then a new ds->ops->get_private_vid()
+ * API shall be introduced to communicate to DSA what this VID is, which needs
+ * to be stripped here.
+ */
+static inline void dsa_software_untag_vlan_unaware_bridge(struct sk_buff *skb,
+ struct net_device *br,
+ u16 vid)
+{
+ struct net_device *upper_dev;
+ u16 pvid, proto;
+ int err;
- /* We already run under an RCU read-side critical section since
- * we are called from netif_receive_skb_list_internal().
- */
- err = br_vlan_get_pvid_rcu(dev, &pvid);
+ err = br_vlan_get_proto(br, &proto);
if (err)
- return skb;
+ return;
- if (vid != pvid)
- return skb;
+ err = br_vlan_get_pvid_rcu(skb->dev, &pvid);
+ if (err)
+ return;
+
+ if (vid != pvid || skb->vlan_proto != htons(proto))
+ return;
/* The sad part about attempting to untag from DSA is that we
* don't know, unless we check, if the skb will end up in
@@ -95,10 +130,50 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb)
* definitely keep the tag, to make sure it keeps working.
*/
upper_dev = __vlan_find_dev_deep_rcu(br, htons(proto), vid);
- if (upper_dev)
+ if (!upper_dev)
+ __vlan_hwaccel_clear_tag(skb);
+}
+
+/**
+ * dsa_software_vlan_untag: Software VLAN untagging in DSA receive path
+ * @skb: Pointer to socket buffer (packet)
+ *
+ * Receive path method for switches which cannot avoid tagging all packets
+ * towards the CPU port. Called when ds->untag_bridge_pvid (legacy) or
+ * ds->untag_vlan_aware_bridge_pvid is set to true.
+ *
+ * As a side effect of this method, any VLAN tag from the skb head is moved
+ * to hwaccel.
+ */
+static inline struct sk_buff *dsa_software_vlan_untag(struct sk_buff *skb)
+{
+ struct dsa_port *dp = dsa_user_to_port(skb->dev);
+ struct net_device *br = dsa_port_bridge_dev_get(dp);
+ u16 vid;
+
+ /* software untagging for standalone ports not yet necessary */
+ if (!br)
return skb;
- __vlan_hwaccel_clear_tag(skb);
+ /* Move VLAN tag from data to hwaccel */
+ if (!skb_vlan_tag_present(skb)) {
+ skb = skb_vlan_untag(skb);
+ if (!skb)
+ return NULL;
+ }
+
+ if (!skb_vlan_tag_present(skb))
+ return skb;
+
+ vid = skb_vlan_tag_get_id(skb);
+
+ if (br_vlan_enabled(br)) {
+ if (dp->ds->untag_vlan_aware_bridge_pvid)
+ dsa_software_untag_vlan_aware_bridge(skb, br, vid);
+ } else {
+ if (dp->ds->untag_bridge_pvid)
+ dsa_software_untag_vlan_unaware_bridge(skb, br, vid);
+ }
return skb;
}
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index ee7b272ab715..281bbac5539d 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -111,9 +111,10 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
* DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
* ---------------------------------------------------------------------------
* tag0 : zero-based value represents port
- * (eg, 0x00=port1, 0x02=port3, 0x06=port7)
+ * (eg, 0x0=port1, 0x2=port3, 0x3=port4)
*/
+#define KSZ8795_TAIL_TAG_EG_PORT_M GENMASK(1, 0)
#define KSZ8795_TAIL_TAG_OVERRIDE BIT(6)
#define KSZ8795_TAIL_TAG_LOOKUP BIT(7)
@@ -141,7 +142,8 @@ static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev)
{
u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
- return ksz_common_rcv(skb, dev, tag[0] & 7, KSZ_EGRESS_TAG_LEN);
+ return ksz_common_rcv(skb, dev, tag[0] & KSZ8795_TAIL_TAG_EG_PORT_M,
+ KSZ_EGRESS_TAG_LEN);
}
static const struct dsa_device_ops ksz8795_netdev_ops = {
@@ -176,8 +178,9 @@ MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ8795, KSZ8795_NAME);
#define KSZ9477_INGRESS_TAG_LEN 2
#define KSZ9477_PTP_TAG_LEN 4
-#define KSZ9477_PTP_TAG_INDICATION 0x80
+#define KSZ9477_PTP_TAG_INDICATION BIT(7)
+#define KSZ9477_TAIL_TAG_EG_PORT_M GENMASK(2, 0)
#define KSZ9477_TAIL_TAG_PRIO GENMASK(8, 7)
#define KSZ9477_TAIL_TAG_OVERRIDE BIT(9)
#define KSZ9477_TAIL_TAG_LOOKUP BIT(10)
@@ -310,7 +313,7 @@ static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev)
{
/* Tag decoding */
u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
- unsigned int port = tag[0] & 7;
+ unsigned int port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M;
unsigned int len = KSZ_EGRESS_TAG_LEN;
/* Extra 4-bytes PTP timestamp */
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index e0e4300bfbd3..bf6608fc6be7 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -8,40 +8,6 @@
#define OCELOT_NAME "ocelot"
#define SEVILLE_NAME "seville"
-/* If the port is under a VLAN-aware bridge, remove the VLAN header from the
- * payload and move it into the DSA tag, which will make the switch classify
- * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero,
- * which is the pvid of standalone and VLAN-unaware bridge ports.
- */
-static void ocelot_xmit_get_vlan_info(struct sk_buff *skb, struct dsa_port *dp,
- u64 *vlan_tci, u64 *tag_type)
-{
- struct net_device *br = dsa_port_bridge_dev_get(dp);
- struct vlan_ethhdr *hdr;
- u16 proto, tci;
-
- if (!br || !br_vlan_enabled(br)) {
- *vlan_tci = 0;
- *tag_type = IFH_TAG_TYPE_C;
- return;
- }
-
- hdr = skb_vlan_eth_hdr(skb);
- br_vlan_get_proto(br, &proto);
-
- if (ntohs(hdr->h_vlan_proto) == proto) {
- vlan_remove_tag(skb, &tci);
- *vlan_tci = tci;
- } else {
- rcu_read_lock();
- br_vlan_get_pvid_rcu(br, &tci);
- rcu_read_unlock();
- *vlan_tci = tci;
- }
-
- *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C;
-}
-
static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev,
__be32 ifh_prefix, void **ifh)
{
@@ -53,7 +19,8 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev,
u32 rew_op = 0;
u64 qos_class;
- ocelot_xmit_get_vlan_info(skb, dp, &vlan_tci, &tag_type);
+ ocelot_xmit_get_vlan_info(skb, dsa_port_bridge_dev_get(dp), &vlan_tci,
+ &tag_type);
qos_class = netdev_get_num_tc(netdev) ?
netdev_get_prio_tc_map(netdev, skb->priority) : skb->priority;
diff --git a/net/dsa/user.c b/net/dsa/user.c
index f5adfa1d978a..74eda9b30608 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -2642,11 +2642,12 @@ void dsa_user_setup_tagger(struct net_device *user)
user->features = conduit->vlan_features | NETIF_F_HW_TC;
user->hw_features |= NETIF_F_HW_TC;
- user->features |= NETIF_F_LLTX;
if (user->needed_tailroom)
user->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST);
if (ds->needs_standalone_vlan_filtering)
user->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+ user->lltx = true;
}
int dsa_user_suspend(struct net_device *user_dev)
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 9a190635fe95..9b540644ba31 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -8,4 +8,5 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \
linkstate.o debug.o wol.o features.o privflags.o rings.o \
channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \
- module.o cmis_fw_update.o cmis_cdb.o pse-pd.o plca.o mm.o
+ module.o cmis_fw_update.o cmis_cdb.o pse-pd.o plca.o mm.o \
+ phy.o
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index f6f136ec7ddf..f22051f33868 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -13,7 +13,7 @@
const struct nla_policy ethnl_cable_test_act_policy[] = {
[ETHTOOL_A_CABLE_TEST_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
};
static int ethnl_cable_test_started(struct phy_device *phydev, u8 cmd)
@@ -58,6 +58,7 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
struct ethnl_req_info req_info = {};
const struct ethtool_phy_ops *ops;
struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
struct net_device *dev;
int ret;
@@ -69,12 +70,16 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
return ret;
dev = req_info.dev;
- if (!dev->phydev) {
+
+ rtnl_lock();
+ phydev = ethnl_req_get_phydev(&req_info,
+ tb[ETHTOOL_A_CABLE_TEST_HEADER],
+ info->extack);
+ if (IS_ERR_OR_NULL(phydev)) {
ret = -EOPNOTSUPP;
- goto out_dev_put;
+ goto out_rtnl;
}
- rtnl_lock();
ops = ethtool_phy_ops;
if (!ops || !ops->start_cable_test) {
ret = -EOPNOTSUPP;
@@ -85,17 +90,15 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
goto out_rtnl;
- ret = ops->start_cable_test(dev->phydev, info->extack);
+ ret = ops->start_cable_test(phydev, info->extack);
ethnl_ops_complete(dev);
if (!ret)
- ethnl_cable_test_started(dev->phydev,
- ETHTOOL_MSG_CABLE_TEST_NTF);
+ ethnl_cable_test_started(phydev, ETHTOOL_MSG_CABLE_TEST_NTF);
out_rtnl:
rtnl_unlock();
-out_dev_put:
ethnl_parse_header_dev_put(&req_info);
return ret;
}
@@ -160,7 +163,8 @@ void ethnl_cable_test_finished(struct phy_device *phydev)
}
EXPORT_SYMBOL_GPL(ethnl_cable_test_finished);
-int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result)
+int ethnl_cable_test_result_with_src(struct phy_device *phydev, u8 pair,
+ u8 result, u32 src)
{
struct nlattr *nest;
int ret = -EMSGSIZE;
@@ -173,6 +177,10 @@ int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result)
goto err;
if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_RESULT_CODE, result))
goto err;
+ if (src != ETHTOOL_A_CABLE_INF_SRC_UNSPEC) {
+ if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_RESULT_SRC, src))
+ goto err;
+ }
nla_nest_end(phydev->skb, nest);
return 0;
@@ -181,9 +189,10 @@ err:
nla_nest_cancel(phydev->skb, nest);
return ret;
}
-EXPORT_SYMBOL_GPL(ethnl_cable_test_result);
+EXPORT_SYMBOL_GPL(ethnl_cable_test_result_with_src);
-int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm)
+int ethnl_cable_test_fault_length_with_src(struct phy_device *phydev, u8 pair,
+ u32 cm, u32 src)
{
struct nlattr *nest;
int ret = -EMSGSIZE;
@@ -197,6 +206,11 @@ int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm)
goto err;
if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_FAULT_LENGTH_CM, cm))
goto err;
+ if (src != ETHTOOL_A_CABLE_INF_SRC_UNSPEC) {
+ if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_FAULT_LENGTH_SRC,
+ src))
+ goto err;
+ }
nla_nest_end(phydev->skb, nest);
return 0;
@@ -205,7 +219,7 @@ err:
nla_nest_cancel(phydev->skb, nest);
return ret;
}
-EXPORT_SYMBOL_GPL(ethnl_cable_test_fault_length);
+EXPORT_SYMBOL_GPL(ethnl_cable_test_fault_length_with_src);
static const struct nla_policy cable_test_tdr_act_cfg_policy[] = {
[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST] = { .type = NLA_U32 },
@@ -216,7 +230,7 @@ static const struct nla_policy cable_test_tdr_act_cfg_policy[] = {
const struct nla_policy ethnl_cable_test_tdr_act_policy[] = {
[ETHTOOL_A_CABLE_TEST_TDR_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
[ETHTOOL_A_CABLE_TEST_TDR_CFG] = { .type = NLA_NESTED },
};
@@ -305,6 +319,7 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
struct ethnl_req_info req_info = {};
const struct ethtool_phy_ops *ops;
struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
struct phy_tdr_config cfg;
struct net_device *dev;
int ret;
@@ -317,10 +332,6 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
return ret;
dev = req_info.dev;
- if (!dev->phydev) {
- ret = -EOPNOTSUPP;
- goto out_dev_put;
- }
ret = ethnl_act_cable_test_tdr_cfg(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG],
info, &cfg);
@@ -328,6 +339,14 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
goto out_dev_put;
rtnl_lock();
+ phydev = ethnl_req_get_phydev(&req_info,
+ tb[ETHTOOL_A_CABLE_TEST_TDR_HEADER],
+ info->extack);
+ if (IS_ERR_OR_NULL(phydev)) {
+ ret = -EOPNOTSUPP;
+ goto out_rtnl;
+ }
+
ops = ethtool_phy_ops;
if (!ops || !ops->start_cable_test_tdr) {
ret = -EOPNOTSUPP;
@@ -338,12 +357,12 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
goto out_rtnl;
- ret = ops->start_cable_test_tdr(dev->phydev, info->extack, &cfg);
+ ret = ops->start_cable_test_tdr(phydev, info->extack, &cfg);
ethnl_ops_complete(dev);
if (!ret)
- ethnl_cable_test_started(dev->phydev,
+ ethnl_cable_test_started(phydev,
ETHTOOL_MSG_CABLE_TEST_TDR_NTF);
out_rtnl:
diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index cee188da54f8..ca4f80282448 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -114,8 +114,7 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info)
struct net_device *dev = req_info->dev;
struct ethtool_channels channels = {};
struct nlattr **tb = info->attrs;
- u32 err_attr, max_rxfh_in_use;
- u64 max_rxnfc_in_use;
+ u32 err_attr;
int ret;
dev->ethtool_ops->get_channels(dev, &channels);
@@ -166,20 +165,9 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info)
return -EINVAL;
}
- /* ensure the new Rx count fits within the configured Rx flow
- * indirection table/rxnfc settings
- */
- if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use))
- max_rxnfc_in_use = 0;
- max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev);
- if (channels.combined_count + channels.rx_count <= max_rxfh_in_use) {
- GENL_SET_ERR_MSG_FMT(info, "requested channel counts are too low for existing indirection table (%d)", max_rxfh_in_use);
- return -EINVAL;
- }
- if (channels.combined_count + channels.rx_count <= max_rxnfc_in_use) {
- GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing ntuple filter settings");
- return -EINVAL;
- }
+ ret = ethtool_check_max_channel(dev, channels, info);
+ if (ret)
+ return ret;
/* Disabling channels, query zero-copy AF_XDP sockets */
from_channel = channels.combined_count +
diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h
index e71cc3e1b7eb..3e7c293af78c 100644
--- a/net/ethtool/cmis.h
+++ b/net/ethtool/cmis.h
@@ -108,7 +108,6 @@ void ethtool_cmis_cdb_check_completion_flag(u8 cmis_rev, u8 *flags);
void ethtool_cmis_page_init(struct ethtool_module_eeprom *page_data,
u8 page, u32 offset, u32 length);
-void ethtool_cmis_page_fini(struct ethtool_module_eeprom *page_data);
struct ethtool_cmis_cdb *
ethtool_cmis_cdb_init(struct net_device *dev,
diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c
index 1bb08783b60d..4d5581147952 100644
--- a/net/ethtool/cmis_cdb.c
+++ b/net/ethtool/cmis_cdb.c
@@ -100,7 +100,8 @@ static u8 cmis_cdb_advert_rpl_inst_supported(struct cmis_cdb_advert_rpl *rpl)
}
static int cmis_cdb_advertisement_get(struct ethtool_cmis_cdb *cdb,
- struct net_device *dev)
+ struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
{
const struct ethtool_ops *ops = dev->ethtool_ops;
struct ethtool_module_eeprom page_data = {};
@@ -119,8 +120,12 @@ static int cmis_cdb_advertisement_get(struct ethtool_cmis_cdb *cdb,
return err;
}
- if (!cmis_cdb_advert_rpl_inst_supported(&rpl))
+ if (!cmis_cdb_advert_rpl_inst_supported(&rpl)) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "CDB functionality is not supported",
+ NULL);
return -EOPNOTSUPP;
+ }
cdb->read_write_len_ext = rpl.read_write_len_ext;
@@ -282,7 +287,7 @@ ethtool_cmis_cdb_init(struct net_device *dev,
goto err;
}
- err = cmis_cdb_advertisement_get(cdb, dev);
+ err = cmis_cdb_advertisement_get(cdb, dev, ntf_params);
if (err < 0)
goto err;
@@ -444,6 +449,9 @@ static void cmis_cdb_status_fail_msg_get(u8 status, char **err_msg)
case 0b01000101:
*err_msg = "CDB status failed: CdbChkCode error";
break;
+ case 0b01000110:
+ *err_msg = "CDB status failed: Password error";
+ break;
default:
*err_msg = "Unknown failure reason";
}
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 07032babd1b6..dd345efa114b 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -6,6 +6,7 @@
#include <linux/rtnetlink.h>
#include <linux/ptp_clock_kernel.h>
+#include "netlink.h"
#include "common.h"
const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
@@ -24,8 +25,6 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
[NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",
[NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
[NETIF_F_GSO_BIT] = "tx-generic-segmentation",
- [NETIF_F_LLTX_BIT] = "tx-lockless",
- [NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
[NETIF_F_GRO_BIT] = "rx-gro",
[NETIF_F_GRO_HW_BIT] = "rx-gro-hw",
[NETIF_F_LRO_BIT] = "rx-lro",
@@ -51,7 +50,6 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
- [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu",
[NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter",
[NETIF_F_RXHASH_BIT] = "rx-hashing",
[NETIF_F_RXCSUM_BIT] = "rx-checksum",
@@ -429,6 +427,7 @@ const char sof_timestamping_names[][ETH_GSTRING_LEN] = {
[const_ilog2(SOF_TIMESTAMPING_OPT_TX_SWHW)] = "option-tx-swhw",
[const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc",
[const_ilog2(SOF_TIMESTAMPING_OPT_ID_TCP)] = "option-id-tcp",
+ [const_ilog2(SOF_TIMESTAMPING_OPT_RX_FILTER)] = "option-rx-filter",
};
static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT);
@@ -539,7 +538,7 @@ static int ethtool_get_rxnfc_rule_count(struct net_device *dev)
return info.rule_cnt;
}
-int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max)
+static int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max)
{
const struct ethtool_ops *ops = dev->ethtool_ops;
struct ethtool_rxnfc *info;
@@ -609,7 +608,7 @@ static u32 ethtool_get_max_rss_ctx_channel(struct net_device *dev)
return max_ring;
}
-u32 ethtool_get_max_rxfh_channel(struct net_device *dev)
+static u32 ethtool_get_max_rxfh_channel(struct net_device *dev)
{
struct ethtool_rxfh_param rxfh = {};
u32 dev_size, current_max;
@@ -650,10 +649,47 @@ out_free:
return current_max;
}
+int ethtool_check_max_channel(struct net_device *dev,
+ struct ethtool_channels channels,
+ struct genl_info *info)
+{
+ u64 max_rxnfc_in_use;
+ u32 max_rxfh_in_use;
+ int max_mp_in_use;
+
+ /* ensure the new Rx count fits within the configured Rx flow
+ * indirection table/rxnfc settings
+ */
+ if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use))
+ max_rxnfc_in_use = 0;
+ max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev);
+ if (channels.combined_count + channels.rx_count <= max_rxfh_in_use) {
+ if (info)
+ GENL_SET_ERR_MSG_FMT(info, "requested channel counts are too low for existing indirection table (%d)", max_rxfh_in_use);
+ return -EINVAL;
+ }
+ if (channels.combined_count + channels.rx_count <= max_rxnfc_in_use) {
+ if (info)
+ GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing ntuple filter settings");
+ return -EINVAL;
+ }
+
+ max_mp_in_use = dev_get_min_mp_channel_count(dev);
+ if (channels.combined_count + channels.rx_count <= max_mp_in_use) {
+ if (info)
+ GENL_SET_ERR_MSG_FMT(info, "requested channel counts are too low for existing memory provider setting (%d)", max_mp_in_use);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int ethtool_check_ops(const struct ethtool_ops *ops)
{
if (WARN_ON(ops->set_coalesce && !ops->supported_coalesce_params))
return -EINVAL;
+ if (WARN_ON(ops->rxfh_max_num_contexts == 1))
+ return -EINVAL;
/* NOTE: sufficiently insane drivers may swap ethtool_ops at runtime,
* the fact that ops are checked at registration time does not
* mean the ops attached to a netdev later on are sane.
@@ -665,20 +701,21 @@ int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info
{
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
+ int err = 0;
memset(info, 0, sizeof(*info));
info->cmd = ETHTOOL_GET_TS_INFO;
+ info->phc_index = -1;
if (phy_is_default_hwtstamp(phydev) && phy_has_tsinfo(phydev))
- return phy_ts_info(phydev, info);
- if (ops->get_ts_info)
- return ops->get_ts_info(dev, info);
+ err = phy_ts_info(phydev, info);
+ else if (ops->get_ts_info)
+ err = ops->get_ts_info(dev, info);
- info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
- info->phc_index = -1;
+ info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE;
- return 0;
+ return err;
}
int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index)
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index 863806fcf01a..d55d5201b085 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -20,6 +20,8 @@ struct link_mode_info {
u8 duplex;
};
+struct genl_info;
+
extern const char
netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN];
extern const char
@@ -42,8 +44,9 @@ int __ethtool_get_link(struct net_device *dev);
bool convert_legacy_settings_to_link_ksettings(
struct ethtool_link_ksettings *link_ksettings,
const struct ethtool_cmd *legacy_settings);
-u32 ethtool_get_max_rxfh_channel(struct net_device *dev);
-int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max);
+int ethtool_check_max_channel(struct net_device *dev,
+ struct ethtool_channels channels,
+ struct genl_info *info);
int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info);
extern const struct ethtool_phy_ops *ethtool_phy_ops;
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index e18823bf2330..65cfe76dafbe 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -442,6 +442,9 @@ int __ethtool_get_link_ksettings(struct net_device *dev,
if (!dev->ethtool_ops->get_link_ksettings)
return -EOPNOTSUPP;
+ if (!netif_device_present(dev))
+ return -ENODEV;
+
memset(link_ksettings, 0, sizeof(*link_ksettings));
return dev->ethtool_ops->get_link_ksettings(dev, link_ksettings);
}
@@ -1227,7 +1230,8 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32)
return -EINVAL;
/* Most drivers don't handle rss_context, check it's 0 as well */
- if (rxfh.rss_context && !ops->cap_rss_ctx_supported)
+ if (rxfh.rss_context && !(ops->cap_rss_ctx_supported ||
+ ops->create_rxfh_context))
return -EOPNOTSUPP;
rxfh.indir_size = rxfh_dev.indir_size;
@@ -1260,10 +1264,15 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (rxfh_dev.indir)
memcpy(rxfh_dev.indir, ethtool_rxfh_context_indir(ctx),
indir_bytes);
- if (rxfh_dev.key)
- memcpy(rxfh_dev.key, ethtool_rxfh_context_key(ctx),
- user_key_size);
- rxfh_dev.hfunc = ctx->hfunc;
+ if (!ops->rxfh_per_ctx_key) {
+ rxfh_dev.key_size = 0;
+ } else {
+ if (rxfh_dev.key)
+ memcpy(rxfh_dev.key,
+ ethtool_rxfh_context_key(ctx),
+ user_key_size);
+ rxfh_dev.hfunc = ctx->hfunc;
+ }
rxfh_dev.input_xfrm = ctx->input_xfrm;
ret = 0;
} else {
@@ -1281,6 +1290,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
sizeof(rxfh.input_xfrm))) {
ret = -EFAULT;
} else if (copy_to_user(useraddr +
+ offsetof(struct ethtool_rxfh, key_size),
+ &rxfh_dev.key_size,
+ sizeof(rxfh.key_size))) {
+ ret = -EFAULT;
+ } else if (copy_to_user(useraddr +
offsetof(struct ethtool_rxfh, rss_config[0]),
rss_config, total_size)) {
ret = -EFAULT;
@@ -1357,7 +1371,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32)
return -EINVAL;
/* Most drivers don't handle rss_context, check it's 0 as well */
- if (rxfh.rss_context && !ops->cap_rss_ctx_supported)
+ if (rxfh.rss_context && !(ops->cap_rss_ctx_supported ||
+ ops->create_rxfh_context))
return -EOPNOTSUPP;
/* Check input data transformation capabilities */
if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR &&
@@ -1387,6 +1402,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]);
+ /* Check settings which may be global rather than per RSS-context */
+ if (rxfh.rss_context && !ops->rxfh_per_ctx_key)
+ if (rxfh.key_size ||
+ (rxfh.hfunc && rxfh.hfunc != ETH_RSS_HASH_NO_CHANGE) ||
+ (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_NO_CHANGE))
+ return -EOPNOTSUPP;
+
rss_config = kzalloc(indir_bytes + dev_key_size, GFP_USER);
if (!rss_config)
return -ENOMEM;
@@ -2069,8 +2091,6 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
{
struct ethtool_channels channels, curr = { .cmd = ETHTOOL_GCHANNELS };
u16 from_channel, to_channel;
- u64 max_rxnfc_in_use;
- u32 max_rxfh_in_use;
unsigned int i;
int ret;
@@ -2100,14 +2120,9 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
(!channels.rx_count || !channels.tx_count))
return -EINVAL;
- /* ensure the new Rx count fits within the configured Rx flow
- * indirection table/rxnfc settings */
- if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use))
- max_rxnfc_in_use = 0;
- max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev);
- if (channels.combined_count + channels.rx_count <=
- max_t(u64, max_rxnfc_in_use, max_rxfh_in_use))
- return -EINVAL;
+ ret = ethtool_check_max_channel(dev, channels, NULL);
+ if (ret)
+ return ret;
/* Disabling channels, query zero-copy AF_XDP sockets */
from_channel = channels.combined_count +
diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c
index 5c317d23787b..30b8ce275159 100644
--- a/net/ethtool/linkinfo.c
+++ b/net/ethtool/linkinfo.c
@@ -35,7 +35,7 @@ static int linkinfo_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
ret = __ethtool_get_link_ksettings(dev, &data->ksettings);
- if (ret < 0 && info)
+ if (ret < 0)
GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
ethnl_ops_complete(dev);
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index b2591db49f7d..259cd9ef1f2a 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -40,7 +40,7 @@ static int linkmodes_prepare_data(const struct ethnl_req_info *req_base,
return ret;
ret = __ethtool_get_link_ksettings(dev, &data->ksettings);
- if (ret < 0 && info) {
+ if (ret < 0) {
GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
goto out;
}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index cb1eea00e349..e3f0ef6b851b 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -2,6 +2,7 @@
#include <net/sock.h>
#include <linux/ethtool_netlink.h>
+#include <linux/phy_link_topology.h>
#include <linux/pm_runtime.h>
#include "netlink.h"
#include "module_fw.h"
@@ -31,6 +32,24 @@ const struct nla_policy ethnl_header_policy_stats[] = {
ETHTOOL_FLAGS_STATS),
};
+const struct nla_policy ethnl_header_policy_phy[] = {
+ [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 },
+ [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = ALTIFNAMSIZ - 1 },
+ [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32,
+ ETHTOOL_FLAGS_BASIC),
+ [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
+const struct nla_policy ethnl_header_policy_phy_stats[] = {
+ [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 },
+ [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = ALTIFNAMSIZ - 1 },
+ [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32,
+ ETHTOOL_FLAGS_STATS),
+ [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid,
enum ethnl_sock_type type)
{
@@ -119,7 +138,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
const struct nlattr *header, struct net *net,
struct netlink_ext_ack *extack, bool require_dev)
{
- struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy)];
+ struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy_phy)];
const struct nlattr *devname_attr;
struct net_device *dev = NULL;
u32 flags = 0;
@@ -134,7 +153,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
/* No validation here, command policy should have a nested policy set
* for the header, therefore validation should have already been done.
*/
- ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy) - 1, header,
+ ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy_phy) - 1, header,
NULL, extack);
if (ret < 0)
return ret;
@@ -175,11 +194,45 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
return -EINVAL;
}
+ if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) {
+ if (dev) {
+ req_info->phy_index = nla_get_u32(tb[ETHTOOL_A_HEADER_PHY_INDEX]);
+ } else {
+ NL_SET_ERR_MSG_ATTR(extack, header,
+ "phy_index set without a netdev");
+ return -EINVAL;
+ }
+ }
+
req_info->dev = dev;
req_info->flags = flags;
return 0;
}
+struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info,
+ const struct nlattr *header,
+ struct netlink_ext_ack *extack)
+{
+ struct phy_device *phydev;
+
+ ASSERT_RTNL();
+
+ if (!req_info->dev)
+ return NULL;
+
+ if (!req_info->phy_index)
+ return req_info->dev->phydev;
+
+ phydev = phy_link_topo_get_phy(req_info->dev, req_info->phy_index);
+ if (!phydev) {
+ NL_SET_ERR_MSG_ATTR(extack, header,
+ "no phy matching phyindex");
+ return ERR_PTR(-ENODEV);
+ }
+
+ return phydev;
+}
+
/**
* ethnl_fill_reply_header() - Put common header into a reply message
* @skb: skb with the message
@@ -1128,6 +1181,8 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_RSS_GET,
.doit = ethnl_default_doit,
+ .start = ethnl_rss_dump_start,
+ .dumpit = ethnl_rss_dumpit,
.policy = ethnl_rss_get_policy,
.maxattr = ARRAY_SIZE(ethnl_rss_get_policy) - 1,
},
@@ -1179,6 +1234,15 @@ static const struct genl_ops ethtool_genl_ops[] = {
.policy = ethnl_module_fw_flash_act_policy,
.maxattr = ARRAY_SIZE(ethnl_module_fw_flash_act_policy) - 1,
},
+ {
+ .cmd = ETHTOOL_MSG_PHY_GET,
+ .doit = ethnl_phy_doit,
+ .start = ethnl_phy_start,
+ .dumpit = ethnl_phy_dumpit,
+ .done = ethnl_phy_done,
+ .policy = ethnl_phy_get_policy,
+ .maxattr = ARRAY_SIZE(ethnl_phy_get_policy) - 1,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 46ec273a87c5..203b08eb6c6f 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -251,6 +251,9 @@ static inline unsigned int ethnl_reply_header_size(void)
* @dev: network device the request is for (may be null)
* @dev_tracker: refcount tracker for @dev reference
* @flags: request flags common for all request types
+ * @phy_index: phy_device index connected to @dev this request is for. Can be
+ * 0 if the request doesn't target a phy, or if the @dev's attached
+ * phy is targeted.
*
* This is a common base for request specific structures holding data from
* parsed userspace request. These always embed struct ethnl_req_info at
@@ -260,6 +263,7 @@ struct ethnl_req_info {
struct net_device *dev;
netdevice_tracker dev_tracker;
u32 flags;
+ u32 phy_index;
};
static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info)
@@ -268,6 +272,27 @@ static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info)
}
/**
+ * ethnl_req_get_phydev() - Gets the phy_device targeted by this request,
+ * if any. Must be called under rntl_lock().
+ * @req_info: The ethnl request to get the phy from.
+ * @header: The netlink header, used for error reporting.
+ * @extack: The netlink extended ACK, for error reporting.
+ *
+ * The caller must hold RTNL, until it's done interacting with the returned
+ * phy_device.
+ *
+ * Return: A phy_device pointer corresponding either to the passed phy_index
+ * if one is provided. If not, the phy_device attached to the
+ * net_device targeted by this request is returned. If there's no
+ * targeted net_device, or no phy_device is attached, NULL is
+ * returned. If the provided phy_index is invalid, an error pointer
+ * is returned.
+ */
+struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info,
+ const struct nlattr *header,
+ struct netlink_ext_ack *extack);
+
+/**
* struct ethnl_reply_data - base type of reply data for GET requests
* @dev: device for current reply message; in single shot requests it is
* equal to &ethnl_req_info.dev; in dumps it's different for each
@@ -409,9 +434,12 @@ extern const struct ethnl_request_ops ethnl_rss_request_ops;
extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops;
extern const struct ethnl_request_ops ethnl_plca_status_request_ops;
extern const struct ethnl_request_ops ethnl_mm_request_ops;
+extern const struct ethnl_request_ops ethnl_phy_request_ops;
extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
+extern const struct nla_policy ethnl_header_policy_phy[ETHTOOL_A_HEADER_PHY_INDEX + 1];
+extern const struct nla_policy ethnl_header_policy_phy_stats[ETHTOOL_A_HEADER_PHY_INDEX + 1];
extern const struct nla_policy ethnl_strset_get_policy[ETHTOOL_A_STRSET_COUNTS_ONLY + 1];
extern const struct nla_policy ethnl_linkinfo_get_policy[ETHTOOL_A_LINKINFO_HEADER + 1];
extern const struct nla_policy ethnl_linkinfo_set_policy[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL + 1];
@@ -449,13 +477,14 @@ extern const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER +
extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1];
extern const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1];
extern const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1];
-extern const struct nla_policy ethnl_rss_get_policy[ETHTOOL_A_RSS_CONTEXT + 1];
+extern const struct nla_policy ethnl_rss_get_policy[ETHTOOL_A_RSS_START_CONTEXT + 1];
extern const struct nla_policy ethnl_plca_get_cfg_policy[ETHTOOL_A_PLCA_HEADER + 1];
extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1];
extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1];
extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1];
extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1];
extern const struct nla_policy ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1];
+extern const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1];
int ethnl_set_features(struct sk_buff *skb, struct genl_info *info);
int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info);
@@ -464,6 +493,12 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info);
int ethnl_tunnel_info_start(struct netlink_callback *cb);
int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info);
+int ethnl_rss_dump_start(struct netlink_callback *cb);
+int ethnl_rss_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int ethnl_phy_start(struct netlink_callback *cb);
+int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info);
+int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int ethnl_phy_done(struct netlink_callback *cb);
extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c
new file mode 100644
index 000000000000..ed8f690f6bac
--- /dev/null
+++ b/net/ethtool/phy.c
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Bootlin
+ *
+ */
+#include "common.h"
+#include "netlink.h"
+
+#include <linux/phy.h>
+#include <linux/phy_link_topology.h>
+#include <linux/sfp.h>
+
+struct phy_req_info {
+ struct ethnl_req_info base;
+ struct phy_device_node *pdn;
+};
+
+#define PHY_REQINFO(__req_base) \
+ container_of(__req_base, struct phy_req_info, base)
+
+const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1] = {
+ [ETHTOOL_A_PHY_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+};
+
+/* Caller holds rtnl */
+static ssize_t
+ethnl_phy_reply_size(const struct ethnl_req_info *req_base,
+ struct netlink_ext_ack *extack)
+{
+ struct phy_req_info *req_info = PHY_REQINFO(req_base);
+ struct phy_device_node *pdn = req_info->pdn;
+ struct phy_device *phydev = pdn->phy;
+ size_t size = 0;
+
+ ASSERT_RTNL();
+
+ /* ETHTOOL_A_PHY_INDEX */
+ size += nla_total_size(sizeof(u32));
+
+ /* ETHTOOL_A_DRVNAME */
+ if (phydev->drv)
+ size += nla_total_size(strlen(phydev->drv->name) + 1);
+
+ /* ETHTOOL_A_NAME */
+ size += nla_total_size(strlen(dev_name(&phydev->mdio.dev)) + 1);
+
+ /* ETHTOOL_A_PHY_UPSTREAM_TYPE */
+ size += nla_total_size(sizeof(u32));
+
+ if (phy_on_sfp(phydev)) {
+ const char *upstream_sfp_name = sfp_get_name(pdn->parent_sfp_bus);
+
+ /* ETHTOOL_A_PHY_UPSTREAM_SFP_NAME */
+ if (upstream_sfp_name)
+ size += nla_total_size(strlen(upstream_sfp_name) + 1);
+
+ /* ETHTOOL_A_PHY_UPSTREAM_INDEX */
+ size += nla_total_size(sizeof(u32));
+ }
+
+ /* ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME */
+ if (phydev->sfp_bus) {
+ const char *sfp_name = sfp_get_name(phydev->sfp_bus);
+
+ if (sfp_name)
+ size += nla_total_size(strlen(sfp_name) + 1);
+ }
+
+ return size;
+}
+
+static int
+ethnl_phy_fill_reply(const struct ethnl_req_info *req_base, struct sk_buff *skb)
+{
+ struct phy_req_info *req_info = PHY_REQINFO(req_base);
+ struct phy_device_node *pdn = req_info->pdn;
+ struct phy_device *phydev = pdn->phy;
+ enum phy_upstream ptype;
+
+ ptype = pdn->upstream_type;
+
+ if (nla_put_u32(skb, ETHTOOL_A_PHY_INDEX, phydev->phyindex) ||
+ nla_put_string(skb, ETHTOOL_A_PHY_NAME, dev_name(&phydev->mdio.dev)) ||
+ nla_put_u32(skb, ETHTOOL_A_PHY_UPSTREAM_TYPE, ptype))
+ return -EMSGSIZE;
+
+ if (phydev->drv &&
+ nla_put_string(skb, ETHTOOL_A_PHY_DRVNAME, phydev->drv->name))
+ return -EMSGSIZE;
+
+ if (ptype == PHY_UPSTREAM_PHY) {
+ struct phy_device *upstream = pdn->upstream.phydev;
+ const char *sfp_upstream_name;
+
+ /* Parent index */
+ if (nla_put_u32(skb, ETHTOOL_A_PHY_UPSTREAM_INDEX, upstream->phyindex))
+ return -EMSGSIZE;
+
+ if (pdn->parent_sfp_bus) {
+ sfp_upstream_name = sfp_get_name(pdn->parent_sfp_bus);
+ if (sfp_upstream_name &&
+ nla_put_string(skb, ETHTOOL_A_PHY_UPSTREAM_SFP_NAME,
+ sfp_upstream_name))
+ return -EMSGSIZE;
+ }
+ }
+
+ if (phydev->sfp_bus) {
+ const char *sfp_name = sfp_get_name(phydev->sfp_bus);
+
+ if (sfp_name &&
+ nla_put_string(skb, ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME,
+ sfp_name))
+ return -EMSGSIZE;
+ }
+
+ return 0;
+}
+
+static int ethnl_phy_parse_request(struct ethnl_req_info *req_base,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct phy_link_topology *topo = req_base->dev->link_topo;
+ struct phy_req_info *req_info = PHY_REQINFO(req_base);
+ struct phy_device *phydev;
+
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PHY_HEADER],
+ extack);
+ if (!phydev)
+ return 0;
+
+ if (IS_ERR(phydev))
+ return PTR_ERR(phydev);
+
+ if (!topo)
+ return 0;
+
+ req_info->pdn = xa_load(&topo->phys, phydev->phyindex);
+
+ return 0;
+}
+
+int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct phy_req_info req_info = {};
+ struct nlattr **tb = info->attrs;
+ struct sk_buff *rskb;
+ void *reply_payload;
+ int reply_len;
+ int ret;
+
+ ret = ethnl_parse_header_dev_get(&req_info.base,
+ tb[ETHTOOL_A_PHY_HEADER],
+ genl_info_net(info), info->extack,
+ true);
+ if (ret < 0)
+ return ret;
+
+ rtnl_lock();
+
+ ret = ethnl_phy_parse_request(&req_info.base, tb, info->extack);
+ if (ret < 0)
+ goto err_unlock_rtnl;
+
+ /* No PHY, return early */
+ if (!req_info.pdn)
+ goto err_unlock_rtnl;
+
+ ret = ethnl_phy_reply_size(&req_info.base, info->extack);
+ if (ret < 0)
+ goto err_unlock_rtnl;
+ reply_len = ret + ethnl_reply_header_size();
+
+ rskb = ethnl_reply_init(reply_len, req_info.base.dev,
+ ETHTOOL_MSG_PHY_GET_REPLY,
+ ETHTOOL_A_PHY_HEADER,
+ info, &reply_payload);
+ if (!rskb) {
+ ret = -ENOMEM;
+ goto err_unlock_rtnl;
+ }
+
+ ret = ethnl_phy_fill_reply(&req_info.base, rskb);
+ if (ret)
+ goto err_free_msg;
+
+ rtnl_unlock();
+ ethnl_parse_header_dev_put(&req_info.base);
+ genlmsg_end(rskb, reply_payload);
+
+ return genlmsg_reply(rskb, info);
+
+err_free_msg:
+ nlmsg_free(rskb);
+err_unlock_rtnl:
+ rtnl_unlock();
+ ethnl_parse_header_dev_put(&req_info.base);
+ return ret;
+}
+
+struct ethnl_phy_dump_ctx {
+ struct phy_req_info *phy_req_info;
+ unsigned long ifindex;
+ unsigned long phy_index;
+};
+
+int ethnl_phy_start(struct netlink_callback *cb)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx;
+ int ret;
+
+ BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+
+ ctx->phy_req_info = kzalloc(sizeof(*ctx->phy_req_info), GFP_KERNEL);
+ if (!ctx->phy_req_info)
+ return -ENOMEM;
+
+ ret = ethnl_parse_header_dev_get(&ctx->phy_req_info->base,
+ info->attrs[ETHTOOL_A_PHY_HEADER],
+ sock_net(cb->skb->sk), cb->extack,
+ false);
+ ctx->ifindex = 0;
+ ctx->phy_index = 0;
+
+ if (ret)
+ kfree(ctx->phy_req_info);
+
+ return ret;
+}
+
+int ethnl_phy_done(struct netlink_callback *cb)
+{
+ struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx;
+
+ if (ctx->phy_req_info->base.dev)
+ ethnl_parse_header_dev_put(&ctx->phy_req_info->base);
+
+ kfree(ctx->phy_req_info);
+
+ return 0;
+}
+
+static int ethnl_phy_dump_one_dev(struct sk_buff *skb, struct net_device *dev,
+ struct netlink_callback *cb)
+{
+ struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx;
+ struct phy_req_info *pri = ctx->phy_req_info;
+ struct phy_device_node *pdn;
+ int ret = 0;
+ void *ehdr;
+
+ if (!dev->link_topo)
+ return 0;
+
+ xa_for_each_start(&dev->link_topo->phys, ctx->phy_index, pdn, ctx->phy_index) {
+ ehdr = ethnl_dump_put(skb, cb, ETHTOOL_MSG_PHY_GET_REPLY);
+ if (!ehdr) {
+ ret = -EMSGSIZE;
+ break;
+ }
+
+ ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_PHY_HEADER);
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ break;
+ }
+
+ pri->pdn = pdn;
+ ret = ethnl_phy_fill_reply(&pri->base, skb);
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ break;
+ }
+
+ genlmsg_end(skb, ehdr);
+ }
+
+ return ret;
+}
+
+int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx;
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ int ret = 0;
+
+ rtnl_lock();
+
+ if (ctx->phy_req_info->base.dev) {
+ ret = ethnl_phy_dump_one_dev(skb, ctx->phy_req_info->base.dev, cb);
+ } else {
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ ret = ethnl_phy_dump_one_dev(skb, dev, cb);
+ if (ret)
+ break;
+
+ ctx->phy_index = 0;
+ }
+ }
+ rtnl_unlock();
+
+ return ret;
+}
diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c
index b1e2e3b5027f..d95d92f173a6 100644
--- a/net/ethtool/plca.c
+++ b/net/ethtool/plca.c
@@ -25,7 +25,7 @@ struct plca_reply_data {
const struct nla_policy ethnl_plca_get_cfg_policy[] = {
[ETHTOOL_A_PLCA_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
};
static void plca_update_sint(int *dst, struct nlattr **tb, u32 attrid,
@@ -58,10 +58,14 @@ static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base,
struct plca_reply_data *data = PLCA_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
const struct ethtool_phy_ops *ops;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
int ret;
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER],
+ info->extack);
// check that the PHY device is available and connected
- if (!dev->phydev) {
+ if (IS_ERR_OR_NULL(phydev)) {
ret = -EOPNOTSUPP;
goto out;
}
@@ -80,7 +84,7 @@ static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base,
memset(&data->plca_cfg, 0xff,
sizeof_field(struct plca_reply_data, plca_cfg));
- ret = ops->get_plca_cfg(dev->phydev, &data->plca_cfg);
+ ret = ops->get_plca_cfg(phydev, &data->plca_cfg);
ethnl_ops_complete(dev);
out:
@@ -129,7 +133,7 @@ static int plca_get_cfg_fill_reply(struct sk_buff *skb,
const struct nla_policy ethnl_plca_set_cfg_policy[] = {
[ETHTOOL_A_PLCA_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
[ETHTOOL_A_PLCA_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1),
[ETHTOOL_A_PLCA_NODE_ID] = NLA_POLICY_MAX(NLA_U32, 255),
[ETHTOOL_A_PLCA_NODE_CNT] = NLA_POLICY_RANGE(NLA_U32, 1, 255),
@@ -141,15 +145,17 @@ const struct nla_policy ethnl_plca_set_cfg_policy[] = {
static int
ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info)
{
- struct net_device *dev = req_info->dev;
const struct ethtool_phy_ops *ops;
struct nlattr **tb = info->attrs;
struct phy_plca_cfg plca_cfg;
+ struct phy_device *phydev;
bool mod = false;
int ret;
+ phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PLCA_HEADER],
+ info->extack);
// check that the PHY device is available and connected
- if (!dev->phydev)
+ if (IS_ERR_OR_NULL(phydev))
return -EOPNOTSUPP;
ops = ethtool_phy_ops;
@@ -168,7 +174,7 @@ ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info)
if (!mod)
return 0;
- ret = ops->set_plca_cfg(dev->phydev, &plca_cfg, info->extack);
+ ret = ops->set_plca_cfg(phydev, &plca_cfg, info->extack);
return ret < 0 ? ret : 1;
}
@@ -191,7 +197,7 @@ const struct ethnl_request_ops ethnl_plca_cfg_request_ops = {
const struct nla_policy ethnl_plca_get_status_policy[] = {
[ETHTOOL_A_PLCA_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
};
static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base,
@@ -201,10 +207,14 @@ static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base,
struct plca_reply_data *data = PLCA_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
const struct ethtool_phy_ops *ops;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
int ret;
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER],
+ info->extack);
// check that the PHY device is available and connected
- if (!dev->phydev) {
+ if (IS_ERR_OR_NULL(phydev)) {
ret = -EOPNOTSUPP;
goto out;
}
@@ -223,7 +233,7 @@ static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base,
memset(&data->plca_st, 0xff,
sizeof_field(struct plca_reply_data, plca_st));
- ret = ops->get_plca_status(dev->phydev, &data->plca_st);
+ ret = ops->get_plca_status(phydev, &data->plca_st);
ethnl_ops_complete(dev);
out:
return ret;
diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index ff81aa749784..a0705edca22a 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -28,17 +28,15 @@ struct pse_reply_data {
/* PSE_GET */
const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1] = {
- [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy),
};
-static int pse_get_pse_attributes(struct net_device *dev,
+static int pse_get_pse_attributes(struct phy_device *phydev,
struct netlink_ext_ack *extack,
struct pse_reply_data *data)
{
- struct phy_device *phydev = dev->phydev;
-
if (!phydev) {
- NL_SET_ERR_MSG(extack, "No PHY is attached");
+ NL_SET_ERR_MSG(extack, "No PHY found");
return -EOPNOTSUPP;
}
@@ -58,13 +56,20 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base,
{
struct pse_reply_data *data = PSE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
int ret;
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
- ret = pse_get_pse_attributes(dev, info->extack, data);
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PSE_HEADER],
+ info->extack);
+ if (IS_ERR(phydev))
+ return -ENODEV;
+
+ ret = pse_get_pse_attributes(phydev, info->extack, data);
ethnl_ops_complete(dev);
@@ -206,7 +211,7 @@ static void pse_cleanup_data(struct ethnl_reply_data *reply_base)
/* PSE_SET */
const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
- [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy),
[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] =
NLA_POLICY_RANGE(NLA_U32, ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED,
ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED),
@@ -217,14 +222,11 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
};
static int
-ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
+ethnl_set_pse_validate(struct phy_device *phydev, struct genl_info *info)
{
- struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- struct phy_device *phydev;
- phydev = dev->phydev;
- if (!phydev) {
+ if (IS_ERR_OR_NULL(phydev)) {
NL_SET_ERR_MSG(info->extack, "No PHY is attached");
return -EOPNOTSUPP;
}
@@ -249,18 +251,21 @@ ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
return -EOPNOTSUPP;
}
- return 1;
+ return 0;
}
static int
ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
{
- struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
struct phy_device *phydev;
- int ret = 0;
+ int ret;
- phydev = dev->phydev;
+ phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER],
+ info->extack);
+ ret = ethnl_set_pse_validate(phydev, info);
+ if (ret)
+ return ret;
if (tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]) {
unsigned int pw_limit;
@@ -307,7 +312,6 @@ const struct ethnl_request_ops ethnl_pse_request_ops = {
.fill_reply = pse_fill_reply,
.cleanup_data = pse_cleanup_data,
- .set_validate = ethnl_set_pse_validate,
.set = ethnl_set_pse,
/* PSE has no notification */
};
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index 5c4c4505ab9a..e07386275e14 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -10,6 +10,7 @@ struct rss_req_info {
struct rss_reply_data {
struct ethnl_reply_data base;
+ bool no_key_fields;
u32 indir_size;
u32 hkey_size;
u32 hfunc;
@@ -27,6 +28,7 @@ struct rss_reply_data {
const struct nla_policy ethnl_rss_get_policy[] = {
[ETHTOOL_A_RSS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
[ETHTOOL_A_RSS_CONTEXT] = { .type = NLA_U32 },
+ [ETHTOOL_A_RSS_START_CONTEXT] = { .type = NLA_U32 },
};
static int
@@ -37,18 +39,18 @@ rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
if (tb[ETHTOOL_A_RSS_CONTEXT])
request->rss_context = nla_get_u32(tb[ETHTOOL_A_RSS_CONTEXT]);
+ if (tb[ETHTOOL_A_RSS_START_CONTEXT]) {
+ NL_SET_BAD_ATTR(extack, tb[ETHTOOL_A_RSS_START_CONTEXT]);
+ return -EINVAL;
+ }
return 0;
}
static int
-rss_prepare_data(const struct ethnl_req_info *req_base,
- struct ethnl_reply_data *reply_base,
- const struct genl_info *info)
+rss_prepare_get(const struct rss_req_info *request, struct net_device *dev,
+ struct rss_reply_data *data, const struct genl_info *info)
{
- struct rss_reply_data *data = RSS_REPDATA(reply_base);
- struct rss_req_info *request = RSS_REQINFO(req_base);
- struct net_device *dev = reply_base->dev;
struct ethtool_rxfh_param rxfh = {};
const struct ethtool_ops *ops;
u32 total_size, indir_bytes;
@@ -56,12 +58,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
int ret;
ops = dev->ethtool_ops;
- if (!ops->get_rxfh)
- return -EOPNOTSUPP;
-
- /* Some drivers don't handle rss_context */
- if (request->rss_context && !ops->cap_rss_ctx_supported)
- return -EOPNOTSUPP;
ret = ethnl_ops_begin(dev);
if (ret < 0)
@@ -91,7 +87,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
rxfh.indir = data->indir_table;
rxfh.key_size = data->hkey_size;
rxfh.key = data->hkey;
- rxfh.rss_context = request->rss_context;
ret = ops->get_rxfh(dev, &rxfh);
if (ret)
@@ -105,6 +100,67 @@ out_ops:
}
static int
+rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev,
+ struct rss_reply_data *data, const struct genl_info *info)
+{
+ struct ethtool_rxfh_context *ctx;
+ u32 total_size, indir_bytes;
+ u8 *rss_config;
+
+ ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context);
+ if (!ctx)
+ return -ENOENT;
+
+ data->indir_size = ctx->indir_size;
+ data->hkey_size = ctx->key_size;
+ data->hfunc = ctx->hfunc;
+ data->input_xfrm = ctx->input_xfrm;
+
+ indir_bytes = data->indir_size * sizeof(u32);
+ total_size = indir_bytes + data->hkey_size;
+ rss_config = kzalloc(total_size, GFP_KERNEL);
+ if (!rss_config)
+ return -ENOMEM;
+
+ data->indir_table = (u32 *)rss_config;
+ memcpy(data->indir_table, ethtool_rxfh_context_indir(ctx), indir_bytes);
+
+ if (data->hkey_size) {
+ data->hkey = rss_config + indir_bytes;
+ memcpy(data->hkey, ethtool_rxfh_context_key(ctx),
+ data->hkey_size);
+ }
+
+ return 0;
+}
+
+static int
+rss_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ const struct genl_info *info)
+{
+ struct rss_reply_data *data = RSS_REPDATA(reply_base);
+ struct rss_req_info *request = RSS_REQINFO(req_base);
+ struct net_device *dev = reply_base->dev;
+ const struct ethtool_ops *ops;
+
+ ops = dev->ethtool_ops;
+ if (!ops->get_rxfh)
+ return -EOPNOTSUPP;
+
+ /* Some drivers don't handle rss_context */
+ if (request->rss_context) {
+ if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context)
+ return -EOPNOTSUPP;
+
+ data->no_key_fields = !ops->rxfh_per_ctx_key;
+ return rss_prepare_ctx(request, dev, data, info);
+ }
+
+ return rss_prepare_get(request, dev, data, info);
+}
+
+static int
rss_reply_size(const struct ethnl_req_info *req_base,
const struct ethnl_reply_data *reply_base)
{
@@ -131,13 +187,18 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base,
nla_put_u32(skb, ETHTOOL_A_RSS_CONTEXT, request->rss_context))
return -EMSGSIZE;
+ if ((data->indir_size &&
+ nla_put(skb, ETHTOOL_A_RSS_INDIR,
+ sizeof(u32) * data->indir_size, data->indir_table)))
+ return -EMSGSIZE;
+
+ if (data->no_key_fields)
+ return 0;
+
if ((data->hfunc &&
nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) ||
(data->input_xfrm &&
nla_put_u32(skb, ETHTOOL_A_RSS_INPUT_XFRM, data->input_xfrm)) ||
- (data->indir_size &&
- nla_put(skb, ETHTOOL_A_RSS_INDIR,
- sizeof(u32) * data->indir_size, data->indir_table)) ||
(data->hkey_size &&
nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey)))
return -EMSGSIZE;
@@ -152,6 +213,146 @@ static void rss_cleanup_data(struct ethnl_reply_data *reply_base)
kfree(data->indir_table);
}
+struct rss_nl_dump_ctx {
+ unsigned long ifindex;
+ unsigned long ctx_idx;
+
+ /* User wants to only dump contexts from given ifindex */
+ unsigned int match_ifindex;
+ unsigned int start_ctx;
+};
+
+static struct rss_nl_dump_ctx *rss_dump_ctx(struct netlink_callback *cb)
+{
+ NL_ASSERT_DUMP_CTX_FITS(struct rss_nl_dump_ctx);
+
+ return (struct rss_nl_dump_ctx *)cb->ctx;
+}
+
+int ethnl_rss_dump_start(struct netlink_callback *cb)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct rss_nl_dump_ctx *ctx = rss_dump_ctx(cb);
+ struct ethnl_req_info req_info = {};
+ struct nlattr **tb = info->attrs;
+ int ret;
+
+ /* Filtering by context not supported */
+ if (tb[ETHTOOL_A_RSS_CONTEXT]) {
+ NL_SET_BAD_ATTR(info->extack, tb[ETHTOOL_A_RSS_CONTEXT]);
+ return -EINVAL;
+ }
+ if (tb[ETHTOOL_A_RSS_START_CONTEXT]) {
+ ctx->start_ctx = nla_get_u32(tb[ETHTOOL_A_RSS_START_CONTEXT]);
+ ctx->ctx_idx = ctx->start_ctx;
+ }
+
+ ret = ethnl_parse_header_dev_get(&req_info,
+ tb[ETHTOOL_A_RSS_HEADER],
+ sock_net(cb->skb->sk), cb->extack,
+ false);
+ if (req_info.dev) {
+ ctx->match_ifindex = req_info.dev->ifindex;
+ ctx->ifindex = ctx->match_ifindex;
+ ethnl_parse_header_dev_put(&req_info);
+ req_info.dev = NULL;
+ }
+
+ return ret;
+}
+
+static int
+rss_dump_one_ctx(struct sk_buff *skb, struct netlink_callback *cb,
+ struct net_device *dev, u32 rss_context)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct rss_reply_data data = {};
+ struct rss_req_info req = {};
+ void *ehdr;
+ int ret;
+
+ req.rss_context = rss_context;
+
+ ehdr = ethnl_dump_put(skb, cb, ETHTOOL_MSG_RSS_GET_REPLY);
+ if (!ehdr)
+ return -EMSGSIZE;
+
+ ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_RSS_HEADER);
+ if (ret < 0)
+ goto err_cancel;
+
+ /* Context 0 is not currently storred or cached in the XArray */
+ if (!rss_context)
+ ret = rss_prepare_get(&req, dev, &data, info);
+ else
+ ret = rss_prepare_ctx(&req, dev, &data, info);
+ if (ret)
+ goto err_cancel;
+
+ ret = rss_fill_reply(skb, &req.base, &data.base);
+ if (ret)
+ goto err_cleanup;
+ genlmsg_end(skb, ehdr);
+
+ rss_cleanup_data(&data.base);
+ return 0;
+
+err_cleanup:
+ rss_cleanup_data(&data.base);
+err_cancel:
+ genlmsg_cancel(skb, ehdr);
+ return ret;
+}
+
+static int
+rss_dump_one_dev(struct sk_buff *skb, struct netlink_callback *cb,
+ struct net_device *dev)
+{
+ struct rss_nl_dump_ctx *ctx = rss_dump_ctx(cb);
+ int ret;
+
+ if (!dev->ethtool_ops->get_rxfh)
+ return 0;
+
+ if (!ctx->ctx_idx) {
+ ret = rss_dump_one_ctx(skb, cb, dev, 0);
+ if (ret)
+ return ret;
+ ctx->ctx_idx++;
+ }
+
+ for (; xa_find(&dev->ethtool->rss_ctx, &ctx->ctx_idx,
+ ULONG_MAX, XA_PRESENT); ctx->ctx_idx++) {
+ ret = rss_dump_one_ctx(skb, cb, dev, ctx->ctx_idx);
+ if (ret)
+ return ret;
+ }
+ ctx->ctx_idx = ctx->start_ctx;
+
+ return 0;
+}
+
+int ethnl_rss_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rss_nl_dump_ctx *ctx = rss_dump_ctx(cb);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ int ret = 0;
+
+ rtnl_lock();
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ if (ctx->match_ifindex && ctx->match_ifindex != ctx->ifindex)
+ break;
+
+ ret = rss_dump_one_dev(skb, cb, dev);
+ if (ret)
+ break;
+ }
+ rtnl_unlock();
+
+ return ret;
+}
+
const struct ethnl_request_ops ethnl_rss_request_ops = {
.request_cmd = ETHTOOL_MSG_RSS_GET,
.reply_cmd = ETHTOOL_MSG_RSS_GET_REPLY,
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index c678b484a079..b3382b3cf325 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -126,7 +126,7 @@ struct strset_reply_data {
const struct nla_policy ethnl_strset_get_policy[] = {
[ETHTOOL_A_STRSET_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
[ETHTOOL_A_STRSET_STRINGSETS] = { .type = NLA_NESTED },
[ETHTOOL_A_STRSET_COUNTS_ONLY] = { .type = NLA_FLAG },
};
@@ -233,17 +233,18 @@ static void strset_cleanup_data(struct ethnl_reply_data *reply_base)
}
static int strset_prepare_set(struct strset_info *info, struct net_device *dev,
- unsigned int id, bool counts_only)
+ struct phy_device *phydev, unsigned int id,
+ bool counts_only)
{
const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops;
const struct ethtool_ops *ops = dev->ethtool_ops;
void *strings;
int count, ret;
- if (id == ETH_SS_PHY_STATS && dev->phydev &&
+ if (id == ETH_SS_PHY_STATS && phydev &&
!ops->get_ethtool_phy_stats && phy_ops &&
phy_ops->get_sset_count)
- ret = phy_ops->get_sset_count(dev->phydev);
+ ret = phy_ops->get_sset_count(phydev);
else if (ops->get_sset_count && ops->get_strings)
ret = ops->get_sset_count(dev, id);
else
@@ -258,10 +259,10 @@ static int strset_prepare_set(struct strset_info *info, struct net_device *dev,
strings = kcalloc(count, ETH_GSTRING_LEN, GFP_KERNEL);
if (!strings)
return -ENOMEM;
- if (id == ETH_SS_PHY_STATS && dev->phydev &&
+ if (id == ETH_SS_PHY_STATS && phydev &&
!ops->get_ethtool_phy_stats && phy_ops &&
phy_ops->get_strings)
- phy_ops->get_strings(dev->phydev, strings);
+ phy_ops->get_strings(phydev, strings);
else
ops->get_strings(dev, id, strings);
info->strings = strings;
@@ -279,6 +280,8 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base,
const struct strset_req_info *req_info = STRSET_REQINFO(req_base);
struct strset_reply_data *data = STRSET_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
unsigned int i;
int ret;
@@ -289,14 +292,20 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base,
for (i = 0; i < ETH_SS_COUNT; i++) {
if ((req_info->req_ids & (1U << i)) &&
data->sets[i].per_dev) {
- if (info)
- GENL_SET_ERR_MSG(info, "requested per device strings without dev");
+ GENL_SET_ERR_MSG(info, "requested per device strings without dev");
return -EINVAL;
}
}
return 0;
}
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_HEADER_FLAGS],
+ info->extack);
+
+ /* phydev can be NULL, check for errors only */
+ if (IS_ERR(phydev))
+ return PTR_ERR(phydev);
+
ret = ethnl_ops_begin(dev);
if (ret < 0)
goto err_strset;
@@ -305,7 +314,7 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base,
!data->sets[i].per_dev)
continue;
- ret = strset_prepare_set(&data->sets[i], dev, i,
+ ret = strset_prepare_set(&data->sets[i], dev, phydev, i,
req_info->counts_only);
if (ret < 0)
goto err_ops;
diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c
index 89637e732866..7e46d130dce2 100644
--- a/net/handshake/netlink.c
+++ b/net/handshake/netlink.c
@@ -153,7 +153,7 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
if (!req) {
err = -EBUSY;
trace_handshake_cmd_done_err(net, req, sock->sk, err);
- fput(sock->file);
+ sockfd_put(sock);
return err;
}
@@ -164,7 +164,7 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
status = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_STATUS]);
handshake_complete(req, status, info);
- fput(sock->file);
+ sockfd_put(sock);
return 0;
}
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index e4cc6b78dcfc..ebdfd5b64e17 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -427,6 +427,9 @@ static void hsr_proxy_announce(struct timer_list *t)
* of SAN nodes stored in ProxyNodeTable.
*/
interlink = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK);
+ if (!interlink)
+ goto done;
+
list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) {
if (hsr_addr_is_redbox(hsr, node->macaddress_A))
continue;
@@ -441,6 +444,7 @@ static void hsr_proxy_announce(struct timer_list *t)
mod_timer(&hsr->announce_proxy_timer, jiffies + interval);
}
+done:
rcu_read_unlock();
}
@@ -554,6 +558,12 @@ void hsr_dev_setup(struct net_device *dev)
dev->netdev_ops = &hsr_device_ops;
SET_NETDEV_DEVTYPE(dev, &hsr_type);
dev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL;
+ /* Prevent recursive tx locking */
+ dev->lltx = true;
+ /* Not sure about this. Taken from bridge code. netdevice.h says
+ * it means "Does not change network namespaces".
+ */
+ dev->netns_local = true;
dev->needs_free_netdev = true;
@@ -563,16 +573,10 @@ void hsr_dev_setup(struct net_device *dev)
dev->features = dev->hw_features;
- /* Prevent recursive tx locking */
- dev->features |= NETIF_F_LLTX;
/* VLAN on top of HSR needs testing and probably some work on
* hsr_header_create() etc.
*/
dev->features |= NETIF_F_VLAN_CHALLENGED;
- /* Not sure about this. Taken from bridge code. netdev_features.h says
- * it means "Does not change network namespaces".
- */
- dev->features |= NETIF_F_NETNS_LOCAL;
}
/* Return true if dev is a HSR master; return false otherwise.
@@ -625,7 +629,6 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
/* Overflow soon to find bugs easier: */
hsr->sequence_nr = HSR_SEQNR_START;
hsr->sup_sequence_nr = HSR_SUP_SEQNR_START;
- hsr->interlink_sequence_nr = HSR_SEQNR_START;
timer_setup(&hsr->announce_timer, hsr_announce, 0);
timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0);
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index ab1f8d35d9dc..fcfeb79bb040 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -203,7 +203,6 @@ struct hsr_priv {
struct timer_list prune_proxy_timer;
int announce_count;
u16 sequence_nr;
- u16 interlink_sequence_nr; /* Interlink port seq_nr */
u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */
enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */
spinlock_t seqnr_lock; /* locking for sequence_nr */
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index af6cf64a00e0..464f683e016d 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -67,7 +67,16 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
skb_set_network_header(skb, ETH_HLEN + HSR_HLEN);
skb_reset_mac_len(skb);
- hsr_forward_skb(skb, port);
+ /* Only the frames received over the interlink port will assign a
+ * sequence number and require synchronisation vs other sender.
+ */
+ if (port->type == HSR_PT_INTERLINK) {
+ spin_lock_bh(&hsr->seqnr_lock);
+ hsr_forward_skb(skb, port);
+ spin_unlock_bh(&hsr->seqnr_lock);
+ } else {
+ hsr_forward_skb(skb, port);
+ }
finish_consume:
return RX_HANDLER_CONSUMED;
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 77b4e92027c5..175efd860f7b 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -116,7 +116,7 @@ static void lowpan_setup(struct net_device *ldev)
ldev->netdev_ops = &lowpan_netdev_ops;
ldev->header_ops = &lowpan_header_ops;
ldev->needs_free_netdev = true;
- ldev->features |= NETIF_F_NETNS_LOCAL;
+ ldev->netns_local = true;
}
static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[],
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index 60e8fff1347e..88adb04e4072 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -226,11 +226,11 @@ int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
list_for_each_entry(wpan_dev, &rdev->wpan_dev_list, list) {
if (!wpan_dev->netdev)
continue;
- wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+ wpan_dev->netdev->netns_local = false;
err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d");
if (err)
break;
- wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
+ wpan_dev->netdev->netns_local = true;
}
if (err) {
@@ -242,11 +242,11 @@ int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
list) {
if (!wpan_dev->netdev)
continue;
- wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+ wpan_dev->netdev->netns_local = false;
err = dev_change_net_namespace(wpan_dev->netdev, net,
"wpan%d");
WARN_ON(err);
- wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
+ wpan_dev->netdev->netns_local = true;
}
return err;
@@ -291,7 +291,7 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb,
switch (state) {
/* TODO NETDEV_DEVTYPE */
case NETDEV_REGISTER:
- dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->netns_local = true;
wpan_dev->identifier = ++rdev->wpan_dev_id;
list_add_rcu(&wpan_dev->list, &rdev->wpan_dev_list);
rdev->devlist_generation++;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 8e94ed7c56a0..6d2c97f8e9ef 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -661,7 +661,8 @@ config TCP_CONG_CDG
For further details see:
D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using
- delay gradients." In Networking 2011. Preprint: http://goo.gl/No3vdg
+ delay gradients." In Networking 2011. Preprint:
+ http://caia.swin.edu.au/cv/dahayes/content/networking2011-cdg-preprint.pdf
config TCP_CONG_BBR
tristate "BBR TCP"
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index d96f3e452fef..ab76744383cf 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -216,17 +216,27 @@ static void devinet_sysctl_unregister(struct in_device *idev)
/* Locks all the inet devices. */
-static struct in_ifaddr *inet_alloc_ifa(void)
+static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev)
{
- return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
+ struct in_ifaddr *ifa;
+
+ ifa = kzalloc(sizeof(*ifa), GFP_KERNEL_ACCOUNT);
+ if (!ifa)
+ return NULL;
+
+ in_dev_hold(in_dev);
+ ifa->ifa_dev = in_dev;
+
+ INIT_HLIST_NODE(&ifa->hash);
+
+ return ifa;
}
static void inet_rcu_free_ifa(struct rcu_head *head)
{
struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
- if (ifa->ifa_dev)
- in_dev_put(ifa->ifa_dev);
+ in_dev_put(ifa->ifa_dev);
kfree(ifa);
}
@@ -574,17 +584,9 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
ASSERT_RTNL();
- if (!in_dev) {
- inet_free_ifa(ifa);
- return -ENOBUFS;
- }
ipv4_devconf_setall(in_dev);
neigh_parms_data_state_setall(in_dev->arp_parms);
- if (ifa->ifa_dev != in_dev) {
- WARN_ON(ifa->ifa_dev);
- in_dev_hold(in_dev);
- ifa->ifa_dev = in_dev;
- }
+
if (ipv4_is_loopback(ifa->ifa_local))
ifa->ifa_scope = RT_SCOPE_HOST;
return inet_insert_ifa(ifa);
@@ -701,8 +703,6 @@ errout:
return err;
}
-#define INFINITY_LIFE_TIME 0xFFFFFFFF
-
static void check_lifetime(struct work_struct *work)
{
unsigned long now, next, next_sec, next_sched;
@@ -875,7 +875,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
if (!in_dev)
goto errout;
- ifa = inet_alloc_ifa();
+ ifa = inet_alloc_ifa(in_dev);
if (!ifa)
/*
* A potential indev allocation can be left alive, it stays
@@ -885,19 +885,15 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
ipv4_devconf_setall(in_dev);
neigh_parms_data_state_setall(in_dev->arp_parms);
- in_dev_hold(in_dev);
if (!tb[IFA_ADDRESS])
tb[IFA_ADDRESS] = tb[IFA_LOCAL];
- INIT_HLIST_NODE(&ifa->hash);
ifa->ifa_prefixlen = ifm->ifa_prefixlen;
ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
ifm->ifa_flags;
ifa->ifa_scope = ifm->ifa_scope;
- ifa->ifa_dev = in_dev;
-
ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
@@ -1184,10 +1180,12 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
if (!ifa) {
ret = -ENOBUFS;
- ifa = inet_alloc_ifa();
+ if (!in_dev)
+ break;
+ ifa = inet_alloc_ifa(in_dev);
if (!ifa)
break;
- INIT_HLIST_NODE(&ifa->hash);
+
if (colon)
memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
else
@@ -1586,16 +1584,13 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
if (!inetdev_valid_mtu(dev->mtu))
break;
if (dev->flags & IFF_LOOPBACK) {
- struct in_ifaddr *ifa = inet_alloc_ifa();
+ struct in_ifaddr *ifa = inet_alloc_ifa(in_dev);
if (ifa) {
- INIT_HLIST_NODE(&ifa->hash);
ifa->ifa_local =
ifa->ifa_address = htonl(INADDR_LOOPBACK);
ifa->ifa_prefixlen = 8;
ifa->ifa_mask = inet_make_mask(8);
- in_dev_hold(in_dev);
- ifa->ifa_dev = in_dev;
ifa->ifa_scope = RT_SCOPE_HOST;
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
@@ -1948,8 +1943,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
}
static size_t inet_get_link_af_size(const struct net_device *dev,
@@ -2145,8 +2139,7 @@ void inet_netconf_notify_devconf(struct net *net, int event, int type,
rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
}
static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 47378ca41904..f3281312eb5e 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -115,7 +115,8 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
*/
if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
- skb_page_unref(sg_page(sg), skb->pp_recycle);
+ skb_page_unref(page_to_netmem(sg_page(sg)),
+ skb->pp_recycle);
}
#ifdef CONFIG_INET_ESPINTCP
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7ad2cafb9276..793e6781399a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -293,7 +293,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
.flowi4_iif = LOOPBACK_IFINDEX,
.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev),
.daddr = ip_hdr(skb)->saddr,
- .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK,
+ .flowi4_tos = ip_hdr(skb)->tos & INET_DSCP_MASK,
.flowi4_scope = scope,
.flowi4_mark = vmark ? skb->mark : 0,
};
@@ -1343,7 +1343,7 @@ static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn)
struct flowi4 fl4 = {
.flowi4_mark = frn->fl_mark,
.daddr = frn->fl_addr,
- .flowi4_tos = frn->fl_tos,
+ .flowi4_tos = frn->fl_tos & INET_DSCP_MASK,
.flowi4_scope = frn->fl_scope,
};
struct fib_table *tb;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 5bdd1c016009..b07292d50ee7 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -37,6 +37,7 @@ struct fib4_rule {
u8 dst_len;
u8 src_len;
dscp_t dscp;
+ u8 dscp_full:1; /* DSCP or TOS selector */
__be32 src;
__be32 srcmask;
__be32 dst;
@@ -186,7 +187,15 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
((daddr ^ r->dst) & r->dstmask))
return 0;
- if (r->dscp && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
+ /* When DSCP selector is used we need to match on the entire DSCP field
+ * in the flow information structure. When TOS selector is used we need
+ * to mask the upper three DSCP bits prior to matching to maintain
+ * legacy behavior.
+ */
+ if (r->dscp_full && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
+ return 0;
+ else if (!r->dscp_full && r->dscp &&
+ !fib_dscp_masked_match(r->dscp, fl4))
return 0;
if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
@@ -217,6 +226,20 @@ static struct fib_table *fib_empty_table(struct net *net)
return NULL;
}
+static int fib4_nl2rule_dscp(const struct nlattr *nla, struct fib4_rule *rule4,
+ struct netlink_ext_ack *extack)
+{
+ if (rule4->dscp) {
+ NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP");
+ return -EINVAL;
+ }
+
+ rule4->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
+ rule4->dscp_full = true;
+
+ return 0;
+}
+
static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
@@ -238,6 +261,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
rule4->dscp = inet_dsfield_to_dscp(frh->tos);
+ if (tb[FRA_DSCP] &&
+ fib4_nl2rule_dscp(tb[FRA_DSCP], rule4, extack) < 0)
+ goto errout;
+
/* split local/main if they are not already split */
err = fib_unmerge(net);
if (err)
@@ -320,9 +347,19 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (rule4->dst_len != frh->dst_len))
return 0;
- if (frh->tos && inet_dscp_to_dsfield(rule4->dscp) != frh->tos)
+ if (frh->tos &&
+ (rule4->dscp_full ||
+ inet_dscp_to_dsfield(rule4->dscp) != frh->tos))
return 0;
+ if (tb[FRA_DSCP]) {
+ dscp_t dscp;
+
+ dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2);
+ if (!rule4->dscp_full || rule4->dscp != dscp)
+ return 0;
+ }
+
#ifdef CONFIG_IP_ROUTE_CLASSID
if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
return 0;
@@ -344,7 +381,15 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->dst_len = rule4->dst_len;
frh->src_len = rule4->src_len;
- frh->tos = inet_dscp_to_dsfield(rule4->dscp);
+
+ if (rule4->dscp_full) {
+ frh->tos = 0;
+ if (nla_put_u8(skb, FRA_DSCP,
+ inet_dscp_to_dsfield(rule4->dscp) >> 2))
+ goto nla_put_failure;
+ } else {
+ frh->tos = inet_dscp_to_dsfield(rule4->dscp);
+ }
if ((rule4->dst_len &&
nla_put_in_addr(skb, FRA_DST, rule4->dst)) ||
@@ -366,7 +411,8 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(4) /* dst */
+ nla_total_size(4) /* src */
- + nla_total_size(4); /* flow */
+ + nla_total_size(4) /* flow */
+ + nla_total_size(1); /* dscp */
}
static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 2b57cd2b96e2..ba2df3d2ac15 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -543,8 +543,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
info->nlh, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
+ rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
}
static int fib_detect_death(struct fib_info *fi, int order,
@@ -2066,8 +2065,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
if (fa->fa_slen != slen)
continue;
- if (fa->fa_dscp &&
- fa->fa_dscp != inet_dsfield_to_dscp(flp->flowi4_tos))
+ if (fa->fa_dscp && !fib_dscp_masked_match(fa->fa_dscp, flp))
continue;
if (fa->tb_id != tb->tb_id)
continue;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8f30e3f00b7f..09e31757e96c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1580,8 +1580,7 @@ found:
if (index >= (1ul << fa->fa_slen))
continue;
}
- if (fa->fa_dscp &&
- inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
+ if (fa->fa_dscp && !fib_dscp_masked_match(fa->fa_dscp, flp))
continue;
/* Paired with WRITE_ONCE() in fib_release_info() */
if (READ_ONCE(fi->fib_dead))
diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c
index 0abbc413e0fe..3e30745e2c09 100644
--- a/net/ipv4/fou_core.c
+++ b/net/ipv4/fou_core.c
@@ -50,7 +50,7 @@ struct fou_net {
static inline struct fou *fou_from_sock(struct sock *sk)
{
- return sk->sk_user_data;
+ return rcu_dereference_sk_user_data(sk);
}
static int fou_recv_pull(struct sk_buff *skb, struct fou *fou, size_t len)
@@ -233,9 +233,15 @@ static struct sk_buff *fou_gro_receive(struct sock *sk,
struct sk_buff *skb)
{
const struct net_offload __rcu **offloads;
- u8 proto = fou_from_sock(sk)->protocol;
+ struct fou *fou = fou_from_sock(sk);
const struct net_offload *ops;
struct sk_buff *pp = NULL;
+ u8 proto;
+
+ if (!fou)
+ goto out;
+
+ proto = fou->protocol;
/* We can clear the encap_mark for FOU as we are essentially doing
* one of two possible things. We are either adding an L4 tunnel
@@ -263,14 +269,24 @@ static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
int nhoff)
{
const struct net_offload __rcu **offloads;
- u8 proto = fou_from_sock(sk)->protocol;
+ struct fou *fou = fou_from_sock(sk);
const struct net_offload *ops;
- int err = -ENOSYS;
+ u8 proto;
+ int err;
+
+ if (!fou) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ proto = fou->protocol;
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
ops = rcu_dereference(offloads[proto]);
- if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+ if (WARN_ON(!ops || !ops->callbacks.gro_complete)) {
+ err = -ENOSYS;
goto out;
+ }
err = ops->callbacks.gro_complete(skb, nhoff);
@@ -322,6 +338,9 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
skb_gro_remcsum_init(&grc);
+ if (!fou)
+ goto out;
+
off = skb_gro_offset(skb);
len = off + sizeof(*guehdr);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ab6d0d98dbc3..e1384e7331d8 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -93,6 +93,7 @@
#include <net/ip_fib.h>
#include <net/l3mdev.h>
#include <net/addrconf.h>
+#include <net/inet_dscp.h>
#define CREATE_TRACE_POINTS
#include <trace/events/icmp.h>
@@ -220,61 +221,56 @@ static inline void icmp_xmit_unlock(struct sock *sk)
spin_unlock(&sk->sk_lock.slock);
}
-int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
-int sysctl_icmp_msgs_burst __read_mostly = 50;
-
-static struct {
- spinlock_t lock;
- u32 credit;
- u32 stamp;
-} icmp_global = {
- .lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock),
-};
-
/**
* icmp_global_allow - Are we allowed to send one more ICMP message ?
+ * @net: network namespace
*
* Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec.
* Returns false if we reached the limit and can not send another packet.
- * Note: called with BH disabled
+ * Works in tandem with icmp_global_consume().
*/
-bool icmp_global_allow(void)
+bool icmp_global_allow(struct net *net)
{
- u32 credit, delta, incr = 0, now = (u32)jiffies;
- bool rc = false;
+ u32 delta, now, oldstamp;
+ int incr, new, old;
- /* Check if token bucket is empty and cannot be refilled
- * without taking the spinlock. The READ_ONCE() are paired
- * with the following WRITE_ONCE() in this same function.
+ /* Note: many cpus could find this condition true.
+ * Then later icmp_global_consume() could consume more credits,
+ * this is an acceptable race.
*/
- if (!READ_ONCE(icmp_global.credit)) {
- delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ);
- if (delta < HZ / 50)
- return false;
- }
+ if (atomic_read(&net->ipv4.icmp_global_credit) > 0)
+ return true;
- spin_lock(&icmp_global.lock);
- delta = min_t(u32, now - icmp_global.stamp, HZ);
- if (delta >= HZ / 50) {
- incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
- if (incr)
- WRITE_ONCE(icmp_global.stamp, now);
- }
- credit = min_t(u32, icmp_global.credit + incr,
- READ_ONCE(sysctl_icmp_msgs_burst));
- if (credit) {
- /* We want to use a credit of one in average, but need to randomize
- * it for security reasons.
- */
- credit = max_t(int, credit - get_random_u32_below(3), 0);
- rc = true;
+ now = jiffies;
+ oldstamp = READ_ONCE(net->ipv4.icmp_global_stamp);
+ delta = min_t(u32, now - oldstamp, HZ);
+ if (delta < HZ / 50)
+ return false;
+
+ incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ;
+ if (!incr)
+ return false;
+
+ if (cmpxchg(&net->ipv4.icmp_global_stamp, oldstamp, now) == oldstamp) {
+ old = atomic_read(&net->ipv4.icmp_global_credit);
+ do {
+ new = min(old + incr, READ_ONCE(net->ipv4.sysctl_icmp_msgs_burst));
+ } while (!atomic_try_cmpxchg(&net->ipv4.icmp_global_credit, &old, new));
}
- WRITE_ONCE(icmp_global.credit, credit);
- spin_unlock(&icmp_global.lock);
- return rc;
+ return true;
}
EXPORT_SYMBOL(icmp_global_allow);
+void icmp_global_consume(struct net *net)
+{
+ int credits = get_random_u32_below(3);
+
+ /* Note: this might make icmp_global.credit negative. */
+ if (credits)
+ atomic_sub(credits, &net->ipv4.icmp_global_credit);
+}
+EXPORT_SYMBOL(icmp_global_consume);
+
static bool icmpv4_mask_allow(struct net *net, int type, int code)
{
if (type > NR_ICMP_TYPES)
@@ -291,14 +287,16 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code)
return false;
}
-static bool icmpv4_global_allow(struct net *net, int type, int code)
+static bool icmpv4_global_allow(struct net *net, int type, int code,
+ bool *apply_ratelimit)
{
if (icmpv4_mask_allow(net, type, code))
return true;
- if (icmp_global_allow())
+ if (icmp_global_allow(net)) {
+ *apply_ratelimit = true;
return true;
-
+ }
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false;
}
@@ -308,15 +306,16 @@ static bool icmpv4_global_allow(struct net *net, int type, int code)
*/
static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
- struct flowi4 *fl4, int type, int code)
+ struct flowi4 *fl4, int type, int code,
+ bool apply_ratelimit)
{
struct dst_entry *dst = &rt->dst;
struct inet_peer *peer;
bool rc = true;
int vif;
- if (icmpv4_mask_allow(net, type, code))
- goto out;
+ if (!apply_ratelimit)
+ return true;
/* No rate limit on loopback */
if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
@@ -331,6 +330,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
out:
if (!rc)
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
+ else
+ icmp_global_consume(net);
return rc;
}
@@ -402,6 +403,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
struct ipcm_cookie ipc;
struct rtable *rt = skb_rtable(skb);
struct net *net = dev_net(rt->dst.dev);
+ bool apply_ratelimit = false;
struct flowi4 fl4;
struct sock *sk;
struct inet_sock *inet;
@@ -413,11 +415,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
return;
- /* Needed by both icmp_global_allow and icmp_xmit_lock */
+ /* Needed by both icmpv4_global_allow and icmp_xmit_lock */
local_bh_disable();
- /* global icmp_msgs_per_sec */
- if (!icmpv4_global_allow(net, type, code))
+ /* is global icmp_msgs_per_sec exhausted ? */
+ if (!icmpv4_global_allow(net, type, code, &apply_ratelimit))
goto out_bh_enable;
sk = icmp_xmit_lock(net);
@@ -443,14 +445,14 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
fl4.saddr = saddr;
fl4.flowi4_mark = mark;
fl4.flowi4_uid = sock_net_uid(net, NULL);
- fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
+ fl4.flowi4_tos = ip_hdr(skb)->tos & INET_DSCP_MASK;
fl4.flowi4_proto = IPPROTO_ICMP;
fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
goto out_unlock;
- if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
+ if (icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
ip_rt_put(rt);
out_unlock:
@@ -496,7 +498,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->saddr = saddr;
fl4->flowi4_mark = mark;
fl4->flowi4_uid = sock_net_uid(net, NULL);
- fl4->flowi4_tos = RT_TOS(tos);
+ fl4->flowi4_tos = tos & INET_DSCP_MASK;
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
@@ -545,7 +547,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
orefdst = skb_in->_skb_refdst; /* save old refdst */
skb_dst_set(skb_in, NULL);
err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
- RT_TOS(tos), rt2->dst.dev);
+ tos, rt2->dst.dev);
dst_release(&rt2->dst);
rt2 = skb_rtable(skb_in);
@@ -596,6 +598,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
int room;
struct icmp_bxm icmp_param;
struct rtable *rt = skb_rtable(skb_in);
+ bool apply_ratelimit = false;
struct ipcm_cookie ipc;
struct flowi4 fl4;
__be32 saddr;
@@ -677,7 +680,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
}
}
- /* Needed by both icmp_global_allow and icmp_xmit_lock */
+ /* Needed by both icmpv4_global_allow and icmp_xmit_lock */
local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit, unless
@@ -685,7 +688,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
* loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
*/
if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
- !icmpv4_global_allow(net, type, code))
+ !icmpv4_global_allow(net, type, code, &apply_ratelimit))
goto out_bh_enable;
sk = icmp_xmit_lock(net);
@@ -744,7 +747,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
goto out_unlock;
/* peer icmp_ratelimit */
- if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code))
+ if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
goto ende;
/* RFC says return as much as we can without exceeding 576 bytes. */
@@ -1487,6 +1490,8 @@ static int __net_init icmp_sk_init(struct net *net)
net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
net->ipv4.sysctl_icmp_ratemask = 0x1818;
net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
+ net->ipv4.sysctl_icmp_msgs_per_sec = 1000;
+ net->ipv4.sysctl_icmp_msgs_burst = 50;
return 0;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 64d07b842e73..2c5632d4fddb 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -236,7 +236,7 @@ static bool inet_bhash2_conflict(const struct sock *sk,
#define sk_for_each_bound_bhash(__sk, __tb2, __tb) \
hlist_for_each_entry(__tb2, &(__tb)->bhash2, bhash_node) \
- sk_for_each_bound(sk2, &(__tb2)->owners)
+ sk_for_each_bound((__sk), &(__tb2)->owners)
/* This should be called only when the tb and tb2 hashbuckets' locks are held */
static int inet_csk_bind_conflict(const struct sock *sk,
@@ -714,6 +714,7 @@ struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg)
out:
release_sock(sk);
if (newsk && mem_cgroup_sockets_enabled) {
+ gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL;
int amt = 0;
/* atomically get the memory usage, set and charge the
@@ -731,8 +732,8 @@ out:
}
if (amt)
- mem_cgroup_charge_skmem(newsk->sk_memcg, amt,
- GFP_KERNEL | __GFP_NOFAIL);
+ mem_cgroup_charge_skmem(newsk->sk_memcg, amt, gfp);
+ kmem_cache_charge(newsk, gfp);
release_sock(newsk);
}
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 9712cdb8087c..67639309163d 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -442,7 +442,7 @@ static int inet_twsk_diag_fill(struct sock *sk,
inet_diag_msg_common_fill(r, sk);
r->idiag_retrans = 0;
- r->idiag_state = tw->tw_substate;
+ r->idiag_state = READ_ONCE(tw->tw_substate);
r->idiag_timer = 3;
tmo = tw->tw_timer.expires - jiffies;
r->idiag_expires = jiffies_delta_to_msecs(tmo);
@@ -1209,7 +1209,7 @@ next_chunk:
if (num < s_num)
goto next_normal;
state = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_substate : sk->sk_state;
+ READ_ONCE(inet_twsk(sk)->tw_substate) : sk->sk_state;
if (!(idiag_states & (1 << state)))
goto next_normal;
if (r->sdiag_family != AF_UNSPEC &&
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 48d0d494185b..9bfcfd016e18 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -310,7 +310,7 @@ inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk)
return inet_lhash2_bucket(h, hash);
}
-static inline int compute_score(struct sock *sk, struct net *net,
+static inline int compute_score(struct sock *sk, const struct net *net,
const unsigned short hnum, const __be32 daddr,
const int dif, const int sdif)
{
@@ -348,7 +348,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
* Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to
* the selected sock or an error.
*/
-struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk,
+struct sock *inet_lookup_reuseport(const struct net *net, struct sock *sk,
struct sk_buff *skb, int doff,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned short hnum,
@@ -374,7 +374,7 @@ EXPORT_SYMBOL_GPL(inet_lookup_reuseport);
*/
/* called with rcu_read_lock() : No refcount taken on the socket */
-static struct sock *inet_lhash2_lookup(struct net *net,
+static struct sock *inet_lhash2_lookup(const struct net *net,
struct inet_listen_hashbucket *ilb2,
struct sk_buff *skb, int doff,
const __be32 saddr, __be16 sport,
@@ -401,7 +401,7 @@ static struct sock *inet_lhash2_lookup(struct net *net,
return result;
}
-struct sock *inet_lookup_run_sk_lookup(struct net *net,
+struct sock *inet_lookup_run_sk_lookup(const struct net *net,
int protocol,
struct sk_buff *skb, int doff,
__be32 saddr, __be16 sport,
@@ -423,7 +423,7 @@ struct sock *inet_lookup_run_sk_lookup(struct net *net,
return sk;
}
-struct sock *__inet_lookup_listener(struct net *net,
+struct sock *__inet_lookup_listener(const struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be32 saddr, __be16 sport,
@@ -488,7 +488,7 @@ void sock_edemux(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_edemux);
-struct sock *__inet_lookup_established(struct net *net,
+struct sock *__inet_lookup_established(const struct net *net,
struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const u16 hnum,
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ba205473522e..5f6fd382af38 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -44,6 +44,7 @@
#include <net/gre.h>
#include <net/dst_metadata.h>
#include <net/erspan.h>
+#include <net/inet_dscp.h>
/*
Problems & solutions
@@ -930,7 +931,7 @@ static int ipgre_open(struct net_device *dev)
t->parms.iph.daddr,
t->parms.iph.saddr,
t->parms.o_key,
- RT_TOS(t->parms.iph.tos),
+ t->parms.iph.tos & INET_DSCP_MASK,
t->parms.link);
if (IS_ERR(rt))
return -EADDRNOTAVAIL;
@@ -996,7 +997,7 @@ static void __gre_tunnel_init(struct net_device *dev)
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
- dev->features |= GRE_FEATURES | NETIF_F_LLTX;
+ dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
/* TCP offload with GRE SEQ is not supported, nor can we support 2
@@ -1010,6 +1011,8 @@ static void __gre_tunnel_init(struct net_device *dev)
dev->features |= NETIF_F_GSO_SOFTWARE;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+
+ dev->lltx = true;
}
static int ipgre_tunnel_init(struct net_device *dev)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d6fbcbd2358a..b6e7d4921309 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -596,9 +596,8 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
{
struct sk_buff *skb, *next, *hint = NULL;
struct dst_entry *curr_dst = NULL;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct net_device *dev = skb->dev;
struct dst_entry *dst;
@@ -646,9 +645,8 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *curr_dev = NULL;
struct net *curr_net = NULL;
struct sk_buff *skb, *next;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b90d0f78ac80..49811c9281d4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -77,6 +77,7 @@
#include <net/inetpeer.h>
#include <net/inet_ecn.h>
#include <net/lwtunnel.h>
+#include <net/inet_dscp.h>
#include <linux/bpf-cgroup.h>
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
@@ -493,7 +494,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
inet->inet_dport,
inet->inet_sport,
sk->sk_protocol,
- RT_TOS(tos),
+ tos & INET_DSCP_MASK,
sk->sk_bound_dev_if);
if (IS_ERR(rt))
goto no_route;
@@ -1621,7 +1622,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
flowi4_init_output(&fl4, oif,
IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark,
- RT_TOS(arg->tos),
+ arg->tos & INET_DSCP_MASK,
RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
ip_reply_arg_flowi_flags(arg),
daddr, saddr,
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 5cffad42fe8c..d591c73e2c0e 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -43,6 +43,7 @@
#include <net/rtnetlink.h>
#include <net/udp.h>
#include <net/dst_metadata.h>
+#include <net/inet_dscp.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -293,7 +294,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
iph->saddr, tunnel->parms.o_key,
- RT_TOS(iph->tos), dev_net(dev),
+ iph->tos & INET_DSCP_MASK, dev_net(dev),
tunnel->parms.link, tunnel->fwmark, 0, 0);
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -609,9 +610,9 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
}
ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
- tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
- dev_net(dev), 0, skb->mark, skb_get_hash(skb),
- key->flow_flags);
+ tunnel_id_to_key32(key->tun_id),
+ tos & INET_DSCP_MASK, dev_net(dev), 0, skb->mark,
+ skb_get_hash(skb), key->flow_flags);
if (!tunnel_hlen)
tunnel_hlen = ip_encap_hlen(&tun_info->encap);
@@ -772,7 +773,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos),
+ tunnel->parms.o_key, tos & INET_DSCP_MASK,
dev_net(dev), READ_ONCE(tunnel->parms.link),
tunnel->fwmark, skb_get_hash(skb), 0);
@@ -1161,7 +1162,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
* Allowing to move it to another netns is clearly unsafe.
*/
if (!IS_ERR(itn->fb_tunnel_dev)) {
- itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ itn->fb_tunnel_dev->netns_local = true;
itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
itn->type = itn->fb_tunnel_dev->type;
@@ -1326,7 +1327,7 @@ int ip_tunnel_init(struct net_device *dev)
tunnel->dev = dev;
tunnel->net = dev_net(dev);
- strcpy(tunnel->parms.name, dev->name);
+ strscpy(tunnel->parms.name, dev->name);
iph->version = 4;
iph->ihl = 5;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 14536da9f5dc..f0b4419cef34 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -443,7 +443,7 @@ static int vti_tunnel_init(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->addr_len = 4;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
netif_keep_dst(dev);
return ip_tunnel_init(dev);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 923a2ef68c2f..dc0db5895e0e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -378,7 +378,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
dev->type = ARPHRD_TUNNEL;
dev->flags = IFF_NOARP;
dev->addr_len = 4;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
netif_keep_dst(dev);
dev->features |= IPIP_FEATURES;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 6c750bd13dd8..089864c6a35e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -62,6 +62,7 @@
#include <net/fib_rules.h>
#include <linux/netconf.h>
#include <net/rtnh.h>
+#include <net/inet_dscp.h>
#include <linux/nospec.h>
@@ -536,7 +537,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->netdev_ops = &reg_vif_netdev_ops;
dev->needs_free_netdev = true;
- dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->netns_local = true;
}
static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
@@ -1868,7 +1869,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
vif->remote, vif->local,
0, 0,
IPPROTO_IPIP,
- RT_TOS(iph->tos), vif->link);
+ iph->tos & INET_DSCP_MASK, vif->link);
if (IS_ERR(rt))
goto out_free;
encap = sizeof(struct iphdr);
@@ -1876,7 +1877,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
0, 0,
IPPROTO_IPIP,
- RT_TOS(iph->tos), vif->link);
+ iph->tos & INET_DSCP_MASK, vif->link);
if (IS_ERR(rt))
goto out_free;
}
@@ -2080,7 +2081,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
struct flowi4 fl4 = {
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowi4_tos = RT_TOS(iph->tos),
+ .flowi4_tos = iph->tos & INET_DSCP_MASK,
.flowi4_oif = (rt_is_output_route(rt) ?
skb->dev->ifindex : 0),
.flowi4_iif = (rt_is_output_route(rt) ?
@@ -2406,8 +2407,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
errout:
kfree_skb(skb);
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
}
static size_t igmpmsg_netlink_msgsize(size_t payloadlen)
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 591a2737808e..e0aab66cd925 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -14,6 +14,7 @@
#include <net/route.h>
#include <net/xfrm.h>
#include <net/ip.h>
+#include <net/inet_dscp.h>
#include <net/netfilter/nf_queue.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
@@ -43,7 +44,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
*/
fl4.daddr = iph->daddr;
fl4.saddr = saddr;
- fl4.flowi4_tos = RT_TOS(iph->tos);
+ fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev);
fl4.flowi4_mark = skb->mark;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 14365b20f1c5..1cdd9c28ab2d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -826,7 +826,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
sizeof(info.underflow));
info.num_entries = private->number;
info.size = private->size;
- strcpy(info.name, name);
+ strscpy(info.name, name);
if (copy_to_user(user, &info, *len) != 0)
ret = -EFAULT;
@@ -1547,7 +1547,7 @@ int arpt_register_table(struct net *net,
goto out_free;
}
- ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
if (!ops) {
ret = -ENOMEM;
goto out_free;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index fe89a056eb06..3d101613f27f 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -981,7 +981,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
sizeof(info.underflow));
info.num_entries = private->number;
info.size = private->size;
- strcpy(info.name, name);
+ strscpy(info.name, name);
if (copy_to_user(user, &info, *len) != 0)
ret = -EFAULT;
@@ -1767,7 +1767,7 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
goto out_free;
}
- ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
if (!ops) {
ret = -ENOMEM;
goto out_free;
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index ded5bef02f77..1ce7a1655b97 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
+#include <net/inet_dscp.h>
#include <linux/ip.h>
#include <net/ip.h>
#include <net/ip_fib.h>
@@ -75,7 +76,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
- flow.flowi4_tos = iph->tos & IPTOS_RT_MASK;
+ flow.flowi4_tos = iph->tos & INET_DSCP_MASK;
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par));
flow.flowi4_uid = sock_net_uid(xt_net(par), NULL);
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index 6cc5743c553a..f4aed0789d69 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -15,6 +15,7 @@
#include <net/icmp.h>
#include <net/ip.h>
#include <net/route.h>
+#include <net/inet_dscp.h>
#include <net/netfilter/ipv4/nf_dup_ipv4.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
@@ -32,7 +33,7 @@ static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb,
fl4.flowi4_oif = oif;
fl4.daddr = gw->s_addr;
- fl4.flowi4_tos = RT_TOS(iph->tos);
+ fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
rt = ip_route_output_key(net, &fl4);
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index a522c3a3be52..ef5dd88107dd 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -40,13 +40,13 @@ static int nft_dup_ipv4_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_ADDR] == NULL)
return -EINVAL;
- err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
+ err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
sizeof(struct in_addr));
if (err < 0)
return err;
if (tb[NFTA_DUP_SREG_DEV])
- err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV],
+ err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_DEV],
&priv->sreg_dev, sizeof(int));
return err;
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 9eee535c64dd..00da1332bbf1 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -10,6 +10,7 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_fib.h>
+#include <net/inet_dscp.h>
#include <net/ip_fib.h>
#include <net/route.h>
@@ -22,8 +23,6 @@ static __be32 get_saddr(__be32 addr)
return addr;
}
-#define DSCP_BITS 0xfc
-
void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
@@ -110,7 +109,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (priv->flags & NFTA_FIB_F_MARK)
fl4.flowi4_mark = pkt->skb->mark;
- fl4.flowi4_tos = iph->tos & DSCP_BITS;
+ fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
if (priv->flags & NFTA_FIB_F_DADDR) {
fl4.daddr = iph->daddr;
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 6b9787ee8601..93aaea0006ba 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -865,15 +865,18 @@ out:
}
static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh,
- u32 op_flags)
+ u32 op_flags, u32 *resp_op_flags)
{
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
struct nexthop_grp *p;
size_t len = nhg->num_nh * sizeof(*p);
struct nlattr *nla;
u16 group_type = 0;
+ u16 weight;
int i;
+ *resp_op_flags |= NHA_OP_FLAG_RESP_GRP_RESVD_0;
+
if (nhg->hash_threshold)
group_type = NEXTHOP_GRP_TYPE_MPATH;
else if (nhg->resilient)
@@ -888,9 +891,12 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh,
p = nla_data(nla);
for (i = 0; i < nhg->num_nh; ++i) {
+ weight = nhg->nh_entries[i].weight - 1;
+
*p++ = (struct nexthop_grp) {
.id = nhg->nh_entries[i].nh->id,
- .weight = nhg->nh_entries[i].weight - 1,
+ .weight = weight,
+ .weight_high = weight >> 8,
};
}
@@ -934,10 +940,12 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nh->is_group) {
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ u32 resp_op_flags = 0;
if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
goto nla_put_failure;
- if (nla_put_nh_group(skb, nh, op_flags))
+ if (nla_put_nh_group(skb, nh, op_flags, &resp_op_flags) ||
+ nla_put_u32(skb, NHA_OP_FLAGS, resp_op_flags))
goto nla_put_failure;
goto out;
}
@@ -1050,7 +1058,9 @@ static size_t nh_nlmsg_size(struct nexthop *nh)
sz += nla_total_size(4); /* NHA_ID */
if (nh->is_group)
- sz += nh_nlmsg_size_grp(nh);
+ sz += nh_nlmsg_size_grp(nh) +
+ nla_total_size(4) + /* NHA_OP_FLAGS */
+ 0;
else
sz += nh_nlmsg_size_single(nh);
@@ -1080,8 +1090,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
info->nlh, gfp_any());
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
+ rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
}
static unsigned long nh_res_bucket_used_time(const struct nh_res_bucket *bucket)
@@ -1201,8 +1210,7 @@ static void nexthop_bucket_notify(struct nh_res_table *res_table,
rtnl_notify(skb, nh->net, 0, RTNLGRP_NEXTHOP, NULL, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err);
+ rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err);
}
static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
@@ -1280,11 +1288,14 @@ static int nh_check_attr_group(struct net *net,
nhg = nla_data(tb[NHA_GROUP]);
for (i = 0; i < len; ++i) {
- if (nhg[i].resvd1 || nhg[i].resvd2) {
- NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
+ if (nhg[i].resvd2) {
+ NL_SET_ERR_MSG(extack, "Reserved field in nexthop_grp must be 0");
return -EINVAL;
}
- if (nhg[i].weight > 254) {
+ if (nexthop_grp_weight(&nhg[i]) == 0) {
+ /* 0xffff got passed in, representing weight of 0x10000,
+ * which is too heavy.
+ */
NL_SET_ERR_MSG(extack, "Invalid value for weight");
return -EINVAL;
}
@@ -1880,9 +1891,9 @@ static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table)
static void nh_res_group_rebalance(struct nh_group *nhg,
struct nh_res_table *res_table)
{
- int prev_upper_bound = 0;
- int total = 0;
- int w = 0;
+ u16 prev_upper_bound = 0;
+ u32 total = 0;
+ u32 w = 0;
int i;
INIT_LIST_HEAD(&res_table->uw_nh_entries);
@@ -1892,11 +1903,12 @@ static void nh_res_group_rebalance(struct nh_group *nhg,
for (i = 0; i < nhg->num_nh; ++i) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
- int upper_bound;
+ u16 upper_bound;
+ u64 btw;
w += nhge->weight;
- upper_bound = DIV_ROUND_CLOSEST(res_table->num_nh_buckets * w,
- total);
+ btw = ((u64)res_table->num_nh_buckets) * w;
+ upper_bound = DIV_ROUND_CLOSEST_ULL(btw, total);
nhge->res.wants_buckets = upper_bound - prev_upper_bound;
prev_upper_bound = upper_bound;
@@ -1962,8 +1974,8 @@ static void replace_nexthop_grp_res(struct nh_group *oldg,
static void nh_hthr_group_rebalance(struct nh_group *nhg)
{
- int total = 0;
- int w = 0;
+ u32 total = 0;
+ u32 w = 0;
int i;
for (i = 0; i < nhg->num_nh; ++i)
@@ -1971,7 +1983,7 @@ static void nh_hthr_group_rebalance(struct nh_group *nhg)
for (i = 0; i < nhg->num_nh; ++i) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
- int upper_bound;
+ u32 upper_bound;
w += nhge->weight;
upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
@@ -2713,7 +2725,8 @@ static struct nexthop *nexthop_create_group(struct net *net,
goto out_no_nh;
}
nhg->nh_entries[i].nh = nhe;
- nhg->nh_entries[i].weight = entry[i].weight + 1;
+ nhg->nh_entries[i].weight = nexthop_grp_weight(&entry[i]);
+
list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
nhg->nh_entries[i].nh_parent = nh;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 13c0f1d455f3..723ac9181558 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -512,7 +512,7 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
sk->sk_protocol;
}
- flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope,
+ flowi4_init_output(fl4, oif, mark, tos & INET_DSCP_MASK, scope,
prot, flow_flags, iph->daddr, iph->saddr, 0, 0,
sock_net_uid(net, sk));
}
@@ -541,7 +541,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
- ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
+ ip_sock_rt_tos(sk),
ip_sock_rt_scope(sk),
inet_test_bit(HDRINCL, sk) ?
IPPROTO_RAW : sk->sk_protocol,
@@ -1263,7 +1263,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
struct flowi4 fl4 = {
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowi4_tos = iph->tos & IPTOS_RT_MASK,
+ .flowi4_tos = iph->tos & INET_DSCP_MASK,
.flowi4_oif = rt->dst.dev->ifindex,
.flowi4_iif = skb->dev->ifindex,
.flowi4_mark = skb->mark,
@@ -2160,7 +2160,7 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (rt->rt_type != RTN_LOCAL)
goto skip_validate_source;
- tos &= IPTOS_RT_MASK;
+ tos &= INET_DSCP_MASK;
err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag);
if (err < 0)
goto martian_source;
@@ -2470,7 +2470,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct fib_result res;
int err;
- tos &= IPTOS_RT_MASK;
+ tos &= INET_DSCP_MASK;
rcu_read_lock();
err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
rcu_read_unlock();
@@ -2618,7 +2618,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
struct rtable *rth;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
- fl4->flowi4_tos &= IPTOS_RT_MASK;
+ fl4->flowi4_tos &= INET_DSCP_MASK;
rcu_read_lock();
rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
@@ -3261,7 +3261,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fl4.daddr = dst;
fl4.saddr = src;
- fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK;
+ fl4.flowi4_tos = rtm->rtm_tos & INET_DSCP_MASK;
fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
fl4.flowi4_mark = mark;
fl4.flowi4_uid = uid;
@@ -3286,7 +3286,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
skb->dev = dev;
skb->mark = mark;
err = ip_route_input_rcu(skb, dst, src,
- rtm->rtm_tos & IPTOS_RT_MASK, dev,
+ rtm->rtm_tos & INET_DSCP_MASK, dev,
&res);
rt = skb_rtable(skb);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4af0c234d8d7..a79b2a52ce01 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -601,22 +601,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_tcp_available_ulp,
},
{
- .procname = "icmp_msgs_per_sec",
- .data = &sysctl_icmp_msgs_per_sec,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
- {
- .procname = "icmp_msgs_burst",
- .data = &sysctl_icmp_msgs_burst,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
- {
.procname = "udp_mem",
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
@@ -702,6 +686,22 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "icmp_msgs_per_sec",
+ .data = &init_net.ipv4.sysctl_icmp_msgs_per_sec,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "icmp_msgs_burst",
+ .data = &init_net.ipv4.sysctl_icmp_msgs_burst,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
.procname = "ping_group_range",
.data = &init_net.ipv4.ping_group_range.range,
.maxlen = sizeof(gid_t)*2,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e03a342c9162..4f77bd862e95 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -285,6 +285,8 @@
#include <trace/events/tcp.h>
#include <net/rps.h>
+#include "../core/devmem.h"
+
/* Track pending CMSGs. */
enum {
TCP_CMSG_INQ = 1,
@@ -471,6 +473,7 @@ void tcp_init_sock(struct sock *sk)
set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
sk_sockets_allocated_inc(sk);
+ xa_init_flags(&sk->sk_user_frags, XA_FLAGS_ALLOC1);
}
EXPORT_SYMBOL(tcp_init_sock);
@@ -2160,6 +2163,9 @@ static int tcp_zerocopy_receive(struct sock *sk,
skb = tcp_recv_skb(sk, seq, &offset);
}
+ if (!skb_frags_readable(skb))
+ break;
+
if (TCP_SKB_CB(skb)->has_rxtstamp) {
tcp_update_recv_tstamps(skb, tss);
zc->msg_flags |= TCP_CMSG_TS;
@@ -2177,6 +2183,9 @@ static int tcp_zerocopy_receive(struct sock *sk,
break;
}
page = skb_frag_page(frags);
+ if (WARN_ON_ONCE(!page))
+ break;
+
prefetchw(page);
pages[pages_to_map++] = page;
length += PAGE_SIZE;
@@ -2235,6 +2244,7 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
struct scm_timestamping_internal *tss)
{
int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
+ u32 tsflags = READ_ONCE(sk->sk_tsflags);
bool has_timestamping = false;
if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
@@ -2274,14 +2284,18 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
}
}
- if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE)
+ if (tsflags & SOF_TIMESTAMPING_SOFTWARE &&
+ (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE ||
+ !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER)))
has_timestamping = true;
else
tss->ts[0] = (struct timespec64) {0};
}
if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
- if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE)
+ if (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE &&
+ (tsflags & SOF_TIMESTAMPING_RX_HARDWARE ||
+ !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER)))
has_timestamping = true;
else
tss->ts[2] = (struct timespec64) {0};
@@ -2317,6 +2331,220 @@ static int tcp_inq_hint(struct sock *sk)
return inq;
}
+/* batch __xa_alloc() calls and reduce xa_lock()/xa_unlock() overhead. */
+struct tcp_xa_pool {
+ u8 max; /* max <= MAX_SKB_FRAGS */
+ u8 idx; /* idx <= max */
+ __u32 tokens[MAX_SKB_FRAGS];
+ netmem_ref netmems[MAX_SKB_FRAGS];
+};
+
+static void tcp_xa_pool_commit_locked(struct sock *sk, struct tcp_xa_pool *p)
+{
+ int i;
+
+ /* Commit part that has been copied to user space. */
+ for (i = 0; i < p->idx; i++)
+ __xa_cmpxchg(&sk->sk_user_frags, p->tokens[i], XA_ZERO_ENTRY,
+ (__force void *)p->netmems[i], GFP_KERNEL);
+ /* Rollback what has been pre-allocated and is no longer needed. */
+ for (; i < p->max; i++)
+ __xa_erase(&sk->sk_user_frags, p->tokens[i]);
+
+ p->max = 0;
+ p->idx = 0;
+}
+
+static void tcp_xa_pool_commit(struct sock *sk, struct tcp_xa_pool *p)
+{
+ if (!p->max)
+ return;
+
+ xa_lock_bh(&sk->sk_user_frags);
+
+ tcp_xa_pool_commit_locked(sk, p);
+
+ xa_unlock_bh(&sk->sk_user_frags);
+}
+
+static int tcp_xa_pool_refill(struct sock *sk, struct tcp_xa_pool *p,
+ unsigned int max_frags)
+{
+ int err, k;
+
+ if (p->idx < p->max)
+ return 0;
+
+ xa_lock_bh(&sk->sk_user_frags);
+
+ tcp_xa_pool_commit_locked(sk, p);
+
+ for (k = 0; k < max_frags; k++) {
+ err = __xa_alloc(&sk->sk_user_frags, &p->tokens[k],
+ XA_ZERO_ENTRY, xa_limit_31b, GFP_KERNEL);
+ if (err)
+ break;
+ }
+
+ xa_unlock_bh(&sk->sk_user_frags);
+
+ p->max = k;
+ p->idx = 0;
+ return k ? 0 : err;
+}
+
+/* On error, returns the -errno. On success, returns number of bytes sent to the
+ * user. May not consume all of @remaining_len.
+ */
+static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
+ unsigned int offset, struct msghdr *msg,
+ int remaining_len)
+{
+ struct dmabuf_cmsg dmabuf_cmsg = { 0 };
+ struct tcp_xa_pool tcp_xa_pool;
+ unsigned int start;
+ int i, copy, n;
+ int sent = 0;
+ int err = 0;
+
+ tcp_xa_pool.max = 0;
+ tcp_xa_pool.idx = 0;
+ do {
+ start = skb_headlen(skb);
+
+ if (skb_frags_readable(skb)) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* Copy header. */
+ copy = start - offset;
+ if (copy > 0) {
+ copy = min(copy, remaining_len);
+
+ n = copy_to_iter(skb->data + offset, copy,
+ &msg->msg_iter);
+ if (n != copy) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ offset += copy;
+ remaining_len -= copy;
+
+ /* First a dmabuf_cmsg for # bytes copied to user
+ * buffer.
+ */
+ memset(&dmabuf_cmsg, 0, sizeof(dmabuf_cmsg));
+ dmabuf_cmsg.frag_size = copy;
+ err = put_cmsg(msg, SOL_SOCKET, SO_DEVMEM_LINEAR,
+ sizeof(dmabuf_cmsg), &dmabuf_cmsg);
+ if (err || msg->msg_flags & MSG_CTRUNC) {
+ msg->msg_flags &= ~MSG_CTRUNC;
+ if (!err)
+ err = -ETOOSMALL;
+ goto out;
+ }
+
+ sent += copy;
+
+ if (remaining_len == 0)
+ goto out;
+ }
+
+ /* after that, send information of dmabuf pages through a
+ * sequence of cmsg
+ */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ struct net_iov *niov;
+ u64 frag_offset;
+ int end;
+
+ /* !skb_frags_readable() should indicate that ALL the
+ * frags in this skb are dmabuf net_iovs. We're checking
+ * for that flag above, but also check individual frags
+ * here. If the tcp stack is not setting
+ * skb_frags_readable() correctly, we still don't want
+ * to crash here.
+ */
+ if (!skb_frag_net_iov(frag)) {
+ net_err_ratelimited("Found non-dmabuf skb with net_iov");
+ err = -ENODEV;
+ goto out;
+ }
+
+ niov = skb_frag_net_iov(frag);
+ end = start + skb_frag_size(frag);
+ copy = end - offset;
+
+ if (copy > 0) {
+ copy = min(copy, remaining_len);
+
+ frag_offset = net_iov_virtual_addr(niov) +
+ skb_frag_off(frag) + offset -
+ start;
+ dmabuf_cmsg.frag_offset = frag_offset;
+ dmabuf_cmsg.frag_size = copy;
+ err = tcp_xa_pool_refill(sk, &tcp_xa_pool,
+ skb_shinfo(skb)->nr_frags - i);
+ if (err)
+ goto out;
+
+ /* Will perform the exchange later */
+ dmabuf_cmsg.frag_token = tcp_xa_pool.tokens[tcp_xa_pool.idx];
+ dmabuf_cmsg.dmabuf_id = net_iov_binding_id(niov);
+
+ offset += copy;
+ remaining_len -= copy;
+
+ err = put_cmsg(msg, SOL_SOCKET,
+ SO_DEVMEM_DMABUF,
+ sizeof(dmabuf_cmsg),
+ &dmabuf_cmsg);
+ if (err || msg->msg_flags & MSG_CTRUNC) {
+ msg->msg_flags &= ~MSG_CTRUNC;
+ if (!err)
+ err = -ETOOSMALL;
+ goto out;
+ }
+
+ atomic_long_inc(&niov->pp_ref_count);
+ tcp_xa_pool.netmems[tcp_xa_pool.idx++] = skb_frag_netmem(frag);
+
+ sent += copy;
+
+ if (remaining_len == 0)
+ goto out;
+ }
+ start = end;
+ }
+
+ tcp_xa_pool_commit(sk, &tcp_xa_pool);
+ if (!remaining_len)
+ goto out;
+
+ /* if remaining_len is not satisfied yet, we need to go to the
+ * next frag in the frag_list to satisfy remaining_len.
+ */
+ skb = skb_shinfo(skb)->frag_list ?: skb->next;
+
+ offset = offset - start;
+ } while (skb);
+
+ if (remaining_len) {
+ err = -EFAULT;
+ goto out;
+ }
+
+out:
+ tcp_xa_pool_commit(sk, &tcp_xa_pool);
+ if (!sent)
+ sent = err;
+
+ return sent;
+}
+
/*
* This routine copies from a sock struct into the user buffer.
*
@@ -2330,6 +2558,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
int *cmsg_flags)
{
struct tcp_sock *tp = tcp_sk(sk);
+ int last_copied_dmabuf = -1; /* uninitialized */
int copied = 0;
u32 peek_seq;
u32 *seq;
@@ -2509,15 +2738,44 @@ found_ok_skb:
}
if (!(flags & MSG_TRUNC)) {
- err = skb_copy_datagram_msg(skb, offset, msg, used);
- if (err) {
- /* Exception. Bailout! */
- if (!copied)
- copied = -EFAULT;
+ if (last_copied_dmabuf != -1 &&
+ last_copied_dmabuf != !skb_frags_readable(skb))
break;
+
+ if (skb_frags_readable(skb)) {
+ err = skb_copy_datagram_msg(skb, offset, msg,
+ used);
+ if (err) {
+ /* Exception. Bailout! */
+ if (!copied)
+ copied = -EFAULT;
+ break;
+ }
+ } else {
+ if (!(flags & MSG_SOCK_DEVMEM)) {
+ /* dmabuf skbs can only be received
+ * with the MSG_SOCK_DEVMEM flag.
+ */
+ if (!copied)
+ copied = -EFAULT;
+
+ break;
+ }
+
+ err = tcp_recvmsg_dmabuf(sk, skb, offset, msg,
+ used);
+ if (err <= 0) {
+ if (!copied)
+ copied = -EFAULT;
+
+ break;
+ }
+ used = err;
}
}
+ last_copied_dmabuf = !skb_frags_readable(skb);
+
WRITE_ONCE(*seq, *seq + used);
copied += used;
len -= used;
@@ -2833,7 +3091,7 @@ void __tcp_close(struct sock *sk, long timeout)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, sk->sk_allocation,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_ABORT_ON_CLOSE);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
@@ -2908,7 +3166,7 @@ adjudge_to_death:
if (READ_ONCE(tp->linger2) < 0) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_ABORT_ON_LINGER);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONLINGER);
} else {
@@ -2927,7 +3185,7 @@ adjudge_to_death:
if (tcp_check_oom(sk, 0)) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_ABORT_ON_MEMORY);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY);
} else if (!check_net(sock_net(sk))) {
@@ -3025,13 +3283,16 @@ int tcp_disconnect(struct sock *sk, int flags)
inet_csk_listen_stop(sk);
} else if (unlikely(tp->repair)) {
WRITE_ONCE(sk->sk_err, ECONNABORTED);
- } else if (tcp_need_reset(old_state) ||
- (tp->snd_nxt != tp->write_seq &&
- (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
+ } else if (tcp_need_reset(old_state)) {
+ tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_TCP_STATE);
+ WRITE_ONCE(sk->sk_err, ECONNRESET);
+ } else if (tp->snd_nxt != tp->write_seq &&
+ (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) {
/* The last check adjusts for discrepancy of Linux wrt. RFC
* states
*/
- tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_NOT_SPECIFIED);
+ tcp_send_active_reset(sk, gfp_any(),
+ SK_RST_REASON_TCP_DISCONNECT_WITH_DATA);
WRITE_ONCE(sk->sk_err, ECONNRESET);
} else if (old_state == TCP_SYN_SENT)
WRITE_ONCE(sk->sk_err, ECONNRESET);
@@ -4637,6 +4898,13 @@ int tcp_abort(struct sock *sk, int err)
/* Don't race with userspace socket closes such as tcp_close. */
lock_sock(sk);
+ /* Avoid closing the same socket twice. */
+ if (sk->sk_state == TCP_CLOSE) {
+ if (!has_current_bpf_ctx())
+ release_sock(sk);
+ return -ENOENT;
+ }
+
if (sk->sk_state == TCP_LISTEN) {
tcp_set_state(sk, TCP_CLOSE);
inet_csk_listen_stop(sk);
@@ -4646,16 +4914,13 @@ int tcp_abort(struct sock *sk, int err)
local_bh_disable();
bh_lock_sock(sk);
- if (!sock_flag(sk, SOCK_DEAD)) {
- if (tcp_need_reset(sk->sk_state))
- tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
- tcp_done_with_error(sk, err);
- }
+ if (tcp_need_reset(sk->sk_state))
+ tcp_send_active_reset(sk, GFP_ATOMIC,
+ SK_RST_REASON_TCP_STATE);
+ tcp_done_with_error(sk, err);
bh_unlock_sock(sk);
local_bh_enable();
- tcp_write_queue_purge(sk);
if (!has_current_bpf_ctx())
release_sock(sk);
return 0;
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 53b0d62fd2c2..e7658c5d6b79 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -30,7 +30,7 @@ void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
}
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
- struct sk_msg *msg, u32 apply_bytes, int flags)
+ struct sk_msg *msg, u32 apply_bytes)
{
bool apply = apply_bytes;
struct scatterlist *sge;
@@ -167,7 +167,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
if (unlikely(!psock))
return -EPIPE;
- ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) :
+ ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes) :
tcp_bpf_push_locked(sk, msg, bytes, flags, false);
sk_psock_put(sk, psock);
return ret;
@@ -577,7 +577,7 @@ out_err:
err = sk_stream_error(sk, msg->msg_flags, err);
release_sock(sk);
sk_psock_put(sk, psock);
- return copied ? copied : err;
+ return copied > 0 ? copied : err;
}
enum {
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 52b1f2665dfa..81b96331b2bb 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -185,7 +185,7 @@ static inline void htcp_alpha_update(struct htcp *ca)
u32 scale = (HZ << 3) / (10 * minRTT);
/* clamping ratio to interval [0.5,10]<<3 */
- scale = min(max(scale, 1U << 2), 10U << 3);
+ scale = clamp(scale, 1U << 2, 10U << 3);
factor = (factor << 3) / scale;
if (!factor)
factor = 1;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e37488d3453f..9f314dfa1490 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5391,6 +5391,9 @@ restart:
for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
n = tcp_skb_next(skb, list);
+ if (!skb_frags_readable(skb))
+ goto skip_this;
+
/* No new bits? It is possible on ofo queue. */
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
skb = tcp_collapse_one(sk, skb, list, root);
@@ -5411,17 +5414,20 @@ restart:
break;
}
- if (n && n != tail && tcp_skb_can_collapse_rx(skb, n) &&
+ if (n && n != tail && skb_frags_readable(n) &&
+ tcp_skb_can_collapse_rx(skb, n) &&
TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
end_of_skbs = false;
break;
}
+skip_this:
/* Decided to skip this, advance start seq. */
start = TCP_SKB_CB(skb)->end_seq;
}
if (end_of_skbs ||
- (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
+ (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) ||
+ !skb_frags_readable(skb))
return;
__skb_queue_head_init(&tmp);
@@ -5463,7 +5469,8 @@ restart:
if (!skb ||
skb == tail ||
!tcp_skb_can_collapse_rx(nskb, skb) ||
- (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
+ (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) ||
+ !skb_frags_readable(skb))
goto end;
}
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fd17f25ff288..5afe5e57c89b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -79,6 +79,7 @@
#include <linux/seq_file.h>
#include <linux/inetdevice.h>
#include <linux/btf_ids.h>
+#include <linux/skbuff_ref.h>
#include <crypto/hash.h>
#include <linux/scatterlist.h>
@@ -97,6 +98,8 @@ static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = {
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
};
+static DEFINE_MUTEX(tcp_exit_batch_mutex);
+
static u32 tcp_v4_init_seq(const struct sk_buff *skb)
{
return secure_tcp_seq(ip_hdr(skb)->daddr,
@@ -118,6 +121,9 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
struct tcp_sock *tp = tcp_sk(sk);
int ts_recent_stamp;
+ if (READ_ONCE(tw->tw_substate) == TCP_FIN_WAIT2)
+ reuse = 0;
+
if (reuse == 2) {
/* Still does not detect *everything* that goes through
* lo, since we require a loopback src or dst address
@@ -1068,7 +1074,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
}
tcp_v4_send_ack(sk, skb,
- tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcptw->tw_snd_nxt, READ_ONCE(tcptw->tw_rcv_nxt),
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_tw_tsval(tcptw),
READ_ONCE(tcptw->tw_ts_recent),
@@ -2507,10 +2513,25 @@ static void tcp_md5sig_info_free_rcu(struct rcu_head *head)
}
#endif
+static void tcp_release_user_frags(struct sock *sk)
+{
+#ifdef CONFIG_PAGE_POOL
+ unsigned long index;
+ void *netmem;
+
+ xa_for_each(&sk->sk_user_frags, index, netmem)
+ WARN_ON_ONCE(!napi_pp_put_page((__force netmem_ref)netmem));
+#endif
+}
+
void tcp_v4_destroy_sock(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ tcp_release_user_frags(sk);
+
+ xa_destroy(&sk->sk_user_frags);
+
trace_tcp_destroy_sock(sk);
tcp_clear_xmit_timers(sk);
@@ -2943,7 +2964,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
- i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
+ i, src, srcp, dest, destp, READ_ONCE(tw->tw_substate), 0, 0,
3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
refcount_read(&tw->tw_refcnt), tw);
}
@@ -3514,6 +3535,16 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
struct net *net;
+ /* make sure concurrent calls to tcp_sk_exit_batch from net_cleanup_work
+ * and failed setup_net error unwinding path are serialized.
+ *
+ * tcp_twsk_purge() handles twsk in any dead netns, not just those in
+ * net_exit_list, the thread that dismantles a particular twsk must
+ * do so without other thread progressing to refcount_dec_and_test() of
+ * tcp_death_row.tw_refcount.
+ */
+ mutex_lock(&tcp_exit_batch_mutex);
+
tcp_twsk_purge(net_exit_list);
list_for_each_entry(net, net_exit_list, exit_list) {
@@ -3521,6 +3552,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount));
tcp_fastopen_ctx_destroy(net);
}
+
+ mutex_unlock(&tcp_exit_batch_mutex);
}
static struct pernet_operations __net_initdata tcp_sk_ops = {
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index b01eb6d94413..95669935494e 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -617,9 +617,13 @@ static struct genl_family tcp_metrics_nl_family;
static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = {
[TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, },
- [TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY,
- .len = sizeof(struct in6_addr), },
+ [TCP_METRICS_ATTR_ADDR_IPV6] =
+ NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
+
[TCP_METRICS_ATTR_SADDR_IPV4] = { .type = NLA_U32, },
+ [TCP_METRICS_ATTR_SADDR_IPV6] =
+ NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
+
/* Following attributes are not received for GET/DEL,
* we keep them for reference
*/
@@ -811,8 +815,6 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
if (a) {
struct in6_addr in6;
- if (nla_len(a) != sizeof(struct in6_addr))
- return -EINVAL;
in6 = nla_get_in6_addr(a);
inetpeer_set_addr_v6(addr, &in6);
if (hash)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a19a9dbd3409..bb1fe1ba867a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -52,16 +52,17 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw,
return TCP_TW_SUCCESS;
}
-static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq)
+static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq,
+ u32 rcv_nxt)
{
#ifdef CONFIG_TCP_AO
struct tcp_ao_info *ao;
ao = rcu_dereference(tcptw->ao_info);
- if (unlikely(ao && seq < tcptw->tw_rcv_nxt))
+ if (unlikely(ao && seq < rcv_nxt))
WRITE_ONCE(ao->rcv_sne, ao->rcv_sne + 1);
#endif
- tcptw->tw_rcv_nxt = seq;
+ WRITE_ONCE(tcptw->tw_rcv_nxt, seq);
}
/*
@@ -98,8 +99,9 @@ enum tcp_tw_status
tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
const struct tcphdr *th, u32 *tw_isn)
{
- struct tcp_options_received tmp_opt;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
+ u32 rcv_nxt = READ_ONCE(tcptw->tw_rcv_nxt);
+ struct tcp_options_received tmp_opt;
bool paws_reject = false;
int ts_recent_stamp;
@@ -117,26 +119,26 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
}
}
- if (tw->tw_substate == TCP_FIN_WAIT2) {
+ if (READ_ONCE(tw->tw_substate) == TCP_FIN_WAIT2) {
/* Just repeat all the checks of tcp_rcv_state_process() */
/* Out of window, send ACK */
if (paws_reject ||
!tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
- tcptw->tw_rcv_nxt,
- tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd))
+ rcv_nxt,
+ rcv_nxt + tcptw->tw_rcv_wnd))
return tcp_timewait_check_oow_rate_limit(
tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2);
if (th->rst)
goto kill;
- if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt))
+ if (th->syn && !before(TCP_SKB_CB(skb)->seq, rcv_nxt))
return TCP_TW_RST;
/* Dup ACK? */
if (!th->ack ||
- !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||
+ !after(TCP_SKB_CB(skb)->end_seq, rcv_nxt) ||
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
inet_twsk_put(tw);
return TCP_TW_SUCCESS;
@@ -146,12 +148,13 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
* reset.
*/
if (!th->fin ||
- TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1)
+ TCP_SKB_CB(skb)->end_seq != rcv_nxt + 1)
return TCP_TW_RST;
/* FIN arrived, enter true time-wait state. */
- tw->tw_substate = TCP_TIME_WAIT;
- twsk_rcv_nxt_update(tcptw, TCP_SKB_CB(skb)->end_seq);
+ WRITE_ONCE(tw->tw_substate, TCP_TIME_WAIT);
+ twsk_rcv_nxt_update(tcptw, TCP_SKB_CB(skb)->end_seq,
+ rcv_nxt);
if (tmp_opt.saw_tstamp) {
WRITE_ONCE(tcptw->tw_ts_recent_stamp,
@@ -182,7 +185,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
*/
if (!paws_reject &&
- (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt &&
+ (TCP_SKB_CB(skb)->seq == rcv_nxt &&
(TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) {
/* In window segment, it may be only reset or bare ack. */
@@ -229,7 +232,7 @@ kill:
*/
if (th->syn && !th->rst && !th->ack && !paws_reject &&
- (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) ||
+ (after(TCP_SKB_CB(skb)->seq, rcv_nxt) ||
(tmp_opt.saw_tstamp &&
(s32)(READ_ONCE(tcptw->tw_ts_recent) - tmp_opt.rcv_tsval) < 0))) {
u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
@@ -625,6 +628,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
+ xa_init_flags(&newsk->sk_user_frags, XA_FLAGS_ALLOC1);
+
return newsk;
}
EXPORT_SYMBOL(tcp_create_openreq_child);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 16c48df8df4c..4fd746bd4d54 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2344,7 +2344,8 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
if (unlikely(TCP_SKB_CB(skb)->eor) ||
tcp_has_tx_tstamp(skb) ||
- !skb_pure_zcopy_same(skb, next))
+ !skb_pure_zcopy_same(skb, next) ||
+ skb_frags_readable(skb) != skb_frags_readable(next))
return false;
len -= skb->len;
@@ -3264,6 +3265,8 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
return false;
if (skb_cloned(skb))
return false;
+ if (!skb_frags_readable(skb))
+ return false;
/* Some heuristics for collapsing over SACK'd could be invented */
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
return false;
@@ -3649,7 +3652,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority,
/* skb of trace_tcp_send_reset() keeps the skb that caused RST,
* skb here is different to the troublesome skb, so use NULL
*/
- trace_tcp_send_reset(sk, NULL, SK_RST_REASON_NOT_SPECIFIED);
+ trace_tcp_send_reset(sk, NULL, reason);
}
/* Send a crossed SYN-ACK during socket establishment.
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4d40615dc8fc..79064580c8c0 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -125,7 +125,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
do_reset = true;
if (do_reset)
tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_ABORT_ON_MEMORY);
tcp_done(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1;
@@ -282,6 +282,7 @@ static int tcp_write_timeout(struct sock *sk)
expired = retransmits_timed_out(sk, retry_until,
READ_ONCE(icsk->icsk_user_timeout));
tcp_fastopen_active_detect_blackhole(sk, expired);
+ mptcp_active_detect_blackhole(sk, expired);
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB,
@@ -779,7 +780,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
goto out;
}
}
- tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED);
+ tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_TCP_STATE);
goto death;
}
@@ -807,7 +808,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
(user_timeout == 0 &&
icsk->icsk_probes_out >= keepalive_probes(tp))) {
tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT);
tcp_write_err(sk);
goto out;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 49c622e743e8..8accbf4cb295 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -115,6 +115,7 @@
#include <net/addrconf.h>
#include <net/udp_tunnel.h>
#include <net/gro.h>
+#include <net/inet_dscp.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6_stubs.h>
#endif
@@ -365,7 +366,7 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
return udp_lib_get_port(sk, snum, hash2_nulladdr);
}
-static int compute_score(struct sock *sk, struct net *net,
+static int compute_score(struct sock *sk, const struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned short hnum,
int dif, int sdif)
@@ -420,7 +421,7 @@ u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
}
/* called with rcu_read_lock() */
-static struct sock *udp4_lib_lookup2(struct net *net,
+static struct sock *udp4_lib_lookup2(const struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum,
int dif, int sdif,
@@ -480,7 +481,7 @@ rescore:
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
-struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
+struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport, int dif,
int sdif, struct udp_table *udptable, struct sk_buff *skb)
{
@@ -561,7 +562,7 @@ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
* Does increment socket refcount.
*/
#if IS_ENABLED(CONFIG_NF_TPROXY_IPV4) || IS_ENABLED(CONFIG_NF_SOCKET_IPV4)
-struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
+struct sock *udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif)
{
struct sock *sk;
@@ -2618,7 +2619,7 @@ int udp_v4_early_demux(struct sk_buff *skb)
if (!inet_sk(sk)->inet_daddr && in_dev)
return ip_mc_validate_source(skb, iph->daddr,
iph->saddr,
- iph->tos & IPTOS_RT_MASK,
+ iph->tos & INET_DSCP_MASK,
skb->dev, in_dev, &itag);
}
return 0;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index b254a5dadfcf..d842303587af 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -279,7 +279,8 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
return ERR_PTR(-EINVAL);
if (unlikely(skb_checksum_start(gso_skb) !=
- skb_transport_header(gso_skb)))
+ skb_transport_header(gso_skb) &&
+ !(skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)))
return ERR_PTR(-EINVAL);
/* We don't know if egress device can segment and checksum the packet
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index e4e0fa869fa4..619a53eb672d 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -6,6 +6,7 @@
#include <net/dst_metadata.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
+#include <net/inet_dscp.h>
int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp)
@@ -232,7 +233,7 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb,
fl4.saddr = key->u.ipv4.src;
fl4.fl4_dport = dport;
fl4.fl4_sport = sport;
- fl4.flowi4_tos = RT_TOS(tos);
+ fl4.flowi4_tos = tos & INET_DSCP_MASK;
fl4.flowi4_flags = key->flow_flags;
rt = ip_route_output_key(net, &fl4);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f70d8757af1a..d680beb91b0a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -92,8 +92,6 @@
#include <linux/export.h>
#include <linux/ioam6.h>
-#define INFINITY_LIFE_TIME 0xFFFFFFFF
-
#define IPV6_MAX_STRLEN \
sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255")
@@ -239,6 +237,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
.ndisc_evict_nocarrier = 1,
.ra_honor_pio_life = 0,
+ .ra_honor_pio_pflag = 0,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -302,6 +301,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
.ndisc_evict_nocarrier = 1,
.ra_honor_pio_life = 0,
+ .ra_honor_pio_pflag = 0,
};
/* Check if link is ready: is it up and is a valid qdisc available */
@@ -2762,6 +2762,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
u32 addr_flags = 0;
struct inet6_dev *in6_dev;
struct net *net = dev_net(dev);
+ bool ignore_autoconf = false;
pinfo = (struct prefix_info *) opt;
@@ -2864,7 +2865,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
/* Try to figure out our local address for this prefix */
- if (pinfo->autoconf && in6_dev->cnf.autoconf) {
+ ignore_autoconf = READ_ONCE(in6_dev->cnf.ra_honor_pio_pflag) && pinfo->preferpd;
+ if (pinfo->autoconf && in6_dev->cnf.autoconf && !ignore_autoconf) {
struct in6_addr addr;
bool tokenized = false, dev_addr_generated = false;
@@ -5617,8 +5619,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
}
static void ipv6_store_devconf(const struct ipv6_devconf *cnf,
@@ -6173,8 +6174,7 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFINFO, NULL, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err);
}
static inline size_t inet6_prefix_nlmsg_size(void)
@@ -6241,8 +6241,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
}
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -6926,6 +6925,15 @@ static const struct ctl_table addrconf_sysctl[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+ {
+ .procname = "ra_honor_pio_pflag",
+ .data = &ipv6_devconf.ra_honor_pio_pflag,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
#ifdef CONFIG_IPV6_ROUTER_PREF
{
.procname = "accept_ra_rtr_pref",
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 90d2c7e3f5e9..ba69b86f1c7d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -708,6 +708,7 @@ const struct proto_ops inet6_stream_ops = {
.splice_eof = inet_splice_eof,
.sendmsg_locked = tcp_sendmsg_locked,
.splice_read = tcp_splice_read,
+ .set_peek_off = sk_set_peek_off,
.read_sock = tcp_read_sock,
.read_skb = tcp_read_skb,
.peek_len = tcp_peek_len,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 3920e8aa1031..b2400c226a32 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -132,7 +132,8 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
*/
if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
- skb_page_unref(sg_page(sg), skb->pp_recycle);
+ skb_page_unref(page_to_netmem(sg_page(sg)),
+ skb->pp_recycle);
}
#ifdef CONFIG_INET6_ESPINTCP
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 9e254de7462f..04a9ed5e8310 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -27,6 +27,7 @@ struct fib6_rule {
struct rt6key src;
struct rt6key dst;
dscp_t dscp;
+ u8 dscp_full:1; /* DSCP or TOS selector */
};
static bool fib6_rule_matchall(const struct fib_rule *rule)
@@ -345,6 +346,20 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
return 1;
}
+static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6,
+ struct netlink_ext_ack *extack)
+{
+ if (rule6->dscp) {
+ NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP");
+ return -EINVAL;
+ }
+
+ rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
+ rule6->dscp_full = true;
+
+ return 0;
+}
+
static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
@@ -361,6 +376,9 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
rule6->dscp = inet_dsfield_to_dscp(frh->tos);
+ if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0)
+ goto errout;
+
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid table");
@@ -413,9 +431,19 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
return 0;
- if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos)
+ if (frh->tos &&
+ (rule6->dscp_full ||
+ inet_dscp_to_dsfield(rule6->dscp) != frh->tos))
return 0;
+ if (tb[FRA_DSCP]) {
+ dscp_t dscp;
+
+ dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2);
+ if (!rule6->dscp_full || rule6->dscp != dscp)
+ return 0;
+ }
+
if (frh->src_len &&
nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
return 0;
@@ -434,7 +462,15 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->dst_len = rule6->dst.plen;
frh->src_len = rule6->src.plen;
- frh->tos = inet_dscp_to_dsfield(rule6->dscp);
+
+ if (rule6->dscp_full) {
+ frh->tos = 0;
+ if (nla_put_u8(skb, FRA_DSCP,
+ inet_dscp_to_dsfield(rule6->dscp) >> 2))
+ goto nla_put_failure;
+ } else {
+ frh->tos = inet_dscp_to_dsfield(rule6->dscp);
+ }
if ((rule6->dst.plen &&
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
@@ -450,7 +486,8 @@ nla_put_failure:
static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(16) /* dst */
- + nla_total_size(16); /* src */
+ + nla_total_size(16) /* src */
+ + nla_total_size(1); /* dscp */
}
static void fib6_rule_flush_cache(struct fib_rules_ops *ops)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 7b31674644ef..071b0bc1179d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -175,14 +175,16 @@ static bool icmpv6_mask_allow(struct net *net, int type)
return false;
}
-static bool icmpv6_global_allow(struct net *net, int type)
+static bool icmpv6_global_allow(struct net *net, int type,
+ bool *apply_ratelimit)
{
if (icmpv6_mask_allow(net, type))
return true;
- if (icmp_global_allow())
+ if (icmp_global_allow(net)) {
+ *apply_ratelimit = true;
return true;
-
+ }
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false;
}
@@ -191,13 +193,13 @@ static bool icmpv6_global_allow(struct net *net, int type)
* Check the ICMP output rate limit
*/
static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
- struct flowi6 *fl6)
+ struct flowi6 *fl6, bool apply_ratelimit)
{
struct net *net = sock_net(sk);
struct dst_entry *dst;
bool res = false;
- if (icmpv6_mask_allow(net, type))
+ if (!apply_ratelimit)
return true;
/*
@@ -228,6 +230,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (!res)
__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
ICMP6_MIB_RATELIMITHOST);
+ else
+ icmp_global_consume(net);
dst_release(dst);
return res;
}
@@ -452,6 +456,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
struct net *net;
struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL;
+ bool apply_ratelimit = false;
struct dst_entry *dst;
struct icmp6hdr tmp_hdr;
struct flowi6 fl6;
@@ -533,11 +538,12 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
return;
}
- /* Needed by both icmp_global_allow and icmpv6_xmit_lock */
+ /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit */
- if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
+ if (!(skb->dev->flags & IFF_LOOPBACK) &&
+ !icmpv6_global_allow(net, type, &apply_ratelimit))
goto out_bh_enable;
mip6_addr_swap(skb, parm);
@@ -575,7 +581,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
np = inet6_sk(sk);
- if (!icmpv6_xrlim_allow(sk, type, &fl6))
+ if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
goto out;
tmp_hdr.icmp6_type = type;
@@ -717,6 +723,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL;
struct icmp6hdr *icmph = icmp6_hdr(skb);
+ bool apply_ratelimit = false;
struct icmp6hdr tmp_hdr;
struct flowi6 fl6;
struct icmpv6_msg msg;
@@ -781,8 +788,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
goto out;
/* Check the ratelimit */
- if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
- !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
+ if ((!(skb->dev->flags & IFF_LOOPBACK) &&
+ !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) ||
+ !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit))
goto out_dst_release;
idev = __in6_dev_get(skb->dev);
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index ad5f6f6ba333..85b92917849b 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -108,6 +108,7 @@ int ila_lwt_init(void);
void ila_lwt_fini(void);
int ila_xlat_init_net(struct net *net);
+void ila_xlat_pre_exit_net(struct net *net);
void ila_xlat_exit_net(struct net *net);
int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
index 69caed07315f..976c78efbae1 100644
--- a/net/ipv6/ila/ila_main.c
+++ b/net/ipv6/ila/ila_main.c
@@ -71,6 +71,11 @@ ila_xlat_init_fail:
return err;
}
+static __net_exit void ila_pre_exit_net(struct net *net)
+{
+ ila_xlat_pre_exit_net(net);
+}
+
static __net_exit void ila_exit_net(struct net *net)
{
ila_xlat_exit_net(net);
@@ -78,6 +83,7 @@ static __net_exit void ila_exit_net(struct net *net)
static struct pernet_operations ila_net_ops = {
.init = ila_init_net,
+ .pre_exit = ila_pre_exit_net,
.exit = ila_exit_net,
.id = &ila_net_id,
.size = sizeof(struct ila_net),
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 67e8c9440977..534a4498e280 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -619,6 +619,15 @@ int ila_xlat_init_net(struct net *net)
return 0;
}
+void ila_xlat_pre_exit_net(struct net *net)
+{
+ struct ila_net *ilan = net_generic(net, ila_net_id);
+
+ if (ilan->xlat.hooks_registered)
+ nf_unregister_net_hooks(net, ila_nf_hook_ops,
+ ARRAY_SIZE(ila_nf_hook_ops));
+}
+
void ila_xlat_exit_net(struct net *net)
{
struct ila_net *ilan = net_generic(net, ila_net_id);
@@ -626,10 +635,6 @@ void ila_xlat_exit_net(struct net *net)
rhashtable_free_and_destroy(&ilan->xlat.rhash_table, ila_free_cb, NULL);
free_bucket_spinlocks(ilan->xlat.locks);
-
- if (ilan->xlat.hooks_registered)
- nf_unregister_net_hooks(net, ila_nf_hook_ops,
- ARRAY_SIZE(ila_nf_hook_ops));
}
static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 6db71bb1cd30..9ec05e354baa 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -46,7 +46,7 @@ EXPORT_SYMBOL_GPL(inet6_ehashfn);
*
* The sockhash lock must be held as a reader here.
*/
-struct sock *__inet6_lookup_established(struct net *net,
+struct sock *__inet6_lookup_established(const struct net *net,
struct inet_hashinfo *hashinfo,
const struct in6_addr *saddr,
const __be16 sport,
@@ -89,7 +89,7 @@ found:
}
EXPORT_SYMBOL(__inet6_lookup_established);
-static inline int compute_score(struct sock *sk, struct net *net,
+static inline int compute_score(struct sock *sk, const struct net *net,
const unsigned short hnum,
const struct in6_addr *daddr,
const int dif, const int sdif)
@@ -126,7 +126,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
* Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to
* the selected sock or an error.
*/
-struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
+struct sock *inet6_lookup_reuseport(const struct net *net, struct sock *sk,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
__be16 sport,
@@ -147,7 +147,7 @@ struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
EXPORT_SYMBOL_GPL(inet6_lookup_reuseport);
/* called with rcu_read_lock() */
-static struct sock *inet6_lhash2_lookup(struct net *net,
+static struct sock *inet6_lhash2_lookup(const struct net *net,
struct inet_listen_hashbucket *ilb2,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
@@ -174,7 +174,7 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
return result;
}
-struct sock *inet6_lookup_run_sk_lookup(struct net *net,
+struct sock *inet6_lookup_run_sk_lookup(const struct net *net,
int protocol,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
@@ -199,7 +199,7 @@ struct sock *inet6_lookup_run_sk_lookup(struct net *net,
}
EXPORT_SYMBOL_GPL(inet6_lookup_run_sk_lookup);
-struct sock *inet6_lookup_listener(struct net *net,
+struct sock *inet6_lookup_listener(const struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
@@ -243,7 +243,8 @@ done:
}
EXPORT_SYMBOL_GPL(inet6_lookup_listener);
-struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+struct sock *inet6_lookup(const struct net *net,
+ struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr, const __be16 sport,
const struct in6_addr *daddr, const __be16 dport,
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index bf7120ecea1e..beb6b4cfc551 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -42,8 +42,10 @@ struct ioam6_lwt {
struct ioam6_lwt_freq freq;
atomic_t pkt_cnt;
u8 mode;
+ bool has_tunsrc;
+ struct in6_addr tunsrc;
struct in6_addr tundst;
- struct ioam6_lwt_encap tuninfo;
+ struct ioam6_lwt_encap tuninfo;
};
static const struct netlink_range_validation freq_range = {
@@ -72,8 +74,10 @@ static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
[IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8,
IOAM6_IPTUNNEL_MODE_MIN,
IOAM6_IPTUNNEL_MODE_MAX),
+ [IOAM6_IPTUNNEL_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
[IOAM6_IPTUNNEL_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
- [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
+ [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(
+ sizeof(struct ioam6_trace_hdr)),
};
static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
@@ -85,7 +89,7 @@ static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
- trace->type.bit21)
+ trace->type.bit21 | trace->type.bit23)
return false;
trace->nodelen = 0;
@@ -143,6 +147,11 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
else
mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]);
+ if (tb[IOAM6_IPTUNNEL_SRC] && mode == IOAM6_IPTUNNEL_MODE_INLINE) {
+ NL_SET_ERR_MSG(extack, "no tunnel src expected with this mode");
+ return -EINVAL;
+ }
+
if (!tb[IOAM6_IPTUNNEL_DST] && mode != IOAM6_IPTUNNEL_MODE_INLINE) {
NL_SET_ERR_MSG(extack, "this mode needs a tunnel destination");
return -EINVAL;
@@ -167,19 +176,40 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
ilwt = ioam6_lwt_state(lwt);
err = dst_cache_init(&ilwt->cache, GFP_ATOMIC);
- if (err) {
- kfree(lwt);
- return err;
- }
+ if (err)
+ goto free_lwt;
atomic_set(&ilwt->pkt_cnt, 0);
ilwt->freq.k = freq_k;
ilwt->freq.n = freq_n;
ilwt->mode = mode;
- if (tb[IOAM6_IPTUNNEL_DST])
+
+ if (!tb[IOAM6_IPTUNNEL_SRC]) {
+ ilwt->has_tunsrc = false;
+ } else {
+ ilwt->has_tunsrc = true;
+ ilwt->tunsrc = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_SRC]);
+
+ if (ipv6_addr_any(&ilwt->tunsrc)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_SRC],
+ "invalid tunnel source address");
+ err = -EINVAL;
+ goto free_cache;
+ }
+ }
+
+ if (tb[IOAM6_IPTUNNEL_DST]) {
ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]);
+ if (ipv6_addr_any(&ilwt->tundst)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_DST],
+ "invalid tunnel dest address");
+ err = -EINVAL;
+ goto free_cache;
+ }
+ }
+
tuninfo = ioam6_lwt_info(lwt);
tuninfo->eh.hdrlen = ((sizeof(*tuninfo) + len_aligned) >> 3) - 1;
tuninfo->pad[0] = IPV6_TLV_PADN;
@@ -201,6 +231,11 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
*ts = lwt;
return 0;
+free_cache:
+ dst_cache_destroy(&ilwt->cache);
+free_lwt:
+ kfree(lwt);
+ return err;
}
static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
@@ -256,6 +291,8 @@ static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
struct ioam6_lwt_encap *tuninfo,
+ bool has_tunsrc,
+ struct in6_addr *tunsrc,
struct in6_addr *tundst)
{
struct dst_entry *dst = skb_dst(skb);
@@ -285,8 +322,12 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
hdr->nexthdr = NEXTHDR_HOP;
hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
hdr->daddr = *tundst;
- ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
- IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
+
+ if (has_tunsrc)
+ memcpy(&hdr->saddr, tunsrc, sizeof(*tunsrc));
+ else
+ ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
+ IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
skb_postpush_rcsum(skb, hdr, len);
@@ -328,7 +369,9 @@ do_inline:
case IOAM6_IPTUNNEL_MODE_ENCAP:
do_encap:
/* Encapsulation (ip6ip6) */
- err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst);
+ err = ioam6_do_encap(net, skb, &ilwt->tuninfo,
+ ilwt->has_tunsrc, &ilwt->tunsrc,
+ &ilwt->tundst);
if (unlikely(err))
goto drop;
@@ -414,6 +457,13 @@ static int ioam6_fill_encap_info(struct sk_buff *skb,
goto ret;
if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) {
+ if (ilwt->has_tunsrc) {
+ err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_SRC,
+ &ilwt->tunsrc);
+ if (err)
+ goto ret;
+ }
+
err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_DST, &ilwt->tundst);
if (err)
goto ret;
@@ -435,8 +485,12 @@ static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
nla_total_size(sizeof(ilwt->mode)) +
nla_total_size(sizeof(ilwt->tuninfo.traceh));
- if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE)
+ if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) {
+ if (ilwt->has_tunsrc)
+ nlsize += nla_total_size(sizeof(ilwt->tunsrc));
+
nlsize += nla_total_size(sizeof(ilwt->tundst));
+ }
return nlsize;
}
@@ -451,17 +505,21 @@ static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
return (ilwt_a->freq.k != ilwt_b->freq.k ||
ilwt_a->freq.n != ilwt_b->freq.n ||
ilwt_a->mode != ilwt_b->mode ||
+ ilwt_a->has_tunsrc != ilwt_b->has_tunsrc ||
(ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
!ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) ||
+ (ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
+ ilwt_a->has_tunsrc &&
+ !ipv6_addr_equal(&ilwt_a->tunsrc, &ilwt_b->tunsrc)) ||
trace_a->namespace_id != trace_b->namespace_id);
}
static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
.build_state = ioam6_build_state,
.destroy_state = ioam6_destroy_state,
- .output = ioam6_output,
+ .output = ioam6_output,
.fill_encap = ioam6_fill_encap_info,
- .get_encap_size = ioam6_encap_nlsize,
+ .get_encap_size = ioam6_encap_nlsize,
.cmp_encap = ioam6_encap_cmp,
.owner = THIS_MODULE,
};
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3942bd2ade78..235808cfec70 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1471,7 +1471,7 @@ static void ip6gre_tnl_init_features(struct net_device *dev)
{
struct ip6_tnl *nt = netdev_priv(dev);
- dev->features |= GRE6_FEATURES | NETIF_F_LLTX;
+ dev->features |= GRE6_FEATURES;
dev->hw_features |= GRE6_FEATURES;
/* TCP offload with GRE SEQ is not supported, nor can we support 2
@@ -1485,6 +1485,8 @@ static void ip6gre_tnl_init_features(struct net_device *dev)
dev->features |= NETIF_F_GSO_SOFTWARE;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+
+ dev->lltx = true;
}
static int ip6gre_tunnel_init_common(struct net_device *dev)
@@ -1619,8 +1621,7 @@ static int __net_init ip6gre_init_net(struct net *net)
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
-
+ ign->fb_tunnel_dev->netns_local = true;
ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 133610a49da6..70c0e16c0ae6 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -111,9 +111,8 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
{
struct sk_buff *skb, *next, *hint = NULL;
struct dst_entry *curr_dst = NULL;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct dst_entry *dst;
@@ -327,9 +326,8 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *curr_dev = NULL;
struct net *curr_net = NULL;
struct sk_buff *skb, *next;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ab504d31f0cd..f26841f1490f 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -70,11 +70,15 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
/* Be paranoid, rather than too clever. */
if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
+ /* Make sure idev stays alive */
+ rcu_read_lock();
skb = skb_expand_head(skb, hh_len);
if (!skb) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ rcu_read_unlock();
return -ENOMEM;
}
+ rcu_read_unlock();
}
hdr = ipv6_hdr(skb);
@@ -283,11 +287,15 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
head_room += opt->opt_nflen + opt->opt_flen;
if (unlikely(head_room > skb_headroom(skb))) {
+ /* Make sure idev stays alive */
+ rcu_read_lock();
skb = skb_expand_head(skb, head_room);
if (!skb) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ rcu_read_unlock();
return -ENOBUFS;
}
+ rcu_read_unlock();
}
if (opt) {
@@ -1956,6 +1964,7 @@ int ip6_send_skb(struct sk_buff *skb)
struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
int err;
+ rcu_read_lock();
err = ip6_local_out(net, skb->sk, skb);
if (err) {
if (err > 0)
@@ -1965,6 +1974,7 @@ int ip6_send_skb(struct sk_buff *skb)
IPSTATS_MIB_OUTDISCARDS);
}
+ rcu_read_unlock();
return err;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9dee0c127955..b60e13c42bca 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -53,6 +53,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/dst_metadata.h>
+#include <net/inet_dscp.h>
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
@@ -608,7 +609,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
/* Try to guess incoming interface */
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
- 0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ 0, 0, 0, IPPROTO_IPIP,
+ eiph->tos & INET_DSCP_MASK, 0);
if (IS_ERR(rt))
goto out;
@@ -619,7 +621,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rt->rt_flags & RTCF_LOCAL) {
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
eiph->daddr, eiph->saddr, 0, 0,
- IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ IPPROTO_IPIP,
+ eiph->tos & INET_DSCP_MASK, 0);
if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) {
if (!IS_ERR(rt))
ip_rt_put(rt);
@@ -1507,7 +1510,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
tdev = __dev_get_by_index(t->net, p->link);
if (tdev) {
- dev->hard_header_len = tdev->hard_header_len + t_hlen;
+ dev->needed_headroom = tdev->hard_header_len +
+ tdev->needed_headroom + t_hlen;
mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU);
mtu = mtu - t_hlen;
@@ -1731,7 +1735,9 @@ ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
{
struct ip6_tnl *tnl = netdev_priv(dev);
+ int t_hlen;
+ t_hlen = tnl->hlen + sizeof(struct ipv6hdr);
if (tnl->parms.proto == IPPROTO_IPV6) {
if (new_mtu < IPV6_MIN_MTU)
return -EINVAL;
@@ -1740,10 +1746,10 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
return -EINVAL;
}
if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) {
- if (new_mtu > IP6_MAX_MTU - dev->hard_header_len)
+ if (new_mtu > IP6_MAX_MTU - dev->hard_header_len - t_hlen)
return -EINVAL;
} else {
- if (new_mtu > IP_MAX_MTU - dev->hard_header_len)
+ if (new_mtu > IP_MAX_MTU - dev->hard_header_len - t_hlen)
return -EINVAL;
}
WRITE_ONCE(dev->mtu, new_mtu);
@@ -1846,7 +1852,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
dev->type = ARPHRD_TUNNEL6;
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
netif_keep_dst(dev);
@@ -1887,12 +1893,11 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
t_hlen = t->hlen + sizeof(struct ipv6hdr);
dev->type = ARPHRD_TUNNEL6;
- dev->hard_header_len = LL_MAX_HEADER + t_hlen;
dev->mtu = ETH_DATA_LEN - t_hlen;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
dev->min_mtu = ETH_MIN_MTU;
- dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len;
+ dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len - t_hlen;
netdev_hold(dev, &t->dev_tracker, GFP_KERNEL);
netdev_lockdep_set_classes(dev);
@@ -2256,7 +2261,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
+ ip6n->fb_tnl_dev->netns_local = true;
err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
if (err < 0)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index dd342e6ecf3f..2ce4ae0d8dc3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -640,7 +640,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->netdev_ops = &reg_vif_netdev_ops;
dev->needs_free_netdev = true;
- dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->netns_local = true;
}
static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
@@ -2431,8 +2431,7 @@ static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
errout:
kfree_skb(skb);
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
}
static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index cd342d5015c6..1e225e6489ea 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -985,7 +985,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
int err;
if (level == SOL_IP && sk->sk_type != SOCK_RAW)
- return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+ return ip_setsockopt(sk, level, optname, optval, optlen);
if (level != SOL_IPV6)
return -ENOPROTOOPT;
@@ -1475,7 +1475,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
int err;
if (level == SOL_IP && sk->sk_type != SOCK_RAW)
- return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+ return ip_getsockopt(sk, level, optname, optval, optlen);
if (level != SOL_IPV6)
return -ENOPROTOOPT;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 7ba01d8cfbae..b244dbf61d5f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -586,7 +586,8 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
const struct in6_addr *group;
struct ipv6_mc_socklist *pmc;
struct ip6_sf_socklist *psl;
- int i, count, copycount;
+ unsigned int count;
+ int i, copycount;
group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
@@ -610,7 +611,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
psl = sock_dereference(pmc->sflist, sk);
count = psl ? psl->sl_count : 0;
- copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
+ copycount = min(count, gsf->gf_numsrc);
gsf->gf_numsrc = count;
for (i = 0; i < copycount; i++) {
struct sockaddr_in6 *psin6;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b8eec1b6cc2c..aba94a348673 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -200,9 +200,9 @@ static inline int ndisc_is_useropt(const struct net_device *dev,
return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
opt->nd_opt_type == ND_OPT_RDNSS ||
opt->nd_opt_type == ND_OPT_DNSSL ||
+ opt->nd_opt_type == ND_OPT_6CO ||
opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
- opt->nd_opt_type == ND_OPT_PREF64 ||
- ndisc_ops_is_useropt(dev, opt->nd_opt_type);
+ opt->nd_opt_type == ND_OPT_PREF64;
}
static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev,
@@ -1944,7 +1944,7 @@ static void ndisc_warn_deprecated_sysctl(const struct ctl_table *ctl,
static char warncomm[TASK_COMM_LEN];
static int warned;
if (strcmp(warncomm, current->comm) && warned < 5) {
- strcpy(warncomm, current->comm);
+ strscpy(warncomm, current->comm);
pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n",
warncomm, func,
dev_name, ctl->procname,
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 131f7bb2110d..7d5602950ae7 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1773,7 +1773,7 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
goto out_free;
}
- ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
if (!ops) {
ret = -ENOMEM;
goto out_free;
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index c82f3fdd4a65..492a811828a7 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -38,13 +38,13 @@ static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_ADDR] == NULL)
return -EINVAL;
- err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
+ err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
sizeof(struct in6_addr));
if (err < 0)
return err;
if (tb[NFTA_DUP_SREG_DEV])
- err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV],
+ err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_DEV],
&priv->sreg_dev, sizeof(int));
return err;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 219701caba1e..b4251915585f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -174,7 +174,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev)
struct net_device *rt_dev = rt->dst.dev;
bool handled = false;
- if (rt_idev->dev == dev) {
+ if (rt_idev && rt_idev->dev == dev) {
rt->rt6i_idev = in6_dev_get(blackhole_netdev);
in6_dev_put(rt_idev);
handled = true;
@@ -6193,8 +6193,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
info->nlh, gfp_any());
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
void fib6_rt_update(struct net *net, struct fib6_info *rt,
@@ -6220,8 +6219,7 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt,
info->nlh, gfp_any());
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index 2c83b7586422..db3c19a42e1c 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -263,10 +263,8 @@ static int rpl_input(struct sk_buff *skb)
rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
err = rpl_do_srh(skb, rlwt);
- if (unlikely(err)) {
- kfree_skb(skb);
- return err;
- }
+ if (unlikely(err))
+ goto drop;
local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
@@ -286,9 +284,13 @@ static int rpl_input(struct sk_buff *skb)
err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
if (unlikely(err))
- return err;
+ goto drop;
return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return err;
}
static int nla_put_rpl_srh(struct sk_buff *skb, int attrtype,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 83b195f09561..39bd8951bfca 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -51,6 +51,7 @@
#include <net/dsfield.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/inet_dscp.h>
/*
This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
@@ -935,8 +936,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
}
flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark,
- RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6,
- 0, dst, tiph->saddr, 0, 0,
+ tos & INET_DSCP_MASK, RT_SCOPE_UNIVERSE,
+ IPPROTO_IPV6, 0, dst, tiph->saddr, 0, 0,
sock_net_uid(tunnel->net, NULL));
rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr);
@@ -1111,7 +1112,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
iph->daddr, iph->saddr,
0, 0,
IPPROTO_IPV6,
- RT_TOS(iph->tos),
+ iph->tos & INET_DSCP_MASK,
tunnel->parms.link);
if (!IS_ERR(rt)) {
@@ -1435,7 +1436,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
netif_keep_dst(dev);
dev->addr_len = 4;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
dev->features |= SIT_FEATURES;
dev->hw_features |= SIT_FEATURES;
dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
@@ -1855,7 +1856,7 @@ static int __net_init sit_init_net(struct net *net)
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ sitn->fb_tunnel_dev->netns_local = true;
err = register_netdev(sitn->fb_tunnel_dev);
if (err)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 200fea92f12f..d71ab4e1efe1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1193,7 +1193,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
#endif
}
- tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt,
+ READ_ONCE(tcptw->tw_rcv_nxt),
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_tw_tsval(tcptw),
READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
@@ -2258,7 +2259,7 @@ static void get_timewait6_sock(struct seq_file *seq,
src->s6_addr32[2], src->s6_addr32[3], srcp,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
- tw->tw_substate, 0, 0,
+ READ_ONCE(tw->tw_substate), 0, 0,
3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
refcount_read(&tw->tw_refcnt), tw);
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 6602a2e9cdb5..52dfbb2ff1a8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -114,7 +114,7 @@ void udp_v6_rehash(struct sock *sk)
udp_lib_rehash(sk, new_hash);
}
-static int compute_score(struct sock *sk, struct net *net,
+static int compute_score(struct sock *sk, const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned short hnum,
int dif, int sdif)
@@ -160,7 +160,7 @@ static int compute_score(struct sock *sk, struct net *net,
}
/* called with rcu_read_lock() */
-static struct sock *udp6_lib_lookup2(struct net *net,
+static struct sock *udp6_lib_lookup2(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned int hnum,
int dif, int sdif, struct udp_hslot *hslot2,
@@ -217,7 +217,7 @@ rescore:
}
/* rcu_read_lock() must be held */
-struct sock *__udp6_lib_lookup(struct net *net,
+struct sock *__udp6_lib_lookup(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif, int sdif, struct udp_table *udptable,
@@ -300,7 +300,7 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
* Does increment socket refcount.
*/
#if IS_ENABLED(CONFIG_NF_TPROXY_IPV6) || IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
-struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
+struct sock *udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport, int dif)
{
struct sock *sk;
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 1e42e13ad24e..d3e9efab7f4b 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -86,13 +86,15 @@ struct device *iucv_alloc_device(const struct attribute_group **attrs,
{
struct device *dev;
va_list vargs;
+ char buf[20];
int rc;
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
goto out_error;
va_start(vargs, fmt);
- rc = dev_set_name(dev, fmt, vargs);
+ vsnprintf(buf, sizeof(buf), fmt, vargs);
+ rc = dev_set_name(dev, "%s", buf);
va_end(vargs);
if (rc)
goto out_error;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 2f191e50d4fc..d4118c796290 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -755,6 +755,7 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
!(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR);
int err = -EPIPE;
+ mutex_lock(&kcm->tx_mutex);
lock_sock(sk);
/* Per tcp_sendmsg this should be in poll */
@@ -926,6 +927,7 @@ partial_message:
KCM_STATS_ADD(kcm->stats.tx_bytes, copied);
release_sock(sk);
+ mutex_unlock(&kcm->tx_mutex);
return copied;
out_error:
@@ -951,6 +953,7 @@ out_error:
sk->sk_write_space(sk);
release_sock(sk);
+ mutex_unlock(&kcm->tx_mutex);
return err;
}
@@ -1204,6 +1207,7 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
spin_unlock_bh(&mux->lock);
INIT_WORK(&kcm->tx_work, kcm_tx_work);
+ mutex_init(&kcm->tx_mutex);
spin_lock_bh(&mux->rx_lock);
kcm_rcv_ready(kcm);
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 2e86f520f799..3eec23ac5ab1 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -117,12 +117,12 @@ struct l2tp_net {
struct hlist_head l2tp_v3_session_htable[16];
};
-static inline u32 l2tp_v2_session_key(u16 tunnel_id, u16 session_id)
+static u32 l2tp_v2_session_key(u16 tunnel_id, u16 session_id)
{
return ((u32)tunnel_id) << 16 | session_id;
}
-static inline unsigned long l2tp_v3_session_hashkey(struct sock *sk, u32 session_id)
+static unsigned long l2tp_v3_session_hashkey(struct sock *sk, u32 session_id)
{
return ((unsigned long)sk) + session_id;
}
@@ -135,63 +135,81 @@ static bool l2tp_sk_is_v6(struct sock *sk)
}
#endif
-static inline struct l2tp_net *l2tp_pernet(const struct net *net)
+static struct l2tp_net *l2tp_pernet(const struct net *net)
{
return net_generic(net, l2tp_net_id);
}
static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
{
+ struct sock *sk = tunnel->sock;
+
trace_free_tunnel(tunnel);
- sock_put(tunnel->sock);
- /* the tunnel is freed in the socket destructor */
+
+ if (sk) {
+ /* Disable udp encapsulation */
+ switch (tunnel->encap) {
+ case L2TP_ENCAPTYPE_UDP:
+ /* No longer an encapsulation socket. See net/ipv4/udp.c */
+ WRITE_ONCE(udp_sk(sk)->encap_type, 0);
+ udp_sk(sk)->encap_rcv = NULL;
+ udp_sk(sk)->encap_destroy = NULL;
+ break;
+ case L2TP_ENCAPTYPE_IP:
+ break;
+ }
+
+ tunnel->sock = NULL;
+ sock_put(sk);
+ }
+
+ kfree_rcu(tunnel, rcu);
}
static void l2tp_session_free(struct l2tp_session *session)
{
trace_free_session(session);
if (session->tunnel)
- l2tp_tunnel_dec_refcount(session->tunnel);
- kfree(session);
+ l2tp_tunnel_put(session->tunnel);
+ kfree_rcu(session, rcu);
}
-struct l2tp_tunnel *l2tp_sk_to_tunnel(struct sock *sk)
+struct l2tp_tunnel *l2tp_sk_to_tunnel(const struct sock *sk)
{
- struct l2tp_tunnel *tunnel = sk->sk_user_data;
+ const struct net *net = sock_net(sk);
+ unsigned long tunnel_id, tmp;
+ struct l2tp_tunnel *tunnel;
+ struct l2tp_net *pn;
- if (tunnel)
- if (WARN_ON(tunnel->magic != L2TP_TUNNEL_MAGIC))
- return NULL;
+ rcu_read_lock_bh();
+ pn = l2tp_pernet(net);
+ idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
+ if (tunnel &&
+ tunnel->sock == sk &&
+ refcount_inc_not_zero(&tunnel->ref_count)) {
+ rcu_read_unlock_bh();
+ return tunnel;
+ }
+ }
+ rcu_read_unlock_bh();
- return tunnel;
+ return NULL;
}
EXPORT_SYMBOL_GPL(l2tp_sk_to_tunnel);
-void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel)
-{
- refcount_inc(&tunnel->ref_count);
-}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_inc_refcount);
-
-void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
+void l2tp_tunnel_put(struct l2tp_tunnel *tunnel)
{
if (refcount_dec_and_test(&tunnel->ref_count))
l2tp_tunnel_free(tunnel);
}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_dec_refcount);
+EXPORT_SYMBOL_GPL(l2tp_tunnel_put);
-void l2tp_session_inc_refcount(struct l2tp_session *session)
-{
- refcount_inc(&session->ref_count);
-}
-EXPORT_SYMBOL_GPL(l2tp_session_inc_refcount);
-
-void l2tp_session_dec_refcount(struct l2tp_session *session)
+void l2tp_session_put(struct l2tp_session *session)
{
if (refcount_dec_and_test(&session->ref_count))
l2tp_session_free(session);
}
-EXPORT_SYMBOL_GPL(l2tp_session_dec_refcount);
+EXPORT_SYMBOL_GPL(l2tp_session_put);
/* Lookup a tunnel. A new reference is held on the returned tunnel. */
struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
@@ -211,26 +229,27 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
-struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth)
+struct l2tp_tunnel *l2tp_tunnel_get_next(const struct net *net, unsigned long *key)
{
struct l2tp_net *pn = l2tp_pernet(net);
- unsigned long tunnel_id, tmp;
- struct l2tp_tunnel *tunnel;
- int count = 0;
+ struct l2tp_tunnel *tunnel = NULL;
rcu_read_lock_bh();
- idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
- if (tunnel && ++count > nth &&
- refcount_inc_not_zero(&tunnel->ref_count)) {
+again:
+ tunnel = idr_get_next_ul(&pn->l2tp_tunnel_idr, key);
+ if (tunnel) {
+ if (refcount_inc_not_zero(&tunnel->ref_count)) {
rcu_read_unlock_bh();
return tunnel;
}
+ (*key)++;
+ goto again;
}
rcu_read_unlock_bh();
return NULL;
}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_get_nth);
+EXPORT_SYMBOL_GPL(l2tp_tunnel_get_next);
struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id)
{
@@ -254,7 +273,15 @@ struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk,
hash_for_each_possible_rcu(pn->l2tp_v3_session_htable, session,
hlist, key) {
- if (session->tunnel->sock == sk &&
+ /* session->tunnel may be NULL if another thread is in
+ * l2tp_session_register and has added an item to
+ * l2tp_v3_session_htable but hasn't yet added the
+ * session to its tunnel's session_list.
+ */
+ struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel);
+
+ if (session->session_id == session_id &&
+ tunnel && tunnel->sock == sk &&
refcount_inc_not_zero(&session->ref_count)) {
rcu_read_unlock_bh();
return session;
@@ -295,24 +322,109 @@ struct l2tp_session *l2tp_session_get(const struct net *net, struct sock *sk, in
}
EXPORT_SYMBOL_GPL(l2tp_session_get);
-struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth)
+static struct l2tp_session *l2tp_v2_session_get_next(const struct net *net,
+ u16 tid,
+ unsigned long *key)
{
- struct l2tp_session *session;
- int count = 0;
+ struct l2tp_net *pn = l2tp_pernet(net);
+ struct l2tp_session *session = NULL;
+
+ /* Start searching within the range of the tid */
+ if (*key == 0)
+ *key = l2tp_v2_session_key(tid, 0);
rcu_read_lock_bh();
- list_for_each_entry_rcu(session, &tunnel->session_list, list) {
- if (++count > nth) {
- l2tp_session_inc_refcount(session);
+again:
+ session = idr_get_next_ul(&pn->l2tp_v2_session_idr, key);
+ if (session) {
+ struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel);
+
+ /* ignore sessions with id 0 as they are internal for pppol2tp */
+ if (session->session_id == 0) {
+ (*key)++;
+ goto again;
+ }
+
+ if (tunnel->tunnel_id == tid &&
+ refcount_inc_not_zero(&session->ref_count)) {
rcu_read_unlock_bh();
return session;
}
+
+ (*key)++;
+ if (tunnel->tunnel_id == tid)
+ goto again;
}
rcu_read_unlock_bh();
return NULL;
}
-EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
+
+static struct l2tp_session *l2tp_v3_session_get_next(const struct net *net,
+ u32 tid, struct sock *sk,
+ unsigned long *key)
+{
+ struct l2tp_net *pn = l2tp_pernet(net);
+ struct l2tp_session *session = NULL;
+
+ rcu_read_lock_bh();
+again:
+ session = idr_get_next_ul(&pn->l2tp_v3_session_idr, key);
+ if (session && !hash_hashed(&session->hlist)) {
+ struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel);
+
+ if (tunnel && tunnel->tunnel_id == tid &&
+ refcount_inc_not_zero(&session->ref_count)) {
+ rcu_read_unlock_bh();
+ return session;
+ }
+
+ (*key)++;
+ goto again;
+ }
+
+ /* If we get here and session is non-NULL, the IDR entry may be one
+ * where the session_id collides with one in another tunnel. Check
+ * session_htable for a match. There can only be one session of a given
+ * ID per tunnel so we can return as soon as a match is found.
+ */
+ if (session && hash_hashed(&session->hlist)) {
+ unsigned long hkey = l2tp_v3_session_hashkey(sk, session->session_id);
+ u32 sid = session->session_id;
+
+ hash_for_each_possible_rcu(pn->l2tp_v3_session_htable, session,
+ hlist, hkey) {
+ struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel);
+
+ if (session->session_id == sid &&
+ tunnel && tunnel->tunnel_id == tid &&
+ refcount_inc_not_zero(&session->ref_count)) {
+ rcu_read_unlock_bh();
+ return session;
+ }
+ }
+
+ /* If no match found, the colliding session ID isn't in our
+ * tunnel so try the next session ID.
+ */
+ (*key)++;
+ goto again;
+ }
+
+ rcu_read_unlock_bh();
+
+ return NULL;
+}
+
+struct l2tp_session *l2tp_session_get_next(const struct net *net, struct sock *sk, int pver,
+ u32 tunnel_id, unsigned long *key)
+{
+ if (pver == L2TP_HDR_VER_2)
+ return l2tp_v2_session_get_next(net, tunnel_id, key);
+ else
+ return l2tp_v3_session_get_next(net, tunnel_id, sk, key);
+}
+EXPORT_SYMBOL_GPL(l2tp_session_get_next);
/* Lookup a session by interface name.
* This is very inefficient but is only used by management interfaces.
@@ -330,7 +442,7 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
if (tunnel) {
list_for_each_entry_rcu(session, &tunnel->session_list, list) {
if (!strcmp(session->ifname, ifname)) {
- l2tp_session_inc_refcount(session);
+ refcount_inc(&session->ref_count);
rcu_read_unlock_bh();
return session;
@@ -347,7 +459,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
static void l2tp_session_coll_list_add(struct l2tp_session_coll_list *clist,
struct l2tp_session *session)
{
- l2tp_session_inc_refcount(session);
+ refcount_inc(&session->ref_count);
WARN_ON_ONCE(session->coll_list);
session->coll_list = clist;
spin_lock(&clist->lock);
@@ -387,12 +499,12 @@ static int l2tp_session_collision_add(struct l2tp_net *pn,
/* If existing session isn't already in the session hlist, add it. */
if (!hash_hashed(&session2->hlist))
- hash_add(pn->l2tp_v3_session_htable, &session2->hlist,
- session2->hlist_key);
+ hash_add_rcu(pn->l2tp_v3_session_htable, &session2->hlist,
+ session2->hlist_key);
/* Add new session to the hlist and collision list */
- hash_add(pn->l2tp_v3_session_htable, &session1->hlist,
- session1->hlist_key);
+ hash_add_rcu(pn->l2tp_v3_session_htable, &session1->hlist,
+ session1->hlist_key);
refcount_inc(&clist->ref_count);
l2tp_session_coll_list_add(clist, session1);
@@ -408,7 +520,7 @@ static void l2tp_session_collision_del(struct l2tp_net *pn,
lockdep_assert_held(&pn->l2tp_session_idr_lock);
- hash_del(&session->hlist);
+ hash_del_rcu(&session->hlist);
if (clist) {
/* Remove session from its collision list. If there
@@ -433,7 +545,7 @@ static void l2tp_session_collision_del(struct l2tp_net *pn,
spin_unlock(&clist->lock);
if (refcount_dec_and_test(&clist->ref_count))
kfree(clist);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
}
}
@@ -442,6 +554,7 @@ int l2tp_session_register(struct l2tp_session *session,
{
struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
struct l2tp_session *other_session = NULL;
+ void *old = NULL;
u32 session_key;
int err;
@@ -481,16 +594,23 @@ int l2tp_session_register(struct l2tp_session *session,
goto out;
}
- l2tp_tunnel_inc_refcount(tunnel);
- list_add(&session->list, &tunnel->session_list);
+ refcount_inc(&tunnel->ref_count);
+ WRITE_ONCE(session->tunnel, tunnel);
+ list_add_rcu(&session->list, &tunnel->session_list);
+ /* this makes session available to lockless getters */
if (tunnel->version == L2TP_HDR_VER_3) {
if (!other_session)
- idr_replace(&pn->l2tp_v3_session_idr, session, session_key);
+ old = idr_replace(&pn->l2tp_v3_session_idr, session, session_key);
} else {
- idr_replace(&pn->l2tp_v2_session_idr, session, session_key);
+ old = idr_replace(&pn->l2tp_v2_session_idr, session, session_key);
}
+ /* old should be NULL, unless something removed or modified
+ * the IDR entry after our idr_alloc_32 above (which shouldn't
+ * happen).
+ */
+ WARN_ON_ONCE(old);
out:
spin_unlock_bh(&pn->l2tp_session_idr_lock);
spin_unlock_bh(&tunnel->list_lock);
@@ -797,7 +917,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
if (!session->lns_mode && !session->send_seq) {
trace_session_seqnum_lns_enable(session);
session->send_seq = 1;
- l2tp_session_set_header_len(session, tunnel->version);
+ l2tp_session_set_header_len(session, tunnel->version,
+ tunnel->encap);
}
} else {
/* No sequence numbers.
@@ -818,7 +939,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
if (!session->lns_mode && session->send_seq) {
trace_session_seqnum_lns_disable(session);
session->send_seq = 0;
- l2tp_session_set_header_len(session, tunnel->version);
+ l2tp_session_set_header_len(session, tunnel->version,
+ tunnel->encap);
} else if (session->send_seq) {
pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
session->name);
@@ -955,7 +1077,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (!session || !session->recv_skb) {
if (session)
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
/* Not found? Pass to userspace to deal with */
goto pass;
@@ -969,12 +1091,12 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (version == L2TP_HDR_VER_3 &&
l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) {
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
goto invalid;
}
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return 0;
@@ -1218,44 +1340,6 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
* Tinnel and session create/destroy.
*****************************************************************************/
-/* Tunnel socket destruct hook.
- * The tunnel context is deleted only when all session sockets have been
- * closed.
- */
-static void l2tp_tunnel_destruct(struct sock *sk)
-{
- struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
-
- if (!tunnel)
- goto end;
-
- /* Disable udp encapsulation */
- switch (tunnel->encap) {
- case L2TP_ENCAPTYPE_UDP:
- /* No longer an encapsulation socket. See net/ipv4/udp.c */
- WRITE_ONCE(udp_sk(sk)->encap_type, 0);
- udp_sk(sk)->encap_rcv = NULL;
- udp_sk(sk)->encap_destroy = NULL;
- break;
- case L2TP_ENCAPTYPE_IP:
- break;
- }
-
- /* Remove hooks into tunnel socket */
- write_lock_bh(&sk->sk_callback_lock);
- sk->sk_destruct = tunnel->old_sk_destruct;
- sk->sk_user_data = NULL;
- write_unlock_bh(&sk->sk_callback_lock);
-
- /* Call the original destructor */
- if (sk->sk_destruct)
- (*sk->sk_destruct)(sk);
-
- kfree_rcu(tunnel, rcu);
-end:
- return;
-}
-
/* Remove an l2tp session from l2tp_core's lists. */
static void l2tp_session_unhash(struct l2tp_session *session)
{
@@ -1288,8 +1372,6 @@ static void l2tp_session_unhash(struct l2tp_session *session)
spin_unlock_bh(&pn->l2tp_session_idr_lock);
spin_unlock_bh(&tunnel->list_lock);
-
- synchronize_rcu();
}
}
@@ -1301,28 +1383,21 @@ static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
spin_lock_bh(&tunnel->list_lock);
tunnel->acpt_newsess = false;
- for (;;) {
- session = list_first_entry_or_null(&tunnel->session_list,
- struct l2tp_session, list);
- if (!session)
- break;
- l2tp_session_inc_refcount(session);
- list_del_init(&session->list);
- spin_unlock_bh(&tunnel->list_lock);
+ list_for_each_entry(session, &tunnel->session_list, list)
l2tp_session_delete(session);
- spin_lock_bh(&tunnel->list_lock);
- l2tp_session_dec_refcount(session);
- }
spin_unlock_bh(&tunnel->list_lock);
}
/* Tunnel socket destroy hook for UDP encapsulation */
static void l2tp_udp_encap_destroy(struct sock *sk)
{
- struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
+ struct l2tp_tunnel *tunnel;
- if (tunnel)
+ tunnel = l2tp_sk_to_tunnel(sk);
+ if (tunnel) {
l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_put(tunnel);
+ }
}
static void l2tp_tunnel_remove(struct net *net, struct l2tp_tunnel *tunnel)
@@ -1356,10 +1431,10 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
l2tp_tunnel_remove(tunnel->l2tp_net, tunnel);
/* drop initial ref */
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
/* drop workqueue ref */
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
}
/* Create a socket for the tunnel, if one isn't set up by
@@ -1505,7 +1580,6 @@ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
tunnel->tunnel_id = tunnel_id;
tunnel->peer_tunnel_id = peer_tunnel_id;
- tunnel->magic = L2TP_TUNNEL_MAGIC;
sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
spin_lock_init(&tunnel->list_lock);
tunnel->acpt_newsess = true;
@@ -1531,6 +1605,8 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
enum l2tp_encap_type encap)
{
+ struct l2tp_tunnel *tunnel;
+
if (!net_eq(sock_net(sk), net))
return -EINVAL;
@@ -1544,8 +1620,14 @@ static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
(encap == L2TP_ENCAPTYPE_IP && sk->sk_protocol != IPPROTO_L2TP))
return -EPROTONOSUPPORT;
- if (sk->sk_user_data)
+ if (encap == L2TP_ENCAPTYPE_UDP && sk->sk_user_data)
+ return -EBUSY;
+
+ tunnel = l2tp_sk_to_tunnel(sk);
+ if (tunnel) {
+ l2tp_tunnel_put(tunnel);
return -EBUSY;
+ }
return 0;
}
@@ -1584,12 +1666,10 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
ret = l2tp_validate_socket(sk, net, tunnel->encap);
if (ret < 0)
goto err_inval_sock;
- rcu_assign_sk_user_data(sk, tunnel);
write_unlock_bh(&sk->sk_callback_lock);
if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
struct udp_tunnel_sock_cfg udp_cfg = {
- .sk_user_data = tunnel,
.encap_type = UDP_ENCAP_L2TPINUDP,
.encap_rcv = l2tp_udp_encap_recv,
.encap_err_rcv = l2tp_udp_encap_err_recv,
@@ -1599,8 +1679,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
setup_udp_tunnel_sock(net, sock, &udp_cfg);
}
- tunnel->old_sk_destruct = sk->sk_destruct;
- sk->sk_destruct = &l2tp_tunnel_destruct;
sk->sk_allocation = GFP_ATOMIC;
release_sock(sk);
@@ -1639,7 +1717,7 @@ void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
{
if (!test_and_set_bit(0, &tunnel->dead)) {
trace_delete_tunnel(tunnel);
- l2tp_tunnel_inc_refcount(tunnel);
+ refcount_inc(&tunnel->ref_count);
queue_work(l2tp_wq, &tunnel->del_work);
}
}
@@ -1647,23 +1725,37 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
void l2tp_session_delete(struct l2tp_session *session)
{
- if (test_and_set_bit(0, &session->dead))
- return;
+ if (!test_and_set_bit(0, &session->dead)) {
+ trace_delete_session(session);
+ refcount_inc(&session->ref_count);
+ queue_work(l2tp_wq, &session->del_work);
+ }
+}
+EXPORT_SYMBOL_GPL(l2tp_session_delete);
+
+/* Workqueue session deletion function */
+static void l2tp_session_del_work(struct work_struct *work)
+{
+ struct l2tp_session *session = container_of(work, struct l2tp_session,
+ del_work);
- trace_delete_session(session);
l2tp_session_unhash(session);
l2tp_session_queue_purge(session);
if (session->session_close)
(*session->session_close)(session);
- l2tp_session_dec_refcount(session);
+ /* drop initial ref */
+ l2tp_session_put(session);
+
+ /* drop workqueue ref */
+ l2tp_session_put(session);
}
-EXPORT_SYMBOL_GPL(l2tp_session_delete);
/* We come here whenever a session's send_seq, cookie_len or
* l2specific_type parameters are set.
*/
-void l2tp_session_set_header_len(struct l2tp_session *session, int version)
+void l2tp_session_set_header_len(struct l2tp_session *session, int version,
+ enum l2tp_encap_type encap)
{
if (version == L2TP_HDR_VER_2) {
session->hdr_len = 6;
@@ -1672,7 +1764,7 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
} else {
session->hdr_len = 4 + session->cookie_len;
session->hdr_len += l2tp_get_l2specific_len(session);
- if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP)
+ if (encap == L2TP_ENCAPTYPE_UDP)
session->hdr_len += 4;
}
}
@@ -1686,7 +1778,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
session = kzalloc(sizeof(*session) + priv_size, GFP_KERNEL);
if (session) {
session->magic = L2TP_SESSION_MAGIC;
- session->tunnel = tunnel;
session->session_id = session_id;
session->peer_session_id = peer_session_id;
@@ -1710,6 +1801,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
INIT_HLIST_NODE(&session->hlist);
INIT_LIST_HEAD(&session->clist);
INIT_LIST_HEAD(&session->list);
+ INIT_WORK(&session->del_work, l2tp_session_del_work);
if (cfg) {
session->pwtype = cfg->pw_type;
@@ -1724,7 +1816,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len);
}
- l2tp_session_set_header_len(session, tunnel->version);
+ l2tp_session_set_header_len(session, tunnel->version, tunnel->encap);
refcount_set(&session->ref_count, 1);
@@ -1753,7 +1845,7 @@ static __net_init int l2tp_init_net(struct net *net)
return 0;
}
-static __net_exit void l2tp_exit_net(struct net *net)
+static __net_exit void l2tp_pre_exit_net(struct net *net)
{
struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_tunnel *tunnel = NULL;
@@ -1766,18 +1858,34 @@ static __net_exit void l2tp_exit_net(struct net *net)
}
rcu_read_unlock_bh();
- if (l2tp_wq)
- flush_workqueue(l2tp_wq);
- rcu_barrier();
+ if (l2tp_wq) {
+ /* Run all TUNNEL_DELETE work items just queued. */
+ __flush_workqueue(l2tp_wq);
+
+ /* Each TUNNEL_DELETE work item will queue a SESSION_DELETE
+ * work item for each session in the tunnel. Flush the
+ * workqueue again to process these.
+ */
+ __flush_workqueue(l2tp_wq);
+ }
+}
+
+static __net_exit void l2tp_exit_net(struct net *net)
+{
+ struct l2tp_net *pn = l2tp_pernet(net);
+ WARN_ON_ONCE(!idr_is_empty(&pn->l2tp_v2_session_idr));
idr_destroy(&pn->l2tp_v2_session_idr);
+ WARN_ON_ONCE(!idr_is_empty(&pn->l2tp_v3_session_idr));
idr_destroy(&pn->l2tp_v3_session_idr);
+ WARN_ON_ONCE(!idr_is_empty(&pn->l2tp_tunnel_idr));
idr_destroy(&pn->l2tp_tunnel_idr);
}
static struct pernet_operations l2tp_net_ops = {
.init = l2tp_init_net,
.exit = l2tp_exit_net,
+ .pre_exit = l2tp_pre_exit_net,
.id = &l2tp_net_id,
.size = sizeof(struct l2tp_net),
};
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 8ac81bc1bc6f..ffd8ced3a51f 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -16,7 +16,6 @@
#endif
/* Random numbers used for internal consistency checks of tunnel and session structures */
-#define L2TP_TUNNEL_MAGIC 0x42114DDA
#define L2TP_SESSION_MAGIC 0x0C04EB7D
struct sk_buff;
@@ -67,6 +66,7 @@ struct l2tp_session_coll_list {
struct l2tp_session {
int magic; /* should be L2TP_SESSION_MAGIC */
long dead;
+ struct rcu_head rcu;
struct l2tp_tunnel *tunnel; /* back pointer to tunnel context */
u32 session_id;
@@ -103,6 +103,7 @@ struct l2tp_session {
int reorder_skip; /* set if skip to next nr */
enum l2tp_pwtype pwtype;
struct l2tp_stats stats;
+ struct work_struct del_work;
/* Session receive handler for data packets.
* Each pseudowire implementation should implement this callback in order to
@@ -155,8 +156,6 @@ struct l2tp_tunnel_cfg {
*/
#define L2TP_TUNNEL_NAME_MAX 20
struct l2tp_tunnel {
- int magic; /* Should be L2TP_TUNNEL_MAGIC */
-
unsigned long dead;
struct rcu_head rcu;
@@ -176,7 +175,6 @@ struct l2tp_tunnel {
struct net *l2tp_net; /* the net we belong to */
refcount_t ref_count;
- void (*old_sk_destruct)(struct sock *sk);
struct sock *sock; /* parent socket */
int fd; /* parent fd, if tunnel socket was created
* by userspace
@@ -211,23 +209,22 @@ static inline void *l2tp_session_priv(struct l2tp_session *session)
}
/* Tunnel and session refcounts */
-void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel);
-void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel);
-void l2tp_session_inc_refcount(struct l2tp_session *session);
-void l2tp_session_dec_refcount(struct l2tp_session *session);
+void l2tp_tunnel_put(struct l2tp_tunnel *tunnel);
+void l2tp_session_put(struct l2tp_session *session);
/* Tunnel and session lookup.
* These functions take a reference on the instances they return, so
* the caller must ensure that the reference is dropped appropriately.
*/
struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
-struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth);
+struct l2tp_tunnel *l2tp_tunnel_get_next(const struct net *net, unsigned long *key);
struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id);
struct l2tp_session *l2tp_v2_session_get(const struct net *net, u16 tunnel_id, u16 session_id);
struct l2tp_session *l2tp_session_get(const struct net *net, struct sock *sk, int pver,
u32 tunnel_id, u32 session_id);
-struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth);
+struct l2tp_session *l2tp_session_get_next(const struct net *net, struct sock *sk, int pver,
+ u32 tunnel_id, unsigned long *key);
struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
const char *ifname);
@@ -260,7 +257,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
/* Transmit path helpers for sending packets over the tunnel socket. */
-void l2tp_session_set_header_len(struct l2tp_session *session, int version);
+void l2tp_session_set_header_len(struct l2tp_session *session, int version,
+ enum l2tp_encap_type encap);
int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb);
/* Pseudowire management.
@@ -273,10 +271,7 @@ void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
/* IOCTL helper for IP encap modules. */
int l2tp_ioctl(struct sock *sk, int cmd, int *karg);
-/* Extract the tunnel structure from a socket's sk_user_data pointer,
- * validating the tunnel magic feather.
- */
-struct l2tp_tunnel *l2tp_sk_to_tunnel(struct sock *sk);
+struct l2tp_tunnel *l2tp_sk_to_tunnel(const struct sock *sk);
static inline int l2tp_get_l2specific_len(struct l2tp_session *session)
{
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 8755ae521154..2d0c8275a3a8 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -34,8 +34,8 @@ static struct dentry *rootdir;
struct l2tp_dfs_seq_data {
struct net *net;
netns_tracker ns_tracker;
- int tunnel_idx; /* current tunnel */
- int session_idx; /* index of session within current tunnel */
+ unsigned long tkey; /* lookup key of current tunnel */
+ unsigned long skey; /* lookup key of current session */
struct l2tp_tunnel *tunnel;
struct l2tp_session *session; /* NULL means get next tunnel */
};
@@ -44,23 +44,25 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
{
/* Drop reference taken during previous invocation */
if (pd->tunnel)
- l2tp_tunnel_dec_refcount(pd->tunnel);
+ l2tp_tunnel_put(pd->tunnel);
- pd->tunnel = l2tp_tunnel_get_nth(pd->net, pd->tunnel_idx);
- pd->tunnel_idx++;
+ pd->tunnel = l2tp_tunnel_get_next(pd->net, &pd->tkey);
+ pd->tkey++;
}
static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
{
/* Drop reference taken during previous invocation */
if (pd->session)
- l2tp_session_dec_refcount(pd->session);
+ l2tp_session_put(pd->session);
- pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
- pd->session_idx++;
+ pd->session = l2tp_session_get_next(pd->net, pd->tunnel->sock,
+ pd->tunnel->version,
+ pd->tunnel->tunnel_id, &pd->skey);
+ pd->skey++;
if (!pd->session) {
- pd->session_idx = 0;
+ pd->skey = 0;
l2tp_dfs_next_tunnel(pd);
}
}
@@ -109,11 +111,11 @@ static void l2tp_dfs_seq_stop(struct seq_file *p, void *v)
* or l2tp_dfs_next_tunnel().
*/
if (pd->session) {
- l2tp_session_dec_refcount(pd->session);
+ l2tp_session_put(pd->session);
pd->session = NULL;
}
if (pd->tunnel) {
- l2tp_tunnel_dec_refcount(pd->tunnel);
+ l2tp_tunnel_put(pd->tunnel);
pd->tunnel = NULL;
}
}
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8ba00ad433c2..d692b902e120 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -72,31 +72,19 @@ static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev
unsigned int len = skb->len;
int ret = l2tp_xmit_skb(session, skb);
- if (likely(ret == NET_XMIT_SUCCESS)) {
- DEV_STATS_ADD(dev, tx_bytes, len);
- DEV_STATS_INC(dev, tx_packets);
- } else {
+ if (likely(ret == NET_XMIT_SUCCESS))
+ dev_sw_netstats_tx_add(dev, 1, len);
+ else
DEV_STATS_INC(dev, tx_dropped);
- }
- return NETDEV_TX_OK;
-}
-static void l2tp_eth_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *stats)
-{
- stats->tx_bytes = DEV_STATS_READ(dev, tx_bytes);
- stats->tx_packets = DEV_STATS_READ(dev, tx_packets);
- stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
- stats->rx_bytes = DEV_STATS_READ(dev, rx_bytes);
- stats->rx_packets = DEV_STATS_READ(dev, rx_packets);
- stats->rx_errors = DEV_STATS_READ(dev, rx_errors);
+ return NETDEV_TX_OK;
}
static const struct net_device_ops l2tp_eth_netdev_ops = {
.ndo_init = l2tp_eth_dev_init,
.ndo_uninit = l2tp_eth_dev_uninit,
.ndo_start_xmit = l2tp_eth_dev_xmit,
- .ndo_get_stats64 = l2tp_eth_get_stats64,
+ .ndo_get_stats64 = dev_get_tstats64,
.ndo_set_mac_address = eth_mac_addr,
};
@@ -109,9 +97,10 @@ static void l2tp_eth_dev_setup(struct net_device *dev)
SET_NETDEV_DEVTYPE(dev, &l2tpeth_type);
ether_setup(dev);
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
dev->netdev_ops = &l2tp_eth_netdev_ops;
dev->needs_free_netdev = true;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
}
static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
@@ -138,12 +127,11 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
if (!dev)
goto error_rcu;
- if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
- DEV_STATS_INC(dev, rx_packets);
- DEV_STATS_ADD(dev, rx_bytes, data_len);
- } else {
+ if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS)
+ dev_sw_netstats_rx_add(dev, data_len);
+ else
DEV_STATS_INC(dev, rx_errors);
- }
+
rcu_read_unlock();
return;
@@ -283,7 +271,7 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
spriv = l2tp_session_priv(session);
- l2tp_session_inc_refcount(session);
+ refcount_inc(&session->ref_count);
rtnl_lock();
@@ -301,7 +289,7 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
if (rc < 0) {
rtnl_unlock();
l2tp_session_delete(session);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
free_netdev(dev);
return rc;
@@ -312,17 +300,17 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
rtnl_unlock();
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
__module_get(THIS_MODULE);
return 0;
err_sess_dev:
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
free_netdev(dev);
err_sess:
- kfree(session);
+ l2tp_session_put(session);
err:
return rc;
}
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index e48aa177d74c..4bc24fddfd52 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -22,9 +22,19 @@
#include <net/tcp_states.h>
#include <net/protocol.h>
#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include "l2tp_core.h"
+/* per-net private data for this module */
+static unsigned int l2tp_ip_net_id;
+struct l2tp_ip_net {
+ rwlock_t l2tp_ip_lock;
+ struct hlist_head l2tp_ip_table;
+ struct hlist_head l2tp_ip_bind_table;
+};
+
struct l2tp_ip_sock {
/* inet_sock has to be the first member of l2tp_ip_sock */
struct inet_sock inet;
@@ -33,21 +43,23 @@ struct l2tp_ip_sock {
u32 peer_conn_id;
};
-static DEFINE_RWLOCK(l2tp_ip_lock);
-static struct hlist_head l2tp_ip_table;
-static struct hlist_head l2tp_ip_bind_table;
-
-static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
+static struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
{
return (struct l2tp_ip_sock *)sk;
}
+static struct l2tp_ip_net *l2tp_ip_pernet(const struct net *net)
+{
+ return net_generic(net, l2tp_ip_net_id);
+}
+
static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr,
__be32 raddr, int dif, u32 tunnel_id)
{
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(net);
struct sock *sk;
- sk_for_each_bound(sk, &l2tp_ip_bind_table) {
+ sk_for_each_bound(sk, &pn->l2tp_ip_bind_table) {
const struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
int bound_dev_if;
@@ -113,6 +125,7 @@ found:
static int l2tp_ip_recv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
+ struct l2tp_ip_net *pn;
struct sock *sk;
u32 session_id;
u32 tunnel_id;
@@ -121,6 +134,8 @@ static int l2tp_ip_recv(struct sk_buff *skb)
struct l2tp_tunnel *tunnel = NULL;
struct iphdr *iph;
+ pn = l2tp_ip_pernet(net);
+
if (!pskb_may_pull(skb, 4))
goto discard;
@@ -152,7 +167,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
goto discard_sess;
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return 0;
@@ -167,15 +182,15 @@ pass_up:
tunnel_id = ntohl(*(__be32 *)&skb->data[4]);
iph = (struct iphdr *)skb_network_header(skb);
- read_lock_bh(&l2tp_ip_lock);
+ read_lock_bh(&pn->l2tp_ip_lock);
sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb),
tunnel_id);
if (!sk) {
- read_unlock_bh(&l2tp_ip_lock);
+ read_unlock_bh(&pn->l2tp_ip_lock);
goto discard;
}
sock_hold(sk);
- read_unlock_bh(&l2tp_ip_lock);
+ read_unlock_bh(&pn->l2tp_ip_lock);
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
@@ -185,7 +200,7 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
discard_sess:
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
goto discard;
discard_put:
@@ -198,21 +213,25 @@ discard:
static int l2tp_ip_hash(struct sock *sk)
{
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk));
+
if (sk_unhashed(sk)) {
- write_lock_bh(&l2tp_ip_lock);
- sk_add_node(sk, &l2tp_ip_table);
- write_unlock_bh(&l2tp_ip_lock);
+ write_lock_bh(&pn->l2tp_ip_lock);
+ sk_add_node(sk, &pn->l2tp_ip_table);
+ write_unlock_bh(&pn->l2tp_ip_lock);
}
return 0;
}
static void l2tp_ip_unhash(struct sock *sk)
{
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk));
+
if (sk_unhashed(sk))
return;
- write_lock_bh(&l2tp_ip_lock);
+ write_lock_bh(&pn->l2tp_ip_lock);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip_lock);
+ write_unlock_bh(&pn->l2tp_ip_lock);
}
static int l2tp_ip_open(struct sock *sk)
@@ -226,23 +245,26 @@ static int l2tp_ip_open(struct sock *sk)
static void l2tp_ip_close(struct sock *sk, long timeout)
{
- write_lock_bh(&l2tp_ip_lock);
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk));
+
+ write_lock_bh(&pn->l2tp_ip_lock);
hlist_del_init(&sk->sk_bind_node);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip_lock);
+ write_unlock_bh(&pn->l2tp_ip_lock);
sk_common_release(sk);
}
static void l2tp_ip_destroy_sock(struct sock *sk)
{
- struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
- struct sk_buff *skb;
+ struct l2tp_tunnel *tunnel;
- while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
- kfree_skb(skb);
+ __skb_queue_purge(&sk->sk_write_queue);
- if (tunnel)
+ tunnel = l2tp_sk_to_tunnel(sk);
+ if (tunnel) {
l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_put(tunnel);
+ }
}
static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -250,6 +272,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *)uaddr;
struct net *net = sock_net(sk);
+ struct l2tp_ip_net *pn;
int ret;
int chk_addr_ret;
@@ -280,10 +303,11 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
inet->inet_saddr = 0; /* Use device */
- write_lock_bh(&l2tp_ip_lock);
+ pn = l2tp_ip_pernet(net);
+ write_lock_bh(&pn->l2tp_ip_lock);
if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, 0,
sk->sk_bound_dev_if, addr->l2tp_conn_id)) {
- write_unlock_bh(&l2tp_ip_lock);
+ write_unlock_bh(&pn->l2tp_ip_lock);
ret = -EADDRINUSE;
goto out;
}
@@ -291,9 +315,9 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sk_dst_reset(sk);
l2tp_ip_sk(sk)->conn_id = addr->l2tp_conn_id;
- sk_add_bind_node(sk, &l2tp_ip_bind_table);
+ sk_add_bind_node(sk, &pn->l2tp_ip_bind_table);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip_lock);
+ write_unlock_bh(&pn->l2tp_ip_lock);
ret = 0;
sock_reset_flag(sk, SOCK_ZAPPED);
@@ -307,6 +331,7 @@ out:
static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr;
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk));
int rc;
if (addr_len < sizeof(*lsa))
@@ -329,10 +354,10 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
- write_lock_bh(&l2tp_ip_lock);
+ write_lock_bh(&pn->l2tp_ip_lock);
hlist_del_init(&sk->sk_bind_node);
- sk_add_bind_node(sk, &l2tp_ip_bind_table);
- write_unlock_bh(&l2tp_ip_lock);
+ sk_add_bind_node(sk, &pn->l2tp_ip_bind_table);
+ write_unlock_bh(&pn->l2tp_ip_lock);
out_sk:
release_sock(sk);
@@ -637,25 +662,58 @@ static struct net_protocol l2tp_ip_protocol __read_mostly = {
.handler = l2tp_ip_recv,
};
+static __net_init int l2tp_ip_init_net(struct net *net)
+{
+ struct l2tp_ip_net *pn = net_generic(net, l2tp_ip_net_id);
+
+ rwlock_init(&pn->l2tp_ip_lock);
+ INIT_HLIST_HEAD(&pn->l2tp_ip_table);
+ INIT_HLIST_HEAD(&pn->l2tp_ip_bind_table);
+ return 0;
+}
+
+static __net_exit void l2tp_ip_exit_net(struct net *net)
+{
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(net);
+
+ write_lock_bh(&pn->l2tp_ip_lock);
+ WARN_ON_ONCE(hlist_count_nodes(&pn->l2tp_ip_table) != 0);
+ WARN_ON_ONCE(hlist_count_nodes(&pn->l2tp_ip_bind_table) != 0);
+ write_unlock_bh(&pn->l2tp_ip_lock);
+}
+
+static struct pernet_operations l2tp_ip_net_ops = {
+ .init = l2tp_ip_init_net,
+ .exit = l2tp_ip_exit_net,
+ .id = &l2tp_ip_net_id,
+ .size = sizeof(struct l2tp_ip_net),
+};
+
static int __init l2tp_ip_init(void)
{
int err;
pr_info("L2TP IP encapsulation support (L2TPv3)\n");
+ err = register_pernet_device(&l2tp_ip_net_ops);
+ if (err)
+ goto out;
+
err = proto_register(&l2tp_ip_prot, 1);
if (err != 0)
- goto out;
+ goto out1;
err = inet_add_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
if (err)
- goto out1;
+ goto out2;
inet_register_protosw(&l2tp_ip_protosw);
return 0;
-out1:
+out2:
proto_unregister(&l2tp_ip_prot);
+out1:
+ unregister_pernet_device(&l2tp_ip_net_ops);
out:
return err;
}
@@ -665,6 +723,7 @@ static void __exit l2tp_ip_exit(void)
inet_unregister_protosw(&l2tp_ip_protosw);
inet_del_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
proto_unregister(&l2tp_ip_prot);
+ unregister_pernet_device(&l2tp_ip_net_ops);
}
module_init(l2tp_ip_init);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index d217ff1f229e..f4c1da070826 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -22,6 +22,8 @@
#include <net/tcp_states.h>
#include <net/protocol.h>
#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/transp_v6.h>
#include <net/addrconf.h>
@@ -29,6 +31,14 @@
#include "l2tp_core.h"
+/* per-net private data for this module */
+static unsigned int l2tp_ip6_net_id;
+struct l2tp_ip6_net {
+ rwlock_t l2tp_ip6_lock;
+ struct hlist_head l2tp_ip6_table;
+ struct hlist_head l2tp_ip6_bind_table;
+};
+
struct l2tp_ip6_sock {
/* inet_sock has to be the first member of l2tp_ip6_sock */
struct inet_sock inet;
@@ -39,23 +49,25 @@ struct l2tp_ip6_sock {
struct ipv6_pinfo inet6;
};
-static DEFINE_RWLOCK(l2tp_ip6_lock);
-static struct hlist_head l2tp_ip6_table;
-static struct hlist_head l2tp_ip6_bind_table;
-
-static inline struct l2tp_ip6_sock *l2tp_ip6_sk(const struct sock *sk)
+static struct l2tp_ip6_sock *l2tp_ip6_sk(const struct sock *sk)
{
return (struct l2tp_ip6_sock *)sk;
}
+static struct l2tp_ip6_net *l2tp_ip6_pernet(const struct net *net)
+{
+ return net_generic(net, l2tp_ip6_net_id);
+}
+
static struct sock *__l2tp_ip6_bind_lookup(const struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *raddr,
int dif, u32 tunnel_id)
{
+ struct l2tp_ip6_net *pn = l2tp_ip6_pernet(net);
struct sock *sk;
- sk_for_each_bound(sk, &l2tp_ip6_bind_table) {
+ sk_for_each_bound(sk, &pn->l2tp_ip6_bind_table) {
const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk);
const struct in6_addr *sk_raddr = &sk->sk_v6_daddr;
const struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk);
@@ -123,6 +135,7 @@ found:
static int l2tp_ip6_recv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
+ struct l2tp_ip6_net *pn;
struct sock *sk;
u32 session_id;
u32 tunnel_id;
@@ -131,6 +144,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
struct l2tp_tunnel *tunnel = NULL;
struct ipv6hdr *iph;
+ pn = l2tp_ip6_pernet(net);
+
if (!pskb_may_pull(skb, 4))
goto discard;
@@ -162,7 +177,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
goto discard_sess;
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return 0;
@@ -177,15 +192,15 @@ pass_up:
tunnel_id = ntohl(*(__be32 *)&skb->data[4]);
iph = ipv6_hdr(skb);
- read_lock_bh(&l2tp_ip6_lock);
+ read_lock_bh(&pn->l2tp_ip6_lock);
sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
inet6_iif(skb), tunnel_id);
if (!sk) {
- read_unlock_bh(&l2tp_ip6_lock);
+ read_unlock_bh(&pn->l2tp_ip6_lock);
goto discard;
}
sock_hold(sk);
- read_unlock_bh(&l2tp_ip6_lock);
+ read_unlock_bh(&pn->l2tp_ip6_lock);
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
@@ -195,7 +210,7 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
discard_sess:
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
goto discard;
discard_put:
@@ -208,21 +223,25 @@ discard:
static int l2tp_ip6_hash(struct sock *sk)
{
+ struct l2tp_ip6_net *pn = l2tp_ip6_pernet(sock_net(sk));
+
if (sk_unhashed(sk)) {
- write_lock_bh(&l2tp_ip6_lock);
- sk_add_node(sk, &l2tp_ip6_table);
- write_unlock_bh(&l2tp_ip6_lock);
+ write_lock_bh(&pn->l2tp_ip6_lock);
+ sk_add_node(sk, &pn->l2tp_ip6_table);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
}
return 0;
}
static void l2tp_ip6_unhash(struct sock *sk)
{
+ struct l2tp_ip6_net *pn = l2tp_ip6_pernet(sock_net(sk));
+
if (sk_unhashed(sk))
return;
- write_lock_bh(&l2tp_ip6_lock);
+ write_lock_bh(&pn->l2tp_ip6_lock);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip6_lock);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
}
static int l2tp_ip6_open(struct sock *sk)
@@ -236,24 +255,29 @@ static int l2tp_ip6_open(struct sock *sk)
static void l2tp_ip6_close(struct sock *sk, long timeout)
{
- write_lock_bh(&l2tp_ip6_lock);
+ struct l2tp_ip6_net *pn = l2tp_ip6_pernet(sock_net(sk));
+
+ write_lock_bh(&pn->l2tp_ip6_lock);
hlist_del_init(&sk->sk_bind_node);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip6_lock);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
sk_common_release(sk);
}
static void l2tp_ip6_destroy_sock(struct sock *sk)
{
- struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
+ struct l2tp_tunnel *tunnel;
lock_sock(sk);
ip6_flush_pending_frames(sk);
release_sock(sk);
- if (tunnel)
+ tunnel = l2tp_sk_to_tunnel(sk);
+ if (tunnel) {
l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_put(tunnel);
+ }
}
static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -262,11 +286,14 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct ipv6_pinfo *np = inet6_sk(sk);
struct sockaddr_l2tpip6 *addr = (struct sockaddr_l2tpip6 *)uaddr;
struct net *net = sock_net(sk);
+ struct l2tp_ip6_net *pn;
__be32 v4addr = 0;
int bound_dev_if;
int addr_type;
int err;
+ pn = l2tp_ip6_pernet(net);
+
if (addr->l2tp_family != AF_INET6)
return -EINVAL;
if (addr_len < sizeof(*addr))
@@ -324,10 +351,10 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
rcu_read_unlock();
- write_lock_bh(&l2tp_ip6_lock);
+ write_lock_bh(&pn->l2tp_ip6_lock);
if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, NULL, bound_dev_if,
addr->l2tp_conn_id)) {
- write_unlock_bh(&l2tp_ip6_lock);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
err = -EADDRINUSE;
goto out_unlock;
}
@@ -340,9 +367,9 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
l2tp_ip6_sk(sk)->conn_id = addr->l2tp_conn_id;
- sk_add_bind_node(sk, &l2tp_ip6_bind_table);
+ sk_add_bind_node(sk, &pn->l2tp_ip6_bind_table);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip6_lock);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
sock_reset_flag(sk, SOCK_ZAPPED);
release_sock(sk);
@@ -364,6 +391,7 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
struct in6_addr *daddr;
int addr_type;
int rc;
+ struct l2tp_ip6_net *pn;
if (addr_len < sizeof(*lsa))
return -EINVAL;
@@ -395,10 +423,11 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
l2tp_ip6_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
- write_lock_bh(&l2tp_ip6_lock);
+ pn = l2tp_ip6_pernet(sock_net(sk));
+ write_lock_bh(&pn->l2tp_ip6_lock);
hlist_del_init(&sk->sk_bind_node);
- sk_add_bind_node(sk, &l2tp_ip6_bind_table);
- write_unlock_bh(&l2tp_ip6_lock);
+ sk_add_bind_node(sk, &pn->l2tp_ip6_bind_table);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
out_sk:
release_sock(sk);
@@ -765,25 +794,58 @@ static struct inet6_protocol l2tp_ip6_protocol __read_mostly = {
.handler = l2tp_ip6_recv,
};
+static __net_init int l2tp_ip6_init_net(struct net *net)
+{
+ struct l2tp_ip6_net *pn = net_generic(net, l2tp_ip6_net_id);
+
+ rwlock_init(&pn->l2tp_ip6_lock);
+ INIT_HLIST_HEAD(&pn->l2tp_ip6_table);
+ INIT_HLIST_HEAD(&pn->l2tp_ip6_bind_table);
+ return 0;
+}
+
+static __net_exit void l2tp_ip6_exit_net(struct net *net)
+{
+ struct l2tp_ip6_net *pn = l2tp_ip6_pernet(net);
+
+ write_lock_bh(&pn->l2tp_ip6_lock);
+ WARN_ON_ONCE(hlist_count_nodes(&pn->l2tp_ip6_table) != 0);
+ WARN_ON_ONCE(hlist_count_nodes(&pn->l2tp_ip6_bind_table) != 0);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
+}
+
+static struct pernet_operations l2tp_ip6_net_ops = {
+ .init = l2tp_ip6_init_net,
+ .exit = l2tp_ip6_exit_net,
+ .id = &l2tp_ip6_net_id,
+ .size = sizeof(struct l2tp_ip6_net),
+};
+
static int __init l2tp_ip6_init(void)
{
int err;
pr_info("L2TP IP encapsulation support for IPv6 (L2TPv3)\n");
+ err = register_pernet_device(&l2tp_ip6_net_ops);
+ if (err)
+ goto out;
+
err = proto_register(&l2tp_ip6_prot, 1);
if (err != 0)
- goto out;
+ goto out1;
err = inet6_add_protocol(&l2tp_ip6_protocol, IPPROTO_L2TP);
if (err)
- goto out1;
+ goto out2;
inet6_register_protosw(&l2tp_ip6_protosw);
return 0;
-out1:
+out2:
proto_unregister(&l2tp_ip6_prot);
+out1:
+ unregister_pernet_device(&l2tp_ip6_net_ops);
out:
return err;
}
@@ -793,6 +855,7 @@ static void __exit l2tp_ip6_exit(void)
inet6_unregister_protosw(&l2tp_ip6_protosw);
inet6_del_protocol(&l2tp_ip6_protocol, IPPROTO_L2TP);
proto_unregister(&l2tp_ip6_prot);
+ unregister_pernet_device(&l2tp_ip6_net_ops);
}
module_init(l2tp_ip6_init);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index d105030520f9..284f1dec1b56 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -63,7 +63,7 @@ static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info)
if (tunnel) {
session = l2tp_session_get(net, tunnel->sock, tunnel->version,
tunnel_id, session_id);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
}
}
@@ -242,7 +242,7 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
if (ret < 0)
goto out;
- l2tp_tunnel_inc_refcount(tunnel);
+ refcount_inc(&tunnel->ref_count);
ret = l2tp_tunnel_register(tunnel, net, &cfg);
if (ret < 0) {
kfree(tunnel);
@@ -250,7 +250,7 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
}
ret = l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel,
L2TP_CMD_TUNNEL_CREATE);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
out:
return ret;
@@ -280,7 +280,7 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
l2tp_tunnel_delete(tunnel);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
out:
return ret;
@@ -308,7 +308,7 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
tunnel, L2TP_CMD_TUNNEL_MODIFY);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
out:
return ret;
@@ -479,42 +479,48 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
goto err_nlmsg_tunnel;
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
return genlmsg_unicast(net, msg, info->snd_portid);
err_nlmsg_tunnel:
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
err_nlmsg:
nlmsg_free(msg);
err:
return ret;
}
+struct l2tp_nl_cb_data {
+ unsigned long tkey;
+ unsigned long skey;
+};
+
static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
- int ti = cb->args[0];
+ struct l2tp_nl_cb_data *cbd = (void *)&cb->ctx[0];
+ unsigned long key = cbd->tkey;
struct l2tp_tunnel *tunnel;
struct net *net = sock_net(skb->sk);
for (;;) {
- tunnel = l2tp_tunnel_get_nth(net, ti);
+ tunnel = l2tp_tunnel_get_next(net, &key);
if (!tunnel)
goto out;
if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
tunnel, L2TP_CMD_TUNNEL_GET) < 0) {
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
goto out;
}
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
- ti++;
+ key++;
}
out:
- cb->args[0] = ti;
+ cbd->tkey = key;
return skb->len;
}
@@ -641,12 +647,12 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
if (session) {
ret = l2tp_session_notify(&l2tp_nl_family, info, session,
L2TP_CMD_SESSION_CREATE);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
}
}
out_tunnel:
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
out:
return ret;
}
@@ -671,7 +677,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
l2tp_nl_cmd_ops[pw_type]->session_delete(session);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
out:
return ret;
@@ -692,8 +698,10 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
if (info->attrs[L2TP_ATTR_SEND_SEQ]) {
+ struct l2tp_tunnel *tunnel = session->tunnel;
+
session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
- l2tp_session_set_header_len(session, session->tunnel->version);
+ l2tp_session_set_header_len(session, tunnel->version, tunnel->encap);
}
if (info->attrs[L2TP_ATTR_LNS_MODE])
@@ -705,7 +713,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
ret = l2tp_session_notify(&l2tp_nl_family, info,
session, L2TP_CMD_SESSION_MODIFY);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
out:
return ret;
@@ -816,57 +824,59 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
ret = genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return ret;
err_ref_msg:
nlmsg_free(msg);
err_ref:
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
err:
return ret;
}
static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct l2tp_nl_cb_data *cbd = (void *)&cb->ctx[0];
struct net *net = sock_net(skb->sk);
struct l2tp_session *session;
struct l2tp_tunnel *tunnel = NULL;
- int ti = cb->args[0];
- int si = cb->args[1];
+ unsigned long tkey = cbd->tkey;
+ unsigned long skey = cbd->skey;
for (;;) {
if (!tunnel) {
- tunnel = l2tp_tunnel_get_nth(net, ti);
+ tunnel = l2tp_tunnel_get_next(net, &tkey);
if (!tunnel)
goto out;
}
- session = l2tp_session_get_nth(tunnel, si);
+ session = l2tp_session_get_next(net, tunnel->sock, tunnel->version,
+ tunnel->tunnel_id, &skey);
if (!session) {
- ti++;
- l2tp_tunnel_dec_refcount(tunnel);
+ tkey++;
+ l2tp_tunnel_put(tunnel);
tunnel = NULL;
- si = 0;
+ skey = 0;
continue;
}
if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
session, L2TP_CMD_SESSION_GET) < 0) {
- l2tp_session_dec_refcount(session);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_session_put(session);
+ l2tp_tunnel_put(tunnel);
break;
}
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
- si++;
+ skey++;
}
out:
- cb->args[0] = ti;
- cb->args[1] = si;
+ cbd->tkey = tkey;
+ cbd->skey = skey;
return skb->len;
}
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 3596290047b2..53baf2dd5d5d 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -119,7 +119,6 @@ struct pppol2tp_session {
struct mutex sk_lock; /* Protects .sk */
struct sock __rcu *sk; /* Pointer to the session PPPoX socket */
struct sock *__sk; /* Copy of .sk, for cleanup */
- struct rcu_head rcu; /* For asynchronous release */
};
static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
@@ -150,27 +149,23 @@ static struct sock *pppol2tp_session_get_sock(struct l2tp_session *session)
/* Helpers to obtain tunnel/session contexts from sockets.
*/
-static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
+static struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
{
struct l2tp_session *session;
if (!sk)
return NULL;
- sock_hold(sk);
- session = (struct l2tp_session *)(sk->sk_user_data);
- if (!session) {
- sock_put(sk);
- goto out;
- }
- if (WARN_ON(session->magic != L2TP_SESSION_MAGIC)) {
- session = NULL;
- sock_put(sk);
- goto out;
+ rcu_read_lock();
+ session = rcu_dereference_sk_user_data(sk);
+ if (session && refcount_inc_not_zero(&session->ref_count)) {
+ rcu_read_unlock();
+ WARN_ON_ONCE(session->magic != L2TP_SESSION_MAGIC);
+ return session;
}
+ rcu_read_unlock();
-out:
- return session;
+ return NULL;
}
/*****************************************************************************
@@ -318,12 +313,12 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
l2tp_xmit_skb(session, skb);
local_bh_enable();
- sock_put(sk);
+ l2tp_session_put(session);
return total_len;
error_put_sess:
- sock_put(sk);
+ l2tp_session_put(session);
error:
return error;
}
@@ -377,12 +372,12 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
l2tp_xmit_skb(session, skb);
local_bh_enable();
- sock_put(sk);
+ l2tp_session_put(session);
return 1;
abort_put_sess:
- sock_put(sk);
+ l2tp_session_put(session);
abort:
/* Free the original skb */
kfree_skb(skb);
@@ -393,29 +388,32 @@ abort:
* Session (and tunnel control) socket create/destroy.
*****************************************************************************/
-static void pppol2tp_put_sk(struct rcu_head *head)
-{
- struct pppol2tp_session *ps;
-
- ps = container_of(head, typeof(*ps), rcu);
- sock_put(ps->__sk);
-}
-
/* Really kill the session socket. (Called from sock_put() if
* refcnt == 0.)
*/
static void pppol2tp_session_destruct(struct sock *sk)
{
- struct l2tp_session *session = sk->sk_user_data;
-
skb_queue_purge(&sk->sk_receive_queue);
skb_queue_purge(&sk->sk_write_queue);
+}
- if (session) {
- sk->sk_user_data = NULL;
- if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
- return;
- l2tp_session_dec_refcount(session);
+static void pppol2tp_session_close(struct l2tp_session *session)
+{
+ struct pppol2tp_session *ps;
+
+ ps = l2tp_session_priv(session);
+ mutex_lock(&ps->sk_lock);
+ ps->__sk = rcu_dereference_protected(ps->sk,
+ lockdep_is_held(&ps->sk_lock));
+ RCU_INIT_POINTER(ps->sk, NULL);
+ mutex_unlock(&ps->sk_lock);
+ if (ps->__sk) {
+ /* detach socket */
+ rcu_assign_sk_user_data(ps->__sk, NULL);
+ sock_put(ps->__sk);
+
+ /* drop ref taken when we referenced socket via sk_user_data */
+ l2tp_session_put(session);
}
}
@@ -444,30 +442,13 @@ static int pppol2tp_release(struct socket *sock)
session = pppol2tp_sock_to_session(sk);
if (session) {
- struct pppol2tp_session *ps;
-
l2tp_session_delete(session);
-
- ps = l2tp_session_priv(session);
- mutex_lock(&ps->sk_lock);
- ps->__sk = rcu_dereference_protected(ps->sk,
- lockdep_is_held(&ps->sk_lock));
- RCU_INIT_POINTER(ps->sk, NULL);
- mutex_unlock(&ps->sk_lock);
- call_rcu(&ps->rcu, pppol2tp_put_sk);
-
- /* Rely on the sock_put() call at the end of the function for
- * dropping the reference held by pppol2tp_sock_to_session().
- * The last reference will be dropped by pppol2tp_put_sk().
- */
+ /* drop ref taken by pppol2tp_sock_to_session */
+ l2tp_session_put(session);
}
release_sock(sk);
- /* This will delete the session context via
- * pppol2tp_session_destruct() if the socket's refcnt drops to
- * zero.
- */
sock_put(sk);
return 0;
@@ -506,6 +487,7 @@ static int pppol2tp_create(struct net *net, struct socket *sock, int kern)
goto out;
sock_init_data(sock, sk);
+ sock_set_flag(sk, SOCK_RCU_FREE);
sock->state = SS_UNCONNECTED;
sock->ops = &pppol2tp_ops;
@@ -542,6 +524,7 @@ static void pppol2tp_session_init(struct l2tp_session *session)
struct pppol2tp_session *ps;
session->recv_skb = pppol2tp_recv;
+ session->session_close = pppol2tp_session_close;
if (IS_ENABLED(CONFIG_L2TP_DEBUGFS))
session->show = pppol2tp_show;
@@ -685,7 +668,7 @@ static struct l2tp_tunnel *pppol2tp_tunnel_get(struct net *net,
if (error < 0)
return ERR_PTR(error);
- l2tp_tunnel_inc_refcount(tunnel);
+ refcount_inc(&tunnel->ref_count);
error = l2tp_tunnel_register(tunnel, net, &tcfg);
if (error < 0) {
kfree(tunnel);
@@ -701,7 +684,7 @@ static struct l2tp_tunnel *pppol2tp_tunnel_get(struct net *net,
/* Error if socket is not prepped */
if (!tunnel->sock) {
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
return ERR_PTR(-ENOENT);
}
}
@@ -787,18 +770,20 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
goto end;
}
+ drop_refcnt = true;
+
pppol2tp_session_init(session);
ps = l2tp_session_priv(session);
- l2tp_session_inc_refcount(session);
+ refcount_inc(&session->ref_count);
mutex_lock(&ps->sk_lock);
error = l2tp_session_register(session, tunnel);
if (error < 0) {
mutex_unlock(&ps->sk_lock);
- kfree(session);
+ l2tp_session_put(session);
goto end;
}
- drop_refcnt = true;
+
new_session = true;
}
@@ -830,12 +815,13 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
out_no_ppp:
/* This is how we get the session context from the socket. */
- sk->sk_user_data = session;
+ sock_hold(sk);
+ rcu_assign_sk_user_data(sk, session);
rcu_assign_pointer(ps->sk, sk);
mutex_unlock(&ps->sk_lock);
/* Keep the reference we've grabbed on the session: sk doesn't expect
- * the session to disappear. pppol2tp_session_destruct() is responsible
+ * the session to disappear. pppol2tp_session_close() is responsible
* for dropping it.
*/
drop_refcnt = false;
@@ -850,8 +836,8 @@ end:
l2tp_tunnel_delete(tunnel);
}
if (drop_refcnt)
- l2tp_session_dec_refcount(session);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_session_put(session);
+ l2tp_tunnel_put(tunnel);
release_sock(sk);
return error;
@@ -891,7 +877,7 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel,
return 0;
err_sess:
- kfree(session);
+ l2tp_session_put(session);
err:
return error;
}
@@ -1002,7 +988,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
error = len;
- sock_put(sk);
+ l2tp_session_put(session);
end:
return error;
}
@@ -1052,12 +1038,12 @@ static int pppol2tp_tunnel_copy_stats(struct pppol2tp_ioc_stats *stats,
return -EBADR;
if (session->pwtype != L2TP_PWTYPE_PPP) {
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return -EBADR;
}
pppol2tp_copy_stats(stats, &session->stats);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return 0;
}
@@ -1205,7 +1191,8 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
}
- l2tp_session_set_header_len(session, session->tunnel->version);
+ l2tp_session_set_header_len(session, session->tunnel->version,
+ session->tunnel->encap);
break;
case PPPOL2TP_SO_LNSMODE:
@@ -1274,7 +1261,7 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
err = pppol2tp_session_setsockopt(sk, session, optname, val);
}
- sock_put(sk);
+ l2tp_session_put(session);
end:
return err;
}
@@ -1395,7 +1382,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
err = 0;
end_put_sess:
- sock_put(sk);
+ l2tp_session_put(session);
end:
return err;
}
@@ -1406,14 +1393,12 @@ end:
* L2TPv2, we dump only L2TPv2 tunnels and sessions here.
*****************************************************************************/
-static unsigned int pppol2tp_net_id;
-
#ifdef CONFIG_PROC_FS
struct pppol2tp_seq_data {
struct seq_net_private p;
- int tunnel_idx; /* current tunnel */
- int session_idx; /* index of session within current tunnel */
+ unsigned long tkey; /* lookup key of current tunnel */
+ unsigned long skey; /* lookup key of current session */
struct l2tp_tunnel *tunnel;
struct l2tp_session *session; /* NULL means get next tunnel */
};
@@ -1422,17 +1407,17 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
{
/* Drop reference taken during previous invocation */
if (pd->tunnel)
- l2tp_tunnel_dec_refcount(pd->tunnel);
+ l2tp_tunnel_put(pd->tunnel);
for (;;) {
- pd->tunnel = l2tp_tunnel_get_nth(net, pd->tunnel_idx);
- pd->tunnel_idx++;
+ pd->tunnel = l2tp_tunnel_get_next(net, &pd->tkey);
+ pd->tkey++;
/* Only accept L2TPv2 tunnels */
if (!pd->tunnel || pd->tunnel->version == 2)
return;
- l2tp_tunnel_dec_refcount(pd->tunnel);
+ l2tp_tunnel_put(pd->tunnel);
}
}
@@ -1440,13 +1425,15 @@ static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
{
/* Drop reference taken during previous invocation */
if (pd->session)
- l2tp_session_dec_refcount(pd->session);
+ l2tp_session_put(pd->session);
- pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
- pd->session_idx++;
+ pd->session = l2tp_session_get_next(net, pd->tunnel->sock,
+ pd->tunnel->version,
+ pd->tunnel->tunnel_id, &pd->skey);
+ pd->skey++;
if (!pd->session) {
- pd->session_idx = 0;
+ pd->skey = 0;
pppol2tp_next_tunnel(net, pd);
}
}
@@ -1498,11 +1485,11 @@ static void pppol2tp_seq_stop(struct seq_file *p, void *v)
* or pppol2tp_next_tunnel().
*/
if (pd->session) {
- l2tp_session_dec_refcount(pd->session);
+ l2tp_session_put(pd->session);
pd->session = NULL;
}
if (pd->tunnel) {
- l2tp_tunnel_dec_refcount(pd->tunnel);
+ l2tp_tunnel_put(pd->tunnel);
pd->tunnel = NULL;
}
}
@@ -1513,7 +1500,7 @@ static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
seq_printf(m, "\nTUNNEL '%s', %c %d\n",
tunnel->name,
- (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N',
+ tunnel->sock ? 'Y' : 'N',
refcount_read(&tunnel->ref_count) - 1);
seq_printf(m, " %08x %ld/%ld/%ld %ld/%ld/%ld\n",
0,
@@ -1641,7 +1628,6 @@ static __net_exit void pppol2tp_exit_net(struct net *net)
static struct pernet_operations pppol2tp_net_ops = {
.init = pppol2tp_init_net,
.exit = pppol2tp_exit_net,
- .id = &pppol2tp_net_id,
};
/*****************************************************************************
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 9bffac7a4974..fe7eab4b681b 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -207,20 +207,7 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
return;
skb_reserve(skb, local->hw.extra_tx_headroom);
- mgmt = skb_put_zero(skb, 24);
- memcpy(mgmt->da, da, ETH_ALEN);
- memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
- if (sdata->vif.type == NL80211_IFTYPE_AP ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
- memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_STATION)
- memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
- memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN);
-
- mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
- IEEE80211_STYPE_ACTION);
+ mgmt = ieee80211_mgmt_ba(skb, da, sdata);
skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
mgmt->u.action.category = WLAN_CATEGORY_BACK;
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 677bbbac9f16..1c18b862ef8c 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -74,20 +74,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
return;
skb_reserve(skb, local->hw.extra_tx_headroom);
- mgmt = skb_put_zero(skb, 24);
- memcpy(mgmt->da, da, ETH_ALEN);
- memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
- if (sdata->vif.type == NL80211_IFTYPE_AP ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
- memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_STATION)
- memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
- memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN);
-
- mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
- IEEE80211_STYPE_ACTION);
+ mgmt = ieee80211_mgmt_ba(skb, da, sdata);
skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req));
diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c
index fdf8b658fede..c61df637232a 100644
--- a/net/mac80211/airtime.c
+++ b/net/mac80211/airtime.c
@@ -55,10 +55,21 @@
#define HE_DURATION_S(shift, streams, gi, bps) \
(HE_DURATION(streams, gi, bps) >> shift)
+/* gi in HE/EHT is identical. It matches enum nl80211_eht_gi as well */
+#define EHT_GI_08 HE_GI_08
+#define EHT_GI_16 HE_GI_16
+#define EHT_GI_32 HE_GI_32
+
+#define EHT_DURATION(streams, gi, bps) \
+ HE_DURATION(streams, gi, bps)
+#define EHT_DURATION_S(shift, streams, gi, bps) \
+ HE_DURATION_S(shift, streams, gi, bps)
+
#define BW_20 0
#define BW_40 1
#define BW_80 2
#define BW_160 3
+#define BW_320 4
/*
* Define group sort order: HT40 -> SGI -> #streams
@@ -68,17 +79,26 @@
#define IEEE80211_VHT_STREAM_GROUPS 8 /* BW(=4) * SGI(=2) */
#define IEEE80211_HE_MAX_STREAMS 8
+#define IEEE80211_HE_STREAM_GROUPS 12 /* BW(=4) * GI(=3) */
+
+#define IEEE80211_EHT_MAX_STREAMS 8
+#define IEEE80211_EHT_STREAM_GROUPS 15 /* BW(=5) * GI(=3) */
#define IEEE80211_HT_GROUPS_NB (IEEE80211_MAX_STREAMS * \
IEEE80211_HT_STREAM_GROUPS)
#define IEEE80211_VHT_GROUPS_NB (IEEE80211_MAX_STREAMS * \
IEEE80211_VHT_STREAM_GROUPS)
+#define IEEE80211_HE_GROUPS_NB (IEEE80211_HE_MAX_STREAMS * \
+ IEEE80211_HE_STREAM_GROUPS)
+#define IEEE80211_EHT_GROUPS_NB (IEEE80211_EHT_MAX_STREAMS * \
+ IEEE80211_EHT_STREAM_GROUPS)
#define IEEE80211_HT_GROUP_0 0
#define IEEE80211_VHT_GROUP_0 (IEEE80211_HT_GROUP_0 + IEEE80211_HT_GROUPS_NB)
#define IEEE80211_HE_GROUP_0 (IEEE80211_VHT_GROUP_0 + IEEE80211_VHT_GROUPS_NB)
+#define IEEE80211_EHT_GROUP_0 (IEEE80211_HE_GROUP_0 + IEEE80211_HE_GROUPS_NB)
-#define MCS_GROUP_RATES 12
+#define MCS_GROUP_RATES 14
#define HT_GROUP_IDX(_streams, _sgi, _ht40) \
IEEE80211_HT_GROUP_0 + \
@@ -203,6 +223,69 @@
#define HE_GROUP(_streams, _gi, _bw) \
__HE_GROUP(_streams, _gi, _bw, \
HE_GROUP_SHIFT(_streams, _gi, _bw))
+
+#define EHT_BW2VBPS(_bw, r5, r4, r3, r2, r1) \
+ ((_bw) == BW_320 ? r5 : BW2VBPS(_bw, r4, r3, r2, r1))
+
+#define EHT_GROUP_IDX(_streams, _gi, _bw) \
+ (IEEE80211_EHT_GROUP_0 + \
+ IEEE80211_EHT_MAX_STREAMS * 3 * (_bw) + \
+ IEEE80211_EHT_MAX_STREAMS * (_gi) + \
+ (_streams) - 1)
+
+#define __EHT_GROUP(_streams, _gi, _bw, _s) \
+ [EHT_GROUP_IDX(_streams, _gi, _bw)] = { \
+ .shift = _s, \
+ .duration = { \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 1960, 980, 490, 234, 117)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 3920, 1960, 980, 468, 234)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 5880, 2937, 1470, 702, 351)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 7840, 3920, 1960, 936, 468)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 11760, 5880, 2940, 1404, 702)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 15680, 7840, 3920, 1872, 936)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 17640, 8820, 4410, 2106, 1053)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 19600, 9800, 4900, 2340, 1170)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 23520, 11760, 5880, 2808, 1404)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 26133, 13066, 6533, 3120, 1560)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 29400, 14700, 7350, 3510, 1755)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 32666, 16333, 8166, 3900, 1950)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 35280, 17640, 8820, 4212, 2106)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 39200, 19600, 9800, 4680, 2340)) \
+ } \
+}
+
+#define EHT_GROUP_SHIFT(_streams, _gi, _bw) \
+ GROUP_SHIFT(EHT_DURATION(_streams, _gi, \
+ EHT_BW2VBPS(_bw, 1960, 980, 490, 234, 117)))
+
+#define EHT_GROUP(_streams, _gi, _bw) \
+ __EHT_GROUP(_streams, _gi, _bw, \
+ EHT_GROUP_SHIFT(_streams, _gi, _bw))
+
+#define EHT_GROUP_RANGE(_gi, _bw) \
+ EHT_GROUP(1, _gi, _bw), \
+ EHT_GROUP(2, _gi, _bw), \
+ EHT_GROUP(3, _gi, _bw), \
+ EHT_GROUP(4, _gi, _bw), \
+ EHT_GROUP(5, _gi, _bw), \
+ EHT_GROUP(6, _gi, _bw), \
+ EHT_GROUP(7, _gi, _bw), \
+ EHT_GROUP(8, _gi, _bw)
+
struct mcs_group {
u8 shift;
u16 duration[MCS_GROUP_RATES];
@@ -376,6 +459,26 @@ static const struct mcs_group airtime_mcs_groups[] = {
HE_GROUP(6, HE_GI_32, BW_160),
HE_GROUP(7, HE_GI_32, BW_160),
HE_GROUP(8, HE_GI_32, BW_160),
+
+ EHT_GROUP_RANGE(EHT_GI_08, BW_20),
+ EHT_GROUP_RANGE(EHT_GI_16, BW_20),
+ EHT_GROUP_RANGE(EHT_GI_32, BW_20),
+
+ EHT_GROUP_RANGE(EHT_GI_08, BW_40),
+ EHT_GROUP_RANGE(EHT_GI_16, BW_40),
+ EHT_GROUP_RANGE(EHT_GI_32, BW_40),
+
+ EHT_GROUP_RANGE(EHT_GI_08, BW_80),
+ EHT_GROUP_RANGE(EHT_GI_16, BW_80),
+ EHT_GROUP_RANGE(EHT_GI_32, BW_80),
+
+ EHT_GROUP_RANGE(EHT_GI_08, BW_160),
+ EHT_GROUP_RANGE(EHT_GI_16, BW_160),
+ EHT_GROUP_RANGE(EHT_GI_32, BW_160),
+
+ EHT_GROUP_RANGE(EHT_GI_08, BW_320),
+ EHT_GROUP_RANGE(EHT_GI_16, BW_320),
+ EHT_GROUP_RANGE(EHT_GI_32, BW_320),
};
static u32
@@ -422,6 +525,9 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw,
case RATE_INFO_BW_160:
bw = BW_160;
break;
+ case RATE_INFO_BW_320:
+ bw = BW_320;
+ break;
default:
WARN_ON_ONCE(1);
return 0;
@@ -443,14 +549,27 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw,
idx = status->rate_idx;
group = HE_GROUP_IDX(streams, status->he_gi, bw);
break;
+ case RX_ENC_EHT:
+ streams = status->nss;
+ idx = status->rate_idx;
+ group = EHT_GROUP_IDX(streams, status->eht.gi, bw);
+ break;
default:
WARN_ON_ONCE(1);
return 0;
}
- if (WARN_ON_ONCE((status->encoding != RX_ENC_HE && streams > 4) ||
- (status->encoding == RX_ENC_HE && streams > 8)))
- return 0;
+ switch (status->encoding) {
+ case RX_ENC_EHT:
+ case RX_ENC_HE:
+ if (WARN_ON_ONCE(streams > 8))
+ return 0;
+ break;
+ default:
+ if (WARN_ON_ONCE(streams > 4))
+ return 0;
+ break;
+ }
if (idx >= MCS_GROUP_RATES)
return 0;
@@ -517,7 +636,9 @@ static bool ieee80211_fill_rate_info(struct ieee80211_hw *hw,
stat->nss = ri->nss;
stat->rate_idx = ri->mcs;
- if (ri->flags & RATE_INFO_FLAGS_HE_MCS)
+ if (ri->flags & RATE_INFO_FLAGS_EHT_MCS)
+ stat->encoding = RX_ENC_EHT;
+ else if (ri->flags & RATE_INFO_FLAGS_HE_MCS)
stat->encoding = RX_ENC_HE;
else if (ri->flags & RATE_INFO_FLAGS_VHT_MCS)
stat->encoding = RX_ENC_VHT;
@@ -529,7 +650,14 @@ static bool ieee80211_fill_rate_info(struct ieee80211_hw *hw,
if (ri->flags & RATE_INFO_FLAGS_SHORT_GI)
stat->enc_flags |= RX_ENC_FLAG_SHORT_GI;
- stat->he_gi = ri->he_gi;
+ switch (stat->encoding) {
+ case RX_ENC_EHT:
+ stat->eht.gi = ri->eht_gi;
+ break;
+ default:
+ stat->he_gi = ri->he_gi;
+ break;
+ }
if (stat->encoding != RX_ENC_LEGACY)
return true;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b02b84ce2130..847304a3a29a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1662,12 +1662,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
ieee80211_link_info_change_notify(sdata, link,
BSS_CHANGED_BEACON_ENABLED);
- if (sdata->wdev.cac_started) {
+ if (sdata->wdev.links[link_id].cac_started) {
chandef = link_conf->chanreq.oper;
- wiphy_delayed_work_cancel(wiphy, &sdata->dfs_cac_timer_work);
+ wiphy_delayed_work_cancel(wiphy, &link->dfs_cac_timer_work);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
- GFP_KERNEL);
+ GFP_KERNEL, link_id);
}
drv_stop_ap(sdata->local, sdata, link_conf);
@@ -3462,51 +3462,58 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
static int ieee80211_start_radar_detection(struct wiphy *wiphy,
struct net_device *dev,
struct cfg80211_chan_def *chandef,
- u32 cac_time_ms)
+ u32 cac_time_ms, int link_id)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_chan_req chanreq = { .oper = *chandef };
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_link_data *link_data;
int err;
lockdep_assert_wiphy(local->hw.wiphy);
- if (!list_empty(&local->roc_list) || local->scanning) {
- err = -EBUSY;
- goto out_unlock;
- }
+ if (!list_empty(&local->roc_list) || local->scanning)
+ return -EBUSY;
+
+ link_data = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link_data)
+ return -ENOLINK;
/* whatever, but channel contexts should not complain about that one */
- sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
- sdata->deflink.needed_rx_chains = local->rx_chains;
+ link_data->smps_mode = IEEE80211_SMPS_OFF;
+ link_data->needed_rx_chains = local->rx_chains;
- err = ieee80211_link_use_channel(&sdata->deflink, &chanreq,
+ err = ieee80211_link_use_channel(link_data, &chanreq,
IEEE80211_CHANCTX_SHARED);
if (err)
- goto out_unlock;
+ return err;
- wiphy_delayed_work_queue(wiphy, &sdata->dfs_cac_timer_work,
+ wiphy_delayed_work_queue(wiphy, &link_data->dfs_cac_timer_work,
msecs_to_jiffies(cac_time_ms));
- out_unlock:
- return err;
+ return 0;
}
static void ieee80211_end_cac(struct wiphy *wiphy,
- struct net_device *dev)
+ struct net_device *dev, unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_link_data *link_data;
lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(sdata, &local->interfaces, list) {
+ link_data = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link_data)
+ continue;
+
wiphy_delayed_work_cancel(wiphy,
- &sdata->dfs_cac_timer_work);
+ &link_data->dfs_cac_timer_work);
- if (sdata->wdev.cac_started) {
- ieee80211_link_release_channel(&sdata->deflink);
- sdata->wdev.cac_started = false;
+ if (sdata->wdev.links[link_id].cac_started) {
+ ieee80211_link_release_channel(link_data);
+ sdata->wdev.links[link_id].cac_started = false;
}
}
}
@@ -3961,7 +3968,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
if (!list_empty(&local->roc_list) || local->scanning)
return -EBUSY;
- if (sdata->wdev.cac_started)
+ if (sdata->wdev.links[link_id].cac_started)
return -EBUSY;
if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index e8567723e94d..cca6d14084d2 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -286,7 +286,9 @@ ieee80211_get_max_required_bw(struct ieee80211_link_data *link)
enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT;
struct sta_info *sta;
- list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
+ list_for_each_entry(sta, &sdata->local->sta_list, list) {
if (sdata != sta->sdata &&
!(sta->sdata->bss && sta->sdata->bss == sdata->bss))
continue;
@@ -681,6 +683,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local,
ctx->mode = mode;
ctx->conf.radar_enabled = false;
ctx->conf.radio_idx = radio_idx;
+ ctx->radar_detected = false;
_ieee80211_recalc_chanctx_min_def(local, ctx, NULL, false);
return ctx;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 79caeb485fd5..1c2b7dd8976a 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -467,20 +467,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
return;
skb_reserve(skb, local->hw.extra_tx_headroom);
- mgmt = skb_put_zero(skb, 24);
- memcpy(mgmt->da, da, ETH_ALEN);
- memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
- if (sdata->vif.type == NL80211_IFTYPE_AP ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
- memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_STATION)
- memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
- memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN);
-
- mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
- IEEE80211_STYPE_ACTION);
+ mgmt = ieee80211_mgmt_ba(skb, da, sdata);
skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba));
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a3485e4c6132..4f0390918b60 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -893,6 +893,8 @@ struct ieee80211_chanctx {
struct ieee80211_chan_req req;
struct ieee80211_chanctx_conf conf;
+
+ bool radar_detected;
};
struct mac80211_qos_map {
@@ -1067,6 +1069,7 @@ struct ieee80211_link_data {
int ap_power_level; /* in dBm */
bool radar_required;
+ struct wiphy_delayed_work dfs_cac_timer_work;
union {
struct ieee80211_link_data_managed mgd;
@@ -1165,8 +1168,6 @@ struct ieee80211_sub_if_data {
struct ieee80211_link_data deflink;
struct ieee80211_link_data __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS];
- struct wiphy_delayed_work dfs_cac_timer_work;
-
/* for ieee80211_set_active_links_async() */
struct wiphy_work activate_links_work;
u16 desired_active_links;
@@ -2072,8 +2073,6 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
u32 info_flags,
u32 ctrl_flags,
u64 *cookie);
-void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
- struct sk_buff_head *skbs);
struct sk_buff *
ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u32 info_flags);
@@ -2136,6 +2135,29 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
struct ieee80211_mgmt *mgmt,
size_t len);
+static inline struct ieee80211_mgmt *
+ieee80211_mgmt_ba(struct sk_buff *skb, const u8 *da,
+ struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_mgmt *mgmt = skb_put_zero(skb, 24);
+
+ ether_addr_copy(mgmt->da, da);
+ ether_addr_copy(mgmt->sa, sdata->vif.addr);
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP ||
+ sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+ sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
+ ether_addr_copy(mgmt->bssid, sdata->vif.addr);
+ else if (sdata->vif.type == NL80211_IFTYPE_STATION)
+ ether_addr_copy(mgmt->bssid, sdata->vif.cfg.ap_addr);
+ else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
+ ether_addr_copy(mgmt->bssid, sdata->u.ibss.bssid);
+
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+ return mgmt;
+}
+
int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
enum ieee80211_agg_stop_reason reason);
void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
@@ -2629,7 +2651,8 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
bool ieee80211_is_radar_required(struct ieee80211_local *local);
void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work);
-void ieee80211_dfs_cac_cancel(struct ieee80211_local *local);
+void ieee80211_dfs_cac_cancel(struct ieee80211_local *local,
+ struct ieee80211_chanctx *chanctx);
void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy,
struct wiphy_work *work);
int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b4ad66af3af3..6ef0990d3d29 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -462,6 +462,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
{
struct ieee80211_local *local = sdata->local;
unsigned long flags;
+ struct sk_buff_head freeq;
struct sk_buff *skb, *tmp;
u32 hw_reconf_flags = 0;
int i, flushed;
@@ -550,15 +551,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
wiphy_work_cancel(local->hw.wiphy,
&sdata->deflink.color_change_finalize_work);
wiphy_delayed_work_cancel(local->hw.wiphy,
- &sdata->dfs_cac_timer_work);
+ &sdata->deflink.dfs_cac_timer_work);
- if (sdata->wdev.cac_started) {
+ if (sdata->wdev.links[0].cac_started) {
chandef = sdata->vif.bss_conf.chanreq.oper;
WARN_ON(local->suspended);
ieee80211_link_release_channel(&sdata->deflink);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
- GFP_KERNEL);
+ GFP_KERNEL, 0);
}
if (sdata->vif.type == NL80211_IFTYPE_AP) {
@@ -637,18 +638,32 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
skb_queue_purge(&sdata->status_queue);
}
+ /*
+ * Since ieee80211_free_txskb() may issue __dev_queue_xmit()
+ * which should be called with interrupts enabled, reclamation
+ * is done in two phases:
+ */
+ __skb_queue_head_init(&freeq);
+
+ /* unlink from local queues... */
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
skb_queue_walk_safe(&local->pending[i], skb, tmp) {
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
if (info->control.vif == &sdata->vif) {
__skb_unlink(skb, &local->pending[i]);
- ieee80211_free_txskb(&local->hw, skb);
+ __skb_queue_tail(&freeq, skb);
}
}
}
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+ /* ... and perform actual reclamation with interrupts enabled. */
+ skb_queue_walk_safe(&freeq, skb, tmp) {
+ __skb_unlink(skb, &freeq);
+ ieee80211_free_txskb(&local->hw, skb);
+ }
+
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
ieee80211_txq_remove_vlan(local, sdata);
@@ -1729,8 +1744,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
wiphy_work_init(&sdata->work, ieee80211_iface_work);
wiphy_work_init(&sdata->activate_links_work,
ieee80211_activate_links_work);
- wiphy_delayed_work_init(&sdata->dfs_cac_timer_work,
- ieee80211_dfs_cac_timer_work);
switch (type) {
case NL80211_IFTYPE_P2P_GO:
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index 1a211b8d4057..0bbac64d5fa0 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -45,6 +45,8 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
ieee80211_color_collision_detection_work);
INIT_LIST_HEAD(&link->assigned_chanctx_list);
INIT_LIST_HEAD(&link->reserved_chanctx_list);
+ wiphy_delayed_work_init(&link->dfs_cac_timer_work,
+ ieee80211_dfs_cac_timer_work);
if (!deflink) {
switch (sdata->vif.type) {
@@ -75,6 +77,16 @@ void ieee80211_link_stop(struct ieee80211_link_data *link)
&link->color_change_finalize_work);
wiphy_work_cancel(link->sdata->local->hw.wiphy,
&link->csa.finalize_work);
+
+ if (link->sdata->wdev.links[link->link_id].cac_started) {
+ wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy,
+ &link->dfs_cac_timer_work);
+ cfg80211_cac_event(link->sdata->dev,
+ &link->conf->chanreq.oper,
+ NL80211_RADAR_CAC_ABORTED,
+ GFP_KERNEL, link->link_id);
+ }
+
ieee80211_link_release_channel(link);
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a3104b6ea6f0..89084690350f 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1051,9 +1051,9 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
return 0;
/* Driver provides cipher suites, but we need to exclude WEP */
- suites = kmemdup(local->hw.wiphy->cipher_suites,
- sizeof(u32) * local->hw.wiphy->n_cipher_suites,
- GFP_KERNEL);
+ suites = kmemdup_array(local->hw.wiphy->cipher_suites,
+ local->hw.wiphy->n_cipher_suites,
+ sizeof(u32), GFP_KERNEL);
if (!suites)
return -ENOMEM;
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index c0a5c75cddcb..30c0d89203af 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -580,7 +580,7 @@ void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata,
prev = rhashtable_lookup_get_insert_fast(&cache->rht,
&entry->rhash,
fast_tx_rht_params);
- if (unlikely(IS_ERR(prev))) {
+ if (IS_ERR(prev)) {
kfree(entry);
goto unlock_cache;
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 4779a18ab75d..735e78adb0db 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1231,7 +1231,7 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
bool disable_mu_mimo = false;
struct ieee80211_sub_if_data *other;
- list_for_each_entry_rcu(other, &local->interfaces, list) {
+ list_for_each_entry(other, &local->interfaces, list) {
if (other->vif.bss_conf.mu_mimo_owner) {
disable_mu_mimo = true;
break;
@@ -3031,18 +3031,19 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t)
void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work)
{
- struct ieee80211_sub_if_data *sdata =
- container_of(work, struct ieee80211_sub_if_data,
+ struct ieee80211_link_data *link =
+ container_of(work, struct ieee80211_link_data,
dfs_cac_timer_work.work);
- struct cfg80211_chan_def chandef = sdata->vif.bss_conf.chanreq.oper;
+ struct cfg80211_chan_def chandef = link->conf->chanreq.oper;
+ struct ieee80211_sub_if_data *sdata = link->sdata;
lockdep_assert_wiphy(sdata->local->hw.wiphy);
- if (sdata->wdev.cac_started) {
- ieee80211_link_release_channel(&sdata->deflink);
+ if (sdata->wdev.links[link->link_id].cac_started) {
+ ieee80211_link_release_channel(link);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_FINISHED,
- GFP_KERNEL);
+ GFP_KERNEL, link->link_id);
}
}
@@ -4715,7 +4716,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
((assoc_data->wmm && !elems->wmm_param) ||
(link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT &&
(!elems->ht_cap_elem || !elems->ht_operation)) ||
- (link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT &&
+ (is_5ghz && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT &&
(!elems->vht_cap_elem || !elems->vht_operation)))) {
const struct cfg80211_bss_ies *ies;
struct ieee802_11_elems *bss_elems;
@@ -4763,19 +4764,22 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
sdata_info(sdata,
"AP bug: HT operation missing from AssocResp\n");
}
- if (!elems->vht_cap_elem && bss_elems->vht_cap_elem &&
- link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
- elems->vht_cap_elem = bss_elems->vht_cap_elem;
- sdata_info(sdata,
- "AP bug: VHT capa missing from AssocResp\n");
- }
- if (!elems->vht_operation && bss_elems->vht_operation &&
- link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
- elems->vht_operation = bss_elems->vht_operation;
- sdata_info(sdata,
- "AP bug: VHT operation missing from AssocResp\n");
- }
+ if (is_5ghz) {
+ if (!elems->vht_cap_elem && bss_elems->vht_cap_elem &&
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
+ elems->vht_cap_elem = bss_elems->vht_cap_elem;
+ sdata_info(sdata,
+ "AP bug: VHT capa missing from AssocResp\n");
+ }
+
+ if (!elems->vht_operation && bss_elems->vht_operation &&
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
+ elems->vht_operation = bss_elems->vht_operation;
+ sdata_info(sdata,
+ "AP bug: VHT operation missing from AssocResp\n");
+ }
+ }
kfree(bss_elems);
}
@@ -6664,7 +6668,7 @@ static bool ieee80211_mgd_ssid_mismatch(struct ieee80211_sub_if_data *sdata,
return true;
/* hidden SSID: zeroed out */
- if (memcmp(elems->ssid, zero_ssid, elems->ssid_len))
+ if (!memcmp(elems->ssid, zero_ssid, elems->ssid_len))
return false;
return memcmp(elems->ssid, cfg->ssid, cfg->ssid_len);
@@ -7660,6 +7664,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
lockdep_assert_wiphy(sdata->local->hw.wiphy);
assoc_data->tries++;
+ assoc_data->comeback = false;
if (assoc_data->tries > IEEE80211_ASSOC_MAX_TRIES) {
sdata_info(sdata, "association with %pM timed out\n",
assoc_data->ap_addr);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 28d03196ef75..29fab7ae47b4 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -997,6 +997,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
}
IEEE80211_SKB_CB(skb)->flags = flags;
+ IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_DONT_USE_RATE_MASK;
skb->dev = sdata->dev;
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index d823d58303e8..7be52345f218 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -32,7 +32,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
ieee80211_scan_cancel(local);
- ieee80211_dfs_cac_cancel(local);
+ ieee80211_dfs_cac_cancel(local, NULL);
ieee80211_roc_purge(local, NULL);
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 4dc1def69548..3dc9752188d5 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -890,7 +890,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
if (ieee80211_is_tx_data(skb))
rate_control_apply_mask(sdata, sta, sband, dest, max_rates);
- if (!(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX))
+ if (!(info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK))
mask = sdata->rc_rateidx_mask[info->band];
if (dest[0].idx < 0)
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index b5f2df61c7f6..adb88c06b598 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -504,7 +504,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
* the scan was in progress; if there was none this will
* just be a no-op for the particular interface.
*/
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ list_for_each_entry(sdata, &local->interfaces, list) {
if (ieee80211_sdata_running(sdata))
wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
}
@@ -575,6 +575,7 @@ static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_sub_if_data *sdata_iter;
+ unsigned int link_id;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -585,8 +586,9 @@ static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata)
return false;
list_for_each_entry(sdata_iter, &local->interfaces, list) {
- if (sdata_iter->wdev.cac_started)
- return false;
+ for_each_valid_link(&sdata_iter->wdev, link_id)
+ if (sdata_iter->wdev.links[link_id].cac_started)
+ return false;
}
return true;
@@ -649,7 +651,7 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata,
cpu_to_le16(IEEE80211_SN_TO_SEQ(sn));
}
IEEE80211_SKB_CB(skb)->flags |= tx_flags;
- IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_SCAN_TX;
+ IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_DONT_USE_RATE_MASK;
ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band);
}
}
@@ -1013,10 +1015,8 @@ set_channel:
*/
if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) ||
!scan_req->n_ssids) {
- *next_delay = msecs_to_jiffies(scan_req->duration) >
- IEEE80211_PASSIVE_CHANNEL_TIME ?
- msecs_to_jiffies(scan_req->duration) :
- IEEE80211_PASSIVE_CHANNEL_TIME;
+ *next_delay = max(msecs_to_jiffies(scan_req->duration),
+ IEEE80211_PASSIVE_CHANNEL_TIME);
local->next_scan_state = SCAN_DECISION;
if (scan_req->n_ssids)
set_bit(SCAN_BEACON_WAIT, &local->scanning);
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index dd8f857a1fbc..d1cf987de13b 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -1301,3 +1301,4 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
while ((skb = __skb_dequeue(skbs)))
ieee80211_free_txskb(hw, skb);
}
+EXPORT_SYMBOL(ieee80211_purge_tx_queue);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index edba4a31844f..a9ee86982259 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -699,7 +699,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
txrc.skb = tx->skb;
txrc.reported_rate.idx = -1;
- if (unlikely(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) {
+ if (unlikely(info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK)) {
txrc.rate_idx_mask = ~0;
} else {
txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band];
@@ -5348,8 +5348,10 @@ ieee80211_beacon_get_ap(struct ieee80211_hw *hw,
if (beacon->tail)
skb_put_data(skb, beacon->tail, beacon->tail_len);
- if (ieee80211_beacon_protect(skb, local, sdata, link) < 0)
+ if (ieee80211_beacon_protect(skb, local, sdata, link) < 0) {
+ dev_kfree_skb(skb);
return NULL;
+ }
ieee80211_beacon_get_finish(hw, vif, link, offs, beacon, skb,
chanctx_conf, csa_off_base);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index c7ad9bc5973a..f94faa86ba8a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -751,7 +751,9 @@ static void __iterate_interfaces(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata;
bool active_only = iter_flags & IEEE80211_IFACE_ITER_ACTIVE;
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ list_for_each_entry_rcu(sdata, &local->interfaces, list,
+ lockdep_is_held(&local->iflist_mtx) ||
+ lockdep_is_held(&local->hw.wiphy->mtx)) {
switch (sdata->vif.type) {
case NL80211_IFTYPE_MONITOR:
if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
@@ -833,7 +835,8 @@ static void __iterate_stations(struct ieee80211_local *local,
{
struct sta_info *sta;
- list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ list_for_each_entry_rcu(sta, &local->sta_list, list,
+ lockdep_is_held(&local->hw.wiphy->mtx)) {
if (!sta->uploaded)
continue;
@@ -854,6 +857,19 @@ void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL_GPL(ieee80211_iterate_stations_atomic);
+void ieee80211_iterate_stations_mtx(struct ieee80211_hw *hw,
+ void (*iterator)(void *data,
+ struct ieee80211_sta *sta),
+ void *data)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ lockdep_assert_wiphy(local->hw.wiphy);
+
+ __iterate_stations(local, iterator, data);
+}
+EXPORT_SYMBOL_GPL(ieee80211_iterate_stations_mtx);
+
struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
@@ -3451,24 +3467,44 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
return ts;
}
-void ieee80211_dfs_cac_cancel(struct ieee80211_local *local)
+/* Cancel CAC for the interfaces under the specified @local. If @ctx is
+ * also provided, only the interfaces using that ctx will be canceled.
+ */
+void ieee80211_dfs_cac_cancel(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx)
{
struct ieee80211_sub_if_data *sdata;
struct cfg80211_chan_def chandef;
+ struct ieee80211_link_data *link;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ unsigned int link_id;
lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(sdata, &local->interfaces, list) {
- wiphy_delayed_work_cancel(local->hw.wiphy,
- &sdata->dfs_cac_timer_work);
-
- if (sdata->wdev.cac_started) {
- chandef = sdata->vif.bss_conf.chanreq.oper;
- ieee80211_link_release_channel(&sdata->deflink);
- cfg80211_cac_event(sdata->dev,
- &chandef,
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS;
+ link_id++) {
+ link = sdata_dereference(sdata->link[link_id],
+ sdata);
+ if (!link)
+ continue;
+
+ chanctx_conf = sdata_dereference(link->conf->chanctx_conf,
+ sdata);
+ if (ctx && &ctx->conf != chanctx_conf)
+ continue;
+
+ wiphy_delayed_work_cancel(local->hw.wiphy,
+ &link->dfs_cac_timer_work);
+
+ if (!sdata->wdev.links[link_id].cac_started)
+ continue;
+
+ chandef = link->conf->chanreq.oper;
+ ieee80211_link_release_channel(link);
+ cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
- GFP_KERNEL);
+ GFP_KERNEL, link_id);
}
}
}
@@ -3478,9 +3514,8 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy,
{
struct ieee80211_local *local =
container_of(work, struct ieee80211_local, radar_detected_work);
- struct cfg80211_chan_def chandef = local->hw.conf.chandef;
+ struct cfg80211_chan_def chandef;
struct ieee80211_chanctx *ctx;
- int num_chanctx = 0;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -3488,25 +3523,46 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy,
if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER)
continue;
- num_chanctx++;
+ if (!ctx->radar_detected)
+ continue;
+
+ ctx->radar_detected = false;
+
chandef = ctx->conf.def;
+
+ ieee80211_dfs_cac_cancel(local, ctx);
+ cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL);
}
+}
- ieee80211_dfs_cac_cancel(local);
+static void
+ieee80211_radar_mark_chan_ctx_iterator(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *chanctx_conf,
+ void *data)
+{
+ struct ieee80211_chanctx *ctx =
+ container_of(chanctx_conf, struct ieee80211_chanctx,
+ conf);
- if (num_chanctx > 1)
- /* XXX: multi-channel is not supported yet */
- WARN_ON(1);
- else
- cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL);
+ if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER)
+ return;
+
+ if (data && data != chanctx_conf)
+ return;
+
+ ctx->radar_detected = true;
}
-void ieee80211_radar_detected(struct ieee80211_hw *hw)
+void ieee80211_radar_detected(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *chanctx_conf)
{
struct ieee80211_local *local = hw_to_local(hw);
trace_api_radar_detected(local);
+ ieee80211_iter_chan_contexts_atomic(hw, ieee80211_radar_mark_chan_ctx_iterator,
+ chanctx_conf);
+
wiphy_work_queue(hw->wiphy, &local->radar_detected_work);
}
EXPORT_SYMBOL(ieee80211_radar_detected);
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index de52a9191da0..43288b408fde 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -486,6 +486,9 @@ static int mctp_ioctl_droptag(struct mctp_sock *msk, bool tagv2,
tag = ctl.tag & MCTP_TAG_MASK;
rc = -EINVAL;
+ if (ctl.peer_addr == MCTP_ADDR_NULL)
+ ctl.peer_addr = MCTP_ADDR_ANY;
+
spin_lock_irqsave(&net->mctp.keys_lock, flags);
hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
/* we do an irqsave here, even though we know the irq state,
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index 77e5dd422258..8551dab1d1e6 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -366,7 +366,7 @@ static void mctp_test_route_input_sk(struct kunit *test)
skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2);
- KUNIT_EXPECT_EQ(test, skb->len, 1);
+ KUNIT_EXPECT_EQ(test, skb2->len, 1);
skb_free_datagram(sock->sk, skb2);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 0e6c94a8c2bc..aba983531ed3 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1201,8 +1201,7 @@ static void mpls_netconf_notify_devconf(struct net *net, int event,
rtnl_notify(skb, net, 0, RTNLGRP_MPLS_NETCONF, NULL, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_MPLS_NETCONF, err);
+ rtnl_set_sk_err(net, RTNLGRP_MPLS_NETCONF, err);
}
static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = {
@@ -2278,8 +2277,7 @@ static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
}
static int mpls_valid_getroute_req(struct sk_buff *skb,
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 4385fd3b13be..6e73da94af7f 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -106,7 +106,7 @@ static int mpls_xmit(struct sk_buff *skb)
hh_len = 0;
/* Ensure there is enough space for the headers in the skb */
- if (skb_cow(skb, hh_len + new_header_size))
+ if (skb_cow_head(skb, hh_len + new_header_size))
goto drop;
skb_set_inner_protocol(skb, skb->protocol);
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 99382c317ebb..38d8121331d4 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -12,6 +12,7 @@
#include <net/netns/generic.h>
#include "protocol.h"
+#include "mib.h"
#define MPTCP_SYSCTL_PATH "net/mptcp"
@@ -27,8 +28,11 @@ struct mptcp_pernet {
#endif
unsigned int add_addr_timeout;
+ unsigned int blackhole_timeout;
unsigned int close_timeout;
unsigned int stale_loss_cnt;
+ atomic_t active_disable_times;
+ unsigned long active_disable_stamp;
u8 mptcp_enabled;
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
@@ -87,6 +91,8 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX;
+ pernet->blackhole_timeout = 3600;
+ atomic_set(&pernet->active_disable_times, 0);
pernet->close_timeout = TCP_TIMEWAIT_LEN;
pernet->checksum_enabled = 0;
pernet->allow_join_initial_addr_port = 1;
@@ -151,6 +157,20 @@ static int proc_available_schedulers(const struct ctl_table *ctl,
return ret;
}
+static int proc_blackhole_detect_timeout(const struct ctl_table *table,
+ int write, void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct mptcp_pernet *pernet = mptcp_get_pernet(current->nsproxy->net_ns);
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ atomic_set(&pernet->active_disable_times, 0);
+
+ return ret;
+}
+
static struct ctl_table mptcp_sysctl_table[] = {
{
.procname = "enabled",
@@ -217,6 +237,13 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "blackhole_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_blackhole_detect_timeout,
+ .extra1 = SYSCTL_ZERO,
+ },
};
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -240,6 +267,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[6].data = &pernet->scheduler;
/* table[7] is for available_schedulers which is read-only info */
table[8].data = &pernet->close_timeout;
+ table[9].data = &pernet->blackhole_timeout;
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
ARRAY_SIZE(mptcp_sysctl_table));
@@ -277,6 +305,111 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
#endif /* CONFIG_SYSCTL */
+/* The following code block is to deal with middle box issues with MPTCP,
+ * similar to what is done with TFO.
+ * The proposed solution is to disable active MPTCP globally when SYN+MPC are
+ * dropped, while SYN without MPC aren't. In this case, active side MPTCP is
+ * disabled globally for 1hr at first. Then if it happens again, it is disabled
+ * for 2h, then 4h, 8h, ...
+ * The timeout is reset back to 1hr when a successful active MPTCP connection is
+ * fully established.
+ */
+
+/* Disable active MPTCP and record current jiffies and active_disable_times */
+void mptcp_active_disable(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+ struct mptcp_pernet *pernet;
+
+ pernet = mptcp_get_pernet(net);
+
+ if (!READ_ONCE(pernet->blackhole_timeout))
+ return;
+
+ /* Paired with READ_ONCE() in mptcp_active_should_disable() */
+ WRITE_ONCE(pernet->active_disable_stamp, jiffies);
+
+ /* Paired with smp_rmb() in mptcp_active_should_disable().
+ * We want pernet->active_disable_stamp to be updated first.
+ */
+ smp_mb__before_atomic();
+ atomic_inc(&pernet->active_disable_times);
+
+ MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE);
+}
+
+/* Calculate timeout for MPTCP active disable
+ * Return true if we are still in the active MPTCP disable period
+ * Return false if timeout already expired and we should use active MPTCP
+ */
+bool mptcp_active_should_disable(struct sock *ssk)
+{
+ struct net *net = sock_net(ssk);
+ unsigned int blackhole_timeout;
+ struct mptcp_pernet *pernet;
+ unsigned long timeout;
+ int disable_times;
+ int multiplier;
+
+ pernet = mptcp_get_pernet(net);
+ blackhole_timeout = READ_ONCE(pernet->blackhole_timeout);
+
+ if (!blackhole_timeout)
+ return false;
+
+ disable_times = atomic_read(&pernet->active_disable_times);
+ if (!disable_times)
+ return false;
+
+ /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */
+ smp_rmb();
+
+ /* Limit timeout to max: 2^6 * initial timeout */
+ multiplier = 1 << min(disable_times - 1, 6);
+
+ /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */
+ timeout = READ_ONCE(pernet->active_disable_stamp) +
+ multiplier * blackhole_timeout * HZ;
+
+ return time_before(jiffies, timeout);
+}
+
+/* Enable active MPTCP and reset active_disable_times if needed */
+void mptcp_active_enable(struct sock *sk)
+{
+ struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk));
+
+ if (atomic_read(&pernet->active_disable_times)) {
+ struct dst_entry *dst = sk_dst_get(sk);
+
+ if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))
+ atomic_set(&pernet->active_disable_times, 0);
+ }
+}
+
+/* Check the number of retransmissions, and fallback to TCP if needed */
+void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
+{
+ struct mptcp_subflow_context *subflow;
+ u32 timeouts;
+
+ if (!sk_is_mptcp(ssk))
+ return;
+
+ timeouts = inet_csk(ssk)->icsk_retransmits;
+ subflow = mptcp_subflow_ctx(ssk);
+
+ if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) {
+ if (timeouts == 2 || (timeouts < 2 && expired)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
+ subflow->mpc_drop = 1;
+ mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
+ } else {
+ subflow->mpc_drop = 0;
+ }
+ }
+}
+
static int __net_init mptcp_net_init(struct net *net)
{
struct mptcp_pernet *pernet = mptcp_get_pernet(net);
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
index ad28da655f8b..a29ff901df75 100644
--- a/net/mptcp/fastopen.c
+++ b/net/mptcp/fastopen.c
@@ -68,12 +68,12 @@ void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflo
skb = skb_peek_tail(&sk->sk_receive_queue);
if (skb) {
WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
- pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx", sk,
+ pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx\n", sk,
MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq,
MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq);
MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq;
MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq;
}
- pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq);
+ pr_debug("msk=%p ack_seq=%llx\n", msk, msk->ack_seq);
}
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 7884217f33eb..38c2efc82b94 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -15,6 +15,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK),
SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK),
SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK),
+ SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP),
+ SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED),
SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT),
SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
@@ -25,6 +27,10 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC),
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
+ SNMP_MIB_ITEM("MPJoinSynTx", MPTCP_MIB_JOINSYNTX),
+ SNMP_MIB_ITEM("MPJoinSynTxCreatSkErr", MPTCP_MIB_JOINSYNTXCREATSKERR),
+ SNMP_MIB_ITEM("MPJoinSynTxBindErr", MPTCP_MIB_JOINSYNTXBINDERR),
+ SNMP_MIB_ITEM("MPJoinSynTxConnectErr", MPTCP_MIB_JOINSYNTXCONNECTERR),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
@@ -69,6 +75,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
+ SNMP_MIB_ITEM("Blackhole", MPTCP_MIB_BLACKHOLE),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 66aa67f49d03..c8ffe18a8722 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -10,6 +10,8 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_MPCAPABLEPASSIVEACK, /* Received third ACK with MP_CAPABLE */
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */
+ MPTCP_MIB_MPCAPABLEACTIVEDROP, /* Client-side fallback due to a MPC drop */
+ MPTCP_MIB_MPCAPABLEACTIVEDISABLED, /* Client-side disabled due to past issues */
MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */
MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */
MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */
@@ -20,6 +22,10 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
+ MPTCP_MIB_JOINSYNTX, /* Sending a SYN + MP_JOIN */
+ MPTCP_MIB_JOINSYNTXCREATSKERR, /* Not able to create a socket when sending a SYN + MP_JOIN */
+ MPTCP_MIB_JOINSYNTXBINDERR, /* Not able to bind() the address when sending a SYN + MP_JOIN */
+ MPTCP_MIB_JOINSYNTXCONNECTERR, /* Not able to connect() when sending a SYN + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
MPTCP_MIB_INFINITEMAPTX, /* Sent an infinite mapping */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
@@ -70,6 +76,7 @@ enum linux_mptcp_mib_field {
*/
MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */
+ MPTCP_MIB_BLACKHOLE, /* A blackhole has been detected */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index ac2f1a54cc43..370c3836b771 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -117,7 +117,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
ptr += 2;
}
- pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u",
+ pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u\n",
version, flags, opsize, mp_opt->sndr_key,
mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum);
break;
@@ -131,7 +131,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr += 4;
mp_opt->nonce = get_unaligned_be32(ptr);
ptr += 4;
- pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u",
+ pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u\n",
mp_opt->backup, mp_opt->join_id,
mp_opt->token, mp_opt->nonce);
} else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) {
@@ -142,19 +142,19 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr += 8;
mp_opt->nonce = get_unaligned_be32(ptr);
ptr += 4;
- pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u",
+ pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u\n",
mp_opt->backup, mp_opt->join_id,
mp_opt->thmac, mp_opt->nonce);
} else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) {
mp_opt->suboptions |= OPTION_MPTCP_MPJ_ACK;
ptr += 2;
memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
- pr_debug("MP_JOIN hmac");
+ pr_debug("MP_JOIN hmac\n");
}
break;
case MPTCPOPT_DSS:
- pr_debug("DSS");
+ pr_debug("DSS\n");
ptr++;
/* we must clear 'mpc_map' be able to detect MP_CAPABLE
@@ -169,7 +169,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->ack64 = (flags & MPTCP_DSS_ACK64) != 0;
mp_opt->use_ack = (flags & MPTCP_DSS_HAS_ACK);
- pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d",
+ pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d\n",
mp_opt->data_fin, mp_opt->dsn64,
mp_opt->use_map, mp_opt->ack64,
mp_opt->use_ack);
@@ -207,7 +207,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr += 4;
}
- pr_debug("data_ack=%llu", mp_opt->data_ack);
+ pr_debug("data_ack=%llu\n", mp_opt->data_ack);
}
if (mp_opt->use_map) {
@@ -231,7 +231,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr += 2;
}
- pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
+ pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u\n",
mp_opt->data_seq, mp_opt->subflow_seq,
mp_opt->data_len, !!(mp_opt->suboptions & OPTION_MPTCP_CSUMREQD),
mp_opt->csum);
@@ -293,7 +293,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->ahmac = get_unaligned_be64(ptr);
ptr += 8;
}
- pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d",
+ pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d\n",
(mp_opt->addr.family == AF_INET6) ? "6" : "",
mp_opt->addr.id, mp_opt->ahmac, mp_opt->echo, ntohs(mp_opt->addr.port));
break;
@@ -309,7 +309,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE;
for (i = 0; i < mp_opt->rm_list.nr; i++)
mp_opt->rm_list.ids[i] = *ptr++;
- pr_debug("RM_ADDR: rm_list_nr=%d", mp_opt->rm_list.nr);
+ pr_debug("RM_ADDR: rm_list_nr=%d\n", mp_opt->rm_list.nr);
break;
case MPTCPOPT_MP_PRIO:
@@ -318,7 +318,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->suboptions |= OPTION_MPTCP_PRIO;
mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP;
- pr_debug("MP_PRIO: prio=%d", mp_opt->backup);
+ pr_debug("MP_PRIO: prio=%d\n", mp_opt->backup);
break;
case MPTCPOPT_MP_FASTCLOSE:
@@ -329,7 +329,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->rcvr_key = get_unaligned_be64(ptr);
ptr += 8;
mp_opt->suboptions |= OPTION_MPTCP_FASTCLOSE;
- pr_debug("MP_FASTCLOSE: recv_key=%llu", mp_opt->rcvr_key);
+ pr_debug("MP_FASTCLOSE: recv_key=%llu\n", mp_opt->rcvr_key);
break;
case MPTCPOPT_RST:
@@ -343,7 +343,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
flags = *ptr++;
mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
mp_opt->reset_reason = *ptr;
- pr_debug("MP_RST: transient=%u reason=%u",
+ pr_debug("MP_RST: transient=%u reason=%u\n",
mp_opt->reset_transient, mp_opt->reset_reason);
break;
@@ -354,7 +354,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr += 2;
mp_opt->suboptions |= OPTION_MPTCP_FAIL;
mp_opt->fail_seq = get_unaligned_be64(ptr);
- pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
+ pr_debug("MP_FAIL: data_seq=%llu\n", mp_opt->fail_seq);
break;
default:
@@ -417,7 +417,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
*size = TCPOLEN_MPTCP_MPC_SYN;
return true;
} else if (subflow->request_join) {
- pr_debug("remote_token=%u, nonce=%u", subflow->remote_token,
+ pr_debug("remote_token=%u, nonce=%u\n", subflow->remote_token,
subflow->local_nonce);
opts->suboptions = OPTION_MPTCP_MPJ_SYN;
opts->join_id = subflow->local_id;
@@ -500,7 +500,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
*size = TCPOLEN_MPTCP_MPC_ACK;
}
- pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
+ pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d\n",
subflow, subflow->local_key, subflow->remote_key,
data_len);
@@ -509,7 +509,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
opts->suboptions = OPTION_MPTCP_MPJ_ACK;
memcpy(opts->hmac, subflow->hmac, MPTCPOPT_HMAC_LEN);
*size = TCPOLEN_MPTCP_MPJ_ACK;
- pr_debug("subflow=%p", subflow);
+ pr_debug("subflow=%p\n", subflow);
/* we can use the full delegate action helper only from BH context
* If we are in process context - sk is flushing the backlog at
@@ -675,7 +675,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
*size = len;
if (drop_other_suboptions) {
- pr_debug("drop other suboptions");
+ pr_debug("drop other suboptions\n");
opts->suboptions = 0;
/* note that e.g. DSS could have written into the memory
@@ -695,7 +695,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
} else {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX);
}
- pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d",
+ pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d\n",
opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port));
return true;
@@ -726,7 +726,7 @@ static bool mptcp_established_options_rm_addr(struct sock *sk,
opts->rm_list = rm_list;
for (i = 0; i < opts->rm_list.nr; i++)
- pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]);
+ pr_debug("rm_list_ids[%d]=%d\n", i, opts->rm_list.ids[i]);
MPTCP_ADD_STATS(sock_net(sk), MPTCP_MIB_RMADDRTX, opts->rm_list.nr);
return true;
}
@@ -752,7 +752,7 @@ static bool mptcp_established_options_mp_prio(struct sock *sk,
opts->suboptions |= OPTION_MPTCP_PRIO;
opts->backup = subflow->request_bkup;
- pr_debug("prio=%d", opts->backup);
+ pr_debug("prio=%d\n", opts->backup);
return true;
}
@@ -794,7 +794,7 @@ static bool mptcp_established_options_fastclose(struct sock *sk,
opts->suboptions |= OPTION_MPTCP_FASTCLOSE;
opts->rcvr_key = READ_ONCE(msk->remote_key);
- pr_debug("FASTCLOSE key=%llu", opts->rcvr_key);
+ pr_debug("FASTCLOSE key=%llu\n", opts->rcvr_key);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX);
return true;
}
@@ -816,7 +816,7 @@ static bool mptcp_established_options_mp_fail(struct sock *sk,
opts->suboptions |= OPTION_MPTCP_FAIL;
opts->fail_seq = subflow->map_seq;
- pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
+ pr_debug("MP_FAIL fail_seq=%llu\n", opts->fail_seq);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
return true;
@@ -904,7 +904,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
opts->csum_reqd = subflow_req->csum_reqd;
opts->allow_join_id0 = subflow_req->allow_join_id0;
*size = TCPOLEN_MPTCP_MPC_SYNACK;
- pr_debug("subflow_req=%p, local_key=%llu",
+ pr_debug("subflow_req=%p, local_key=%llu\n",
subflow_req, subflow_req->local_key);
return true;
} else if (subflow_req->mp_join) {
@@ -913,7 +913,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
opts->join_id = subflow_req->local_id;
opts->thmac = subflow_req->thmac;
opts->nonce = subflow_req->local_nonce;
- pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u",
+ pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u\n",
subflow_req, opts->backup, opts->join_id,
opts->thmac, opts->nonce);
*size = TCPOLEN_MPTCP_MPJ_SYNACK;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 23bb89c94e90..620264c75dc2 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -19,7 +19,7 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
{
u8 add_addr = READ_ONCE(msk->pm.addr_signal);
- pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo);
+ pr_debug("msk=%p, local_id=%d, echo=%d\n", msk, addr->id, echo);
lockdep_assert_held(&msk->pm.lock);
@@ -45,7 +45,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
{
u8 rm_addr = READ_ONCE(msk->pm.addr_signal);
- pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
+ pr_debug("msk=%p, rm_list_nr=%d\n", msk, rm_list->nr);
if (rm_addr) {
MPTCP_ADD_STATS(sock_net((struct sock *)msk),
@@ -60,23 +60,13 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
return 0;
}
-int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
-{
- pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
-
- spin_lock_bh(&msk->pm.lock);
- mptcp_pm_nl_rm_subflow_received(msk, rm_list);
- spin_unlock_bh(&msk->pm.lock);
- return 0;
-}
-
/* path manager event handlers */
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side)
{
struct mptcp_pm_data *pm = &msk->pm;
- pr_debug("msk=%p, token=%u side=%d", msk, READ_ONCE(msk->token), server_side);
+ pr_debug("msk=%p, token=%u side=%d\n", msk, READ_ONCE(msk->token), server_side);
WRITE_ONCE(pm->server_side, server_side);
mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC);
@@ -100,7 +90,7 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
subflows_max = mptcp_pm_get_subflows_max(msk);
- pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows,
+ pr_debug("msk=%p subflows=%d max=%d allow=%d\n", msk, pm->subflows,
subflows_max, READ_ONCE(pm->accept_subflow));
/* try to avoid acquiring the lock below */
@@ -124,7 +114,7 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
enum mptcp_pm_status new_status)
{
- pr_debug("msk=%p status=%x new=%lx", msk, msk->pm.status,
+ pr_debug("msk=%p status=%x new=%lx\n", msk, msk->pm.status,
BIT(new_status));
if (msk->pm.status & BIT(new_status))
return false;
@@ -139,7 +129,7 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
struct mptcp_pm_data *pm = &msk->pm;
bool announce = false;
- pr_debug("msk=%p", msk);
+ pr_debug("msk=%p\n", msk);
spin_lock_bh(&pm->lock);
@@ -163,14 +153,14 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
{
- pr_debug("msk=%p", msk);
+ pr_debug("msk=%p\n", msk);
}
void mptcp_pm_subflow_established(struct mptcp_sock *msk)
{
struct mptcp_pm_data *pm = &msk->pm;
- pr_debug("msk=%p", msk);
+ pr_debug("msk=%p\n", msk);
if (!READ_ONCE(pm->work_pending))
return;
@@ -222,7 +212,7 @@ void mptcp_pm_add_addr_received(const struct sock *ssk,
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct mptcp_pm_data *pm = &msk->pm;
- pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
+ pr_debug("msk=%p remote_id=%d accept=%d\n", msk, addr->id,
READ_ONCE(pm->accept_addr));
mptcp_event_addr_announced(ssk, addr);
@@ -236,7 +226,9 @@ void mptcp_pm_add_addr_received(const struct sock *ssk,
} else {
__MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
}
- } else if (!READ_ONCE(pm->accept_addr)) {
+ /* id0 should not have a different address */
+ } else if ((addr->id == 0 && !mptcp_pm_nl_is_init_remote_addr(msk, addr)) ||
+ (addr->id > 0 && !READ_ONCE(pm->accept_addr))) {
mptcp_pm_announce_addr(msk, addr, true);
mptcp_pm_add_addr_send_ack(msk);
} else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
@@ -253,7 +245,7 @@ void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
{
struct mptcp_pm_data *pm = &msk->pm;
- pr_debug("msk=%p", msk);
+ pr_debug("msk=%p\n", msk);
spin_lock_bh(&pm->lock);
@@ -277,7 +269,7 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
struct mptcp_pm_data *pm = &msk->pm;
u8 i;
- pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr);
+ pr_debug("msk=%p remote_ids_nr=%d\n", msk, rm_list->nr);
for (i = 0; i < rm_list->nr; i++)
mptcp_event_addr_removed(msk, rm_list->ids[i]);
@@ -309,19 +301,19 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- pr_debug("fail_seq=%llu", fail_seq);
+ pr_debug("fail_seq=%llu\n", fail_seq);
if (!READ_ONCE(msk->allow_infinite_fallback))
return;
if (!subflow->fail_tout) {
- pr_debug("send MP_FAIL response and infinite map");
+ pr_debug("send MP_FAIL response and infinite map\n");
subflow->send_mp_fail = 1;
subflow->send_infinite_map = 1;
tcp_send_ack(sk);
} else {
- pr_debug("MP_FAIL response received");
+ pr_debug("MP_FAIL response received\n");
WRITE_ONCE(subflow->fail_tout, 0);
}
}
@@ -438,20 +430,6 @@ bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc)
return mptcp_pm_nl_is_backup(msk, &skc_local);
}
-int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
- u8 *flags, int *ifindex)
-{
- *flags = 0;
- *ifindex = 0;
-
- if (!id)
- return 0;
-
- if (mptcp_pm_is_userspace(msk))
- return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
- return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
-}
-
int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info)
{
if (info->attrs[MPTCP_PM_ATTR_TOKEN])
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 4cae2aa7be5c..64fe0e7d87d7 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -130,12 +130,15 @@ static bool lookup_subflow_by_daddr(const struct list_head *list,
{
struct mptcp_subflow_context *subflow;
struct mptcp_addr_info cur;
- struct sock_common *skc;
list_for_each_entry(subflow, list, node) {
- skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow);
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- remote_address(skc, &cur);
+ if (!((1 << inet_sk_state_load(ssk)) &
+ (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV)))
+ continue;
+
+ remote_address((struct sock_common *)ssk, &cur);
if (mptcp_addresses_equal(&cur, daddr, daddr->port))
return true;
}
@@ -143,11 +146,13 @@ static bool lookup_subflow_by_daddr(const struct list_head *list,
return false;
}
-static struct mptcp_pm_addr_entry *
+static bool
select_local_address(const struct pm_nl_pernet *pernet,
- const struct mptcp_sock *msk)
+ const struct mptcp_sock *msk,
+ struct mptcp_pm_local *new_local)
{
- struct mptcp_pm_addr_entry *entry, *ret = NULL;
+ struct mptcp_pm_addr_entry *entry;
+ bool found = false;
msk_owned_by_me(msk);
@@ -159,17 +164,23 @@ select_local_address(const struct pm_nl_pernet *pernet,
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
continue;
- ret = entry;
+ new_local->addr = entry->addr;
+ new_local->flags = entry->flags;
+ new_local->ifindex = entry->ifindex;
+ found = true;
break;
}
rcu_read_unlock();
- return ret;
+
+ return found;
}
-static struct mptcp_pm_addr_entry *
-select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk)
+static bool
+select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk,
+ struct mptcp_pm_local *new_local)
{
- struct mptcp_pm_addr_entry *entry, *ret = NULL;
+ struct mptcp_pm_addr_entry *entry;
+ bool found = false;
rcu_read_lock();
/* do not keep any additional per socket state, just signal
@@ -184,11 +195,15 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk)
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
continue;
- ret = entry;
+ new_local->addr = entry->addr;
+ new_local->flags = entry->flags;
+ new_local->ifindex = entry->ifindex;
+ found = true;
break;
}
rcu_read_unlock();
- return ret;
+
+ return found;
}
unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk)
@@ -279,7 +294,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
struct mptcp_sock *msk = entry->sock;
struct sock *sk = (struct sock *)msk;
- pr_debug("msk=%p", msk);
+ pr_debug("msk=%p\n", msk);
if (!msk)
return;
@@ -298,7 +313,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
spin_lock_bh(&msk->pm.lock);
if (!mptcp_pm_should_add_signal_addr(msk)) {
- pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id);
+ pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id);
mptcp_pm_announce_addr(msk, &entry->addr, false);
mptcp_pm_add_addr_send_ack(msk);
entry->retrans_times++;
@@ -323,15 +338,21 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
{
struct mptcp_pm_add_entry *entry;
struct sock *sk = (struct sock *)msk;
+ struct timer_list *add_timer = NULL;
spin_lock_bh(&msk->pm.lock);
entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
- if (entry && (!check_id || entry->addr.id == addr->id))
+ if (entry && (!check_id || entry->addr.id == addr->id)) {
entry->retrans_times = ADD_ADDR_RETRANS_MAX;
+ add_timer = &entry->add_timer;
+ }
+ if (!check_id && entry)
+ list_del(&entry->list);
spin_unlock_bh(&msk->pm.lock);
- if (entry && (!check_id || entry->addr.id == addr->id))
- sk_stop_timer_sync(sk, &entry->add_timer);
+ /* no lock, because sk_stop_timer_sync() is calling del_timer_sync() */
+ if (add_timer)
+ sk_stop_timer_sync(sk, add_timer);
return entry;
}
@@ -379,7 +400,7 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
struct sock *sk = (struct sock *)msk;
LIST_HEAD(free_list);
- pr_debug("msk=%p", msk);
+ pr_debug("msk=%p\n", msk);
spin_lock_bh(&msk->pm.lock);
list_splice_init(&msk->pm.anno_list, &free_list);
@@ -465,7 +486,7 @@ static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_con
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow;
- pr_debug("send ack for %s",
+ pr_debug("send ack for %s\n",
prio ? "mp_prio" : (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr"));
slow = lock_sock_fast(ssk);
@@ -512,11 +533,12 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
- struct mptcp_pm_addr_entry *local, *signal_and_subflow = NULL;
struct sock *sk = (struct sock *)msk;
unsigned int add_addr_signal_max;
+ bool signal_and_subflow = false;
unsigned int local_addr_max;
struct pm_nl_pernet *pernet;
+ struct mptcp_pm_local local;
unsigned int subflows_max;
pernet = pm_nl_get_pernet(sock_net(sk));
@@ -565,23 +587,27 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL))
return;
- local = select_signal_address(pernet, msk);
- if (!local)
+ if (!select_signal_address(pernet, msk, &local))
goto subflow;
/* If the alloc fails, we are on memory pressure, not worth
* continuing, and trying to create subflows.
*/
- if (!mptcp_pm_alloc_anno_list(msk, &local->addr))
+ if (!mptcp_pm_alloc_anno_list(msk, &local.addr))
return;
- __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+ __clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled++;
- mptcp_pm_announce_addr(msk, &local->addr, false);
+
+ /* Special case for ID0: set the correct ID */
+ if (local.addr.id == msk->mpc_endpoint_id)
+ local.addr.id = 0;
+
+ mptcp_pm_announce_addr(msk, &local.addr, false);
mptcp_pm_nl_addr_send_ack(msk);
- if (local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
- signal_and_subflow = local;
+ if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
+ signal_and_subflow = true;
}
subflow:
@@ -592,26 +618,28 @@ subflow:
bool fullmesh;
int i, nr;
- if (signal_and_subflow) {
- local = signal_and_subflow;
- signal_and_subflow = NULL;
- } else {
- local = select_local_address(pernet, msk);
- if (!local)
- break;
- }
+ if (signal_and_subflow)
+ signal_and_subflow = false;
+ else if (!select_local_address(pernet, msk, &local))
+ break;
- fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
+ fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
- msk->pm.local_addr_used++;
- __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
- nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs);
+ __clear_bit(local.addr.id, msk->pm.id_avail_bitmap);
+
+ /* Special case for ID0: set the correct ID */
+ if (local.addr.id == msk->mpc_endpoint_id)
+ local.addr.id = 0;
+ else /* local_addr_used is not decr for ID 0 */
+ msk->pm.local_addr_used++;
+
+ nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs);
if (nr == 0)
continue;
spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++)
- __mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
+ __mptcp_subflow_connect(sk, &local, &addrs[i]);
spin_lock_bh(&msk->pm.lock);
}
mptcp_pm_nl_check_work_pending(msk);
@@ -632,10 +660,11 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
*/
static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
struct mptcp_addr_info *remote,
- struct mptcp_addr_info *addrs)
+ struct mptcp_pm_local *locals)
{
struct sock *sk = (struct sock *)msk;
struct mptcp_pm_addr_entry *entry;
+ struct mptcp_addr_info mpc_addr;
struct pm_nl_pernet *pernet;
unsigned int subflows_max;
int i = 0;
@@ -643,6 +672,8 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
pernet = pm_nl_get_pernet_from_msk(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
+ mptcp_local_address((struct sock_common *)msk, &mpc_addr);
+
rcu_read_lock();
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
@@ -652,8 +683,16 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
continue;
if (msk->pm.subflows < subflows_max) {
+ locals[i].addr = entry->addr;
+ locals[i].flags = entry->flags;
+ locals[i].ifindex = entry->ifindex;
+
+ /* Special case for ID0: set the correct ID */
+ if (mptcp_addresses_equal(&locals[i].addr, &mpc_addr, locals[i].addr.port))
+ locals[i].addr.id = 0;
+
msk->pm.subflows++;
- addrs[i++] = entry->addr;
+ i++;
}
}
rcu_read_unlock();
@@ -662,21 +701,19 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
* 'IPADDRANY' local address
*/
if (!i) {
- struct mptcp_addr_info local;
-
- memset(&local, 0, sizeof(local));
- local.family =
+ memset(&locals[i], 0, sizeof(locals[i]));
+ locals[i].addr.family =
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
remote->family == AF_INET6 &&
ipv6_addr_v4mapped(&remote->addr6) ? AF_INET :
#endif
remote->family;
- if (!mptcp_pm_addr_families_match(sk, &local, remote))
+ if (!mptcp_pm_addr_families_match(sk, &locals[i].addr, remote))
return 0;
msk->pm.subflows++;
- addrs[i++] = local;
+ i++;
}
return i;
@@ -684,7 +721,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
{
- struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
+ struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX];
struct sock *sk = (struct sock *)msk;
unsigned int add_addr_accept_max;
struct mptcp_addr_info remote;
@@ -695,7 +732,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
- pr_debug("accepted %d:%d remote family %d",
+ pr_debug("accepted %d:%d remote family %d\n",
msk->pm.add_addr_accepted, add_addr_accept_max,
msk->pm.remote.family);
@@ -713,24 +750,35 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
- nr = fill_local_addresses_vec(msk, &remote, addrs);
+ nr = fill_local_addresses_vec(msk, &remote, locals);
if (nr == 0)
return;
spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++)
- if (__mptcp_subflow_connect(sk, &addrs[i], &remote) == 0)
+ if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0)
sf_created = true;
spin_lock_bh(&msk->pm.lock);
if (sf_created) {
- msk->pm.add_addr_accepted++;
+ /* add_addr_accepted is not decr for ID 0 */
+ if (remote.id)
+ msk->pm.add_addr_accepted++;
if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
msk->pm.subflows >= subflows_max)
WRITE_ONCE(msk->pm.accept_addr, false);
}
}
+bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *remote)
+{
+ struct mptcp_addr_info mpc_remote;
+
+ remote_address((struct sock_common *)msk, &mpc_remote);
+ return mptcp_addresses_equal(&mpc_remote, remote, remote->port);
+}
+
void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
@@ -742,9 +790,12 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
!mptcp_pm_should_rm_signal(msk))
return;
- subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
- if (subflow)
- mptcp_pm_send_ack(msk, subflow, false, false);
+ mptcp_for_each_subflow(msk, subflow) {
+ if (__mptcp_subflow_active(subflow)) {
+ mptcp_pm_send_ack(msk, subflow, false, false);
+ break;
+ }
+ }
}
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
@@ -754,7 +805,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
{
struct mptcp_subflow_context *subflow;
- pr_debug("bkup=%d", bkup);
+ pr_debug("bkup=%d\n", bkup);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
@@ -777,11 +828,6 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
return -EINVAL;
}
-static bool mptcp_local_id_match(const struct mptcp_sock *msk, u8 local_id, u8 id)
-{
- return local_id == id || (!local_id && msk->mpc_endpoint_id == id);
-}
-
static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list,
enum linux_mptcp_mib_field rm_type)
@@ -790,7 +836,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
struct sock *sk = (struct sock *)msk;
u8 i;
- pr_debug("%s rm_list_nr %d",
+ pr_debug("%s rm_list_nr %d\n",
rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr);
msk_owned_by_me(msk);
@@ -814,12 +860,14 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
u8 id = subflow_get_local_id(subflow);
+ if (inet_sk_state_load(ssk) == TCP_CLOSE)
+ continue;
if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id)
continue;
- if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id))
+ if (rm_type == MPTCP_MIB_RMSUBFLOW && id != rm_id)
continue;
- pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u",
+ pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u\n",
rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
i, rm_id, id, remote_id, msk->mpc_endpoint_id);
spin_unlock_bh(&msk->pm.lock);
@@ -829,25 +877,27 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
mptcp_close_ssk(sk, ssk, subflow);
spin_lock_bh(&msk->pm.lock);
- removed = true;
+ removed |= subflow->request_join;
if (rm_type == MPTCP_MIB_RMSUBFLOW)
__MPTCP_INC_STATS(sock_net(sk), rm_type);
}
- if (rm_type == MPTCP_MIB_RMSUBFLOW)
- __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap);
- else if (rm_type == MPTCP_MIB_RMADDR)
+
+ if (rm_type == MPTCP_MIB_RMADDR)
__MPTCP_INC_STATS(sock_net(sk), rm_type);
+
if (!removed)
continue;
if (!mptcp_pm_is_kernel(msk))
continue;
- if (rm_type == MPTCP_MIB_RMADDR) {
- msk->pm.add_addr_accepted--;
- WRITE_ONCE(msk->pm.accept_addr, true);
- } else if (rm_type == MPTCP_MIB_RMSUBFLOW) {
- msk->pm.local_addr_used--;
+ if (rm_type == MPTCP_MIB_RMADDR && rm_id &&
+ !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) {
+ /* Note: if the subflow has been closed before, this
+ * add_addr_accepted counter will not be decremented.
+ */
+ if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk))
+ WRITE_ONCE(msk->pm.accept_addr, true);
}
}
}
@@ -857,8 +907,8 @@ static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR);
}
-void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
- const struct mptcp_rm_list *rm_list)
+static void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
+ const struct mptcp_rm_list *rm_list)
{
mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW);
}
@@ -874,7 +924,7 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
spin_lock_bh(&msk->pm.lock);
- pr_debug("msk=%p status=%x", msk, pm->status);
+ pr_debug("msk=%p status=%x\n", msk, pm->status);
if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
mptcp_pm_nl_add_addr_received(msk);
@@ -1292,20 +1342,27 @@ static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info)
return pm_nl_get_pernet(genl_info_net(info));
}
-static int mptcp_nl_add_subflow_or_signal_addr(struct net *net)
+static int mptcp_nl_add_subflow_or_signal_addr(struct net *net,
+ struct mptcp_addr_info *addr)
{
struct mptcp_sock *msk;
long s_slot = 0, s_num = 0;
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
struct sock *sk = (struct sock *)msk;
+ struct mptcp_addr_info mpc_addr;
if (!READ_ONCE(msk->fully_established) ||
mptcp_pm_is_userspace(msk))
goto next;
+ /* if the endp linked to the init sf is re-added with a != ID */
+ mptcp_local_address((struct sock_common *)msk, &mpc_addr);
+
lock_sock(sk);
spin_lock_bh(&msk->pm.lock);
+ if (mptcp_addresses_equal(addr, &mpc_addr, addr->port))
+ msk->mpc_endpoint_id = addr->id;
mptcp_pm_create_subflow_or_signal_addr(msk);
spin_unlock_bh(&msk->pm.lock);
release_sock(sk);
@@ -1378,7 +1435,7 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
goto out_free;
}
- mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk));
+ mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk), &entry->addr);
return 0;
out_free:
@@ -1386,24 +1443,6 @@ out_free:
return ret;
}
-int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
- u8 *flags, int *ifindex)
-{
- struct mptcp_pm_addr_entry *entry;
- struct sock *sk = (struct sock *)msk;
- struct net *net = sock_net(sk);
-
- rcu_read_lock();
- entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
- if (entry) {
- *flags = entry->flags;
- *ifindex = entry->ifindex;
- }
- rcu_read_unlock();
-
- return 0;
-}
-
static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr)
{
@@ -1411,7 +1450,6 @@ static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
entry = mptcp_pm_del_add_timer(msk, addr, false);
if (entry) {
- list_del(&entry->list);
kfree(entry);
return true;
}
@@ -1419,6 +1457,12 @@ static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
return false;
}
+static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *addr)
+{
+ return msk->mpc_endpoint_id == addr->id ? 0 : addr->id;
+}
+
static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
bool force)
@@ -1426,29 +1470,38 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
struct mptcp_rm_list list = { .nr = 0 };
bool ret;
- list.ids[list.nr++] = addr->id;
+ list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr);
ret = remove_anno_list_by_saddr(msk, addr);
if (ret || force) {
spin_lock_bh(&msk->pm.lock);
- msk->pm.add_addr_signaled -= ret;
+ if (ret) {
+ __set_bit(addr->id, msk->pm.id_avail_bitmap);
+ msk->pm.add_addr_signaled--;
+ }
mptcp_pm_remove_addr(msk, &list);
spin_unlock_bh(&msk->pm.lock);
}
return ret;
}
+static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id)
+{
+ /* If it was marked as used, and not ID 0, decrement local_addr_used */
+ if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) &&
+ id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0))
+ msk->pm.local_addr_used--;
+}
+
static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
const struct mptcp_pm_addr_entry *entry)
{
const struct mptcp_addr_info *addr = &entry->addr;
- struct mptcp_rm_list list = { .nr = 0 };
+ struct mptcp_rm_list list = { .nr = 1 };
long s_slot = 0, s_num = 0;
struct mptcp_sock *msk;
- pr_debug("remove_id=%d", addr->id);
-
- list.ids[list.nr++] = addr->id;
+ pr_debug("remove_id=%d\n", addr->id);
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
struct sock *sk = (struct sock *)msk;
@@ -1466,8 +1519,22 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
mptcp_pm_remove_anno_addr(msk, addr, remove_subflow &&
!(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT));
- if (remove_subflow)
- mptcp_pm_remove_subflow(msk, &list);
+
+ list.ids[0] = mptcp_endp_get_local_id(msk, addr);
+ if (remove_subflow) {
+ spin_lock_bh(&msk->pm.lock);
+ mptcp_pm_nl_rm_subflow_received(msk, &list);
+ spin_unlock_bh(&msk->pm.lock);
+ }
+
+ if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
+ spin_lock_bh(&msk->pm.lock);
+ __mark_subflow_endp_available(msk, list.ids[0]);
+ spin_unlock_bh(&msk->pm.lock);
+ }
+
+ if (msk->mpc_endpoint_id == entry->addr.id)
+ msk->mpc_endpoint_id = 0;
release_sock(sk);
next:
@@ -1502,6 +1569,7 @@ static int mptcp_nl_remove_id_zero_address(struct net *net,
spin_lock_bh(&msk->pm.lock);
mptcp_pm_remove_addr(msk, &list);
mptcp_pm_nl_rm_subflow_received(msk, &list);
+ __mark_subflow_endp_available(msk, 0);
spin_unlock_bh(&msk->pm.lock);
release_sock(sk);
@@ -1561,6 +1629,7 @@ int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info)
return ret;
}
+/* Called from the userspace PM only */
void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
{
struct mptcp_rm_list alist = { .nr = 0 };
@@ -1589,8 +1658,9 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
}
}
-static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
- struct list_head *rm_list)
+/* Called from the in-kernel PM only */
+static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk,
+ struct list_head *rm_list)
{
struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 };
struct mptcp_pm_addr_entry *entry;
@@ -1598,25 +1668,28 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
list_for_each_entry(entry, rm_list, list) {
if (slist.nr < MPTCP_RM_IDS_MAX &&
lookup_subflow_by_saddr(&msk->conn_list, &entry->addr))
- slist.ids[slist.nr++] = entry->addr.id;
+ slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr);
if (alist.nr < MPTCP_RM_IDS_MAX &&
remove_anno_list_by_saddr(msk, &entry->addr))
- alist.ids[alist.nr++] = entry->addr.id;
+ alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr);
}
+ spin_lock_bh(&msk->pm.lock);
if (alist.nr) {
- spin_lock_bh(&msk->pm.lock);
msk->pm.add_addr_signaled -= alist.nr;
mptcp_pm_remove_addr(msk, &alist);
- spin_unlock_bh(&msk->pm.lock);
}
if (slist.nr)
- mptcp_pm_remove_subflow(msk, &slist);
+ mptcp_pm_nl_rm_subflow_received(msk, &slist);
+ /* Reset counters: maybe some subflows have been removed before */
+ bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+ msk->pm.local_addr_used = 0;
+ spin_unlock_bh(&msk->pm.lock);
}
-static void mptcp_nl_remove_addrs_list(struct net *net,
- struct list_head *rm_list)
+static void mptcp_nl_flush_addrs_list(struct net *net,
+ struct list_head *rm_list)
{
long s_slot = 0, s_num = 0;
struct mptcp_sock *msk;
@@ -1629,7 +1702,7 @@ static void mptcp_nl_remove_addrs_list(struct net *net,
if (!mptcp_pm_is_userspace(msk)) {
lock_sock(sk);
- mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
+ mptcp_pm_flush_addrs_and_subflows(msk, rm_list);
release_sock(sk);
}
@@ -1670,7 +1743,7 @@ int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info)
pernet->next_id = 1;
bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
spin_unlock_bh(&pernet->lock);
- mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
+ mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list);
synchronize_rcu();
__flush_addrs(&free_list);
return 0;
@@ -1896,10 +1969,11 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk,
{
struct mptcp_rm_list list = { .nr = 0 };
- list.ids[list.nr++] = addr->id;
+ list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr);
spin_lock_bh(&msk->pm.lock);
mptcp_pm_nl_rm_subflow_received(msk, &list);
+ __mark_subflow_endp_available(msk, list.ids[0]);
mptcp_pm_create_subflow_or_signal_addr(msk);
spin_unlock_bh(&msk->pm.lock);
}
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 8eaa9fbe3e34..2cceded3a83a 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -119,23 +119,6 @@ mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id)
return NULL;
}
-int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
- unsigned int id,
- u8 *flags, int *ifindex)
-{
- struct mptcp_pm_addr_entry *match;
-
- spin_lock_bh(&msk->pm.lock);
- match = mptcp_userspace_pm_lookup_addr_by_id(msk, id);
- spin_unlock_bh(&msk->pm.lock);
- if (match) {
- *flags = match->flags;
- *ifindex = match->ifindex;
- }
-
- return 0;
-}
-
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
struct mptcp_addr_info *skc)
{
@@ -352,8 +335,9 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
- struct mptcp_pm_addr_entry local = { 0 };
+ struct mptcp_pm_addr_entry entry = { 0 };
struct mptcp_addr_info addr_r;
+ struct mptcp_pm_local local;
struct mptcp_sock *msk;
int err = -EINVAL;
struct sock *sk;
@@ -379,18 +363,18 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
- err = mptcp_pm_parse_entry(laddr, info, true, &local);
+ err = mptcp_pm_parse_entry(laddr, info, true, &entry);
if (err < 0) {
NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
goto create_err;
}
- if (local.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
+ if (entry.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
GENL_SET_ERR_MSG(info, "invalid addr flags");
err = -EINVAL;
goto create_err;
}
- local.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
+ entry.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
err = mptcp_pm_parse_addr(raddr, info, &addr_r);
if (err < 0) {
@@ -398,27 +382,29 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
- if (!mptcp_pm_addr_families_match(sk, &local.addr, &addr_r)) {
+ if (!mptcp_pm_addr_families_match(sk, &entry.addr, &addr_r)) {
GENL_SET_ERR_MSG(info, "families mismatch");
err = -EINVAL;
goto create_err;
}
- err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &entry, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto create_err;
}
- lock_sock(sk);
-
- err = __mptcp_subflow_connect(sk, &local.addr, &addr_r);
+ local.addr = entry.addr;
+ local.flags = entry.flags;
+ local.ifindex = entry.ifindex;
+ lock_sock(sk);
+ err = __mptcp_subflow_connect(sk, &local, &addr_r);
release_sock(sk);
spin_lock_bh(&msk->pm.lock);
if (err)
- mptcp_userspace_pm_delete_local_addr(msk, &local);
+ mptcp_userspace_pm_delete_local_addr(msk, &entry);
else
msk->pm.subflows++;
spin_unlock_bh(&msk->pm.lock);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0d536b183a6c..c2317919fc14 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -139,7 +139,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
!skb_try_coalesce(to, from, &fragstolen, &delta))
return false;
- pr_debug("colesced seq %llx into %llx new len %d new end seq %llx",
+ pr_debug("colesced seq %llx into %llx new len %d new end seq %llx\n",
MPTCP_SKB_CB(from)->map_seq, MPTCP_SKB_CB(to)->map_seq,
to->len, MPTCP_SKB_CB(from)->end_seq);
MPTCP_SKB_CB(to)->end_seq = MPTCP_SKB_CB(from)->end_seq;
@@ -217,7 +217,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb)
end_seq = MPTCP_SKB_CB(skb)->end_seq;
max_seq = atomic64_read(&msk->rcv_wnd_sent);
- pr_debug("msk=%p seq=%llx limit=%llx empty=%d", msk, seq, max_seq,
+ pr_debug("msk=%p seq=%llx limit=%llx empty=%d\n", msk, seq, max_seq,
RB_EMPTY_ROOT(&msk->out_of_order_queue));
if (after64(end_seq, max_seq)) {
/* out of window */
@@ -643,7 +643,7 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
}
}
- pr_debug("msk=%p ssk=%p", msk, ssk);
+ pr_debug("msk=%p ssk=%p\n", msk, ssk);
tp = tcp_sk(ssk);
do {
u32 map_remaining, offset;
@@ -724,7 +724,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
u64 end_seq;
p = rb_first(&msk->out_of_order_queue);
- pr_debug("msk=%p empty=%d", msk, RB_EMPTY_ROOT(&msk->out_of_order_queue));
+ pr_debug("msk=%p empty=%d\n", msk, RB_EMPTY_ROOT(&msk->out_of_order_queue));
while (p) {
skb = rb_to_skb(p);
if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq))
@@ -746,7 +746,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
int delta = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq;
/* skip overlapping data, if any */
- pr_debug("uncoalesced seq=%llx ack seq=%llx delta=%d",
+ pr_debug("uncoalesced seq=%llx ack seq=%llx delta=%d\n",
MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq,
delta);
MPTCP_SKB_CB(skb)->offset += delta;
@@ -1240,7 +1240,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
size_t copy;
int i;
- pr_debug("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u",
+ pr_debug("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u\n",
msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent);
if (WARN_ON_ONCE(info->sent > info->limit ||
@@ -1341,7 +1341,7 @@ alloc_skb:
mpext->use_map = 1;
mpext->dsn64 = 1;
- pr_debug("data_seq=%llu subflow_seq=%u data_len=%u dsn64=%d",
+ pr_debug("data_seq=%llu subflow_seq=%u data_len=%u dsn64=%d\n",
mpext->data_seq, mpext->subflow_seq, mpext->data_len,
mpext->dsn64);
@@ -1892,7 +1892,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!msk->first_pending)
WRITE_ONCE(msk->first_pending, dfrag);
}
- pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d", msk,
+ pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d\n", msk,
dfrag->data_seq, dfrag->data_len, dfrag->already_sent,
!dfrag_collapsed);
@@ -2248,7 +2248,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
}
- pr_debug("block timeout %ld", timeo);
+ pr_debug("block timeout %ld\n", timeo);
sk_wait_data(sk, &timeo, NULL);
}
@@ -2264,7 +2264,7 @@ out_err:
}
}
- pr_debug("msk=%p rx queue empty=%d:%d copied=%d",
+ pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n",
msk, skb_queue_empty_lockless(&sk->sk_receive_queue),
skb_queue_empty(&msk->receive_queue), copied);
if (!(flags & MSG_PEEK))
@@ -2326,7 +2326,7 @@ struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
continue;
}
- if (subflow->backup) {
+ if (subflow->backup || subflow->request_bkup) {
if (!backup)
backup = ssk;
continue;
@@ -2508,6 +2508,12 @@ out:
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow)
{
+ /* The first subflow can already be closed and still in the list */
+ if (subflow->close_event_done)
+ return;
+
+ subflow->close_event_done = true;
+
if (sk->sk_state == TCP_ESTABLISHED)
mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL);
@@ -2533,8 +2539,11 @@ static void __mptcp_close_subflow(struct sock *sk)
mptcp_for_each_subflow_safe(msk, subflow, tmp) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ int ssk_state = inet_sk_state_load(ssk);
- if (inet_sk_state_load(ssk) != TCP_CLOSE)
+ if (ssk_state != TCP_CLOSE &&
+ (ssk_state != TCP_CLOSE_WAIT ||
+ inet_sk_state_load(sk) != TCP_ESTABLISHED))
continue;
/* 'subflow_data_ready' will re-sched once rx queue is empty */
@@ -2714,7 +2723,7 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
if (!ssk)
return;
- pr_debug("MP_FAIL doesn't respond, reset the subflow");
+ pr_debug("MP_FAIL doesn't respond, reset the subflow\n");
slow = lock_sock_fast(ssk);
mptcp_subflow_reset(ssk);
@@ -2888,7 +2897,7 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
break;
default:
if (__mptcp_check_fallback(mptcp_sk(sk))) {
- pr_debug("Fallback");
+ pr_debug("Fallback\n");
ssk->sk_shutdown |= how;
tcp_shutdown(ssk, how);
@@ -2898,7 +2907,7 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
WRITE_ONCE(mptcp_sk(sk)->snd_una, mptcp_sk(sk)->snd_nxt);
mptcp_schedule_work(sk);
} else {
- pr_debug("Sending DATA_FIN on subflow %p", ssk);
+ pr_debug("Sending DATA_FIN on subflow %p\n", ssk);
tcp_send_ack(ssk);
if (!mptcp_rtx_timer_pending(sk))
mptcp_reset_rtx_timer(sk);
@@ -2964,7 +2973,7 @@ static void mptcp_check_send_data_fin(struct sock *sk)
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
- pr_debug("msk=%p snd_data_fin_enable=%d pending=%d snd_nxt=%llu write_seq=%llu",
+ pr_debug("msk=%p snd_data_fin_enable=%d pending=%d snd_nxt=%llu write_seq=%llu\n",
msk, msk->snd_data_fin_enable, !!mptcp_send_head(sk),
msk->snd_nxt, msk->write_seq);
@@ -2988,7 +2997,7 @@ static void __mptcp_wr_shutdown(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- pr_debug("msk=%p snd_data_fin_enable=%d shutdown=%x state=%d pending=%d",
+ pr_debug("msk=%p snd_data_fin_enable=%d shutdown=%x state=%d pending=%d\n",
msk, msk->snd_data_fin_enable, sk->sk_shutdown, sk->sk_state,
!!mptcp_send_head(sk));
@@ -3003,7 +3012,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- pr_debug("msk=%p", msk);
+ pr_debug("msk=%p\n", msk);
might_sleep();
@@ -3111,7 +3120,7 @@ cleanup:
mptcp_set_state(sk, TCP_CLOSE);
sock_hold(sk);
- pr_debug("msk=%p state=%d", sk, sk->sk_state);
+ pr_debug("msk=%p state=%d\n", sk, sk->sk_state);
if (msk->token)
mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
@@ -3543,7 +3552,7 @@ static int mptcp_get_port(struct sock *sk, unsigned short snum)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- pr_debug("msk=%p, ssk=%p", msk, msk->first);
+ pr_debug("msk=%p, ssk=%p\n", msk, msk->first);
if (WARN_ON_ONCE(!msk->first))
return -EINVAL;
@@ -3560,7 +3569,7 @@ void mptcp_finish_connect(struct sock *ssk)
sk = subflow->conn;
msk = mptcp_sk(sk);
- pr_debug("msk=%p, token=%u", sk, subflow->token);
+ pr_debug("msk=%p, token=%u\n", sk, subflow->token);
subflow->map_seq = subflow->iasn;
subflow->map_subflow_seq = 1;
@@ -3589,7 +3598,7 @@ bool mptcp_finish_join(struct sock *ssk)
struct sock *parent = (void *)msk;
bool ret = true;
- pr_debug("msk=%p, subflow=%p", msk, subflow);
+ pr_debug("msk=%p, subflow=%p\n", msk, subflow);
/* mptcp socket already closing? */
if (!mptcp_is_fully_established(parent)) {
@@ -3635,7 +3644,7 @@ err_prohibited:
static void mptcp_shutdown(struct sock *sk, int how)
{
- pr_debug("sk=%p, how=%d", sk, how);
+ pr_debug("sk=%p, how=%d\n", sk, how);
if ((how & SEND_SHUTDOWN) && mptcp_close_state(sk))
__mptcp_wr_shutdown(sk);
@@ -3708,13 +3717,6 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg)
return 0;
}
-static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
- struct mptcp_subflow_context *subflow)
-{
- subflow->request_mptcp = 0;
- __mptcp_do_fallback(msk);
-}
-
static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_subflow_context *subflow;
@@ -3735,9 +3737,14 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
mptcp_subflow_early_fallback(msk, subflow);
#endif
- if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
- mptcp_subflow_early_fallback(msk, subflow);
+ if (subflow->request_mptcp) {
+ if (mptcp_active_should_disable(sk)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDISABLED);
+ mptcp_subflow_early_fallback(msk, subflow);
+ } else if (mptcp_token_new_connect(ssk) < 0) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
+ mptcp_subflow_early_fallback(msk, subflow);
+ }
}
WRITE_ONCE(msk->write_seq, subflow->idsn);
@@ -3856,7 +3863,7 @@ static int mptcp_listen(struct socket *sock, int backlog)
struct sock *ssk;
int err;
- pr_debug("msk=%p", msk);
+ pr_debug("msk=%p\n", msk);
lock_sock(sk);
@@ -3895,7 +3902,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct sock *ssk, *newsk;
- pr_debug("msk=%p", msk);
+ pr_debug("msk=%p\n", msk);
/* Buggy applications can call accept on socket states other then LISTEN
* but no need to allocate the first subflow just to error out.
@@ -3904,12 +3911,12 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk)
return -EINVAL;
- pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
+ pr_debug("ssk=%p, listener=%p\n", ssk, mptcp_subflow_ctx(ssk));
newsk = inet_csk_accept(ssk, arg);
if (!newsk)
return arg->err;
- pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
+ pr_debug("newsk=%p, subflow is mptcp=%d\n", newsk, sk_is_mptcp(newsk));
if (sk_is_mptcp(newsk)) {
struct mptcp_subflow_context *subflow;
struct sock *new_mptcp_sock;
@@ -4002,7 +4009,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
sock_poll_wait(file, sock, wait);
state = inet_sk_state_load(sk);
- pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
+ pr_debug("msk=%p state=%d flags=%lx\n", msk, state, msk->flags);
if (state == TCP_LISTEN) {
struct sock *ssk = READ_ONCE(msk->first);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 60c6b073d65f..74417aae08d0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -236,6 +236,12 @@ struct mptcp_pm_data {
struct mptcp_rm_list rm_list_rx;
};
+struct mptcp_pm_local {
+ struct mptcp_addr_info addr;
+ u8 flags;
+ int ifindex;
+};
+
struct mptcp_pm_addr_entry {
struct list_head list;
struct mptcp_addr_info addr;
@@ -524,7 +530,9 @@ struct mptcp_subflow_context {
stale : 1, /* unable to snd/rcv data, do not use for xmit */
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
- __unused : 10;
+ close_event_done : 1, /* has done the post-closed part */
+ mpc_drop : 1, /* the MPC option has been dropped in a rtx */
+ __unused : 8;
bool data_avail;
bool scheduled;
u32 remote_nonce;
@@ -690,6 +698,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
+
+void mptcp_active_disable(struct sock *sk);
+bool mptcp_active_should_disable(struct sock *ssk);
+void mptcp_active_enable(struct sock *sk);
+
void mptcp_get_available_schedulers(char *buf, size_t maxlen);
void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
@@ -718,7 +731,7 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr);
/* called with sk socket lock held */
-int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
+int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local,
const struct mptcp_addr_info *remote);
int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
struct socket **new_sock);
@@ -992,6 +1005,8 @@ void mptcp_pm_add_addr_received(const struct sock *ssk,
void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk);
+bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *remote);
void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk);
void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list);
@@ -1011,14 +1026,6 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
struct mptcp_pm_add_entry *
mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
-int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
- unsigned int id,
- u8 *flags, int *ifindex);
-int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
- u8 *flags, int *ifindex);
-int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
- unsigned int id,
- u8 *flags, int *ifindex);
int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info);
int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
@@ -1026,7 +1033,6 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
bool echo);
int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
-int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list);
void mptcp_free_local_addr_list(struct mptcp_sock *msk);
@@ -1133,8 +1139,6 @@ static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflo
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
-void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
- const struct mptcp_rm_list *rm_list);
unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk);
unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk);
unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk);
@@ -1154,7 +1158,6 @@ static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
}
-void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
@@ -1180,7 +1183,7 @@ static inline bool mptcp_check_fallback(const struct sock *sk)
static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
{
if (__mptcp_check_fallback(msk)) {
- pr_debug("TCP fallback already done (msk=%p)", msk);
+ pr_debug("TCP fallback already done (msk=%p)\n", msk);
return;
}
set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
@@ -1216,7 +1219,15 @@ static inline void mptcp_do_fallback(struct sock *ssk)
}
}
-#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a)
+#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a)
+
+static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow)
+{
+ pr_fallback(msk);
+ subflow->request_mptcp = 0;
+ __mptcp_do_fallback(msk);
+}
static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
{
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 4a7fd0508ad2..78ed508ebc1b 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -86,7 +86,7 @@ int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
list_add_tail_rcu(&sched->list, &mptcp_sched_list);
spin_unlock(&mptcp_sched_list_lock);
- pr_debug("%s registered", sched->name);
+ pr_debug("%s registered\n", sched->name);
return 0;
}
@@ -118,7 +118,7 @@ int mptcp_init_sched(struct mptcp_sock *msk,
if (msk->sched->init)
msk->sched->init(msk);
- pr_debug("sched=%s", msk->sched->name);
+ pr_debug("sched=%s\n", msk->sched->name);
return 0;
}
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 2026a9a36f80..505445a9598f 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -873,7 +873,7 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname,
struct mptcp_sock *msk = mptcp_sk(sk);
struct sock *ssk;
- pr_debug("msk=%p", msk);
+ pr_debug("msk=%p\n", msk);
if (level == SOL_SOCKET)
return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
@@ -1453,7 +1453,7 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname,
struct mptcp_sock *msk = mptcp_sk(sk);
struct sock *ssk;
- pr_debug("msk=%p", msk);
+ pr_debug("msk=%p\n", msk);
/* @@ the meaning of setsockopt() when the socket is connected and
* there are multiple subflows is not yet defined. It is up to the
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index a21c712350c3..1040b3b9696b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -39,7 +39,7 @@ static void subflow_req_destructor(struct request_sock *req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
- pr_debug("subflow_req=%p", subflow_req);
+ pr_debug("subflow_req=%p\n", subflow_req);
if (subflow_req->msk)
sock_put((struct sock *)subflow_req->msk);
@@ -146,7 +146,7 @@ static int subflow_check_req(struct request_sock *req,
struct mptcp_options_received mp_opt;
bool opt_mp_capable, opt_mp_join;
- pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
+ pr_debug("subflow_req=%p, listener=%p\n", subflow_req, listener);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
@@ -221,7 +221,7 @@ again:
}
if (subflow_use_different_sport(subflow_req->msk, sk_listener)) {
- pr_debug("syn inet_sport=%d %d",
+ pr_debug("syn inet_sport=%d %d\n",
ntohs(inet_sk(sk_listener)->inet_sport),
ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) {
@@ -243,7 +243,7 @@ again:
subflow_init_req_cookie_join_save(subflow_req, skb);
}
- pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
+ pr_debug("token=%u, remote_nonce=%u msk=%p\n", subflow_req->token,
subflow_req->remote_nonce, subflow_req->msk);
}
@@ -527,7 +527,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->rel_write_seq = 1;
subflow->conn_finished = 1;
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
- pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
+ pr_debug("subflow=%p synack seq=%x\n", subflow, subflow->ssn_offset);
mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp) {
@@ -546,6 +546,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_capable = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
+ mptcp_active_enable(parent);
mptcp_propagate_state(parent, sk, subflow, &mp_opt);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
@@ -559,7 +560,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
WRITE_ONCE(subflow->remote_id, mp_opt.join_id);
- pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
+ pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d\n",
subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup);
@@ -585,12 +586,15 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKBACKUPRX);
if (subflow_use_different_dport(msk, sk)) {
- pr_debug("synack inet_dport=%d %d",
+ pr_debug("synack inet_dport=%d %d\n",
ntohs(inet_sk(sk)->inet_dport),
ntohs(inet_sk(parent)->inet_dport));
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX);
}
} else if (mptcp_check_fallback(sk)) {
+ /* It looks like MPTCP is blocked, while TCP is not */
+ if (subflow->mpc_drop)
+ mptcp_active_disable(parent);
fallback:
mptcp_propagate_state(parent, sk, subflow, NULL);
}
@@ -655,7 +659,7 @@ static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- pr_debug("subflow=%p", subflow);
+ pr_debug("subflow=%p\n", subflow);
/* Never answer to SYNs sent to broadcast or multicast */
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -686,7 +690,7 @@ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- pr_debug("subflow=%p", subflow);
+ pr_debug("subflow=%p\n", subflow);
if (skb->protocol == htons(ETH_P_IP))
return subflow_v4_conn_request(sk, skb);
@@ -807,7 +811,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct mptcp_sock *owner;
struct sock *child;
- pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
+ pr_debug("listener=%p, req=%p, conn=%p\n", listener, req, listener->conn);
/* After child creation we must look for MPC even when options
* are not parsed
@@ -898,7 +902,7 @@ create_child:
ctx->conn = (struct sock *)owner;
if (subflow_use_different_sport(owner, sk)) {
- pr_debug("ack inet_sport=%d %d",
+ pr_debug("ack inet_sport=%d %d\n",
ntohs(inet_sk(sk)->inet_sport),
ntohs(inet_sk((struct sock *)owner)->inet_sport));
if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
@@ -961,7 +965,7 @@ enum mapping_status {
static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
{
- pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
+ pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d\n",
ssn, subflow->map_subflow_seq, subflow->map_data_len);
}
@@ -1121,7 +1125,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
data_len = mpext->data_len;
if (data_len == 0) {
- pr_debug("infinite mapping received");
+ pr_debug("infinite mapping received\n");
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
subflow->map_data_len = 0;
return MAPPING_INVALID;
@@ -1133,7 +1137,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
if (data_len == 1) {
bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq,
mpext->dsn64);
- pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq);
+ pr_debug("DATA_FIN with no payload seq=%llu\n", mpext->data_seq);
if (subflow->map_valid) {
/* A DATA_FIN might arrive in a DSS
* option before the previous mapping
@@ -1159,7 +1163,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
data_fin_seq &= GENMASK_ULL(31, 0);
mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64);
- pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d",
+ pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d\n",
data_fin_seq, mpext->dsn64);
/* Adjust for DATA_FIN using 1 byte of sequence space */
@@ -1205,7 +1209,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
if (unlikely(subflow->map_csum_reqd != csum_reqd))
return MAPPING_INVALID;
- pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
+ pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u\n",
subflow->map_seq, subflow->map_subflow_seq,
subflow->map_data_len, subflow->map_csum_reqd,
subflow->map_data_csum);
@@ -1240,7 +1244,7 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
avail_len = skb->len - offset;
incr = limit >= avail_len ? avail_len + fin : limit;
- pr_debug("discarding=%d len=%d offset=%d seq=%d", incr, skb->len,
+ pr_debug("discarding=%d len=%d offset=%d seq=%d\n", incr, skb->len,
offset, subflow->map_subflow_seq);
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA);
tcp_sk(ssk)->copied_seq += incr;
@@ -1255,12 +1259,16 @@ out:
/* sched mptcp worker to remove the subflow if no more data is pending */
static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
{
- if (likely(ssk->sk_state != TCP_CLOSE))
+ struct sock *sk = (struct sock *)msk;
+
+ if (likely(ssk->sk_state != TCP_CLOSE &&
+ (ssk->sk_state != TCP_CLOSE_WAIT ||
+ inet_sk_state_load(sk) != TCP_ESTABLISHED)))
return;
if (skb_queue_empty(&ssk->sk_receive_queue) &&
!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
- mptcp_schedule_work((struct sock *)msk);
+ mptcp_schedule_work(sk);
}
static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
@@ -1337,7 +1345,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
old_ack = READ_ONCE(msk->ack_seq);
ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
- pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
+ pr_debug("msk ack_seq=%llx subflow ack_seq=%llx\n", old_ack,
ack_seq);
if (unlikely(before64(ack_seq, old_ack))) {
mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
@@ -1409,7 +1417,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
subflow->map_valid = 0;
WRITE_ONCE(subflow->data_avail, false);
- pr_debug("Done with mapping: seq=%u data_len=%u",
+ pr_debug("Done with mapping: seq=%u data_len=%u\n",
subflow->map_subflow_seq,
subflow->map_data_len);
}
@@ -1519,7 +1527,7 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
- pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
+ pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d\n",
subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
if (likely(icsk->icsk_af_ops == target))
@@ -1561,28 +1569,31 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
#endif
}
-int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
+int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local,
const struct mptcp_addr_info *remote)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_subflow_context *subflow;
+ int local_id = local->addr.id;
struct sockaddr_storage addr;
int remote_id = remote->id;
- int local_id = loc->id;
int err = -ENOTCONN;
struct socket *sf;
struct sock *ssk;
u32 remote_token;
int addrlen;
- int ifindex;
- u8 flags;
+ /* The userspace PM sent the request too early? */
if (!mptcp_is_fully_established(sk))
goto err_out;
- err = mptcp_subflow_create_socket(sk, loc->family, &sf);
- if (err)
+ err = mptcp_subflow_create_socket(sk, local->addr.family, &sf);
+ if (err) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXCREATSKERR);
+ pr_debug("msk=%p local=%d remote=%d create sock error: %d\n",
+ msk, local_id, remote_id, err);
goto err_out;
+ }
ssk = sf->sk;
subflow = mptcp_subflow_ctx(ssk);
@@ -1590,42 +1601,61 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
get_random_bytes(&subflow->local_nonce, sizeof(u32));
} while (!subflow->local_nonce);
- if (local_id)
+ /* if 'IPADDRANY', the ID will be set later, after the routing */
+ if (local->addr.family == AF_INET) {
+ if (!local->addr.addr.s_addr)
+ local_id = -1;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ } else if (sk->sk_family == AF_INET6) {
+ if (ipv6_addr_any(&local->addr.addr6))
+ local_id = -1;
+#endif
+ }
+
+ if (local_id >= 0)
subflow_set_local_id(subflow, local_id);
- mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
- &flags, &ifindex);
subflow->remote_key_valid = 1;
subflow->remote_key = READ_ONCE(msk->remote_key);
subflow->local_key = READ_ONCE(msk->local_key);
subflow->token = msk->token;
- mptcp_info2sockaddr(loc, &addr, ssk->sk_family);
+ mptcp_info2sockaddr(&local->addr, &addr, ssk->sk_family);
addrlen = sizeof(struct sockaddr_in);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if (addr.ss_family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
- ssk->sk_bound_dev_if = ifindex;
+ ssk->sk_bound_dev_if = local->ifindex;
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
- if (err)
+ if (err) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXBINDERR);
+ pr_debug("msk=%p local=%d remote=%d bind error: %d\n",
+ msk, local_id, remote_id, err);
goto failed;
+ }
mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
- pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
+ pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d\n", msk,
remote_token, local_id, remote_id);
subflow->remote_token = remote_token;
WRITE_ONCE(subflow->remote_id, remote_id);
subflow->request_join = 1;
- subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+ subflow->request_bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
subflow->subflow_id = msk->subflow_id++;
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
sock_hold(ssk);
list_add_tail(&subflow->node, &msk->conn_list);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
- if (err && err != -EINPROGRESS)
+ if (err && err != -EINPROGRESS) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXCONNECTERR);
+ pr_debug("msk=%p local=%d remote=%d connect error: %d\n",
+ msk, local_id, remote_id, err);
goto failed_unlink;
+ }
+
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTX);
/* discard the subflow socket */
mptcp_sock_graft(ssk, sk->sk_socket);
@@ -1747,7 +1777,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
subflow = mptcp_subflow_ctx(sf->sk);
- pr_debug("subflow=%p", subflow);
+ pr_debug("subflow=%p\n", subflow);
*new_sock = sf;
sock_hold(sk);
@@ -1776,7 +1806,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
INIT_LIST_HEAD(&ctx->node);
INIT_LIST_HEAD(&ctx->delegated_node);
- pr_debug("subflow=%p", ctx);
+ pr_debug("subflow=%p\n", ctx);
ctx->tcp_sock = sk;
WRITE_ONCE(ctx->local_id, -1);
@@ -1927,7 +1957,7 @@ static int subflow_ulp_init(struct sock *sk)
goto out;
}
- pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
+ pr_debug("subflow=%p, family=%d\n", ctx, sk->sk_family);
tp->is_mptcp = 1;
ctx->icsk_af_ops = icsk->icsk_af_ops;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index b00fc285b334..b9f551f02c81 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -655,11 +655,9 @@ void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
const struct nf_hook_entries *e)
{
struct sk_buff *skb, *next;
- struct list_head sublist;
+ LIST_HEAD(sublist);
int ret;
- INIT_LIST_HEAD(&sublist);
-
list_for_each_entry_safe(skb, next, head, list) {
skb_list_del_init(skb);
ret = nf_hook_slow(skb, state, e, 0);
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 34ba14e59e95..4890af4dc263 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -522,11 +522,10 @@ unsigned int nf_conncount_count(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_conncount_count);
-struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
- unsigned int keylen)
+struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen)
{
struct nf_conncount_data *data;
- int ret, i;
+ int i;
if (keylen % sizeof(u32) ||
keylen / sizeof(u32) > MAX_KEYLEN ||
@@ -539,12 +538,6 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family
if (!data)
return ERR_PTR(-ENOMEM);
- ret = nf_ct_netns_get(net, family);
- if (ret < 0) {
- kfree(data);
- return ERR_PTR(ret);
- }
-
for (i = 0; i < ARRAY_SIZE(data->root); ++i)
data->root[i] = RB_ROOT;
@@ -581,13 +574,11 @@ static void destroy_tree(struct rb_root *r)
}
}
-void nf_conncount_destroy(struct net *net, unsigned int family,
- struct nf_conncount_data *data)
+void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data)
{
unsigned int i;
cancel_work_sync(&data->gc_work);
- nf_ct_netns_put(net, family);
for (i = 0; i < ARRAY_SIZE(data->root); ++i)
destroy_tree(&data->root[i]);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9384426ddc06..d3cb53b008f5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1722,7 +1722,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
hash);
if (IS_ERR(ct))
- return (struct nf_conntrack_tuple_hash *)ct;
+ return ERR_CAST(ct);
if (!nf_ct_add_synproxy(ct, tmpl)) {
nf_conntrack_free(ct);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4cbf71d0786b..123e2e933e9b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1579,9 +1579,6 @@ static int ctnetlink_flush_conntrack(struct net *net,
};
if (ctnetlink_needs_filter(family, cda)) {
- if (cda[CTA_FILTER])
- return -EOPNOTSUPP;
-
filter = ctnetlink_alloc_filter(cda, family);
if (IS_ERR(filter))
return PTR_ERR(filter);
@@ -1610,14 +1607,14 @@ static int ctnetlink_del_conntrack(struct sk_buff *skb,
if (err < 0)
return err;
- if (cda[CTA_TUPLE_ORIG])
+ if (cda[CTA_TUPLE_ORIG] && !cda[CTA_FILTER])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
family, &zone);
- else if (cda[CTA_TUPLE_REPLY])
+ else if (cda[CTA_TUPLE_REPLY] && !cda[CTA_FILTER])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
family, &zone);
else {
- u_int8_t u3 = info->nfmsg->version ? family : AF_UNSPEC;
+ u8 u3 = info->nfmsg->version || cda[CTA_FILTER] ? family : AF_UNSPEC;
return ctnetlink_flush_conntrack(info->net, cda,
NETLINK_CB(skb).portid,
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 5c1ff07eaee0..df72b0376970 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -670,8 +670,14 @@ static int __init nf_flow_table_module_init(void)
if (ret)
goto out_offload;
+ ret = nf_flow_register_bpf();
+ if (ret)
+ goto out_bpf;
+
return 0;
+out_bpf:
+ nf_flow_table_offload_exit();
out_offload:
unregister_pernet_subsys(&nf_flow_table_net_ops);
return ret;
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 88787b45e30d..b0f199171932 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -17,6 +17,9 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
switch (skb->protocol) {
case htons(ETH_P_8021Q):
+ if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth)))
+ return NF_ACCEPT;
+
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
proto = veth->h_vlan_encapsulated_proto;
break;
@@ -98,7 +101,7 @@ static int __init nf_flow_inet_module_init(void)
nft_register_flowtable_type(&flowtable_ipv6);
nft_register_flowtable_type(&flowtable_inet);
- return nf_flow_register_bpf();
+ return 0;
}
static void __exit nf_flow_inet_module_exit(void)
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index c2c005234dcd..98edcaa37b38 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -281,6 +281,9 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
switch (skb->protocol) {
case htons(ETH_P_8021Q):
+ if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth)))
+ return false;
+
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
if (veth->h_vlan_encapsulated_proto == proto) {
*offset += VLAN_HLEN;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 016c816d91cb..6d8da6dddf99 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1104,7 +1104,7 @@ int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
if (!nat_proto_net->nat_hook_ops) {
WARN_ON(nat_proto_net->users != 0);
- nat_ops = kmemdup(orig_nat_ops, sizeof(*orig_nat_ops) * ops_count, GFP_KERNEL);
+ nat_ops = kmemdup_array(orig_nat_ops, ops_count, sizeof(*orig_nat_ops), GFP_KERNEL);
if (!nat_ops) {
mutex_unlock(&nf_nat_proto_mutex);
return -ENOMEM;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 0a2f79346958..57259b5f3ef5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -146,6 +146,8 @@ static void nft_ctx_init(struct nft_ctx *ctx,
ctx->report = nlmsg_report(nlh);
ctx->flags = nlh->nlmsg_flags;
ctx->seq = nlh->nlmsg_seq;
+
+ bitmap_zero(ctx->reg_inited, NFT_REG32_NUM);
}
static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
@@ -393,6 +395,7 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans_binding *binding;
+ struct nft_trans_set *trans_set;
list_add_tail(&trans->list, &nft_net->commit_list);
@@ -402,9 +405,13 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
switch (trans->msg_type) {
case NFT_MSG_NEWSET:
+ trans_set = nft_trans_container_set(trans);
+
if (!nft_trans_set_update(trans) &&
nft_set_is_anonymous(nft_trans_set(trans)))
list_add_tail(&binding->binding_list, &nft_net->binding_list);
+
+ list_add_tail(&trans_set->list_trans_newset, &nft_net->commit_set_list);
break;
case NFT_MSG_NEWCHAIN:
if (!nft_trans_chain_update(trans) &&
@@ -611,6 +618,7 @@ static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
trans_set = nft_trans_container_set(trans);
INIT_LIST_HEAD(&trans_set->nft_trans_binding.binding_list);
+ INIT_LIST_HEAD(&trans_set->list_trans_newset);
if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) {
nft_trans_set_id(trans) =
@@ -3878,7 +3886,6 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
{
struct nft_expr *expr, *last;
- const struct nft_data *data;
struct nft_rule *rule;
int err;
@@ -3899,7 +3906,7 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
/* This may call nft_chain_validate() recursively,
* callers that do so must increment ctx->level.
*/
- err = expr->ops->validate(ctx, expr, &data);
+ err = expr->ops->validate(ctx, expr);
if (err < 0)
return err;
}
@@ -4485,17 +4492,16 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net,
{
struct nftables_pernet *nft_net = nft_pernet(net);
u32 id = ntohl(nla_get_be32(nla));
- struct nft_trans *trans;
+ struct nft_trans_set *trans;
- list_for_each_entry(trans, &nft_net->commit_list, list) {
- if (trans->msg_type == NFT_MSG_NEWSET) {
- struct nft_set *set = nft_trans_set(trans);
+ /* its likely the id we need is at the tail, not at start */
+ list_for_each_entry_reverse(trans, &nft_net->commit_set_list, list_trans_newset) {
+ struct nft_set *set = trans->set;
- if (id == nft_trans_set_id(trans) &&
- set->table == table &&
- nft_active_genmask(set, genmask))
- return set;
- }
+ if (id == trans->set_id &&
+ set->table == table &&
+ nft_active_genmask(set, genmask))
+ return set;
}
return ERR_PTR(-ENOENT);
}
@@ -4587,7 +4593,7 @@ int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result)
return -ERANGE;
ms *= NSEC_PER_MSEC;
- *result = nsecs_to_jiffies64(ms);
+ *result = nsecs_to_jiffies64(ms) ? : !!ms;
return 0;
}
@@ -5688,12 +5694,8 @@ const struct nft_set_ext_type nft_set_ext_types[] = {
.align = __alignof__(u8),
},
[NFT_SET_EXT_TIMEOUT] = {
- .len = sizeof(u64),
- .align = __alignof__(u64),
- },
- [NFT_SET_EXT_EXPIRATION] = {
- .len = sizeof(u64),
- .align = __alignof__(u64),
+ .len = sizeof(struct nft_timeout),
+ .align = __alignof__(struct nft_timeout),
},
[NFT_SET_EXT_USERDATA] = {
.len = sizeof(struct nft_userdata),
@@ -5812,25 +5814,32 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
htonl(*nft_set_ext_flags(ext))))
goto nla_put_failure;
- if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
- nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
- nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)),
- NFTA_SET_ELEM_PAD))
- goto nla_put_failure;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) {
+ u64 timeout = READ_ONCE(nft_set_ext_timeout(ext)->timeout);
+ u64 set_timeout = READ_ONCE(set->timeout);
+ __be64 msecs = 0;
- if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
- u64 expires, now = get_jiffies_64();
+ if (set_timeout != timeout) {
+ msecs = nf_jiffies64_to_msecs(timeout);
+ if (nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, msecs,
+ NFTA_SET_ELEM_PAD))
+ goto nla_put_failure;
+ }
- expires = *nft_set_ext_expiration(ext);
- if (time_before64(now, expires))
- expires -= now;
- else
- expires = 0;
+ if (timeout > 0) {
+ u64 expires, now = get_jiffies_64();
- if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
- nf_jiffies64_to_msecs(expires),
- NFTA_SET_ELEM_PAD))
- goto nla_put_failure;
+ expires = READ_ONCE(nft_set_ext_timeout(ext)->expiration);
+ if (time_before64(now, expires))
+ expires -= now;
+ else
+ expires = 0;
+
+ if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
+ nf_jiffies64_to_msecs(expires),
+ NFTA_SET_ELEM_PAD))
+ goto nla_put_failure;
+ }
}
if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
@@ -6493,13 +6502,14 @@ struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set,
nft_set_ext_data(ext), data, set->dlen) < 0)
goto err_ext_check;
- if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
- *nft_set_ext_expiration(ext) = get_jiffies_64() + expiration;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) {
+ nft_set_ext_timeout(ext)->timeout = timeout;
+
if (expiration == 0)
- *nft_set_ext_expiration(ext) += timeout;
+ expiration = timeout;
+
+ nft_set_ext_timeout(ext)->expiration = get_jiffies_64() + expiration;
}
- if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
- *nft_set_ext_timeout(ext) = timeout;
return elem;
@@ -6842,6 +6852,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data_desc desc;
enum nft_registers dreg;
struct nft_trans *trans;
+ u8 update_flags;
u64 expiration;
u64 timeout;
int err, i;
@@ -6910,17 +6921,23 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return err;
} else if (set->flags & NFT_SET_TIMEOUT &&
!(flags & NFT_SET_ELEM_INTERVAL_END)) {
- timeout = READ_ONCE(set->timeout);
+ timeout = set->timeout;
}
expiration = 0;
if (nla[NFTA_SET_ELEM_EXPIRATION] != NULL) {
if (!(set->flags & NFT_SET_TIMEOUT))
return -EINVAL;
+ if (timeout == 0)
+ return -EOPNOTSUPP;
+
err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_EXPIRATION],
&expiration);
if (err)
return err;
+
+ if (expiration > timeout)
+ return -ERANGE;
}
if (nla[NFTA_SET_ELEM_EXPR]) {
@@ -7006,16 +7023,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_parse_key_end;
}
- if (timeout > 0) {
- err = nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
+ if (set->flags & NFT_SET_TIMEOUT) {
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
if (err < 0)
goto err_parse_key_end;
-
- if (timeout != READ_ONCE(set->timeout)) {
- err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
- if (err < 0)
- goto err_parse_key_end;
- }
}
if (num_exprs) {
@@ -7153,8 +7164,30 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF) &&
*nft_set_ext_obj(ext) != *nft_set_ext_obj(ext2)))
goto err_element_clash;
- else if (!(nlmsg_flags & NLM_F_EXCL))
+ else if (!(nlmsg_flags & NLM_F_EXCL)) {
err = 0;
+ if (nft_set_ext_exists(ext2, NFT_SET_EXT_TIMEOUT)) {
+ update_flags = 0;
+ if (timeout != nft_set_ext_timeout(ext2)->timeout) {
+ nft_trans_elem_timeout(trans) = timeout;
+ if (expiration == 0)
+ expiration = timeout;
+
+ update_flags |= NFT_TRANS_UPD_TIMEOUT;
+ }
+ if (expiration) {
+ nft_trans_elem_expiration(trans) = expiration;
+ update_flags |= NFT_TRANS_UPD_EXPIRATION;
+ }
+
+ if (update_flags) {
+ nft_trans_elem_priv(trans) = elem_priv;
+ nft_trans_elem_update_flags(trans) = update_flags;
+ nft_trans_commit_list_add_tail(ctx->net, trans);
+ goto err_elem_free;
+ }
+ }
+ }
} else if (err == -ENOTEMPTY) {
/* ENOTEMPTY reports overlapping between this element
* and an existing one.
@@ -10447,6 +10480,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_NEWSET:
+ list_del(&nft_trans_container_set(trans)->list_trans_newset);
if (nft_trans_set_update(trans)) {
struct nft_set *set = nft_trans_set(trans);
@@ -10478,7 +10512,22 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWSETELEM:
te = nft_trans_container_elem(trans);
- nft_setelem_activate(net, te->set, te->elem_priv);
+ if (te->update_flags) {
+ const struct nft_set_ext *ext =
+ nft_set_elem_ext(te->set, te->elem_priv);
+
+ if (te->update_flags & NFT_TRANS_UPD_TIMEOUT) {
+ WRITE_ONCE(nft_set_ext_timeout(ext)->timeout,
+ te->timeout);
+ }
+ if (te->update_flags & NFT_TRANS_UPD_EXPIRATION) {
+ WRITE_ONCE(nft_set_ext_timeout(ext)->expiration,
+ get_jiffies_64() + te->expiration);
+ }
+ } else {
+ nft_setelem_activate(net, te->set, te->elem_priv);
+ }
+
nf_tables_setelem_notify(&ctx, te->set,
te->elem_priv,
NFT_MSG_NEWSETELEM);
@@ -10755,6 +10804,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSET:
+ list_del(&nft_trans_container_set(trans)->list_trans_newset);
if (nft_trans_set_update(trans)) {
nft_trans_destroy(trans);
break;
@@ -10777,12 +10827,16 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
- if (nft_trans_elem_set_bound(trans)) {
+ if (nft_trans_elem_update_flags(trans) ||
+ nft_trans_elem_set_bound(trans)) {
nft_trans_destroy(trans);
break;
}
te = nft_trans_container_elem(trans);
- nft_setelem_remove(net, te->set, te->elem_priv);
+ if (!te->set->ops->abort ||
+ nft_setelem_is_catchall(te->set, te->elem_priv))
+ nft_setelem_remove(net, te->set, te->elem_priv);
+
if (!nft_setelem_is_catchall(te->set, te->elem_priv))
atomic_dec(&te->set->nelems);
@@ -10850,6 +10904,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
}
}
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_set_list));
+
nft_set_abort_update(&set_update_list);
synchronize_rcu();
@@ -11026,10 +11082,11 @@ static int nft_validate_register_load(enum nft_registers reg, unsigned int len)
return 0;
}
-int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
+int nft_parse_register_load(const struct nft_ctx *ctx,
+ const struct nlattr *attr, u8 *sreg, u32 len)
{
- u32 reg;
- int err;
+ int err, invalid_reg;
+ u32 reg, next_register;
err = nft_parse_register(attr, &reg);
if (err < 0)
@@ -11039,11 +11096,36 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
if (err < 0)
return err;
+ next_register = DIV_ROUND_UP(len, NFT_REG32_SIZE) + reg;
+
+ /* Can't happen: nft_validate_register_load() should have failed */
+ if (WARN_ON_ONCE(next_register > NFT_REG32_NUM))
+ return -EINVAL;
+
+ /* find first register that did not see an earlier store. */
+ invalid_reg = find_next_zero_bit(ctx->reg_inited, NFT_REG32_NUM, reg);
+
+ /* invalid register within the range that we're loading from? */
+ if (invalid_reg < next_register)
+ return -ENODATA;
+
*sreg = reg;
return 0;
}
EXPORT_SYMBOL_GPL(nft_parse_register_load);
+static void nft_saw_register_store(const struct nft_ctx *__ctx,
+ int reg, unsigned int len)
+{
+ unsigned int registers = DIV_ROUND_UP(len, NFT_REG32_SIZE);
+ struct nft_ctx *ctx = (struct nft_ctx *)__ctx;
+
+ if (WARN_ON_ONCE(len == 0 || reg < 0))
+ return;
+
+ bitmap_set(ctx->reg_inited, reg, registers);
+}
+
static int nft_validate_register_store(const struct nft_ctx *ctx,
enum nft_registers reg,
const struct nft_data *data,
@@ -11065,7 +11147,7 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
return err;
}
- return 0;
+ break;
default:
if (type != NFT_DATA_VALUE)
return -EINVAL;
@@ -11078,8 +11160,11 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
sizeof_field(struct nft_regs, data))
return -ERANGE;
- return 0;
+ break;
}
+
+ nft_saw_register_store(ctx, reg, len);
+ return 0;
}
int nft_parse_register_store(const struct nft_ctx *ctx,
@@ -11519,6 +11604,7 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->tables);
INIT_LIST_HEAD(&nft_net->commit_list);
+ INIT_LIST_HEAD(&nft_net->commit_set_list);
INIT_LIST_HEAD(&nft_net->binding_list);
INIT_LIST_HEAD(&nft_net->module_list);
INIT_LIST_HEAD(&nft_net->notify_list);
@@ -11549,6 +11635,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
gc_seq = nft_gc_seq_begin(nft_net);
WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_set_list));
if (!list_empty(&nft_net->module_list))
nf_tables_module_autoload_cleanup(net);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index a48d5f0e2f3e..75598520b0fa 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -256,7 +256,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
const struct net *net = nft_net(pkt);
const struct nft_expr *expr, *last;
const struct nft_rule_dp *rule;
- struct nft_regs regs = {};
+ struct nft_regs regs;
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
bool genbit = READ_ONCE(net->nft.gencursor);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 932b3ddb34f1..7784ec094097 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -402,27 +402,27 @@ replay_abort:
{
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
- return kfree_skb(skb);
+ return consume_skb(skb);
}
}
if (!ss->valid_genid || !ss->commit || !ss->abort) {
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
- return kfree_skb(skb);
+ return consume_skb(skb);
}
if (!try_module_get(ss->owner)) {
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
- return kfree_skb(skb);
+ return consume_skb(skb);
}
if (!ss->valid_genid(net, genid)) {
module_put(ss->owner);
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -ERESTART, NULL);
- return kfree_skb(skb);
+ return consume_skb(skb);
}
nfnl_unlock(subsys_id);
@@ -567,7 +567,7 @@ done:
if (status & NFNL_BATCH_REPLAY) {
ss->abort(net, oskb, NFNL_ABORT_AUTOLOAD);
nfnl_err_reset(&err_list);
- kfree_skb(skb);
+ consume_skb(skb);
module_put(ss->owner);
goto replay;
} else if (status == NFNL_BATCH_DONE) {
@@ -593,7 +593,7 @@ done:
err = ss->abort(net, oskb, abort_action);
if (err == -EAGAIN) {
nfnl_err_reset(&err_list);
- kfree_skb(skb);
+ consume_skb(skb);
module_put(ss->owner);
status |= NFNL_BATCH_FAILURE;
goto replay_abort;
@@ -601,7 +601,7 @@ done:
}
nfnl_err_deliver(&err_list, oskb);
- kfree_skb(skb);
+ consume_skb(skb);
module_put(ss->owner);
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index e0716da256bf..d2773ce9b585 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -540,6 +540,14 @@ nla_put_failure:
return -1;
}
+static int nf_queue_checksum_help(struct sk_buff *entskb)
+{
+ if (skb_csum_is_sctp(entskb))
+ return skb_crc32c_csum_help(entskb);
+
+ return skb_checksum_help(entskb);
+}
+
static struct sk_buff *
nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct nf_queue_entry *entry,
@@ -602,7 +610,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
case NFQNL_COPY_PACKET:
if (!(queue->flags & NFQA_CFG_F_GSO) &&
entskb->ip_summed == CHECKSUM_PARTIAL &&
- skb_checksum_help(entskb))
+ nf_queue_checksum_help(entskb))
return NULL;
data_len = READ_ONCE(queue->copy_range);
@@ -1014,7 +1022,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
break;
}
- if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(skb))
+ if (!skb_is_gso(skb) || ((queue->flags & NFQA_CFG_F_GSO) && !skb_is_gso_sctp(skb)))
return __nfqnl_enqueue_packet(net, queue, entry);
nf_bridge_adjust_skb_data(skb);
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index ca857afbf061..7de95674fd8c 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -171,7 +171,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG], &priv->sreg,
priv->len);
if (err < 0)
return err;
@@ -365,7 +365,7 @@ static int nft_bitwise_fast_init(const struct nft_ctx *ctx,
struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr);
int err;
- err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG], &priv->sreg,
sizeof(u32));
if (err < 0)
return err;
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index f6e791a68101..2f82a444d21b 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -139,7 +139,7 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_BYTEORDER_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_BYTEORDER_SREG], &priv->sreg,
priv->len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index cd4652259095..2605f43737bc 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -83,7 +83,7 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (err < 0)
return err;
- err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+ err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
@@ -222,7 +222,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+ err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
@@ -323,7 +323,7 @@ static int nft_cmp16_fast_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+ err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index d3d11dede545..52cdfee17f73 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -350,8 +350,7 @@ nla_put_failure:
}
static int nft_target_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
struct xt_target *target = expr->ops->data;
unsigned int hook_mask = 0;
@@ -611,8 +610,7 @@ static int nft_match_large_dump(struct sk_buff *skb,
}
static int nft_match_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
struct xt_match *match = expr->ops->data;
unsigned int hook_mask = 0;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 291ed2026367..cc7325329496 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -8,7 +8,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/seqlock.h>
+#include <linux/u64_stats_sync.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
@@ -17,6 +17,11 @@
#include <net/netfilter/nf_tables_offload.h>
struct nft_counter {
+ u64_stats_t bytes;
+ u64_stats_t packets;
+};
+
+struct nft_counter_tot {
s64 bytes;
s64 packets;
};
@@ -25,25 +30,24 @@ struct nft_counter_percpu_priv {
struct nft_counter __percpu *counter;
};
-static DEFINE_PER_CPU(seqcount_t, nft_counter_seq);
+static DEFINE_PER_CPU(struct u64_stats_sync, nft_counter_sync);
static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
+ struct u64_stats_sync *nft_sync;
struct nft_counter *this_cpu;
- seqcount_t *myseq;
local_bh_disable();
this_cpu = this_cpu_ptr(priv->counter);
- myseq = this_cpu_ptr(&nft_counter_seq);
-
- write_seqcount_begin(myseq);
+ nft_sync = this_cpu_ptr(&nft_counter_sync);
- this_cpu->bytes += pkt->skb->len;
- this_cpu->packets++;
+ u64_stats_update_begin(nft_sync);
+ u64_stats_add(&this_cpu->bytes, pkt->skb->len);
+ u64_stats_inc(&this_cpu->packets);
+ u64_stats_update_end(nft_sync);
- write_seqcount_end(myseq);
local_bh_enable();
}
@@ -66,17 +70,16 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
if (cpu_stats == NULL)
return -ENOMEM;
- preempt_disable();
- this_cpu = this_cpu_ptr(cpu_stats);
+ this_cpu = raw_cpu_ptr(cpu_stats);
if (tb[NFTA_COUNTER_PACKETS]) {
- this_cpu->packets =
- be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ u64_stats_set(&this_cpu->packets,
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])));
}
if (tb[NFTA_COUNTER_BYTES]) {
- this_cpu->bytes =
- be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+ u64_stats_set(&this_cpu->bytes,
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])));
}
- preempt_enable();
+
priv->counter = cpu_stats;
return 0;
}
@@ -104,35 +107,41 @@ static void nft_counter_obj_destroy(const struct nft_ctx *ctx,
}
static void nft_counter_reset(struct nft_counter_percpu_priv *priv,
- struct nft_counter *total)
+ struct nft_counter_tot *total)
{
+ struct u64_stats_sync *nft_sync;
struct nft_counter *this_cpu;
local_bh_disable();
this_cpu = this_cpu_ptr(priv->counter);
- this_cpu->packets -= total->packets;
- this_cpu->bytes -= total->bytes;
+ nft_sync = this_cpu_ptr(&nft_counter_sync);
+
+ u64_stats_update_begin(nft_sync);
+ u64_stats_add(&this_cpu->packets, -total->packets);
+ u64_stats_add(&this_cpu->bytes, -total->bytes);
+ u64_stats_update_end(nft_sync);
+
local_bh_enable();
}
static void nft_counter_fetch(struct nft_counter_percpu_priv *priv,
- struct nft_counter *total)
+ struct nft_counter_tot *total)
{
struct nft_counter *this_cpu;
- const seqcount_t *myseq;
u64 bytes, packets;
unsigned int seq;
int cpu;
memset(total, 0, sizeof(*total));
for_each_possible_cpu(cpu) {
- myseq = per_cpu_ptr(&nft_counter_seq, cpu);
+ struct u64_stats_sync *nft_sync = per_cpu_ptr(&nft_counter_sync, cpu);
+
this_cpu = per_cpu_ptr(priv->counter, cpu);
do {
- seq = read_seqcount_begin(myseq);
- bytes = this_cpu->bytes;
- packets = this_cpu->packets;
- } while (read_seqcount_retry(myseq, seq));
+ seq = u64_stats_fetch_begin(nft_sync);
+ bytes = u64_stats_read(&this_cpu->bytes);
+ packets = u64_stats_read(&this_cpu->packets);
+ } while (u64_stats_fetch_retry(nft_sync, seq));
total->bytes += bytes;
total->packets += packets;
@@ -143,7 +152,7 @@ static int nft_counter_do_dump(struct sk_buff *skb,
struct nft_counter_percpu_priv *priv,
bool reset)
{
- struct nft_counter total;
+ struct nft_counter_tot total;
nft_counter_fetch(priv, &total);
@@ -232,7 +241,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, g
struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
struct nft_counter __percpu *cpu_stats;
struct nft_counter *this_cpu;
- struct nft_counter total;
+ struct nft_counter_tot total;
nft_counter_fetch(priv, &total);
@@ -240,11 +249,9 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, g
if (cpu_stats == NULL)
return -ENOMEM;
- preempt_disable();
- this_cpu = this_cpu_ptr(cpu_stats);
- this_cpu->packets = total.packets;
- this_cpu->bytes = total.bytes;
- preempt_enable();
+ this_cpu = raw_cpu_ptr(cpu_stats);
+ u64_stats_set(&this_cpu->packets, total.packets);
+ u64_stats_set(&this_cpu->bytes, total.bytes);
priv_clone->counter = cpu_stats;
return 0;
@@ -262,18 +269,18 @@ static void nft_counter_offload_stats(struct nft_expr *expr,
const struct flow_stats *stats)
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+ struct u64_stats_sync *nft_sync;
struct nft_counter *this_cpu;
- seqcount_t *myseq;
- preempt_disable();
+ local_bh_disable();
this_cpu = this_cpu_ptr(priv->counter);
- myseq = this_cpu_ptr(&nft_counter_seq);
+ nft_sync = this_cpu_ptr(&nft_counter_sync);
- write_seqcount_begin(myseq);
- this_cpu->packets += stats->pkts;
- this_cpu->bytes += stats->bytes;
- write_seqcount_end(myseq);
- preempt_enable();
+ u64_stats_update_begin(nft_sync);
+ u64_stats_add(&this_cpu->packets, stats->pkts);
+ u64_stats_add(&this_cpu->bytes, stats->bytes);
+ u64_stats_update_end(nft_sync);
+ local_bh_enable();
}
void nft_counter_init_seqcount(void)
@@ -281,7 +288,7 @@ void nft_counter_init_seqcount(void)
int cpu;
for_each_possible_cpu(cpu)
- seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
+ u64_stats_init(per_cpu_ptr(&nft_counter_sync, cpu));
}
struct nft_expr_type nft_counter_type;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 452ed94c3a4d..67a41cd2baaf 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -606,7 +606,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
}
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_CT_SREG], &priv->sreg, len);
+ err = nft_parse_register_load(ctx, tb[NFTA_CT_SREG], &priv->sreg, len);
if (err < 0)
goto err1;
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index e5739a59ebf1..0573f96ce079 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -40,7 +40,7 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_DEV] == NULL)
return -EINVAL;
- return nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev,
+ return nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev,
sizeof(int));
}
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index b4ada3ab2167..88922e0e8e83 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -56,7 +56,7 @@ static struct nft_elem_priv *nft_dynset_new(struct nft_set *set,
if (!atomic_add_unless(&set->nelems, 1, set->size))
return NULL;
- timeout = priv->timeout ? : set->timeout;
+ timeout = priv->timeout ? : READ_ONCE(set->timeout);
elem_priv = nft_set_elem_init(set, &priv->tmpl,
&regs->data[priv->sreg_key], NULL,
&regs->data[priv->sreg_data],
@@ -94,9 +94,10 @@ void nft_dynset_eval(const struct nft_expr *expr,
if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new,
expr, regs, &ext)) {
if (priv->op == NFT_DYNSET_OP_UPDATE &&
- nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
- timeout = priv->timeout ? : set->timeout;
- *nft_set_ext_expiration(ext) = get_jiffies_64() + timeout;
+ nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
+ READ_ONCE(nft_set_ext_timeout(ext)->timeout) != 0) {
+ timeout = priv->timeout ? : READ_ONCE(set->timeout);
+ WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, get_jiffies_64() + timeout);
}
nft_set_elem_update_expr(ext, regs, pkt);
@@ -215,7 +216,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
return err;
}
- err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key,
+ err = nft_parse_register_load(ctx, tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key,
set->klen);
if (err < 0)
return err;
@@ -226,7 +227,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (set->dtype == NFT_DATA_VERDICT)
return -EOPNOTSUPP;
- err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_DATA],
+ err = nft_parse_register_load(ctx, tb[NFTA_DYNSET_SREG_DATA],
&priv->sreg_data, set->dlen);
if (err < 0)
return err;
@@ -312,12 +313,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (priv->num_exprs)
nft_dynset_ext_add_expr(priv);
- if (set->flags & NFT_SET_TIMEOUT) {
- if (timeout || set->timeout) {
- nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT);
- nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION);
- }
- }
+ if (set->flags & NFT_SET_TIMEOUT &&
+ (timeout || READ_ONCE(set->timeout)))
+ nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT);
priv->timeout = timeout;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 6eb571d0c3fd..6bfd33516241 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -588,7 +588,7 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
priv->flags = flags;
priv->op = op;
- return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg,
+ return nft_parse_register_load(ctx, tb[NFTA_EXTHDR_SREG], &priv->sreg,
priv->len);
}
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index b58f62195ff3..96e02a83c045 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -26,8 +26,7 @@ const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
};
EXPORT_SYMBOL(nft_fib_policy);
-int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nft_data **data)
+int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
const struct nft_fib *priv = nft_expr_priv(expr);
unsigned int hooks;
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index ab9576098701..2f732fae5a83 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -9,6 +9,7 @@
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_tables.h>
#include <net/ip.h> /* for ipv4 options. */
+#include <net/inet_dscp.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -235,7 +236,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip;
fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
- fl.u.ip4.flowi4_tos = RT_TOS(ip_hdr(pkt->skb)->tos);
+ fl.u.ip4.flowi4_tos = ip_hdr(pkt->skb)->tos & INET_DSCP_MASK;
fl.u.ip4.flowi4_mark = pkt->skb->mark;
fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
break;
@@ -380,8 +381,7 @@ out:
}
static int nft_flow_offload_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
unsigned int hook_mask = (1 << NF_INET_FORWARD);
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 358e742afad7..152a9fb4d23a 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -52,7 +52,7 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx,
if (tb[NFTA_FWD_SREG_DEV] == NULL)
return -EINVAL;
- return nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
+ return nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
sizeof(int));
}
@@ -178,12 +178,12 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- err = nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
+ err = nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
sizeof(int));
if (err < 0)
return err;
- return nft_parse_register_load(tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr,
+ return nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr,
addr_len);
}
@@ -204,8 +204,7 @@ nla_put_failure:
}
static int nft_fwd_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS) |
(1 << NF_NETDEV_EGRESS));
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 868d68302d22..5d034bbb6913 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -92,7 +92,7 @@ static int nft_jhash_init(const struct nft_ctx *ctx,
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_HASH_SREG], &priv->sreg, len);
+ err = nft_parse_register_load(ctx, tb[NFTA_HASH_SREG], &priv->sreg, len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index ac2422c215e5..02ee5fb69871 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -244,8 +244,7 @@ nla_put_failure:
}
static int nft_immediate_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **d)
+ const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
struct nft_ctx *pctx = (struct nft_ctx *)ctx;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index f3080fa1b226..63ef832b8aa7 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -113,7 +113,7 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (IS_ERR(set))
return PTR_ERR(set);
- err = nft_parse_register_load(tb[NFTA_LOOKUP_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_LOOKUP_SREG], &priv->sreg,
set->klen);
if (err < 0)
return err;
@@ -206,8 +206,7 @@ nla_put_failure:
}
static int nft_lookup_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **d)
+ const struct nft_expr *expr)
{
const struct nft_lookup *priv = nft_expr_priv(expr);
struct nft_set_iter iter;
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 8a14aaca93bb..868bd4d73555 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -27,8 +27,7 @@ static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
};
static int nft_masq_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
int err;
@@ -52,13 +51,13 @@ static int nft_masq_init(const struct nft_ctx *ctx,
priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
if (tb[NFTA_MASQ_REG_PROTO_MIN]) {
- err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN],
+ err = nft_parse_register_load(ctx, tb[NFTA_MASQ_REG_PROTO_MIN],
&priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_MASQ_REG_PROTO_MAX]) {
- err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MAX],
+ err = nft_parse_register_load(ctx, tb[NFTA_MASQ_REG_PROTO_MAX],
&priv->sreg_proto_max,
plen);
if (err < 0)
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 9139ce38ea7b..8c8eb14d647b 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -581,8 +581,7 @@ static int nft_meta_get_validate_xfrm(const struct nft_ctx *ctx)
}
static int nft_meta_get_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@@ -600,8 +599,7 @@ static int nft_meta_get_validate(const struct nft_ctx *ctx,
}
int nft_meta_set_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
struct nft_meta *priv = nft_expr_priv(expr);
unsigned int hooks;
@@ -657,7 +655,7 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
}
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len);
+ err = nft_parse_register_load(ctx, tb[NFTA_META_SREG], &priv->sreg, len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 808f5802c270..6e21f72c5b57 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -137,8 +137,7 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
};
static int nft_nat_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
struct nft_nat *priv = nft_expr_priv(expr);
int err;
@@ -214,13 +213,13 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
priv->family = family;
if (tb[NFTA_NAT_REG_ADDR_MIN]) {
- err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MIN],
+ err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_ADDR_MIN],
&priv->sreg_addr_min, alen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_ADDR_MAX]) {
- err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MAX],
+ err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_ADDR_MAX],
&priv->sreg_addr_max,
alen);
if (err < 0)
@@ -234,13 +233,13 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
plen = sizeof_field(struct nf_nat_range, min_proto.all);
if (tb[NFTA_NAT_REG_PROTO_MIN]) {
- err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN],
+ err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_PROTO_MIN],
&priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_PROTO_MAX]) {
- err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MAX],
+ err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_PROTO_MAX],
&priv->sreg_proto_max,
plen);
if (err < 0)
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 509011b1ef59..09da7a3f9f96 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -143,7 +143,7 @@ static int nft_objref_map_init(const struct nft_ctx *ctx,
if (!(set->flags & NFT_SET_OBJECT))
return -EINVAL;
- err = nft_parse_register_load(tb[NFTA_OBJREF_SET_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_OBJREF_SET_SREG], &priv->sreg,
set->klen);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 7fec57ff736f..1c0b493ef0a9 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -108,8 +108,7 @@ nla_put_failure:
}
static int nft_osf_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
unsigned int hooks;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 50429cbd42da..330609a76fb2 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -981,7 +981,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
}
priv->csum_type = csum_type;
- return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg,
+ return nft_parse_register_load(ctx, tb[NFTA_PAYLOAD_SREG], &priv->sreg,
priv->len);
}
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index b2b8127c8d43..344fe311878f 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -69,8 +69,7 @@ static void nft_queue_sreg_eval(const struct nft_expr *expr,
}
static int nft_queue_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
static const unsigned int supported_hooks = ((1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN) |
@@ -136,7 +135,7 @@ static int nft_queue_sreg_init(const struct nft_ctx *ctx,
struct nft_queue *priv = nft_expr_priv(expr);
int err;
- err = nft_parse_register_load(tb[NFTA_QUEUE_SREG_QNUM],
+ err = nft_parse_register_load(ctx, tb[NFTA_QUEUE_SREG_QNUM],
&priv->sreg_qnum, sizeof(u32));
if (err < 0)
return err;
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 51ae64cd268f..ea382f7bbd78 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -83,7 +83,7 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
goto err2;
}
- err = nft_parse_register_load(tb[NFTA_RANGE_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_RANGE_SREG], &priv->sreg,
desc_from.len);
if (err < 0)
goto err2;
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index a58bd8d291ff..95eedad85c83 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -27,8 +27,7 @@ static const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
};
static int nft_redir_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
int err;
@@ -51,13 +50,13 @@ static int nft_redir_init(const struct nft_ctx *ctx,
plen = sizeof_field(struct nf_nat_range, min_proto.all);
if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
- err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN],
+ err = nft_parse_register_load(ctx, tb[NFTA_REDIR_REG_PROTO_MIN],
&priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_REDIR_REG_PROTO_MAX]) {
- err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MAX],
+ err = nft_parse_register_load(ctx, tb[NFTA_REDIR_REG_PROTO_MAX],
&priv->sreg_proto_max,
plen);
if (err < 0)
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index ed2e668474d6..196a92c7ea09 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -24,8 +24,7 @@ const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
EXPORT_SYMBOL_GPL(nft_reject_policy);
int nft_reject_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
return nft_chain_validate_hooks(ctx->chain,
(1 << NF_INET_LOCAL_IN) |
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 973fa31a9dd6..49020e67304a 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -61,8 +61,7 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
}
static int nft_reject_inet_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
return nft_chain_validate_hooks(ctx->chain,
(1 << NF_INET_LOCAL_IN) |
diff --git a/net/netfilter/nft_reject_netdev.c b/net/netfilter/nft_reject_netdev.c
index 7865cd8b11bb..2558ce1505d9 100644
--- a/net/netfilter/nft_reject_netdev.c
+++ b/net/netfilter/nft_reject_netdev.c
@@ -145,8 +145,7 @@ out:
}
static int nft_reject_netdev_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS));
}
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 14d88394bcb7..dc50b9a5bd68 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -160,8 +160,7 @@ nla_put_failure:
return -1;
}
-static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nft_data **data)
+static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
const struct nft_rt *priv = nft_expr_priv(expr);
unsigned int hooks;
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index f30163e2ca62..f5da0c1775f2 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -9,7 +9,8 @@
struct nft_socket {
enum nft_socket_keys key:8;
- u8 level;
+ u8 level; /* cgroupv2 level to extract */
+ u8 level_user; /* cgroupv2 level provided by userspace */
u8 len;
union {
u8 dreg;
@@ -53,6 +54,28 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo
memcpy(dest, &cgid, sizeof(u64));
return true;
}
+
+/* process context only, uses current->nsproxy. */
+static noinline int nft_socket_cgroup_subtree_level(void)
+{
+ struct cgroup *cgrp = cgroup_get_from_path("/");
+ int level;
+
+ if (IS_ERR(cgrp))
+ return PTR_ERR(cgrp);
+
+ level = cgrp->level;
+
+ cgroup_put(cgrp);
+
+ if (WARN_ON_ONCE(level > 255))
+ return -ERANGE;
+
+ if (WARN_ON_ONCE(level < 0))
+ return -EINVAL;
+
+ return level;
+}
#endif
static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
@@ -110,13 +133,13 @@ static void nft_socket_eval(const struct nft_expr *expr,
*dest = READ_ONCE(sk->sk_mark);
} else {
regs->verdict.code = NFT_BREAK;
- return;
+ goto out_put_sk;
}
break;
case NFT_SOCKET_WILDCARD:
if (!sk_fullsock(sk)) {
regs->verdict.code = NFT_BREAK;
- return;
+ goto out_put_sk;
}
nft_socket_wildcard(pkt, regs, sk, dest);
break;
@@ -124,7 +147,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
case NFT_SOCKET_CGROUPV2:
if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) {
regs->verdict.code = NFT_BREAK;
- return;
+ goto out_put_sk;
}
break;
#endif
@@ -133,6 +156,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
}
+out_put_sk:
if (sk != skb->sk)
sock_gen_put(sk);
}
@@ -173,9 +197,10 @@ static int nft_socket_init(const struct nft_ctx *ctx,
case NFT_SOCKET_MARK:
len = sizeof(u32);
break;
-#ifdef CONFIG_CGROUPS
+#ifdef CONFIG_SOCK_CGROUP_DATA
case NFT_SOCKET_CGROUPV2: {
unsigned int level;
+ int err;
if (!tb[NFTA_SOCKET_LEVEL])
return -EINVAL;
@@ -184,6 +209,17 @@ static int nft_socket_init(const struct nft_ctx *ctx,
if (level > 255)
return -EOPNOTSUPP;
+ err = nft_socket_cgroup_subtree_level();
+ if (err < 0)
+ return err;
+
+ priv->level_user = level;
+
+ level += err;
+ /* Implies a giant cgroup tree */
+ if (WARN_ON_ONCE(level > 255))
+ return -EOPNOTSUPP;
+
priv->level = level;
len = sizeof(u64);
break;
@@ -208,7 +244,7 @@ static int nft_socket_dump(struct sk_buff *skb,
if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg))
return -1;
if (priv->key == NFT_SOCKET_CGROUPV2 &&
- nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level)))
+ nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user)))
return -1;
return 0;
}
@@ -239,8 +275,7 @@ static bool nft_socket_reduce(struct nft_regs_track *track,
}
static int nft_socket_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 1d737f89dfc1..5d3e51825985 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -248,8 +248,7 @@ static void nft_synproxy_eval(const struct nft_expr *expr,
}
static int nft_synproxy_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index 71412adb73d4..50481280abd2 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -254,14 +254,14 @@ static int nft_tproxy_init(const struct nft_ctx *ctx,
}
if (tb[NFTA_TPROXY_REG_ADDR]) {
- err = nft_parse_register_load(tb[NFTA_TPROXY_REG_ADDR],
+ err = nft_parse_register_load(ctx, tb[NFTA_TPROXY_REG_ADDR],
&priv->sreg_addr, alen);
if (err < 0)
return err;
}
if (tb[NFTA_TPROXY_REG_PORT]) {
- err = nft_parse_register_load(tb[NFTA_TPROXY_REG_PORT],
+ err = nft_parse_register_load(ctx, tb[NFTA_TPROXY_REG_PORT],
&priv->sreg_port, sizeof(u16));
if (err < 0)
return err;
@@ -313,8 +313,7 @@ static int nft_tproxy_dump(struct sk_buff *skb,
}
static int nft_tproxy_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index 1c866757db55..8a07b46cc8fb 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -229,8 +229,7 @@ static int nft_xfrm_get_dump(struct sk_buff *skb,
return 0;
}
-static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nft_data **data)
+static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
const struct nft_xfrm *priv = nft_expr_priv(expr);
unsigned int hooks;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 5d04ef80a61d..0e762277bcf8 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -86,6 +86,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
{
struct xt_connlimit_info *info = par->matchinfo;
unsigned int keylen;
+ int ret;
keylen = sizeof(u32);
if (par->family == NFPROTO_IPV6)
@@ -93,8 +94,17 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
else
keylen += sizeof(struct in_addr);
+ ret = nf_ct_netns_get(par->net, par->family);
+ if (ret < 0) {
+ pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+ par->family);
+ return ret;
+ }
+
/* init private data */
- info->data = nf_conncount_init(par->net, par->family, keylen);
+ info->data = nf_conncount_init(par->net, keylen);
+ if (IS_ERR(info->data))
+ nf_ct_netns_put(par->net, par->family);
return PTR_ERR_OR_ZERO(info->data);
}
@@ -103,7 +113,8 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_connlimit_info *info = par->matchinfo;
- nf_conncount_destroy(par->net, par->family, info->data);
+ nf_conncount_destroy(par->net, info->data);
+ nf_ct_netns_put(par->net, par->family);
}
static struct xt_match connlimit_mt_reg __read_mostly = {
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 9751e29d4bbb..5b0e4e62ab8b 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -41,7 +41,6 @@ struct netlink_sock {
struct netlink_callback cb;
struct mutex nl_cb_mutex;
- struct mutex *dump_cb_mutex;
void (*netlink_rcv)(struct sk_buff *skb);
int (*netlink_bind)(struct net *net, int group);
void (*netlink_unbind)(struct net *net, int group);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index bd2b17b219ae..2b5e246b8d9a 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -189,7 +189,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
}
nr_node->callsign = *nr;
- strcpy(nr_node->mnemonic, mnemonic);
+ strscpy(nr_node->mnemonic, mnemonic);
nr_node->which = 0;
nr_node->count = 1;
@@ -214,7 +214,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
nr_node_lock(nr_node);
if (quality != 0)
- strcpy(nr_node->mnemonic, mnemonic);
+ strscpy(nr_node->mnemonic, mnemonic);
for (found = 0, i = 0; i < nr_node->count; i++) {
if (nr_node->routes[i].neighbour == nr_neigh) {
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 101f9a23792c..16e260014684 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -237,14 +237,18 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
const struct ovs_action_push_vlan *vlan)
{
+ int err;
+
if (skb_vlan_tag_present(skb)) {
invalidate_flow_key(key);
} else {
key->eth.vlan.tci = vlan->vlan_tci;
key->eth.vlan.tpid = vlan->vlan_tpid;
}
- return skb_vlan_push(skb, vlan->vlan_tpid,
- ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
+ err = skb_vlan_push(skb, vlan->vlan_tpid,
+ ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
+ skb_reset_mac_len(skb);
+ return err;
}
/* 'src' is already properly masked. */
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 8eb1d644b741..3bb4810234aa 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
attr == OVS_KEY_ATTR_CT_MARK)
return true;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
- attr == OVS_KEY_ATTR_CT_LABELS) {
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-
- return ovs_net->xt_label;
- }
+ attr == OVS_KEY_ATTR_CT_LABELS)
+ return true;
return false;
}
@@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log)
{
+ unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
struct ovs_conntrack_info ct_info;
const char *helper = NULL;
u16 family;
@@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
return -ENOMEM;
}
+ if (nf_connlabels_get(net, n_bits - 1)) {
+ nf_ct_tmpl_free(ct_info.ct);
+ OVS_NLERR(log, "Failed to set connlabel length");
+ return -EOPNOTSUPP;
+ }
+
if (ct_info.timeout[0]) {
if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
ct_info.timeout))
@@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
if (ct_info->ct) {
if (ct_info->timeout[0])
nf_ct_destroy_timeout(ct_info->ct);
+ nf_connlabels_put(nf_ct_net(ct_info->ct));
nf_ct_tmpl_free(ct_info->ct);
}
}
@@ -1603,8 +1608,7 @@ static int ovs_ct_limit_init(struct net *net, struct ovs_net *ovs_net)
for (i = 0; i < CT_LIMIT_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&ovs_net->ct_limit_info->limits[i]);
- ovs_net->ct_limit_info->data =
- nf_conncount_init(net, NFPROTO_INET, sizeof(u32));
+ ovs_net->ct_limit_info->data = nf_conncount_init(net, sizeof(u32));
if (IS_ERR(ovs_net->ct_limit_info->data)) {
err = PTR_ERR(ovs_net->ct_limit_info->data);
@@ -1621,7 +1625,7 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net)
const struct ovs_ct_limit_info *info = ovs_net->ct_limit_info;
int i;
- nf_conncount_destroy(net, NFPROTO_INET, info->data);
+ nf_conncount_destroy(net, info->data);
for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) {
struct hlist_head *head = &info->limits[i];
struct ovs_ct_limit *ct_limit;
@@ -2002,17 +2006,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
int ovs_ct_init(struct net *net)
{
- unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
+#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
- if (nf_connlabels_get(net, n_bits - 1)) {
- ovs_net->xt_label = false;
- OVS_NLERR(true, "Failed to set connlabel length");
- } else {
- ovs_net->xt_label = true;
- }
-
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
return ovs_ct_limit_init(net, ovs_net);
#else
return 0;
@@ -2021,12 +2017,9 @@ int ovs_ct_init(struct net *net)
void ovs_ct_exit(struct net *net)
{
+#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
ovs_ct_limit_exit(net, ovs_net);
#endif
-
- if (ovs_net->xt_label)
- nf_connlabels_put(net);
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 99d72543abd3..78d9961fcd44 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2706,7 +2706,7 @@ static struct pernet_operations ovs_net_ops = {
};
static const char * const ovs_drop_reasons[] = {
-#define S(x) (#x),
+#define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x),
OVS_DROP_REASONS(S)
#undef S
};
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 9ca6231ea647..365b9bb7f546 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -160,9 +160,6 @@ struct ovs_net {
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_ct_limit_info *ct_limit_info;
#endif
-
- /* Module reference for configuring conntrack. */
- bool xt_label;
};
/**
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index c92bdc4dfe19..729ef582a3a8 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2491,7 +2491,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
acts = nla_alloc_flow_actions(new_acts_size);
if (IS_ERR(acts))
- return (void *)acts;
+ return ERR_CAST(acts);
memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
acts->actions_len = (*sfa)->actions_len;
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 4b33133cbdff..5858d65ea1a9 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -102,19 +102,20 @@ static void do_setup(struct net_device *netdev)
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
IFF_NO_QUEUE;
+ netdev->lltx = true;
netdev->needs_free_netdev = true;
netdev->priv_destructor = NULL;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
netdev->rtnl_link_ops = &internal_dev_link_ops;
- netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
- NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
- NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL;
+ netdev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
+ NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE |
+ NETIF_F_GSO_ENCAP_ALL;
netdev->vlan_features = netdev->features;
netdev->hw_enc_features = netdev->features;
netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
- netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
+ netdev->hw_features = netdev->features;
eth_hw_addr_random(netdev);
}
@@ -148,7 +149,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
/* Restrict bridge port to current netns. */
if (vport->port_no == OVSP_LOCAL)
- vport->dev->features |= NETIF_F_NETNS_LOCAL;
+ vport->dev->netns_local = true;
rtnl_lock();
err = register_netdevice(vport->dev);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 4a364cdd445e..a705ec214254 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2216,7 +2216,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
}
}
- snaplen = skb->len;
+ snaplen = skb_frags_readable(skb) ? skb->len : skb_headlen(skb);
res = run_filter(skb, sk, snaplen);
if (!res)
@@ -2336,7 +2336,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
}
}
- snaplen = skb->len;
+ snaplen = skb_frags_readable(skb) ? skb->len : skb_headlen(skb);
res = run_filter(skb, sk, snaplen);
if (!res)
diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index 75cd696963b2..f007730aa2bb 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -26,3 +26,12 @@ config RDS_DEBUG
bool "RDS debugging messages"
depends on RDS
default n
+
+config GCOV_PROFILE_RDS
+ bool "Enable GCOV profiling on RDS"
+ depends on GCOV_KERNEL
+ help
+ Enable GCOV profiling on RDS for checking which functions/lines
+ are executed.
+
+ If unsure, say N.
diff --git a/net/rds/Makefile b/net/rds/Makefile
index 8fdc118e2927..3af1ca1d965c 100644
--- a/net/rds/Makefile
+++ b/net/rds/Makefile
@@ -15,3 +15,8 @@ rds_tcp-y := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \
tcp_send.o tcp_stats.o
ccflags-$(CONFIG_RDS_DEBUG) := -DRDS_DEBUG
+
+# for GCOV coverage profiling
+ifdef CONFIG_GCOV_PROFILE_RDS
+GCOV_PROFILE := y
+endif
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 2ba71102b1f1..8ef3178ed4d6 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -369,9 +369,6 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp);
void rds_ib_conn_free(void *arg);
int rds_ib_conn_path_connect(struct rds_conn_path *cp);
void rds_ib_conn_path_shutdown(struct rds_conn_path *cp);
-void rds_ib_state_change(struct sock *sk);
-int rds_ib_listen_init(void);
-void rds_ib_listen_stop(void);
__printf(2, 3)
void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
@@ -402,7 +399,6 @@ void rds_ib_inc_free(struct rds_incoming *inc);
int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc,
struct rds_ib_ack_state *state);
-void rds_ib_recv_tasklet_fn(unsigned long data);
void rds_ib_recv_init_ring(struct rds_ib_connection *ic);
void rds_ib_recv_clear_ring(struct rds_ib_connection *ic);
void rds_ib_recv_init_ack(struct rds_ib_connection *ic);
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 7a5367628c05..13a5126bc36e 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -539,18 +539,14 @@ bool rfkill_get_global_sw_state(const enum rfkill_type type)
#endif
bool rfkill_set_hw_state_reason(struct rfkill *rfkill,
- bool blocked, unsigned long reason)
+ bool blocked,
+ enum rfkill_hard_block_reasons reason)
{
unsigned long flags;
bool ret, prev;
BUG_ON(!rfkill);
- if (WARN(reason & ~(RFKILL_HARD_BLOCK_SIGNAL |
- RFKILL_HARD_BLOCK_NOT_OWNER),
- "hw_state reason not supported: 0x%lx", reason))
- return rfkill_blocked(rfkill);
-
spin_lock_irqsave(&rfkill->lock, flags);
prev = !!(rfkill->hard_block_reasons & reason);
if (blocked) {
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 84529886c2e6..c268c2b011f4 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -3,6 +3,7 @@
* Copyright (c) 2011, NVIDIA Corporation.
*/
+#include <linux/dmi.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -72,6 +73,20 @@ static int rfkill_gpio_acpi_probe(struct device *dev,
return devm_acpi_dev_add_driver_gpios(dev, acpi_rfkill_default_gpios);
}
+/* List of DMI matches for devices on which rfkill-gpio should not load,
+ * to avoid firmware bugs.
+ */
+static const struct dmi_system_id rfkill_gpio_deny_table[] = {
+ {
+ /* Lenovo Yoga Tab 3 Pro YT3-X90, bogus "BCM4752" device in DSDT */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"),
+ },
+ },
+ { }
+};
+
static int rfkill_gpio_probe(struct platform_device *pdev)
{
struct rfkill_gpio_data *rfkill;
@@ -81,6 +96,9 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
const char *type_name;
int ret;
+ if (dmi_check_system(rfkill_gpio_deny_table))
+ return -ENODEV;
+
rfkill = devm_kzalloc(&pdev->dev, sizeof(*rfkill), GFP_KERNEL);
if (!rfkill)
return -ENOMEM;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 08de24658f4f..80d682f89b23 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -856,7 +856,6 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local,
struct rxrpc_connection *conn,
struct sockaddr_rxrpc *peer_srx,
struct sk_buff *skb);
-void rxrpc_accept_incoming_calls(struct rxrpc_local *);
int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long);
/*
@@ -969,7 +968,6 @@ void rxrpc_connect_client_calls(struct rxrpc_local *local);
void rxrpc_expose_client_call(struct rxrpc_call *);
void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *);
void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
-void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace);
void rxrpc_discard_expired_client_conns(struct rxrpc_local *local);
void rxrpc_clean_up_local_conns(struct rxrpc_local *);
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 3ba8e7e739b5..2197eb625658 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -44,8 +44,6 @@ static DEFINE_MUTEX(zones_mutex);
struct zones_ht_key {
struct net *net;
u16 zone;
- /* Note : pad[] must be the last field. */
- u8 pad[];
};
struct tcf_ct_flow_table {
@@ -62,7 +60,7 @@ struct tcf_ct_flow_table {
static const struct rhashtable_params zones_params = {
.head_offset = offsetof(struct tcf_ct_flow_table, node),
.key_offset = offsetof(struct tcf_ct_flow_table, key),
- .key_len = offsetof(struct zones_ht_key, pad),
+ .key_len = offsetofend(struct zones_ht_key, zone),
.automatic_shrinking = true,
};
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 22f4b1e8ade9..383bf18b6862 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -96,6 +96,7 @@ out:
if (skb_at_tc_ingress(skb))
skb_pull_rcsum(skb, skb->mac_len);
+ skb_reset_mac_len(skb);
return action;
drop:
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 9602dafe32e6..f2f9b75008bb 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -361,8 +361,24 @@ static const u8 besteffort[] = {
static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7};
static const u8 bulk_order[] = {1, 0, 2, 3};
+/* There is a big difference in timing between the accurate values placed in the
+ * cache and the approximations given by a single Newton step for small count
+ * values, particularly when stepping from count 1 to 2 or vice versa. Hence,
+ * these values are calculated using eight Newton steps, using the
+ * implementation below. Above 16, a single Newton step gives sufficient
+ * accuracy in either direction, given the precision stored.
+ *
+ * The magnitude of the error when stepping up to count 2 is such as to give the
+ * value that *should* have been produced at count 4.
+ */
+
#define REC_INV_SQRT_CACHE (16)
-static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0};
+static const u32 inv_sqrt_cache[REC_INV_SQRT_CACHE] = {
+ ~0, ~0, 3037000500, 2479700525,
+ 2147483647, 1920767767, 1753413056, 1623345051,
+ 1518500250, 1431655765, 1358187914, 1294981364,
+ 1239850263, 1191209601, 1147878294, 1108955788
+};
/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
* new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
@@ -388,47 +404,14 @@ static void cobalt_newton_step(struct cobalt_vars *vars)
static void cobalt_invsqrt(struct cobalt_vars *vars)
{
if (vars->count < REC_INV_SQRT_CACHE)
- vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
+ vars->rec_inv_sqrt = inv_sqrt_cache[vars->count];
else
cobalt_newton_step(vars);
}
-/* There is a big difference in timing between the accurate values placed in
- * the cache and the approximations given by a single Newton step for small
- * count values, particularly when stepping from count 1 to 2 or vice versa.
- * Above 16, a single Newton step gives sufficient accuracy in either
- * direction, given the precision stored.
- *
- * The magnitude of the error when stepping up to count 2 is such as to give
- * the value that *should* have been produced at count 4.
- */
-
-static void cobalt_cache_init(void)
-{
- struct cobalt_vars v;
-
- memset(&v, 0, sizeof(v));
- v.rec_inv_sqrt = ~0U;
- cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt;
-
- for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) {
- cobalt_newton_step(&v);
- cobalt_newton_step(&v);
- cobalt_newton_step(&v);
- cobalt_newton_step(&v);
-
- cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt;
- }
-}
-
static void cobalt_vars_init(struct cobalt_vars *vars)
{
memset(vars, 0, sizeof(*vars));
-
- if (!cobalt_rec_inv_sqrt_cache[0]) {
- cobalt_cache_init();
- cobalt_rec_inv_sqrt_cache[0] = ~0;
- }
}
/* CoDel control_law is t + interval/sqrt(count)
@@ -786,12 +769,15 @@ skip_hash:
* queue, accept the collision, update the host tags.
*/
q->way_collisions++;
- if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
- q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
- q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
- }
allocate_src = cake_dsrc(flow_mode);
allocate_dst = cake_ddst(flow_mode);
+
+ if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
+ if (allocate_src)
+ q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
+ if (allocate_dst)
+ q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
+ }
found:
/* reserve queue for future packets in same flow */
reduced_hash = outer_hash + k;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 238974725679..19a49af5a9e5 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -663,7 +663,9 @@ begin:
pband = &q->band_flows[q->band_nr];
pband->credit = min(pband->credit + pband->quantum,
pband->quantum);
- goto begin;
+ if (pband->credit > 0)
+ goto begin;
+ retry = 0;
}
if (q->time_next_delayed_flow != ~0ULL)
qdisc_watchdog_schedule_range_ns(&q->watchdog,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index edc72962ae63..39382ee1e331 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -446,12 +446,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct netem_sched_data *q = qdisc_priv(sch);
/* We don't fill cb now as skb_unshare() may invalidate it */
struct netem_skb_cb *cb;
- struct sk_buff *skb2;
+ struct sk_buff *skb2 = NULL;
struct sk_buff *segs = NULL;
unsigned int prev_len = qdisc_pkt_len(skb);
int count = 1;
- int rc = NET_XMIT_SUCCESS;
- int rc_drop = NET_XMIT_DROP;
/* Do not fool qdisc_drop_all() */
skb->prev = NULL;
@@ -480,19 +478,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
skb_orphan_partial(skb);
/*
- * If we need to duplicate packet, then re-insert at top of the
- * qdisc tree, since parent queuer expects that only one
- * skb will be queued.
+ * If we need to duplicate packet, then clone it before
+ * original is modified.
*/
- if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
- struct Qdisc *rootq = qdisc_root_bh(sch);
- u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
-
- q->duplicate = 0;
- rootq->enqueue(skb2, rootq, to_free);
- q->duplicate = dupsave;
- rc_drop = NET_XMIT_SUCCESS;
- }
+ if (count > 1)
+ skb2 = skb_clone(skb, GFP_ATOMIC);
/*
* Randomized packet corruption.
@@ -504,7 +494,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (skb_is_gso(skb)) {
skb = netem_segment(skb, sch, to_free);
if (!skb)
- return rc_drop;
+ goto finish_segs;
+
segs = skb->next;
skb_mark_not_on_list(skb);
qdisc_skb_cb(skb)->pkt_len = skb->len;
@@ -530,7 +521,24 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* re-link segs, so that qdisc_drop_all() frees them all */
skb->next = segs;
qdisc_drop_all(skb, sch, to_free);
- return rc_drop;
+ if (skb2)
+ __qdisc_drop(skb2, to_free);
+ return NET_XMIT_DROP;
+ }
+
+ /*
+ * If doing duplication then re-insert at top of the
+ * qdisc tree, since parent queuer expects that only one
+ * skb will be queued.
+ */
+ if (skb2) {
+ struct Qdisc *rootq = qdisc_root_bh(sch);
+ u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
+
+ q->duplicate = 0;
+ rootq->enqueue(skb2, rootq, to_free);
+ q->duplicate = dupsave;
+ skb2 = NULL;
}
qdisc_qstats_backlog_inc(sch, skb);
@@ -601,9 +609,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
finish_segs:
+ if (skb2)
+ __qdisc_drop(skb2, to_free);
+
if (segs) {
unsigned int len, last_len;
- int nb;
+ int rc, nb;
len = skb ? skb->len : 0;
nb = skb ? 1 : 0;
@@ -731,11 +742,10 @@ deliver:
err = qdisc_enqueue(skb, q->qdisc, &to_free);
kfree_skb_list(to_free);
- if (err != NET_XMIT_SUCCESS &&
- net_xmit_drop_count(err)) {
- qdisc_qstats_drop(sch);
- qdisc_tree_reduce_backlog(sch, 1,
- pkt_len);
+ if (err != NET_XMIT_SUCCESS) {
+ if (net_xmit_drop_count(err))
+ qdisc_qstats_drop(sch);
+ qdisc_tree_reduce_backlog(sch, 1, pkt_len);
}
goto tfifo_dequeue;
}
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index cc2df9f8c14a..8498d0606b24 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1952,7 +1952,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
goto unlock;
}
- rcu_assign_pointer(q->admin_sched, new_admin);
+ /* Not going to race against advance_sched(), but still */
+ admin = rcu_replace_pointer(q->admin_sched, new_admin,
+ lockdep_rtnl_is_held());
if (admin)
call_rcu(&admin->rcu, taprio_free_sched_cb);
} else {
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 5a7436a13b74..39ca5403d4d7 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -44,6 +44,7 @@
#include <net/inet_common.h>
#include <net/inet_ecn.h>
#include <net/udp_tunnel.h>
+#include <net/inet_dscp.h>
#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024)
@@ -435,7 +436,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
fl4->fl4_dport = daddr->v4.sin_port;
fl4->flowi4_proto = IPPROTO_SCTP;
if (asoc) {
- fl4->flowi4_tos = RT_TOS(tos);
+ fl4->flowi4_tos = tos & INET_DSCP_MASK;
fl4->flowi4_scope = ip_sock_rt_scope(asoc->base.sk);
fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
fl4->fl4_sport = htons(asoc->base.bind_addr.port);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 5adf0c0a6c1a..7d315a18612b 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2260,12 +2260,6 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
}
}
- /* Update socket peer label if first association. */
- if (security_sctp_assoc_request(new_asoc, chunk->head_skb ?: chunk->skb)) {
- sctp_association_free(new_asoc);
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
- }
-
/* Set temp so that it won't be added into hashtable */
new_asoc->temp = 1;
@@ -2274,6 +2268,22 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
*/
action = sctp_tietags_compare(new_asoc, asoc);
+ /* In cases C and E the association doesn't enter the ESTABLISHED
+ * state, so there is no need to call security_sctp_assoc_request().
+ */
+ switch (action) {
+ case 'A': /* Association restart. */
+ case 'B': /* Collision case B. */
+ case 'D': /* Collision case D. */
+ /* Update socket peer label if first association. */
+ if (security_sctp_assoc_request((struct sctp_association *)asoc,
+ chunk->head_skb ?: chunk->skb)) {
+ sctp_association_free(new_asoc);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ }
+ break;
+ }
+
switch (action) {
case 'A': /* Association restart. */
retval = sctp_sf_do_dupcook_a(net, ep, asoc, chunk, commands,
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 8e3093938cd2..0316217b7687 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1466,10 +1466,6 @@ connect_abort:
static int smc_connect_check_aclc(struct smc_init_info *ini,
struct smc_clc_msg_accept_confirm *aclc)
{
- if (aclc->hdr.typev1 != SMC_TYPE_R &&
- aclc->hdr.typev1 != SMC_TYPE_D)
- return SMC_CLC_DECL_MODEUNSUPP;
-
if (aclc->hdr.version >= SMC_V2) {
if ((aclc->hdr.typev1 == SMC_TYPE_R &&
!smcr_indicated(ini->smc_type_v2)) ||
@@ -1523,10 +1519,6 @@ static int __smc_connect(struct smc_sock *smc)
ini->smcd_version &= ~SMC_V1;
ini->smcr_version = 0;
ini->smc_type_v1 = SMC_TYPE_N;
- if (!ini->smcd_version) {
- rc = SMC_CLC_DECL_GETVLANERR;
- goto fallback;
- }
}
rc = smc_find_proposal_devices(smc, ini);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 34b781e463c4..ad77d6b6b8d3 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -284,6 +284,9 @@ struct smc_connection {
struct smc_sock { /* smc sock container */
struct sock sk;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct ipv6_pinfo *pinet6;
+#endif
struct socket *clcsock; /* internal tcp socket */
void (*clcsk_state_change)(struct sock *sk);
/* original stat_change fct. */
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 467effb50cd6..5625fda2960b 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -145,6 +145,8 @@ struct smc_clc_v2_extension {
);
u8 user_eids[][SMC_MAX_EID_LEN];
};
+static_assert(offsetof(struct smc_clc_v2_extension, user_eids) == sizeof(struct smc_clc_v2_extension_fixed),
+ "struct member likely outside of struct_group_tagged()");
struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
__be32 outgoing_subnet; /* subnet mask */
@@ -169,6 +171,8 @@ struct smc_clc_smcd_v2_extension {
);
struct smc_clc_smcd_gid_chid gidchid[];
};
+static_assert(offsetof(struct smc_clc_smcd_v2_extension, gidchid) == sizeof(struct smc_clc_smcd_v2_extension_fixed),
+ "struct member likely outside of struct_group_tagged()");
struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
struct smc_clc_msg_hdr hdr;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3b95828d9976..4e694860ece4 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -221,6 +221,35 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
write_unlock_bh(&lgr->conns_lock);
}
+static void smc_lgr_buf_list_add(struct smc_link_group *lgr,
+ bool is_rmb,
+ struct list_head *buf_list,
+ struct smc_buf_desc *buf_desc)
+{
+ list_add(&buf_desc->list, buf_list);
+ if (is_rmb) {
+ lgr->alloc_rmbs += buf_desc->len;
+ lgr->alloc_rmbs +=
+ lgr->is_smcd ? sizeof(struct smcd_cdc_msg) : 0;
+ } else {
+ lgr->alloc_sndbufs += buf_desc->len;
+ }
+}
+
+static void smc_lgr_buf_list_del(struct smc_link_group *lgr,
+ bool is_rmb,
+ struct smc_buf_desc *buf_desc)
+{
+ list_del(&buf_desc->list);
+ if (is_rmb) {
+ lgr->alloc_rmbs -= buf_desc->len;
+ lgr->alloc_rmbs -=
+ lgr->is_smcd ? sizeof(struct smcd_cdc_msg) : 0;
+ } else {
+ lgr->alloc_sndbufs -= buf_desc->len;
+ }
+}
+
int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
{
struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
@@ -363,6 +392,10 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr,
smc_target[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_LGR_R_SNDBUF_ALLOC, lgr->alloc_sndbufs))
+ goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_LGR_R_RMB_ALLOC, lgr->alloc_rmbs))
+ goto errattr;
if (lgr->smc_version > SMC_V1) {
v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
if (!v2_attrs)
@@ -541,6 +574,10 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
goto errattr;
if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd)))
goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_LGR_D_SNDBUF_ALLOC, lgr->alloc_sndbufs))
+ goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_LGR_D_DMB_ALLOC, lgr->alloc_rmbs))
+ goto errattr;
memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN);
smc_pnet[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
@@ -1138,7 +1175,7 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
lock = is_rmb ? &lgr->rmbs_lock :
&lgr->sndbufs_lock;
down_write(lock);
- list_del(&buf_desc->list);
+ smc_lgr_buf_list_del(lgr, is_rmb, buf_desc);
up_write(lock);
smc_buf_free(lgr, is_rmb, buf_desc);
@@ -1166,22 +1203,30 @@ static void smcd_buf_detach(struct smc_connection *conn)
static void smc_buf_unuse(struct smc_connection *conn,
struct smc_link_group *lgr)
{
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ bool is_smcd = lgr->is_smcd;
+ int bufsize;
+
if (conn->sndbuf_desc) {
- if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) {
+ bufsize = conn->sndbuf_desc->len;
+ if (!is_smcd && conn->sndbuf_desc->is_vm) {
smcr_buf_unuse(conn->sndbuf_desc, false, lgr);
} else {
- memzero_explicit(conn->sndbuf_desc->cpu_addr, conn->sndbuf_desc->len);
+ memzero_explicit(conn->sndbuf_desc->cpu_addr, bufsize);
WRITE_ONCE(conn->sndbuf_desc->used, 0);
}
+ SMC_STAT_RMB_SIZE(smc, is_smcd, false, false, bufsize);
}
if (conn->rmb_desc) {
- if (!lgr->is_smcd) {
+ bufsize = conn->rmb_desc->len;
+ if (!is_smcd) {
smcr_buf_unuse(conn->rmb_desc, true, lgr);
} else {
- memzero_explicit(conn->rmb_desc->cpu_addr,
- conn->rmb_desc->len + sizeof(struct smcd_cdc_msg));
+ bufsize += sizeof(struct smcd_cdc_msg);
+ memzero_explicit(conn->rmb_desc->cpu_addr, bufsize);
WRITE_ONCE(conn->rmb_desc->used, 0);
}
+ SMC_STAT_RMB_SIZE(smc, is_smcd, true, false, bufsize);
}
}
@@ -1377,7 +1422,7 @@ static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
buf_list = &lgr->sndbufs[i];
list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
list) {
- list_del(&buf_desc->list);
+ smc_lgr_buf_list_del(lgr, is_rmb, buf_desc);
smc_buf_free(lgr, is_rmb, buf_desc);
}
}
@@ -2250,7 +2295,7 @@ int smcr_buf_reg_lgr(struct smc_link *lnk)
}
static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
- bool is_rmb, int bufsize)
+ int bufsize)
{
struct smc_buf_desc *buf_desc;
@@ -2390,7 +2435,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
if (buf_desc) {
buf_desc->is_dma_need_sync = 0;
- SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
+ SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize);
SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
break; /* found reusable slot */
}
@@ -2398,7 +2443,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_smcd)
buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
else
- buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
+ buf_desc = smcr_new_buf_create(lgr, bufsize);
if (PTR_ERR(buf_desc) == -ENOMEM)
break;
@@ -2411,10 +2456,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
}
SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
- SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
+ SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize);
buf_desc->used = 1;
down_write(lock);
- list_add(&buf_desc->list, buf_list);
+ smc_lgr_buf_list_add(lgr, is_rmb, buf_list, buf_desc);
up_write(lock);
break; /* found */
}
@@ -2496,7 +2541,8 @@ create_rmb:
rc = __smc_buf_create(smc, is_smcd, true);
if (rc && smc->conn.sndbuf_desc) {
down_write(&smc->conn.lgr->sndbufs_lock);
- list_del(&smc->conn.sndbuf_desc->list);
+ smc_lgr_buf_list_del(smc->conn.lgr, false,
+ smc->conn.sndbuf_desc);
up_write(&smc->conn.lgr->sndbufs_lock);
smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
smc->conn.sndbuf_desc = NULL;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index d93cf51dbd7c..0db4e5f79ac4 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -281,6 +281,8 @@ struct smc_link_group {
struct rw_semaphore sndbufs_lock; /* protects tx buffers */
struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
struct rw_semaphore rmbs_lock; /* protects rx buffers */
+ u64 alloc_sndbufs; /* stats of tx buffers */
+ u64 alloc_rmbs; /* stats of rx buffers */
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c
index bece346dd8e9..a5b2041600f9 100644
--- a/net/smc/smc_inet.c
+++ b/net/smc/smc_inet.c
@@ -60,6 +60,11 @@ static struct inet_protosw smc_inet_protosw = {
};
#if IS_ENABLED(CONFIG_IPV6)
+struct smc6_sock {
+ struct smc_sock smc;
+ struct ipv6_pinfo inet6;
+};
+
static struct proto smc_inet6_prot = {
.name = "INET6_SMC",
.owner = THIS_MODULE,
@@ -67,9 +72,10 @@ static struct proto smc_inet6_prot = {
.hash = smc_hash_sk,
.unhash = smc_unhash_sk,
.release_cb = smc_release_cb,
- .obj_size = sizeof(struct smc_sock),
+ .obj_size = sizeof(struct smc6_sock),
.h.smc_hash = &smc_v6_hashinfo,
.slab_flags = SLAB_TYPESAFE_BY_RCU,
+ .ipv6_pinfo_offset = offsetof(struct smc6_sock, inet6),
};
static const struct proto_ops smc_inet6_stream_ops = {
diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h
index 6dd4292dae56..04dc6808d2e1 100644
--- a/net/smc/smc_loopback.h
+++ b/net/smc/smc_loopback.h
@@ -15,7 +15,6 @@
#define _SMC_LOOPBACK_H
#include <linux/device.h>
-#include <linux/err.h>
#include <net/smc.h>
#if IS_ENABLED(CONFIG_SMC_LO)
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 2adb92b8c469..1dd362326c0a 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -887,9 +887,6 @@ int smc_pnet_net_init(struct net *net)
smc_pnet_create_pnetids_list(net);
- /* disable handshake limitation by default */
- net->smc.limit_smc_hs = 0;
-
return 0;
}
diff --git a/net/smc/smc_stats.c b/net/smc/smc_stats.c
index ca14c0f3a07d..e71b17d1e21c 100644
--- a/net/smc/smc_stats.c
+++ b/net/smc/smc_stats.c
@@ -218,6 +218,12 @@ static int smc_nl_fill_stats_tech_data(struct sk_buff *skb,
smc_tech->tx_bytes,
SMC_NLA_STATS_PAD))
goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_STATS_T_RX_RMB_USAGE,
+ smc_tech->rx_rmbuse))
+ goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_STATS_T_TX_RMB_USAGE,
+ smc_tech->tx_rmbuse))
+ goto errattr;
if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_RX_CNT,
smc_tech->rx_cnt,
SMC_NLA_STATS_PAD))
diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h
index e19177ce4092..571f9d9e7814 100644
--- a/net/smc/smc_stats.h
+++ b/net/smc/smc_stats.h
@@ -79,6 +79,8 @@ struct smc_stats_tech {
u64 tx_bytes;
u64 rx_cnt;
u64 tx_cnt;
+ u64 rx_rmbuse;
+ u64 tx_rmbuse;
};
struct smc_stats {
@@ -135,38 +137,46 @@ do { \
} \
while (0)
-#define SMC_STAT_RMB_SIZE_SUB(_smc_stats, _tech, k, _len) \
+#define SMC_STAT_RMB_SIZE_SUB(_smc_stats, _tech, k, _is_add, _len) \
do { \
+ typeof(_smc_stats) stats = (_smc_stats); \
+ typeof(_is_add) is_a = (_is_add); \
typeof(_len) _l = (_len); \
typeof(_tech) t = (_tech); \
int _pos; \
int m = SMC_BUF_MAX - 1; \
if (_l <= 0) \
break; \
- _pos = fls((_l - 1) >> 13); \
- _pos = (_pos <= m) ? _pos : m; \
- this_cpu_inc((*(_smc_stats)).smc[t].k ## _rmbsize.buf[_pos]); \
+ if (is_a) { \
+ _pos = fls((_l - 1) >> 13); \
+ _pos = (_pos <= m) ? _pos : m; \
+ this_cpu_inc((*stats).smc[t].k ## _rmbsize.buf[_pos]); \
+ this_cpu_add((*stats).smc[t].k ## _rmbuse, _l); \
+ } else { \
+ this_cpu_sub((*stats).smc[t].k ## _rmbuse, _l); \
+ } \
} \
while (0)
#define SMC_STAT_RMB_SUB(_smc_stats, type, t, key) \
this_cpu_inc((*(_smc_stats)).smc[t].rmb ## _ ## key.type ## _cnt)
-#define SMC_STAT_RMB_SIZE(_smc, _is_smcd, _is_rx, _len) \
+#define SMC_STAT_RMB_SIZE(_smc, _is_smcd, _is_rx, _is_add, _len) \
do { \
struct net *_net = sock_net(&(_smc)->sk); \
struct smc_stats __percpu *_smc_stats = _net->smc.smc_stats; \
+ typeof(_is_add) is_add = (_is_add); \
typeof(_is_smcd) is_d = (_is_smcd); \
typeof(_is_rx) is_r = (_is_rx); \
typeof(_len) l = (_len); \
if ((is_d) && (is_r)) \
- SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, rx, l); \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, rx, is_add, l); \
if ((is_d) && !(is_r)) \
- SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, tx, l); \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, tx, is_add, l); \
if (!(is_d) && (is_r)) \
- SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, rx, l); \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, rx, is_add, l); \
if (!(is_d) && !(is_r)) \
- SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, tx, l); \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, tx, is_add, l); \
} \
while (0)
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index 13f2bc092db1..2fab6456f765 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -90,6 +90,15 @@ static struct ctl_table smc_table[] = {
.extra1 = &conns_per_lgr_min,
.extra2 = &conns_per_lgr_max,
},
+ {
+ .procname = "limit_smc_hs",
+ .data = &init_net.smc.limit_smc_hs,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
};
int __net_init smc_sysctl_net_init(struct net *net)
@@ -121,6 +130,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER;
net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER;
+ /* disable handshake limitation by default */
+ net->smc.limit_smc_hs = 0;
return 0;
diff --git a/net/socket.c b/net/socket.c
index fcbdd5bc47ac..8d8b84fa404a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -946,11 +946,17 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
memset(&tss, 0, sizeof(tss));
tsflags = READ_ONCE(sk->sk_tsflags);
- if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
+ if ((tsflags & SOF_TIMESTAMPING_SOFTWARE &&
+ (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE ||
+ skb_is_err_queue(skb) ||
+ !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) &&
ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
empty = 0;
if (shhwtstamps &&
- (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+ (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE &&
+ (tsflags & SOF_TIMESTAMPING_RX_HARDWARE ||
+ skb_is_err_queue(skb) ||
+ !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) &&
!skb_is_swtx_tstamp(skb, false_tstamp)) {
if_index = 0;
if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
@@ -2362,7 +2368,7 @@ INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
int do_sock_getsockopt(struct socket *sock, bool compat, int level,
int optname, sockptr_t optval, sockptr_t optlen)
{
- int max_optlen __maybe_unused;
+ int max_optlen __maybe_unused = 0;
const struct proto_ops *ops;
int err;
@@ -2371,7 +2377,7 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
return err;
if (!compat)
- max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
+ copy_from_sockptr(&max_optlen, optlen, sizeof(int));
ops = READ_ONCE(sock->ops);
if (level == SOL_SOCKET) {
diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c
index a938c19c3490..8507cd4d8921 100644
--- a/net/sunrpc/xprtrdma/ib_client.c
+++ b/net/sunrpc/xprtrdma/ib_client.c
@@ -62,10 +62,11 @@ int rpcrdma_rn_register(struct ib_device *device,
if (!rd || test_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags))
return -ENETUNREACH;
- kref_get(&rd->rd_kref);
if (xa_alloc(&rd->rd_xa, &rn->rn_index, rn, xa_limit_32b, GFP_KERNEL) < 0)
return -ENOMEM;
+ kref_get(&rd->rd_kref);
rn->rn_done = done;
+ trace_rpcrdma_client_register(device, rn);
return 0;
}
@@ -91,6 +92,7 @@ void rpcrdma_rn_unregister(struct ib_device *device,
if (!rd)
return;
+ trace_rpcrdma_client_unregister(device, rn);
xa_erase(&rd->rd_xa, rn->rn_index);
kref_put(&rd->rd_kref, rpcrdma_rn_release);
}
@@ -111,7 +113,7 @@ static int rpcrdma_add_one(struct ib_device *device)
return -ENOMEM;
kref_init(&rd->rd_kref);
- xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC1);
+ xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC);
rd->rd_device = device;
init_completion(&rd->rd_done);
ib_set_client_data(device, &rpcrdma_ib_client, rd);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 593846d25214..114fef65f92e 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -320,8 +320,8 @@ static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb,
{
struct tipc_msg *hdr, *_hdr;
struct sk_buff_head tmpq;
+ u16 cong_link_cnt = 0;
struct sk_buff *_skb;
- u16 cong_link_cnt;
int rc = 0;
/* Is a cluster supporting with new capabilities ? */
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 5a526ebafeb4..ae1ddbf71853 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -163,8 +163,12 @@ static int bearer_name_validate(const char *name,
/* return bearer name components, if necessary */
if (name_parts) {
- strcpy(name_parts->media_name, media_name);
- strcpy(name_parts->if_name, if_name);
+ if (strscpy(name_parts->media_name, media_name,
+ TIPC_MAX_MEDIA_NAME) < 0)
+ return 0;
+ if (strscpy(name_parts->if_name, if_name,
+ TIPC_MAX_IF_NAME) < 0)
+ return 0;
}
return 1;
}
@@ -322,7 +326,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
if (!b)
return -ENOMEM;
- strcpy(b->name, name);
+ strscpy(b->name, name);
b->media = m;
res = m->enable_media(net, b, attr);
if (res) {
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 77a3d016cade..e2f19627e43d 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -149,7 +149,7 @@ static int dom_size(int peers)
while ((i * i) < peers)
i++;
- return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN;
+ return min(i, MAX_MON_DOMAIN);
}
static void map_set(u64 *up_map, int i, unsigned int v)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1a0cd06f0eae..65dcbb54f55d 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1009,12 +1009,11 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
struct tipc_member *mbr = NULL;
struct net *net = sock_net(sk);
u32 node, port, exclude;
- struct list_head dsts;
+ LIST_HEAD(dsts);
int lookups = 0;
int dstcnt, rc;
bool cong;
- INIT_LIST_HEAD(&dsts);
ua->sa.type = msg_nametype(hdr);
ua->scope = msg_lookup_scope(hdr);
@@ -1161,10 +1160,9 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = &tsk->phdr;
struct net *net = sock_net(sk);
- struct list_head dsts;
u32 dstcnt, exclude;
+ LIST_HEAD(dsts);
- INIT_LIST_HEAD(&dsts);
ua->sa.type = msg_nametype(hdr);
ua->scope = msg_lookup_scope(hdr);
exclude = tipc_group_exclude(grp);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 305a412785f5..bbf26cc4f6ee 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1201,7 +1201,7 @@ trim_sgl:
if (!num_async) {
goto send_end;
- } else if (num_zc) {
+ } else if (num_zc || eor) {
int err;
/* Wait for pending encryptions to get completed */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0be0dcb07f7b..001ccc55ef0f 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -693,10 +693,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
unix_state_unlock(sk);
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
- if (u->oob_skb) {
- kfree_skb(u->oob_skb);
- u->oob_skb = NULL;
- }
+ u->oob_skb = NULL;
#endif
wake_up_interruptible_all(&u->peer_wait);
@@ -2226,13 +2223,9 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
}
maybe_add_creds(skb, sock, other);
- skb_get(skb);
-
scm_stat_add(other, skb);
spin_lock(&other->sk_receive_queue.lock);
- if (ousk->oob_skb)
- consume_skb(ousk->oob_skb);
WRITE_ONCE(ousk->oob_skb, skb);
__skb_queue_tail(&other->sk_receive_queue, skb);
spin_unlock(&other->sk_receive_queue.lock);
@@ -2640,8 +2633,6 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
if (!(state->flags & MSG_PEEK))
WRITE_ONCE(u->oob_skb, NULL);
- else
- skb_get(oob_skb);
spin_unlock(&sk->sk_receive_queue.lock);
unix_state_unlock(sk);
@@ -2651,8 +2642,6 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
if (!(state->flags & MSG_PEEK))
UNIXCB(oob_skb).consumed += 1;
- consume_skb(oob_skb);
-
mutex_unlock(&u->iolock);
if (chunk < 0)
@@ -2665,56 +2654,52 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
int flags, int copied)
{
+ struct sk_buff *read_skb = NULL, *unread_skb = NULL;
struct unix_sock *u = unix_sk(sk);
- if (!unix_skb_len(skb)) {
- struct sk_buff *unlinked_skb = NULL;
+ if (likely(unix_skb_len(skb) && skb != READ_ONCE(u->oob_skb)))
+ return skb;
- spin_lock(&sk->sk_receive_queue.lock);
+ spin_lock(&sk->sk_receive_queue.lock);
+ if (!unix_skb_len(skb)) {
if (copied && (!u->oob_skb || skb == u->oob_skb)) {
skb = NULL;
} else if (flags & MSG_PEEK) {
skb = skb_peek_next(skb, &sk->sk_receive_queue);
} else {
- unlinked_skb = skb;
+ read_skb = skb;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
- __skb_unlink(unlinked_skb, &sk->sk_receive_queue);
+ __skb_unlink(read_skb, &sk->sk_receive_queue);
}
- spin_unlock(&sk->sk_receive_queue.lock);
+ if (!skb)
+ goto unlock;
+ }
- consume_skb(unlinked_skb);
- } else {
- struct sk_buff *unlinked_skb = NULL;
+ if (skb != u->oob_skb)
+ goto unlock;
- spin_lock(&sk->sk_receive_queue.lock);
+ if (copied) {
+ skb = NULL;
+ } else if (!(flags & MSG_PEEK)) {
+ WRITE_ONCE(u->oob_skb, NULL);
- if (skb == u->oob_skb) {
- if (copied) {
- skb = NULL;
- } else if (!(flags & MSG_PEEK)) {
- if (sock_flag(sk, SOCK_URGINLINE)) {
- WRITE_ONCE(u->oob_skb, NULL);
- consume_skb(skb);
- } else {
- __skb_unlink(skb, &sk->sk_receive_queue);
- WRITE_ONCE(u->oob_skb, NULL);
- unlinked_skb = skb;
- skb = skb_peek(&sk->sk_receive_queue);
- }
- } else if (!sock_flag(sk, SOCK_URGINLINE)) {
- skb = skb_peek_next(skb, &sk->sk_receive_queue);
- }
+ if (!sock_flag(sk, SOCK_URGINLINE)) {
+ __skb_unlink(skb, &sk->sk_receive_queue);
+ unread_skb = skb;
+ skb = skb_peek(&sk->sk_receive_queue);
}
+ } else if (!sock_flag(sk, SOCK_URGINLINE)) {
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ }
- spin_unlock(&sk->sk_receive_queue.lock);
+unlock:
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ consume_skb(read_skb);
+ kfree_skb(unread_skb);
- if (unlinked_skb) {
- WARN_ON_ONCE(skb_unref(unlinked_skb));
- kfree_skb(unlinked_skb);
- }
- }
return skb;
}
#endif
@@ -2756,7 +2741,6 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
unix_state_unlock(sk);
if (drop) {
- WARN_ON_ONCE(skb_unref(skb));
kfree_skb(skb);
return -EAGAIN;
}
@@ -3192,9 +3176,13 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
skb = skb_peek(&sk->sk_receive_queue);
if (skb) {
struct sk_buff *oob_skb = READ_ONCE(u->oob_skb);
+ struct sk_buff *next_skb;
+
+ next_skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (skb == oob_skb ||
- (!oob_skb && !unix_skb_len(skb)))
+ (!unix_skb_len(skb) &&
+ (!oob_skb || next_skb == oob_skb)))
answ = 1;
}
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 06d94ad999e9..0068e758be4d 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -337,18 +337,6 @@ static bool unix_vertex_dead(struct unix_vertex *vertex)
return true;
}
-static void unix_collect_queue(struct unix_sock *u, struct sk_buff_head *hitlist)
-{
- skb_queue_splice_init(&u->sk.sk_receive_queue, hitlist);
-
-#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
- if (u->oob_skb) {
- WARN_ON_ONCE(skb_unref(u->oob_skb));
- u->oob_skb = NULL;
- }
-#endif
-}
-
static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist)
{
struct unix_vertex *vertex;
@@ -371,11 +359,11 @@ static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist
struct sk_buff_head *embryo_queue = &skb->sk->sk_receive_queue;
spin_lock(&embryo_queue->lock);
- unix_collect_queue(unix_sk(skb->sk), hitlist);
+ skb_queue_splice_init(embryo_queue, hitlist);
spin_unlock(&embryo_queue->lock);
}
} else {
- unix_collect_queue(u, hitlist);
+ skb_queue_splice_init(queue, hitlist);
}
spin_unlock(&queue->lock);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 0ff9b2dd86ba..35681adedd9a 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -112,6 +112,7 @@
#include <net/sock.h>
#include <net/af_vsock.h>
#include <uapi/linux/vm_sockets.h>
+#include <uapi/asm-generic/ioctls.h>
static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
static void vsock_sk_destruct(struct sock *sk);
@@ -1295,6 +1296,57 @@ int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
}
EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg);
+static int vsock_do_ioctl(struct socket *sock, unsigned int cmd,
+ int __user *arg)
+{
+ struct sock *sk = sock->sk;
+ struct vsock_sock *vsk;
+ int ret;
+
+ vsk = vsock_sk(sk);
+
+ switch (cmd) {
+ case SIOCOUTQ: {
+ ssize_t n_bytes;
+
+ if (!vsk->transport || !vsk->transport->unsent_bytes) {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ if (sock_type_connectible(sk->sk_type) && sk->sk_state == TCP_LISTEN) {
+ ret = -EINVAL;
+ break;
+ }
+
+ n_bytes = vsk->transport->unsent_bytes(vsk);
+ if (n_bytes < 0) {
+ ret = n_bytes;
+ break;
+ }
+
+ ret = put_user(n_bytes, arg);
+ break;
+ }
+ default:
+ ret = -ENOIOCTLCMD;
+ }
+
+ return ret;
+}
+
+static int vsock_ioctl(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret;
+
+ lock_sock(sock->sk);
+ ret = vsock_do_ioctl(sock, cmd, (int __user *)arg);
+ release_sock(sock->sk);
+
+ return ret;
+}
+
static const struct proto_ops vsock_dgram_ops = {
.family = PF_VSOCK,
.owner = THIS_MODULE,
@@ -1305,7 +1357,7 @@ static const struct proto_ops vsock_dgram_ops = {
.accept = sock_no_accept,
.getname = vsock_getname,
.poll = vsock_poll,
- .ioctl = sock_no_ioctl,
+ .ioctl = vsock_ioctl,
.listen = sock_no_listen,
.shutdown = vsock_shutdown,
.sendmsg = vsock_dgram_sendmsg,
@@ -2294,7 +2346,7 @@ static const struct proto_ops vsock_stream_ops = {
.accept = vsock_accept,
.getname = vsock_getname,
.poll = vsock_poll,
- .ioctl = sock_no_ioctl,
+ .ioctl = vsock_ioctl,
.listen = vsock_listen,
.shutdown = vsock_shutdown,
.setsockopt = vsock_connectible_setsockopt,
@@ -2316,7 +2368,7 @@ static const struct proto_ops vsock_seqpacket_ops = {
.accept = vsock_accept,
.getname = vsock_getname,
.poll = vsock_poll,
- .ioctl = sock_no_ioctl,
+ .ioctl = vsock_ioctl,
.listen = vsock_listen,
.shutdown = vsock_shutdown,
.setsockopt = vsock_connectible_setsockopt,
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 64a07acfef12..e0160da4ef43 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -311,7 +311,7 @@ static void virtio_transport_tx_work(struct work_struct *work)
virtqueue_disable_cb(vq);
while ((skb = virtqueue_get_buf(vq, &len)) != NULL) {
- consume_skb(skb);
+ virtio_transport_consume_skb_sent(skb, true);
added = true;
}
} while (!virtqueue_enable_cb(vq));
@@ -540,6 +540,8 @@ static struct virtio_transport virtio_transport = {
.notify_buffer_size = virtio_transport_notify_buffer_size,
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
+ .unsent_bytes = virtio_transport_unsent_bytes,
+
.read_skb = virtio_transport_read_skb,
},
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 16ff976a86e3..884ee128851e 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -463,6 +463,26 @@ void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *
}
EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
+void virtio_transport_consume_skb_sent(struct sk_buff *skb, bool consume)
+{
+ struct sock *s = skb->sk;
+
+ if (s && skb->len) {
+ struct vsock_sock *vs = vsock_sk(s);
+ struct virtio_vsock_sock *vvs;
+
+ vvs = vs->trans;
+
+ spin_lock_bh(&vvs->tx_lock);
+ vvs->bytes_unsent -= skb->len;
+ spin_unlock_bh(&vvs->tx_lock);
+ }
+
+ if (consume)
+ consume_skb(skb);
+}
+EXPORT_SYMBOL_GPL(virtio_transport_consume_skb_sent);
+
u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
{
u32 ret;
@@ -475,6 +495,7 @@ u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
if (ret > credit)
ret = credit;
vvs->tx_cnt += ret;
+ vvs->bytes_unsent += ret;
spin_unlock_bh(&vvs->tx_lock);
return ret;
@@ -488,6 +509,7 @@ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)
spin_lock_bh(&vvs->tx_lock);
vvs->tx_cnt -= credit;
+ vvs->bytes_unsent -= credit;
spin_unlock_bh(&vvs->tx_lock);
}
EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
@@ -1090,6 +1112,19 @@ void virtio_transport_destruct(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(virtio_transport_destruct);
+ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ size_t ret;
+
+ spin_lock_bh(&vvs->tx_lock);
+ ret = vvs->bytes_unsent;
+ spin_unlock_bh(&vvs->tx_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(virtio_transport_unsent_bytes);
+
static int virtio_transport_reset(struct vsock_sock *vsk,
struct sk_buff *skb)
{
diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
index 6dea6119f5b2..6e78927a598e 100644
--- a/net/vmw_vsock/vsock_loopback.c
+++ b/net/vmw_vsock/vsock_loopback.c
@@ -98,6 +98,8 @@ static struct virtio_transport loopback_transport = {
.notify_buffer_size = virtio_transport_notify_buffer_size,
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
+ .unsent_bytes = virtio_transport_unsent_bytes,
+
.read_skb = virtio_transport_read_skb,
},
@@ -123,6 +125,10 @@ static void vsock_loopback_work(struct work_struct *work)
spin_unlock_bh(&vsock->pkt_queue.lock);
while ((skb = __skb_dequeue(&pkts))) {
+ /* Decrement the bytes_unsent counter without deallocating skb
+ * It is freed by the receiver.
+ */
+ virtio_transport_consume_skb_sent(skb, false);
virtio_transport_deliver_tap_pkt(skb);
virtio_transport_recv_pkt(&loopback_transport, skb);
}
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 4d5d351bd0b5..661adfc77644 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -165,11 +165,11 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
if (!wdev->netdev)
continue;
- wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+ wdev->netdev->netns_local = false;
err = dev_change_net_namespace(wdev->netdev, net, "wlan%d");
if (err)
break;
- wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
+ wdev->netdev->netns_local = true;
}
if (err) {
@@ -181,11 +181,11 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
list) {
if (!wdev->netdev)
continue;
- wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+ wdev->netdev->netns_local = false;
err = dev_change_net_namespace(wdev->netdev, net,
"wlan%d");
WARN_ON(err);
- wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
+ wdev->netdev->netns_local = true;
}
return err;
@@ -1473,7 +1473,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
SET_NETDEV_DEVTYPE(dev, &wiphy_type);
wdev->netdev = dev;
/* can only change netns with wiphy */
- dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->netns_local = true;
cfg80211_init_wdev(wdev);
break;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 41c8c0e3ba2e..3b3e3cd7027a 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -170,6 +170,12 @@ static inline int for_each_rdev_check_rtnl(void)
if (for_each_rdev_check_rtnl()) {} else \
list_for_each_entry(rdev, &cfg80211_rdev_list, list)
+enum bss_source_type {
+ BSS_SOURCE_DIRECT = 0,
+ BSS_SOURCE_MBSSID,
+ BSS_SOURCE_STA_PROFILE,
+};
+
struct cfg80211_internal_bss {
struct list_head list;
struct list_head hidden_list;
@@ -191,6 +197,8 @@ struct cfg80211_internal_bss {
*/
u8 parent_bssid[ETH_ALEN] __aligned(2);
+ enum bss_source_type bss_source;
+
/* must be last because of priv member */
struct cfg80211_bss pub;
};
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 34e5acff3935..1e3ed29f7cfc 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -94,7 +94,7 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
lockdep_assert_held(&rdev->wiphy.mtx);
- if (wdev->cac_started)
+ if (wdev->links[0].cac_started)
return -EBUSY;
if (wdev->u.ibss.ssid_len)
diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c
index d66a913027e0..64c447040786 100644
--- a/net/wireless/lib80211.c
+++ b/net/wireless/lib80211.c
@@ -34,7 +34,7 @@ MODULE_LICENSE("GPL");
struct lib80211_crypto_alg {
struct list_head list;
- struct lib80211_crypto_ops *ops;
+ const struct lib80211_crypto_ops *ops;
};
static LIST_HEAD(lib80211_crypto_algs);
@@ -161,7 +161,7 @@ void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info,
}
EXPORT_SYMBOL(lib80211_crypt_delayed_deinit);
-int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops)
+int lib80211_register_crypto_ops(const struct lib80211_crypto_ops *ops)
{
unsigned long flags;
struct lib80211_crypto_alg *alg;
@@ -183,7 +183,7 @@ int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops)
}
EXPORT_SYMBOL(lib80211_register_crypto_ops);
-int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops)
+int lib80211_unregister_crypto_ops(const struct lib80211_crypto_ops *ops)
{
struct lib80211_crypto_alg *alg;
unsigned long flags;
@@ -206,7 +206,7 @@ int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops)
}
EXPORT_SYMBOL(lib80211_unregister_crypto_ops);
-struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name)
+const struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name)
{
struct lib80211_crypto_alg *alg;
unsigned long flags;
@@ -234,7 +234,7 @@ static void lib80211_crypt_null_deinit(void *priv)
{
}
-static struct lib80211_crypto_ops lib80211_crypt_null = {
+static const struct lib80211_crypto_ops lib80211_crypt_null = {
.name = "NULL",
.init = lib80211_crypt_null_init,
.deinit = lib80211_crypt_null_deinit,
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
index cca5e1cf089e..5aad139130e1 100644
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ b/net/wireless/lib80211_crypt_ccmp.c
@@ -418,7 +418,7 @@ static void lib80211_ccmp_print_stats(struct seq_file *m, void *priv)
ccmp->dot11RSNAStatsCCMPDecryptErrors);
}
-static struct lib80211_crypto_ops lib80211_crypt_ccmp = {
+static const struct lib80211_crypto_ops lib80211_crypt_ccmp = {
.name = "CCMP",
.init = lib80211_ccmp_init,
.deinit = lib80211_ccmp_deinit,
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index 5c8cdf7681e3..63e68e5e121e 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -705,7 +705,7 @@ static void lib80211_tkip_print_stats(struct seq_file *m, void *priv)
tkip->dot11RSNAStatsTKIPLocalMICFailures);
}
-static struct lib80211_crypto_ops lib80211_crypt_tkip = {
+static const struct lib80211_crypto_ops lib80211_crypt_tkip = {
.name = "TKIP",
.init = lib80211_tkip_init,
.deinit = lib80211_tkip_deinit,
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index 6ab9957b8f96..3b148c7bef85 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -226,7 +226,7 @@ static void lib80211_wep_print_stats(struct seq_file *m, void *priv)
seq_printf(m, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len);
}
-static struct lib80211_crypto_ops lib80211_crypt_wep = {
+static const struct lib80211_crypto_ops lib80211_crypt_wep = {
.name = "WEP",
.init = lib80211_wep_init,
.deinit = lib80211_wep_deinit,
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index aaca65b66af4..2c6654075ca9 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -127,7 +127,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
if (!rdev->ops->join_mesh)
return -EOPNOTSUPP;
- if (wdev->cac_started)
+ if (wdev->links[0].cac_started)
return -EBUSY;
if (!setup->chandef.chan) {
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 4052041a19ea..4dac81854721 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -1110,26 +1110,28 @@ EXPORT_SYMBOL(__cfg80211_radar_event);
void cfg80211_cac_event(struct net_device *netdev,
const struct cfg80211_chan_def *chandef,
- enum nl80211_radar_event event, gfp_t gfp)
+ enum nl80211_radar_event event, gfp_t gfp,
+ unsigned int link_id)
{
struct wireless_dev *wdev = netdev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
unsigned long timeout;
- /* not yet supported */
- if (wdev->valid_links)
+ if (WARN_ON(wdev->valid_links &&
+ !(wdev->valid_links & BIT(link_id))))
return;
- trace_cfg80211_cac_event(netdev, event);
+ trace_cfg80211_cac_event(netdev, event, link_id);
- if (WARN_ON(!wdev->cac_started && event != NL80211_RADAR_CAC_STARTED))
+ if (WARN_ON(!wdev->links[link_id].cac_started &&
+ event != NL80211_RADAR_CAC_STARTED))
return;
switch (event) {
case NL80211_RADAR_CAC_FINISHED:
- timeout = wdev->cac_start_time +
- msecs_to_jiffies(wdev->cac_time_ms);
+ timeout = wdev->links[link_id].cac_start_time +
+ msecs_to_jiffies(wdev->links[link_id].cac_time_ms);
WARN_ON(!time_after_eq(jiffies, timeout));
cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE);
memcpy(&rdev->cac_done_chandef, chandef,
@@ -1138,10 +1140,10 @@ void cfg80211_cac_event(struct net_device *netdev,
cfg80211_sched_dfs_chan_update(rdev);
fallthrough;
case NL80211_RADAR_CAC_ABORTED:
- wdev->cac_started = false;
+ wdev->links[link_id].cac_started = false;
break;
case NL80211_RADAR_CAC_STARTED:
- wdev->cac_started = true;
+ wdev->links[link_id].cac_started = true;
break;
default:
WARN_ON(1);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7397a372c78e..9ab777e0bd4d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6066,7 +6066,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (!rdev->ops->start_ap)
return -EOPNOTSUPP;
- if (wdev->cac_started)
+ if (wdev->links[link_id].cac_started)
return -EBUSY;
if (wdev->links[link_id].ap.beacon_interval)
@@ -9778,7 +9778,8 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
return ERR_PTR(-ENOMEM);
if (n_ssids)
- request->ssids = (void *)&request->channels[n_channels];
+ request->ssids = (void *)request +
+ struct_size(request, channels, n_channels);
request->n_ssids = n_ssids;
if (ie_len) {
if (n_ssids)
@@ -10072,6 +10073,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ int link_id = nl80211_link_id(info->attrs);
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_chan_def chandef;
enum nl80211_dfs_regions dfs_region;
@@ -10121,7 +10123,20 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
goto unlock;
}
- if (cfg80211_beaconing_iface_active(wdev) || wdev->cac_started) {
+ if (cfg80211_beaconing_iface_active(wdev)) {
+ /* During MLO other link(s) can beacon, only the current link
+ * can not already beacon
+ */
+ if (wdev->valid_links &&
+ !wdev->links[link_id].ap.beacon_interval) {
+ /* nothing */
+ } else {
+ err = -EBUSY;
+ goto unlock;
+ }
+ }
+
+ if (wdev->links[link_id].cac_started) {
err = -EBUSY;
goto unlock;
}
@@ -10141,12 +10156,26 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
if (WARN_ON(!cac_time_ms))
cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS;
- err = rdev_start_radar_detection(rdev, dev, &chandef, cac_time_ms);
+ err = rdev_start_radar_detection(rdev, dev, &chandef, cac_time_ms,
+ link_id);
if (!err) {
- wdev->links[0].ap.chandef = chandef;
- wdev->cac_started = true;
- wdev->cac_start_time = jiffies;
- wdev->cac_time_ms = cac_time_ms;
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ wdev->links[0].ap.chandef = chandef;
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ wdev->u.ibss.chandef = chandef;
+ break;
+ case NL80211_IFTYPE_MESH_POINT:
+ wdev->u.mesh.chandef = chandef;
+ break;
+ default:
+ break;
+ }
+ wdev->links[link_id].cac_started = true;
+ wdev->links[link_id].cac_start_time = jiffies;
+ wdev->links[link_id].cac_time_ms = cac_time_ms;
}
unlock:
wiphy_unlock(wiphy);
@@ -10494,17 +10523,21 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
NL80211_BSS_CHAIN_SIGNAL))
goto nla_put_failure;
- switch (rdev->wiphy.signal_type) {
- case CFG80211_SIGNAL_TYPE_MBM:
- if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM, res->signal))
- goto nla_put_failure;
- break;
- case CFG80211_SIGNAL_TYPE_UNSPEC:
- if (nla_put_u8(msg, NL80211_BSS_SIGNAL_UNSPEC, res->signal))
- goto nla_put_failure;
- break;
- default:
- break;
+ if (intbss->bss_source != BSS_SOURCE_STA_PROFILE) {
+ switch (rdev->wiphy.signal_type) {
+ case CFG80211_SIGNAL_TYPE_MBM:
+ if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM,
+ res->signal))
+ goto nla_put_failure;
+ break;
+ case CFG80211_SIGNAL_TYPE_UNSPEC:
+ if (nla_put_u8(msg, NL80211_BSS_SIGNAL_UNSPEC,
+ res->signal))
+ goto nla_put_failure;
+ break;
+ default:
+ break;
+ }
}
switch (wdev->iftype) {
@@ -16498,10 +16531,10 @@ nl80211_set_ttlm(struct sk_buff *skb, struct genl_info *info)
SELECTOR(__sel, NETDEV_UP_NOTMX, \
NL80211_FLAG_NEED_NETDEV_UP | \
NL80211_FLAG_NO_WIPHY_MTX) \
- SELECTOR(__sel, NETDEV_UP_NOTMX_NOMLO, \
+ SELECTOR(__sel, NETDEV_UP_NOTMX_MLO, \
NL80211_FLAG_NEED_NETDEV_UP | \
NL80211_FLAG_NO_WIPHY_MTX | \
- NL80211_FLAG_MLO_UNSUPPORTED) \
+ NL80211_FLAG_MLO_VALID_LINK_ID) \
SELECTOR(__sel, NETDEV_UP_CLEAR, \
NL80211_FLAG_NEED_NETDEV_UP | \
NL80211_FLAG_CLEAR_SKB) \
@@ -17396,7 +17429,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NO_WIPHY_MTX |
- NL80211_FLAG_MLO_UNSUPPORTED),
+ NL80211_FLAG_MLO_VALID_LINK_ID),
},
{
.cmd = NL80211_CMD_GET_PROTOCOL_FEATURES,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index ec3f4aa1c807..f5adbf6b5c84 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1200,26 +1200,27 @@ static inline int
rdev_start_radar_detection(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct cfg80211_chan_def *chandef,
- u32 cac_time_ms)
+ u32 cac_time_ms, int link_id)
{
int ret = -EOPNOTSUPP;
trace_rdev_start_radar_detection(&rdev->wiphy, dev, chandef,
- cac_time_ms);
+ cac_time_ms, link_id);
if (rdev->ops->start_radar_detection)
ret = rdev->ops->start_radar_detection(&rdev->wiphy, dev,
- chandef, cac_time_ms);
+ chandef, cac_time_ms,
+ link_id);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
static inline void
rdev_end_cac(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
+ struct net_device *dev, unsigned int link_id)
{
- trace_rdev_end_cac(&rdev->wiphy, dev);
+ trace_rdev_end_cac(&rdev->wiphy, dev, link_id);
if (rdev->ops->end_cac)
- rdev->ops->end_cac(&rdev->wiphy, dev);
+ rdev->ops->end_cac(&rdev->wiphy, dev, link_id);
trace_rdev_return_void(&rdev->wiphy);
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 4a27f3823e25..6489ba943a63 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -4229,6 +4229,8 @@ EXPORT_SYMBOL(regulatory_pre_cac_allowed);
static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev)
{
struct wireless_dev *wdev;
+ unsigned int link_id;
+
/* If we finished CAC or received radar, we should end any
* CAC running on the same channels.
* the check !cfg80211_chandef_dfs_usable contain 2 options:
@@ -4241,16 +4243,17 @@ static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev)
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
struct cfg80211_chan_def *chandef;
- if (!wdev->cac_started)
- continue;
+ for_each_valid_link(wdev, link_id) {
+ if (!wdev->links[link_id].cac_started)
+ continue;
- /* FIXME: radar detection is tied to link 0 for now */
- chandef = wdev_chandef(wdev, 0);
- if (!chandef)
- continue;
+ chandef = wdev_chandef(wdev, link_id);
+ if (!chandef)
+ continue;
- if (!cfg80211_chandef_dfs_usable(&rdev->wiphy, chandef))
- rdev_end_cac(rdev, wdev->netdev);
+ if (!cfg80211_chandef_dfs_usable(&rdev->wiphy, chandef))
+ rdev_end_cac(rdev, wdev->netdev, link_id);
+ }
}
}
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 64eeed82d43d..59a90bf3c0d6 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1910,6 +1910,7 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
known->pub.bssid_index = new->pub.bssid_index;
known->pub.use_for &= new->pub.use_for;
known->pub.cannot_use_reasons = new->pub.cannot_use_reasons;
+ known->bss_source = new->bss_source;
return true;
}
@@ -2008,10 +2009,10 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev,
return found;
free_ies:
- ies = (void *)rcu_dereference(tmp->pub.beacon_ies);
+ ies = (void *)rcu_access_pointer(tmp->pub.beacon_ies);
if (ies)
kfree_rcu(ies, rcu_head);
- ies = (void *)rcu_dereference(tmp->pub.proberesp_ies);
+ ies = (void *)rcu_access_pointer(tmp->pub.proberesp_ies);
if (ies)
kfree_rcu(ies, rcu_head);
@@ -2149,11 +2150,7 @@ struct cfg80211_inform_single_bss_data {
const u8 *ie;
size_t ielen;
- enum {
- BSS_SOURCE_DIRECT = 0,
- BSS_SOURCE_MBSSID,
- BSS_SOURCE_STA_PROFILE,
- } bss_source;
+ enum bss_source_type bss_source;
/* Set if reporting bss_source != BSS_SOURCE_DIRECT */
struct cfg80211_bss *source_bss;
u8 max_bssid_indicator;
@@ -2268,6 +2265,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
IEEE80211_MAX_CHAINS);
tmp.pub.use_for = data->use_for;
tmp.pub.cannot_use_reasons = data->cannot_use_reasons;
+ tmp.bss_source = data->bss_source;
switch (data->bss_source) {
case BSS_SOURCE_MBSSID:
@@ -2907,6 +2905,9 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
struct element *reporter_rnr = NULL;
struct ieee80211_multi_link_elem *ml_elem;
struct cfg80211_mle *mle;
+ const struct element *ssid_elem;
+ const u8 *ssid = NULL;
+ size_t ssid_len = 0;
u16 control;
u8 ml_common_len;
u8 *new_ie = NULL;
@@ -2961,6 +2962,13 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
bss_change_count,
gfp);
+ ssid_elem = cfg80211_find_elem(WLAN_EID_SSID, tx_data->ie,
+ tx_data->ielen);
+ if (ssid_elem) {
+ ssid = ssid_elem->data;
+ ssid_len = ssid_elem->datalen;
+ }
+
for (i = 0; i < ARRAY_SIZE(mle->sta_prof) && mle->sta_prof[i]; i++) {
const struct ieee80211_neighbor_ap_info *ap_info;
enum nl80211_band band;
@@ -3042,6 +3050,23 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
freq = ieee80211_channel_to_freq_khz(ap_info->channel, band);
data.channel = ieee80211_get_channel_khz(wiphy, freq);
+ /* Skip if BSS entry generated from MBSSID or DIRECT source
+ * frame data available already.
+ */
+ bss = cfg80211_get_bss(wiphy, data.channel, data.bssid, ssid,
+ ssid_len, IEEE80211_BSS_TYPE_ANY,
+ IEEE80211_PRIVACY_ANY);
+ if (bss) {
+ struct cfg80211_internal_bss *ibss = bss_from_pub(bss);
+
+ if (data.capability == bss->capability &&
+ ibss->bss_source != BSS_SOURCE_STA_PROFILE) {
+ cfg80211_put_bss(wiphy, bss);
+ continue;
+ }
+ cfg80211_put_bss(wiphy, bss);
+ }
+
if (use_for == NL80211_BSS_USE_FOR_MLD_LINK &&
!(wiphy->flags & WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY)) {
use_for = 0;
@@ -3467,8 +3492,8 @@ int cfg80211_wext_siwscan(struct net_device *dev,
n_channels = ieee80211_get_num_supported_channels(wiphy);
}
- creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) +
- n_channels * sizeof(void *),
+ creq = kzalloc(struct_size(creq, channels, n_channels) +
+ sizeof(struct cfg80211_ssid),
GFP_ATOMIC);
if (!creq)
return -ENOMEM;
@@ -3476,7 +3501,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
creq->wiphy = wiphy;
creq->wdev = dev->ieee80211_ptr;
/* SSIDs come after channels */
- creq->ssids = (void *)&creq->channels[n_channels];
+ creq->ssids = (void *)creq + struct_size(creq, channels, n_channels);
creq->n_channels = n_channels;
creq->n_ssids = 1;
creq->scan_start = jiffies;
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index d9d7bf8bb5c1..431da30817a6 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -115,7 +115,8 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
n_channels = i;
}
request->n_channels = n_channels;
- request->ssids = (void *)&request->channels[n_channels];
+ request->ssids = (void *)request +
+ struct_size(request, channels, n_channels);
request->n_ssids = 1;
memcpy(request->ssids[0].ssid, wdev->conn->params.ssid,
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 5c26f065bd68..97c21b627791 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -805,9 +805,22 @@ DEFINE_EVENT(wiphy_netdev_evt, rdev_flush_pmksa,
TP_ARGS(wiphy, netdev)
);
-DEFINE_EVENT(wiphy_netdev_evt, rdev_end_cac,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
- TP_ARGS(wiphy, netdev)
+TRACE_EVENT(rdev_end_cac,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ unsigned int link_id),
+ TP_ARGS(wiphy, netdev, link_id),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __field(unsigned int, link_id)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ __entry->link_id = link_id;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id)
);
DECLARE_EVENT_CLASS(station_add_change,
@@ -2652,24 +2665,26 @@ TRACE_EVENT(rdev_external_auth,
TRACE_EVENT(rdev_start_radar_detection,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
struct cfg80211_chan_def *chandef,
- u32 cac_time_ms),
- TP_ARGS(wiphy, netdev, chandef, cac_time_ms),
+ u32 cac_time_ms, int link_id),
+ TP_ARGS(wiphy, netdev, chandef, cac_time_ms, link_id),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
CHAN_DEF_ENTRY
__field(u32, cac_time_ms)
+ __field(int, link_id)
),
TP_fast_assign(
WIPHY_ASSIGN;
NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
__entry->cac_time_ms = cac_time_ms;
+ __entry->link_id = link_id;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
- ", cac_time_ms=%u",
+ ", cac_time_ms=%u, link_id=%d",
WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
- __entry->cac_time_ms)
+ __entry->cac_time_ms, __entry->link_id)
);
TRACE_EVENT(rdev_set_mcast_rate,
@@ -3483,18 +3498,21 @@ TRACE_EVENT(cfg80211_radar_event,
);
TRACE_EVENT(cfg80211_cac_event,
- TP_PROTO(struct net_device *netdev, enum nl80211_radar_event evt),
- TP_ARGS(netdev, evt),
+ TP_PROTO(struct net_device *netdev, enum nl80211_radar_event evt,
+ unsigned int link_id),
+ TP_ARGS(netdev, evt, link_id),
TP_STRUCT__entry(
NETDEV_ENTRY
__field(enum nl80211_radar_event, evt)
+ __field(unsigned int, link_id)
),
TP_fast_assign(
NETDEV_ASSIGN;
__entry->evt = evt;
+ __entry->link_id = link_id;
),
- TP_printk(NETDEV_PR_FMT ", event: %d",
- NETDEV_PR_ARG, __entry->evt)
+ TP_printk(NETDEV_PR_FMT ", event: %d, link_id=%u",
+ NETDEV_PR_ARG, __entry->evt, __entry->link_id)
);
DECLARE_EVENT_CLASS(cfg80211_rx_evt,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 9a7c3adc8a3b..f49b55724f83 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -998,10 +998,10 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
* Diffserv Service Classes no update is needed:
* - Standard: DF
* - Low Priority Data: CS1
- * - Multimedia Streaming: AF31, AF32, AF33
* - Multimedia Conferencing: AF41, AF42, AF43
* - Network Control Traffic: CS7
* - Real-Time Interactive: CS4
+ * - Signaling: CS5
*/
switch (dscp >> 2) {
case 10:
@@ -1026,9 +1026,11 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
/* Broadcasting video: CS3 */
ret = 4;
break;
- case 40:
- /* Signaling: CS5 */
- ret = 5;
+ case 26:
+ case 28:
+ case 30:
+ /* Multimedia Streaming: AF31, AF32, AF33 */
+ ret = 4;
break;
case 44:
/* Voice Admit: VA */
@@ -2435,8 +2437,8 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
if (params->num_different_channels > c->num_different_channels)
continue;
- limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits,
- GFP_KERNEL);
+ limits = kmemdup_array(c->limits, c->n_limits, sizeof(*limits),
+ GFP_KERNEL);
if (!limits)
return -ENOMEM;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index c0e0204b9630..521a2938e50a 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -211,6 +211,11 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
goto err_unreg_pool;
}
+ if (dev_get_min_mp_channel_count(netdev)) {
+ err = -EBUSY;
+ goto err_unreg_pool;
+ }
+
bpf.command = XDP_SETUP_XSK_POOL;
bpf.xsk.pool = pool;
bpf.xsk.queue_id = queue_id;
@@ -623,20 +628,31 @@ static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u3
return nb_entries;
}
-u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
+static u32 xp_alloc_slow(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
+ u32 max)
{
- u32 nb_entries1 = 0, nb_entries2;
+ int i;
- if (unlikely(pool->dev && dma_dev_need_sync(pool->dev))) {
+ for (i = 0; i < max; i++) {
struct xdp_buff *buff;
- /* Slow path */
buff = xp_alloc(pool);
- if (buff)
- *xdp = buff;
- return !!buff;
+ if (unlikely(!buff))
+ return i;
+ *xdp = buff;
+ xdp++;
}
+ return max;
+}
+
+u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
+{
+ u32 nb_entries1 = 0, nb_entries2;
+
+ if (unlikely(pool->dev && dma_dev_need_sync(pool->dev)))
+ return xp_alloc_slow(pool, xdp, max);
+
if (unlikely(pool->free_list_cnt)) {
nb_entries1 = xp_alloc_reused(pool, xdp, max);
if (nb_entries1 == max)
@@ -656,9 +672,17 @@ EXPORT_SYMBOL(xp_alloc_batch);
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count)
{
+ u32 req_count, avail_count;
+
if (pool->free_list_cnt >= count)
return true;
- return xskq_cons_has_entries(pool->fq, count - pool->free_list_cnt);
+
+ req_count = count - pool->free_list_cnt;
+ avail_count = xskq_cons_nb_entries(pool->fq, req_count);
+ if (!avail_count)
+ pool->fq->queue_empty_descs++;
+
+ return avail_count >= req_count;
}
EXPORT_SYMBOL(xp_can_alloc);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 6f2d1621c992..406b20dfee8d 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -306,11 +306,6 @@ static inline u32 xskq_cons_nb_entries(struct xsk_queue *q, u32 max)
return entries >= max ? max : entries;
}
-static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt)
-{
- return xskq_cons_nb_entries(q, cnt) >= cnt;
-}
-
static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr)
{
if (q->cached_prod == q->cached_cons)
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 9a44d363ba62..f123b7c9ec82 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -328,12 +328,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
/* User explicitly requested packet offload mode and configured
* policy in addition to the XFRM state. So be civil to users,
* and return an error instead of taking fallback path.
- *
- * This WARN_ON() can be seen as a documentation for driver
- * authors to do not return -EOPNOTSUPP in packet offload mode.
*/
- WARN_ON(err == -EOPNOTSUPP && is_packet_offload);
- if (err != -EOPNOTSUPP || is_packet_offload) {
+ if ((err != -EOPNOTSUPP && !is_packet_offload) || is_packet_offload) {
NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this state");
return err;
}
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index e50e4bf993fa..98f1e2b67c76 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -769,7 +769,7 @@ static int xfrmi_dev_init(struct net_device *dev)
if (err)
return err;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
dev->features |= XFRMI_FEATURES;
dev->hw_features |= XFRMI_FEATURES;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c56c61b0c12e..914bac03b52a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -45,6 +45,7 @@
#ifdef CONFIG_XFRM_ESPINTCP
#include <net/espintcp.h>
#endif
+#include <net/inet_dscp.h>
#include "xfrm_hash.h"
@@ -109,7 +110,11 @@ struct xfrm_pol_inexact_node {
* 4. saddr:any list from saddr tree
*
* This result set then needs to be searched for the policy with
- * the lowest priority. If two results have same prio, youngest one wins.
+ * the lowest priority. If two candidates have the same priority, the
+ * struct xfrm_policy pos member with the lower number is used.
+ *
+ * This replicates previous single-list-search algorithm which would
+ * return first matching policy in the (ordered-by-priority) list.
*/
struct xfrm_pol_inexact_key {
@@ -196,8 +201,6 @@ xfrm_policy_inexact_lookup_rcu(struct net *net,
static struct xfrm_policy *
xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy,
bool excl);
-static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
- struct xfrm_policy *policy);
static bool
xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
@@ -410,7 +413,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
if (policy) {
write_pnet(&policy->xp_net, net);
INIT_LIST_HEAD(&policy->walk.all);
- INIT_HLIST_NODE(&policy->bydst_inexact_list);
INIT_HLIST_NODE(&policy->bydst);
INIT_HLIST_NODE(&policy->byidx);
rwlock_init(&policy->lock);
@@ -1228,26 +1230,31 @@ xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl)
return ERR_PTR(-EEXIST);
}
- chain = &net->xfrm.policy_inexact[dir];
- xfrm_policy_insert_inexact_list(chain, policy);
-
if (delpol)
__xfrm_policy_inexact_prune_bin(bin, false);
return delpol;
}
+static bool xfrm_policy_is_dead_or_sk(const struct xfrm_policy *policy)
+{
+ int dir;
+
+ if (policy->walk.dead)
+ return true;
+
+ dir = xfrm_policy_id2dir(policy->index);
+ return dir >= XFRM_POLICY_MAX;
+}
+
static void xfrm_hash_rebuild(struct work_struct *work)
{
struct net *net = container_of(work, struct net,
xfrm.policy_hthresh.work);
- unsigned int hmask;
struct xfrm_policy *pol;
struct xfrm_policy *policy;
struct hlist_head *chain;
- struct hlist_head *odst;
struct hlist_node *newpos;
- int i;
int dir;
unsigned seq;
u8 lbits4, rbits4, lbits6, rbits6;
@@ -1274,13 +1281,10 @@ static void xfrm_hash_rebuild(struct work_struct *work)
struct xfrm_pol_inexact_bin *bin;
u8 dbits, sbits;
- if (policy->walk.dead)
+ if (xfrm_policy_is_dead_or_sk(policy))
continue;
dir = xfrm_policy_id2dir(policy->index);
- if (dir >= XFRM_POLICY_MAX)
- continue;
-
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
if (policy->family == AF_INET) {
dbits = rbits4;
@@ -1311,23 +1315,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
goto out_unlock;
}
- /* reset the bydst and inexact table in all directions */
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
- struct hlist_node *n;
-
- hlist_for_each_entry_safe(policy, n,
- &net->xfrm.policy_inexact[dir],
- bydst_inexact_list) {
- hlist_del_rcu(&policy->bydst);
- hlist_del_init(&policy->bydst_inexact_list);
- }
-
- hmask = net->xfrm.policy_bydst[dir].hmask;
- odst = net->xfrm.policy_bydst[dir].table;
- for (i = hmask; i >= 0; i--) {
- hlist_for_each_entry_safe(policy, n, odst + i, bydst)
- hlist_del_rcu(&policy->bydst);
- }
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
/* dir out => dst = remote, src = local */
net->xfrm.policy_bydst[dir].dbits4 = rbits4;
@@ -1345,14 +1333,13 @@ static void xfrm_hash_rebuild(struct work_struct *work)
/* re-insert all policies by order of creation */
list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
- if (policy->walk.dead)
+ if (xfrm_policy_is_dead_or_sk(policy))
continue;
- dir = xfrm_policy_id2dir(policy->index);
- if (dir >= XFRM_POLICY_MAX) {
- /* skip socket policies */
- continue;
- }
+
+ hlist_del_rcu(&policy->bydst);
+
newpos = NULL;
+ dir = xfrm_policy_id2dir(policy->index);
chain = policy_hash_bysel(net, &policy->selector,
policy->family, dir);
@@ -1519,42 +1506,6 @@ static const struct rhashtable_params xfrm_pol_inexact_params = {
.automatic_shrinking = true,
};
-static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
- struct xfrm_policy *policy)
-{
- struct xfrm_policy *pol, *delpol = NULL;
- struct hlist_node *newpos = NULL;
- int i = 0;
-
- hlist_for_each_entry(pol, chain, bydst_inexact_list) {
- if (pol->type == policy->type &&
- pol->if_id == policy->if_id &&
- !selector_cmp(&pol->selector, &policy->selector) &&
- xfrm_policy_mark_match(&policy->mark, pol) &&
- xfrm_sec_ctx_match(pol->security, policy->security) &&
- !WARN_ON(delpol)) {
- delpol = pol;
- if (policy->priority > pol->priority)
- continue;
- } else if (policy->priority >= pol->priority) {
- newpos = &pol->bydst_inexact_list;
- continue;
- }
- if (delpol)
- break;
- }
-
- if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
- hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos);
- else
- hlist_add_head_rcu(&policy->bydst_inexact_list, chain);
-
- hlist_for_each_entry(pol, chain, bydst_inexact_list) {
- pol->pos = i;
- i++;
- }
-}
-
static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
struct xfrm_policy *policy,
bool excl)
@@ -2294,10 +2245,52 @@ out:
return pol;
}
+static u32 xfrm_gen_pos_slow(struct net *net)
+{
+ struct xfrm_policy *policy;
+ u32 i = 0;
+
+ /* oldest entry is last in list */
+ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
+ if (!xfrm_policy_is_dead_or_sk(policy))
+ policy->pos = ++i;
+ }
+
+ return i;
+}
+
+static u32 xfrm_gen_pos(struct net *net)
+{
+ const struct xfrm_policy *policy;
+ u32 i = 0;
+
+ /* most recently added policy is at the head of the list */
+ list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) {
+ if (xfrm_policy_is_dead_or_sk(policy))
+ continue;
+
+ if (policy->pos == UINT_MAX)
+ return xfrm_gen_pos_slow(net);
+
+ i = policy->pos + 1;
+ break;
+ }
+
+ return i;
+}
+
static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
{
struct net *net = xp_net(pol);
+ switch (dir) {
+ case XFRM_POLICY_IN:
+ case XFRM_POLICY_FWD:
+ case XFRM_POLICY_OUT:
+ pol->pos = xfrm_gen_pos(net);
+ break;
+ }
+
list_add(&pol->walk.all, &net->xfrm.policy_all);
net->xfrm.policy_count[dir]++;
xfrm_pol_hold(pol);
@@ -2314,7 +2307,6 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
/* Socket policies are not hashed. */
if (!hlist_unhashed(&pol->bydst)) {
hlist_del_rcu(&pol->bydst);
- hlist_del_init(&pol->bydst_inexact_list);
hlist_del(&pol->byidx);
}
@@ -2561,7 +2553,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
static int xfrm_get_tos(const struct flowi *fl, int family)
{
if (family == AF_INET)
- return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos;
+ return fl->u.ip4.flowi4_tos & INET_DSCP_MASK;
return 0;
}
@@ -4437,63 +4429,50 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
#endif
#ifdef CONFIG_XFRM_MIGRATE
-static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
- const struct xfrm_selector *sel_tgt)
-{
- if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
- if (sel_tgt->family == sel_cmp->family &&
- xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
- sel_cmp->family) &&
- xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
- sel_cmp->family) &&
- sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
- sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
- return true;
- }
- } else {
- if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
- return true;
- }
- }
- return false;
-}
-
static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
u8 dir, u8 type, struct net *net, u32 if_id)
{
- struct xfrm_policy *pol, *ret = NULL;
- struct hlist_head *chain;
- u32 priority = ~0U;
+ struct xfrm_policy *pol;
+ struct flowi fl;
- spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
- hlist_for_each_entry(pol, chain, bydst) {
- if ((if_id == 0 || pol->if_id == if_id) &&
- xfrm_migrate_selector_match(sel, &pol->selector) &&
- pol->type == type) {
- ret = pol;
- priority = ret->priority;
- break;
- }
- }
- chain = &net->xfrm.policy_inexact[dir];
- hlist_for_each_entry(pol, chain, bydst_inexact_list) {
- if ((pol->priority >= priority) && ret)
- break;
+ memset(&fl, 0, sizeof(fl));
- if ((if_id == 0 || pol->if_id == if_id) &&
- xfrm_migrate_selector_match(sel, &pol->selector) &&
- pol->type == type) {
- ret = pol;
+ fl.flowi_proto = sel->proto;
+
+ switch (sel->family) {
+ case AF_INET:
+ fl.u.ip4.saddr = sel->saddr.a4;
+ fl.u.ip4.daddr = sel->daddr.a4;
+ if (sel->proto == IPSEC_ULPROTO_ANY)
break;
- }
+ fl.u.flowi4_oif = sel->ifindex;
+ fl.u.ip4.fl4_sport = sel->sport;
+ fl.u.ip4.fl4_dport = sel->dport;
+ break;
+ case AF_INET6:
+ fl.u.ip6.saddr = sel->saddr.in6;
+ fl.u.ip6.daddr = sel->daddr.in6;
+ if (sel->proto == IPSEC_ULPROTO_ANY)
+ break;
+ fl.u.flowi6_oif = sel->ifindex;
+ fl.u.ip6.fl4_sport = sel->sport;
+ fl.u.ip6.fl4_dport = sel->dport;
+ break;
+ default:
+ return ERR_PTR(-EAFNOSUPPORT);
}
- xfrm_pol_hold(ret);
+ rcu_read_lock();
- spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ pol = xfrm_policy_lookup_bytype(net, type, &fl, sel->family, dir, if_id);
+ if (IS_ERR_OR_NULL(pol))
+ goto out_unlock;
- return ret;
+ if (!xfrm_pol_hold_rcu(pol))
+ pol = NULL;
+out_unlock:
+ rcu_read_unlock();
+ return pol;
}
static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
@@ -4630,9 +4609,9 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
/* Stage 1 - find policy */
pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id);
- if (!pol) {
+ if (IS_ERR_OR_NULL(pol)) {
NL_SET_ERR_MSG(extack, "Target policy not found");
- err = -ENOENT;
+ err = IS_ERR(pol) ? PTR_ERR(pol) : -ENOENT;
goto out;
}