summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.h3
-rw-r--r--net/appletalk/aarp.c2
-rw-r--r--net/batman-adv/bat_iv_ogm.c4
-rw-r--r--net/bluetooth/smp.c6
-rw-r--r--net/bridge/br_cfm.c2
-rw-r--r--net/bridge/br_fdb.c23
-rw-r--r--net/bridge/br_mdb.c23
-rw-r--r--net/bridge/br_private.h4
-rw-r--r--net/bridge/br_stp.c4
-rw-r--r--net/bridge/br_vlan.c19
-rw-r--r--net/bridge/br_vlan_tunnel.c38
-rw-r--r--net/caif/cfcnfg.c2
-rw-r--r--net/can/bcm.c62
-rw-r--r--net/can/isotp.c61
-rw-r--r--net/can/j1939/transport.c54
-rw-r--r--net/can/raw.c62
-rw-r--r--net/core/dev.c27
-rw-r--r--net/core/devlink.c17
-rw-r--r--net/core/neighbour.c1
-rw-r--r--net/core/net_namespace.c20
-rw-r--r--net/core/rtnetlink.c8
-rw-r--r--net/core/skbuff.c4
-rw-r--r--net/core/sock.c7
-rw-r--r--net/core/sock_reuseport.c15
-rw-r--r--net/dsa/dsa2.c22
-rw-r--r--net/dsa/dsa_priv.h8
-rw-r--r--net/dsa/port.c62
-rw-r--r--net/dsa/slave.c132
-rw-r--r--net/dsa/switch.c89
-rw-r--r--net/ethtool/eeprom.c15
-rw-r--r--net/ethtool/ioctl.c10
-rw-r--r--net/ethtool/netlink.h2
-rw-r--r--net/ethtool/strset.c2
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/ah4.c1
-rw-r--r--net/ipv4/cipso_ipv4.c1
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/esp4.c1
-rw-r--r--net/ipv4/esp4_offload.c4
-rw-r--r--net/ipv4/icmp.c70
-rw-r--r--net/ipv4/igmp.c1
-rw-r--r--net/ipv4/inet_connection_sock.c15
-rw-r--r--net/ipv4/ip_gre.c7
-rw-r--r--net/ipv4/ip_output.c32
-rw-r--r--net/ipv4/ipcomp.c1
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ping.c12
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/route.c15
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv4/udp.c10
-rw-r--r--net/ipv4/xfrm4_tunnel.c1
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/ah6.c2
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/esp6_offload.c1
-rw-r--r--net/ipv6/icmp.c21
-rw-r--r--net/ipv6/ip6_output.c40
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/ipcomp6.c2
-rw-r--r--net/ipv6/mip6.c99
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c22
-rw-r--r--net/ipv6/seg6_local.c94
-rw-r--r--net/ipv6/sit.c4
-rw-r--r--net/ipv6/udp.c3
-rw-r--r--net/ipv6/xfrm6_output.c7
-rw-r--r--net/ipv6/xfrm6_tunnel.c1
-rw-r--r--net/kcm/kcmsock.c5
-rw-r--r--net/key/af_key.c6
-rw-r--r--net/mac80211/cfg.c45
-rw-r--r--net/mac80211/chan.c108
-rw-r--r--net/mac80211/debugfs.c81
-rw-r--r--net/mac80211/debugfs_netdev.c33
-rw-r--r--net/mac80211/debugfs_sta.c24
-rw-r--r--net/mac80211/driver-ops.h26
-rw-r--r--net/mac80211/he.c8
-rw-r--r--net/mac80211/ht.c18
-rw-r--r--net/mac80211/ieee80211_i.h196
-rw-r--r--net/mac80211/iface.c253
-rw-r--r--net/mac80211/led.c12
-rw-r--r--net/mac80211/main.c39
-rw-r--r--net/mac80211/mesh.h2
-rw-r--r--net/mac80211/mesh_hwmp.c2
-rw-r--r--net/mac80211/mesh_pathtbl.c2
-rw-r--r--net/mac80211/mesh_plink.c2
-rw-r--r--net/mac80211/mlme.c256
-rw-r--r--net/mac80211/rate.c13
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c36
-rw-r--r--net/mac80211/rx.c63
-rw-r--r--net/mac80211/scan.c21
-rw-r--r--net/mac80211/sta_info.c83
-rw-r--r--net/mac80211/sta_info.h11
-rw-r--r--net/mac80211/status.c26
-rw-r--r--net/mac80211/tdls.c28
-rw-r--r--net/mac80211/trace.h33
-rw-r--r--net/mac80211/tx.c518
-rw-r--r--net/mac80211/util.c59
-rw-r--r--net/mptcp/ctrl.c32
-rw-r--r--net/mptcp/mib.c2
-rw-r--r--net/mptcp/mib.h2
-rw-r--r--net/mptcp/mptcp_diag.c1
-rw-r--r--net/mptcp/options.c169
-rw-r--r--net/mptcp/pm.c1
-rw-r--r--net/mptcp/pm_netlink.c13
-rw-r--r--net/mptcp/protocol.c248
-rw-r--r--net/mptcp/protocol.h36
-rw-r--r--net/mptcp/subflow.c235
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/nf_conntrack_proto.c16
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c14
-rw-r--r--net/netfilter/nf_conntrack_proto_icmp.c7
-rw-r--r--net/netfilter/nf_conntrack_proto_icmpv6.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c23
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c6
-rw-r--r--net/netfilter/nf_synproxy_core.c5
-rw-r--r--net/netfilter/nf_tables_api.c85
-rw-r--r--net/netfilter/nf_tables_core.c1
-rw-r--r--net/netfilter/nfnetlink_hook.c4
-rw-r--r--net/netfilter/nft_exthdr.c7
-rw-r--r--net/netfilter/nft_last.c87
-rw-r--r--net/openvswitch/Makefile3
-rw-r--r--net/openvswitch/actions.c4
-rw-r--r--net/openvswitch/datapath.c4
-rw-r--r--net/openvswitch/openvswitch_trace.c10
-rw-r--r--net/openvswitch/openvswitch_trace.h158
-rw-r--r--net/packet/af_packet.c41
-rw-r--r--net/qrtr/qrtr.c2
-rw-r--r--net/rds/recv.c2
-rw-r--r--net/sched/act_ct.c21
-rw-r--r--net/sched/cls_flower.c3
-rw-r--r--net/sched/sch_cake.c18
-rw-r--r--net/sched/sch_generic.c23
-rw-r--r--net/sctp/associola.c6
-rw-r--r--net/sctp/debug.c1
-rw-r--r--net/sctp/input.c132
-rw-r--r--net/sctp/ipv6.c112
-rw-r--r--net/sctp/output.c33
-rw-r--r--net/sctp/outqueue.c13
-rw-r--r--net/sctp/protocol.c21
-rw-r--r--net/sctp/sm_make_chunk.c31
-rw-r--r--net/sctp/sm_sideeffect.c37
-rw-r--r--net/sctp/sm_statefuns.c40
-rw-r--r--net/sctp/sm_statetable.c43
-rw-r--r--net/sctp/socket.c123
-rw-r--r--net/sctp/sysctl.c35
-rw-r--r--net/sctp/transport.c150
-rw-r--r--net/smc/smc_stats.c6
-rw-r--r--net/smc/smc_tx.c5
-rw-r--r--net/socket.c65
-rw-r--r--net/sunrpc/clnt.c7
-rw-r--r--net/sunrpc/xprt.c40
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c27
-rw-r--r--net/sunrpc/xprtrdma/transport.c12
-rw-r--r--net/sunrpc/xprtrdma/verbs.c18
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h1
-rw-r--r--net/sunrpc/xprtsock.c2
-rw-r--r--net/switchdev/switchdev.c25
-rw-r--r--net/tipc/bcast.c2
-rw-r--r--net/tipc/msg.c27
-rw-r--r--net/tipc/msg.h3
-rw-r--r--net/unix/af_unix.c195
-rw-r--r--net/vmw_vsock/af_vsock.c18
-rw-r--r--net/vmw_vsock/virtio_transport.c4
-rw-r--r--net/vmw_vsock/virtio_transport_common.c7
-rw-r--r--net/wireless/Makefile2
-rw-r--r--net/wireless/chan.c43
-rw-r--r--net/wireless/core.c63
-rw-r--r--net/wireless/core.h3
-rw-r--r--net/wireless/nl80211.c22
-rw-r--r--net/wireless/pmsr.c28
-rw-r--r--net/wireless/rdev-ops.h12
-rw-r--r--net/wireless/reg.c5
-rw-r--r--net/wireless/scan.c22
-rw-r--r--net/wireless/sysfs.c4
-rw-r--r--net/wireless/trace.h36
-rw-r--r--net/wireless/util.c3
-rw-r--r--net/wireless/wext-compat.c8
-rw-r--r--net/wireless/wext-spy.c14
-rw-r--r--net/xfrm/xfrm_hash.h7
-rw-r--r--net/xfrm/xfrm_input.c6
-rw-r--r--net/xfrm/xfrm_output.c124
-rw-r--r--net/xfrm/xfrm_policy.c2
-rw-r--r--net/xfrm/xfrm_replay.c171
-rw-r--r--net/xfrm/xfrm_state.c67
185 files changed, 4379 insertions, 2098 deletions
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index e3f6ff05a528..1a705a4ef7fa 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -108,7 +108,8 @@ static inline netdev_features_t vlan_tnl_features(struct net_device *real_dev)
netdev_features_t ret;
ret = real_dev->hw_enc_features &
- (NETIF_F_CSUM_MASK | NETIF_F_ALL_TSO | NETIF_F_GSO_ENCAP_ALL);
+ (NETIF_F_CSUM_MASK | NETIF_F_GSO_SOFTWARE |
+ NETIF_F_GSO_ENCAP_ALL);
if ((ret & NETIF_F_GSO_ENCAP_ALL) && (ret & NETIF_F_CSUM_MASK))
return (ret & ~NETIF_F_CSUM_MASK) | NETIF_F_HW_CSUM;
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index be18af481d7d..c7236daa2415 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -768,7 +768,7 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev,
if (a && a->status & ATIF_PROBE) {
a->status |= ATIF_PROBE_FAIL;
/*
- * we do not respond to probe or request packets for
+ * we do not respond to probe or request packets of
* this address while we are probing this address
*/
goto unlock;
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 680def809838..12022378f892 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -409,8 +409,10 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
if (WARN_ON(!forw_packet->if_outgoing))
return;
- if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface))
+ if (forw_packet->if_outgoing->soft_iface != soft_iface) {
+ pr_warn("%s: soft interface switch for queued OGM\n", __func__);
return;
+ }
if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE)
return;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 93144e0c7efa..4d93c6c32a71 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3229,7 +3229,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan)
{
struct l2cap_chan *chan;
- bt_dev_dbg(pchan->conn->hcon->hdev, "pchan %p", pchan);
+ BT_DBG("pchan %p", pchan);
chan = l2cap_chan_create();
if (!chan)
@@ -3250,7 +3250,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan)
*/
atomic_set(&chan->nesting, L2CAP_NESTING_SMP);
- bt_dev_dbg(pchan->conn->hcon->hdev, "created chan %p", chan);
+ BT_DBG("created chan %p", chan);
return chan;
}
@@ -3354,7 +3354,7 @@ static void smp_del_chan(struct l2cap_chan *chan)
{
struct smp_dev *smp;
- bt_dev_dbg(chan->conn->hcon->hdev, "chan %p", chan);
+ BT_DBG("chan %p", chan);
smp = chan->data;
if (smp) {
diff --git a/net/bridge/br_cfm.c b/net/bridge/br_cfm.c
index 001064f7583d..a3c755d0a09d 100644
--- a/net/bridge/br_cfm.c
+++ b/net/bridge/br_cfm.c
@@ -142,7 +142,7 @@ static void br_cfm_notify(int event, const struct net_bridge_port *port)
{
u32 filter = RTEXT_FILTER_CFM_STATUS;
- return br_info_notify(event, port->br, NULL, filter);
+ br_info_notify(event, port->br, NULL, filter);
}
static void cc_peer_enable(struct br_cfm_peer_mep *peer_mep)
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 698b79747d32..16f9434fdb5d 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -727,8 +727,9 @@ static inline size_t fdb_nlmsg_size(void)
}
static int br_fdb_replay_one(struct notifier_block *nb,
- struct net_bridge_fdb_entry *fdb,
- struct net_device *dev)
+ const struct net_bridge_fdb_entry *fdb,
+ struct net_device *dev, unsigned long action,
+ const void *ctx)
{
struct switchdev_notifier_fdb_info item;
int err;
@@ -737,17 +738,20 @@ static int br_fdb_replay_one(struct notifier_block *nb,
item.vid = fdb->key.vlan_id;
item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
+ item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
item.info.dev = dev;
+ item.info.ctx = ctx;
- err = nb->notifier_call(nb, SWITCHDEV_FDB_ADD_TO_DEVICE, &item);
+ err = nb->notifier_call(nb, action, &item);
return notifier_to_errno(err);
}
-int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
- struct notifier_block *nb)
+int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
+ const void *ctx, bool adding, struct notifier_block *nb)
{
struct net_bridge_fdb_entry *fdb;
struct net_bridge *br;
+ unsigned long action;
int err = 0;
if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
@@ -755,17 +759,22 @@ int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
br = netdev_priv(br_dev);
+ if (adding)
+ action = SWITCHDEV_FDB_ADD_TO_DEVICE;
+ else
+ action = SWITCHDEV_FDB_DEL_TO_DEVICE;
+
rcu_read_lock();
hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
- struct net_bridge_port *dst = READ_ONCE(fdb->dst);
+ const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
struct net_device *dst_dev;
dst_dev = dst ? dst->dev : br->dev;
if (dst_dev != br_dev && dst_dev != dev)
continue;
- err = br_fdb_replay_one(nb, fdb, dst_dev);
+ err = br_fdb_replay_one(nb, fdb, dst_dev, action, ctx);
if (err)
break;
}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 3f839a8cc9fb..17a720b4473f 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -567,19 +567,21 @@ static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb,
}
static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
- struct switchdev_obj_port_mdb *mdb,
+ const struct switchdev_obj_port_mdb *mdb,
+ unsigned long action, const void *ctx,
struct netlink_ext_ack *extack)
{
struct switchdev_notifier_port_obj_info obj_info = {
.info = {
.dev = dev,
.extack = extack,
+ .ctx = ctx,
},
.obj = &mdb->obj,
};
int err;
- err = nb->notifier_call(nb, SWITCHDEV_PORT_OBJ_ADD, &obj_info);
+ err = nb->notifier_call(nb, action, &obj_info);
return notifier_to_errno(err);
}
@@ -603,11 +605,13 @@ static int br_mdb_queue_one(struct list_head *mdb_list,
}
int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
- struct notifier_block *nb, struct netlink_ext_ack *extack)
+ const void *ctx, bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
- struct net_bridge_mdb_entry *mp;
+ const struct net_bridge_mdb_entry *mp;
struct switchdev_obj *obj, *tmp;
struct net_bridge *br;
+ unsigned long action;
LIST_HEAD(mdb_list);
int err = 0;
@@ -632,8 +636,8 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
rcu_read_lock();
hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
- struct net_bridge_port_group __rcu **pp;
- struct net_bridge_port_group *p;
+ struct net_bridge_port_group __rcu * const *pp;
+ const struct net_bridge_port_group *p;
if (mp->host_joined) {
err = br_mdb_queue_one(&mdb_list,
@@ -662,9 +666,14 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
rcu_read_unlock();
+ if (adding)
+ action = SWITCHDEV_PORT_OBJ_ADD;
+ else
+ action = SWITCHDEV_PORT_OBJ_DEL;
+
list_for_each_entry(obj, &mdb_list, list) {
err = br_mdb_replay_one(nb, dev, SWITCHDEV_OBJ_PORT_MDB(obj),
- extack);
+ action, ctx, extack);
if (err)
goto out_free_mdb;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index ec661130c2d0..a684d0cfc58c 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -90,8 +90,8 @@ struct bridge_mcast_stats {
#endif
struct br_tunnel_info {
- __be64 tunnel_id;
- struct metadata_dst *tunnel_dst;
+ __be64 tunnel_id;
+ struct metadata_dst __rcu *tunnel_dst;
};
/* private vlan flags */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 3dafb6143cff..1d80f34a139c 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -639,9 +639,9 @@ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time)
return 0;
}
-clock_t br_get_ageing_time(struct net_device *br_dev)
+clock_t br_get_ageing_time(const struct net_device *br_dev)
{
- struct net_bridge *br;
+ const struct net_bridge *br;
if (!netif_is_bridge_master(br_dev))
return 0;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index da3256a3eed0..a08e9f193009 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -113,9 +113,7 @@ static void __vlan_add_list(struct net_bridge_vlan *v)
headp = &vg->vlan_list;
list_for_each_prev(hpos, headp) {
vent = list_entry(hpos, struct net_bridge_vlan, vlist);
- if (v->vid < vent->vid)
- continue;
- else
+ if (v->vid >= vent->vid)
break;
}
list_add_rcu(&v->vlist, hpos);
@@ -1809,28 +1807,32 @@ out_kfree:
static int br_vlan_replay_one(struct notifier_block *nb,
struct net_device *dev,
struct switchdev_obj_port_vlan *vlan,
+ const void *ctx, unsigned long action,
struct netlink_ext_ack *extack)
{
struct switchdev_notifier_port_obj_info obj_info = {
.info = {
.dev = dev,
.extack = extack,
+ .ctx = ctx,
},
.obj = &vlan->obj,
};
int err;
- err = nb->notifier_call(nb, SWITCHDEV_PORT_OBJ_ADD, &obj_info);
+ err = nb->notifier_call(nb, action, &obj_info);
return notifier_to_errno(err);
}
int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
- struct notifier_block *nb, struct netlink_ext_ack *extack)
+ const void *ctx, bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *v;
struct net_bridge_port *p;
struct net_bridge *br;
+ unsigned long action;
int err = 0;
u16 pvid;
@@ -1857,6 +1859,11 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
if (!vg)
return 0;
+ if (adding)
+ action = SWITCHDEV_PORT_OBJ_ADD;
+ else
+ action = SWITCHDEV_PORT_OBJ_DEL;
+
pvid = br_get_pvid(vg);
list_for_each_entry(v, &vg->vlan_list, vlist) {
@@ -1870,7 +1877,7 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
if (!br_vlan_should_use(v))
continue;
- err = br_vlan_replay_one(nb, dev, &vlan, extack);
+ err = br_vlan_replay_one(nb, dev, &vlan, ctx, action, extack);
if (err)
return err;
}
diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 0d3a8c01552e..01017448ebde 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -41,26 +41,33 @@ static struct net_bridge_vlan *br_vlan_tunnel_lookup(struct rhashtable *tbl,
br_vlan_tunnel_rht_params);
}
+static void vlan_tunnel_info_release(struct net_bridge_vlan *vlan)
+{
+ struct metadata_dst *tdst = rtnl_dereference(vlan->tinfo.tunnel_dst);
+
+ WRITE_ONCE(vlan->tinfo.tunnel_id, 0);
+ RCU_INIT_POINTER(vlan->tinfo.tunnel_dst, NULL);
+ dst_release(&tdst->dst);
+}
+
void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg,
struct net_bridge_vlan *vlan)
{
- if (!vlan->tinfo.tunnel_dst)
+ if (!rcu_access_pointer(vlan->tinfo.tunnel_dst))
return;
rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode,
br_vlan_tunnel_rht_params);
- vlan->tinfo.tunnel_id = 0;
- dst_release(&vlan->tinfo.tunnel_dst->dst);
- vlan->tinfo.tunnel_dst = NULL;
+ vlan_tunnel_info_release(vlan);
}
static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
struct net_bridge_vlan *vlan, u32 tun_id)
{
- struct metadata_dst *metadata = NULL;
+ struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst);
__be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id));
int err;
- if (vlan->tinfo.tunnel_dst)
+ if (metadata)
return -EEXIST;
metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
@@ -69,8 +76,8 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
return -EINVAL;
metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE;
- vlan->tinfo.tunnel_dst = metadata;
- vlan->tinfo.tunnel_id = key;
+ rcu_assign_pointer(vlan->tinfo.tunnel_dst, metadata);
+ WRITE_ONCE(vlan->tinfo.tunnel_id, key);
err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode,
br_vlan_tunnel_rht_params);
@@ -79,9 +86,7 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
return 0;
out:
- dst_release(&vlan->tinfo.tunnel_dst->dst);
- vlan->tinfo.tunnel_dst = NULL;
- vlan->tinfo.tunnel_id = 0;
+ vlan_tunnel_info_release(vlan);
return err;
}
@@ -182,12 +187,15 @@ int br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
struct net_bridge_vlan *vlan)
{
+ struct metadata_dst *tunnel_dst;
+ __be64 tunnel_id;
int err;
- if (!vlan || !vlan->tinfo.tunnel_id)
+ if (!vlan)
return 0;
- if (unlikely(!skb_vlan_tag_present(skb)))
+ tunnel_id = READ_ONCE(vlan->tinfo.tunnel_id);
+ if (!tunnel_id || unlikely(!skb_vlan_tag_present(skb)))
return 0;
skb_dst_drop(skb);
@@ -195,7 +203,9 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
if (err)
return err;
- skb_dst_set(skb, dst_clone(&vlan->tinfo.tunnel_dst->dst));
+ tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
+ if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
+ skb_dst_set(skb, &tunnel_dst->dst);
return 0;
}
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index cac30e676ac9..23267c8db7c4 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -480,7 +480,7 @@ got_phyid:
phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC);
if (!phyinfo) {
res = -ENOMEM;
- goto out_err;
+ goto out;
}
phy_layer->id = phyid;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 909b9e684e04..f3e4d9528fa3 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -125,7 +125,7 @@ struct bcm_sock {
struct sock sk;
int bound;
int ifindex;
- struct notifier_block notifier;
+ struct list_head notifier;
struct list_head rx_ops;
struct list_head tx_ops;
unsigned long dropped_usr_msgs;
@@ -133,6 +133,10 @@ struct bcm_sock {
char procname [32]; /* inode number in decimal with \0 */
};
+static LIST_HEAD(bcm_notifier_list);
+static DEFINE_SPINLOCK(bcm_notifier_lock);
+static struct bcm_sock *bcm_busy_notifier;
+
static inline struct bcm_sock *bcm_sk(const struct sock *sk)
{
return (struct bcm_sock *)sk;
@@ -402,6 +406,7 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
if (!op->count && (op->flags & TX_COUNTEVT)) {
/* create notification to user */
+ memset(&msg_head, 0, sizeof(msg_head));
msg_head.opcode = TX_EXPIRED;
msg_head.flags = op->flags;
msg_head.count = op->count;
@@ -439,6 +444,7 @@ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data)
/* this element is not throttled anymore */
data->flags &= (BCM_CAN_FLAGS_MASK|RX_RECV);
+ memset(&head, 0, sizeof(head));
head.opcode = RX_CHANGED;
head.flags = op->flags;
head.count = op->count;
@@ -560,6 +566,7 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
}
/* create notification to user */
+ memset(&msg_head, 0, sizeof(msg_head));
msg_head.opcode = RX_TIMEOUT;
msg_head.flags = op->flags;
msg_head.count = op->count;
@@ -1378,20 +1385,15 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
/*
* notification handler for netdevice status changes
*/
-static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
- void *ptr)
+static void bcm_notify(struct bcm_sock *bo, unsigned long msg,
+ struct net_device *dev)
{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier);
struct sock *sk = &bo->sk;
struct bcm_op *op;
int notify_enodev = 0;
if (!net_eq(dev_net(dev), sock_net(sk)))
- return NOTIFY_DONE;
-
- if (dev->type != ARPHRD_CAN)
- return NOTIFY_DONE;
+ return;
switch (msg) {
@@ -1426,7 +1428,28 @@ static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
sk->sk_error_report(sk);
}
}
+}
+static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (dev->type != ARPHRD_CAN)
+ return NOTIFY_DONE;
+ if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+ return NOTIFY_DONE;
+ if (unlikely(bcm_busy_notifier)) /* Check for reentrant bug. */
+ return NOTIFY_DONE;
+
+ spin_lock(&bcm_notifier_lock);
+ list_for_each_entry(bcm_busy_notifier, &bcm_notifier_list, notifier) {
+ spin_unlock(&bcm_notifier_lock);
+ bcm_notify(bcm_busy_notifier, msg, dev);
+ spin_lock(&bcm_notifier_lock);
+ }
+ bcm_busy_notifier = NULL;
+ spin_unlock(&bcm_notifier_lock);
return NOTIFY_DONE;
}
@@ -1446,9 +1469,9 @@ static int bcm_init(struct sock *sk)
INIT_LIST_HEAD(&bo->rx_ops);
/* set notifier */
- bo->notifier.notifier_call = bcm_notifier;
-
- register_netdevice_notifier(&bo->notifier);
+ spin_lock(&bcm_notifier_lock);
+ list_add_tail(&bo->notifier, &bcm_notifier_list);
+ spin_unlock(&bcm_notifier_lock);
return 0;
}
@@ -1471,7 +1494,14 @@ static int bcm_release(struct socket *sock)
/* remove bcm_ops, timer, rx_unregister(), etc. */
- unregister_netdevice_notifier(&bo->notifier);
+ spin_lock(&bcm_notifier_lock);
+ while (bcm_busy_notifier == bo) {
+ spin_unlock(&bcm_notifier_lock);
+ schedule_timeout_uninterruptible(1);
+ spin_lock(&bcm_notifier_lock);
+ }
+ list_del(&bo->notifier);
+ spin_unlock(&bcm_notifier_lock);
lock_sock(sk);
@@ -1692,6 +1722,10 @@ static struct pernet_operations canbcm_pernet_ops __read_mostly = {
.exit = canbcm_pernet_exit,
};
+static struct notifier_block canbcm_notifier = {
+ .notifier_call = bcm_notifier
+};
+
static int __init bcm_module_init(void)
{
int err;
@@ -1705,12 +1739,14 @@ static int __init bcm_module_init(void)
}
register_pernet_subsys(&canbcm_pernet_ops);
+ register_netdevice_notifier(&canbcm_notifier);
return 0;
}
static void __exit bcm_module_exit(void)
{
can_proto_unregister(&bcm_can_proto);
+ unregister_netdevice_notifier(&canbcm_notifier);
unregister_pernet_subsys(&canbcm_pernet_ops);
}
diff --git a/net/can/isotp.c b/net/can/isotp.c
index f995eaef5d7b..bd49299319a1 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -143,10 +143,14 @@ struct isotp_sock {
u32 force_tx_stmin;
u32 force_rx_stmin;
struct tpcon rx, tx;
- struct notifier_block notifier;
+ struct list_head notifier;
wait_queue_head_t wait;
};
+static LIST_HEAD(isotp_notifier_list);
+static DEFINE_SPINLOCK(isotp_notifier_lock);
+static struct isotp_sock *isotp_busy_notifier;
+
static inline struct isotp_sock *isotp_sk(const struct sock *sk)
{
return (struct isotp_sock *)sk;
@@ -1015,7 +1019,14 @@ static int isotp_release(struct socket *sock)
/* wait for complete transmission of current pdu */
wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
- unregister_netdevice_notifier(&so->notifier);
+ spin_lock(&isotp_notifier_lock);
+ while (isotp_busy_notifier == so) {
+ spin_unlock(&isotp_notifier_lock);
+ schedule_timeout_uninterruptible(1);
+ spin_lock(&isotp_notifier_lock);
+ }
+ list_del(&so->notifier);
+ spin_unlock(&isotp_notifier_lock);
lock_sock(sk);
@@ -1319,21 +1330,16 @@ static int isotp_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
-static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
- void *ptr)
+static void isotp_notify(struct isotp_sock *so, unsigned long msg,
+ struct net_device *dev)
{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct isotp_sock *so = container_of(nb, struct isotp_sock, notifier);
struct sock *sk = &so->sk;
if (!net_eq(dev_net(dev), sock_net(sk)))
- return NOTIFY_DONE;
-
- if (dev->type != ARPHRD_CAN)
- return NOTIFY_DONE;
+ return;
if (so->ifindex != dev->ifindex)
- return NOTIFY_DONE;
+ return;
switch (msg) {
case NETDEV_UNREGISTER:
@@ -1359,7 +1365,28 @@ static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
sk->sk_error_report(sk);
break;
}
+}
+static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (dev->type != ARPHRD_CAN)
+ return NOTIFY_DONE;
+ if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+ return NOTIFY_DONE;
+ if (unlikely(isotp_busy_notifier)) /* Check for reentrant bug. */
+ return NOTIFY_DONE;
+
+ spin_lock(&isotp_notifier_lock);
+ list_for_each_entry(isotp_busy_notifier, &isotp_notifier_list, notifier) {
+ spin_unlock(&isotp_notifier_lock);
+ isotp_notify(isotp_busy_notifier, msg, dev);
+ spin_lock(&isotp_notifier_lock);
+ }
+ isotp_busy_notifier = NULL;
+ spin_unlock(&isotp_notifier_lock);
return NOTIFY_DONE;
}
@@ -1396,8 +1423,9 @@ static int isotp_init(struct sock *sk)
init_waitqueue_head(&so->wait);
- so->notifier.notifier_call = isotp_notifier;
- register_netdevice_notifier(&so->notifier);
+ spin_lock(&isotp_notifier_lock);
+ list_add_tail(&so->notifier, &isotp_notifier_list);
+ spin_unlock(&isotp_notifier_lock);
return 0;
}
@@ -1444,6 +1472,10 @@ static const struct can_proto isotp_can_proto = {
.prot = &isotp_proto,
};
+static struct notifier_block canisotp_notifier = {
+ .notifier_call = isotp_notifier
+};
+
static __init int isotp_module_init(void)
{
int err;
@@ -1453,6 +1485,8 @@ static __init int isotp_module_init(void)
err = can_proto_register(&isotp_can_proto);
if (err < 0)
pr_err("can: registration of isotp protocol failed %pe\n", ERR_PTR(err));
+ else
+ register_netdevice_notifier(&canisotp_notifier);
return err;
}
@@ -1460,6 +1494,7 @@ static __init int isotp_module_init(void)
static __exit void isotp_module_exit(void)
{
can_proto_unregister(&isotp_can_proto);
+ unregister_netdevice_notifier(&canisotp_notifier);
}
module_init(isotp_module_init);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index e09d087ba240..c3946c355882 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -330,6 +330,9 @@ static void j1939_session_skb_drop_old(struct j1939_session *session)
if ((do_skcb->offset + do_skb->len) < offset_start) {
__skb_unlink(do_skb, &session->skb_queue);
+ /* drop ref taken in j1939_session_skb_queue() */
+ skb_unref(do_skb);
+
kfree_skb(do_skb);
}
spin_unlock_irqrestore(&session->skb_queue.lock, flags);
@@ -349,12 +352,13 @@ void j1939_session_skb_queue(struct j1939_session *session,
skcb->flags |= J1939_ECU_LOCAL_SRC;
+ skb_get(skb);
skb_queue_tail(&session->skb_queue, skb);
}
static struct
-sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session,
- unsigned int offset_start)
+sk_buff *j1939_session_skb_get_by_offset(struct j1939_session *session,
+ unsigned int offset_start)
{
struct j1939_priv *priv = session->priv;
struct j1939_sk_buff_cb *do_skcb;
@@ -371,6 +375,10 @@ sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session,
skb = do_skb;
}
}
+
+ if (skb)
+ skb_get(skb);
+
spin_unlock_irqrestore(&session->skb_queue.lock, flags);
if (!skb)
@@ -381,12 +389,12 @@ sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session,
return skb;
}
-static struct sk_buff *j1939_session_skb_find(struct j1939_session *session)
+static struct sk_buff *j1939_session_skb_get(struct j1939_session *session)
{
unsigned int offset_start;
offset_start = session->pkt.dpo * 7;
- return j1939_session_skb_find_by_offset(session, offset_start);
+ return j1939_session_skb_get_by_offset(session, offset_start);
}
/* see if we are receiver
@@ -776,7 +784,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
int ret = 0;
u8 dat[8];
- se_skb = j1939_session_skb_find_by_offset(session, session->pkt.tx * 7);
+ se_skb = j1939_session_skb_get_by_offset(session, session->pkt.tx * 7);
if (!se_skb)
return -ENOBUFS;
@@ -801,7 +809,8 @@ static int j1939_session_tx_dat(struct j1939_session *session)
netdev_err_once(priv->ndev,
"%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n",
__func__, session, skcb->offset, se_skb->len , session->pkt.tx);
- return -EOVERFLOW;
+ ret = -EOVERFLOW;
+ goto out_free;
}
if (!len) {
@@ -835,6 +844,12 @@ static int j1939_session_tx_dat(struct j1939_session *session)
if (pkt_done)
j1939_tp_set_rxtimeout(session, 250);
+ out_free:
+ if (ret)
+ kfree_skb(se_skb);
+ else
+ consume_skb(se_skb);
+
return ret;
}
@@ -1007,7 +1022,7 @@ static int j1939_xtp_txnext_receiver(struct j1939_session *session)
static int j1939_simple_txnext(struct j1939_session *session)
{
struct j1939_priv *priv = session->priv;
- struct sk_buff *se_skb = j1939_session_skb_find(session);
+ struct sk_buff *se_skb = j1939_session_skb_get(session);
struct sk_buff *skb;
int ret;
@@ -1015,8 +1030,10 @@ static int j1939_simple_txnext(struct j1939_session *session)
return 0;
skb = skb_clone(se_skb, GFP_ATOMIC);
- if (!skb)
- return -ENOMEM;
+ if (!skb) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
can_skb_set_owner(skb, se_skb->sk);
@@ -1024,12 +1041,18 @@ static int j1939_simple_txnext(struct j1939_session *session)
ret = j1939_send_one(priv, skb);
if (ret)
- return ret;
+ goto out_free;
j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED);
j1939_sk_queue_activate_next(session);
- return 0;
+ out_free:
+ if (ret)
+ kfree_skb(se_skb);
+ else
+ consume_skb(se_skb);
+
+ return ret;
}
static bool j1939_session_deactivate_locked(struct j1939_session *session)
@@ -1170,9 +1193,10 @@ static void j1939_session_completed(struct j1939_session *session)
struct sk_buff *skb;
if (!session->transmission) {
- skb = j1939_session_skb_find(session);
+ skb = j1939_session_skb_get(session);
/* distribute among j1939 receivers */
j1939_sk_recv(session->priv, skb);
+ consume_skb(skb);
}
j1939_session_deactivate_activate_next(session);
@@ -1744,7 +1768,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
{
struct j1939_priv *priv = session->priv;
struct j1939_sk_buff_cb *skcb;
- struct sk_buff *se_skb;
+ struct sk_buff *se_skb = NULL;
const u8 *dat;
u8 *tpdat;
int offset;
@@ -1786,7 +1810,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
goto out_session_cancel;
}
- se_skb = j1939_session_skb_find_by_offset(session, packet * 7);
+ se_skb = j1939_session_skb_get_by_offset(session, packet * 7);
if (!se_skb) {
netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__,
session);
@@ -1848,11 +1872,13 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
j1939_tp_set_rxtimeout(session, 250);
}
session->last_cmd = 0xff;
+ consume_skb(se_skb);
j1939_session_put(session);
return;
out_session_cancel:
+ kfree_skb(se_skb);
j1939_session_timers_cancel(session);
j1939_session_cancel(session, J1939_XTP_ABORT_FAULT);
j1939_session_put(session);
diff --git a/net/can/raw.c b/net/can/raw.c
index 139d9471ddcf..ac96fc210025 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -83,7 +83,7 @@ struct raw_sock {
struct sock sk;
int bound;
int ifindex;
- struct notifier_block notifier;
+ struct list_head notifier;
int loopback;
int recv_own_msgs;
int fd_frames;
@@ -95,6 +95,10 @@ struct raw_sock {
struct uniqframe __percpu *uniq;
};
+static LIST_HEAD(raw_notifier_list);
+static DEFINE_SPINLOCK(raw_notifier_lock);
+static struct raw_sock *raw_busy_notifier;
+
/* Return pointer to store the extra msg flags for raw_recvmsg().
* We use the space of one unsigned int beyond the 'struct sockaddr_can'
* in skb->cb.
@@ -263,21 +267,16 @@ static int raw_enable_allfilters(struct net *net, struct net_device *dev,
return err;
}
-static int raw_notifier(struct notifier_block *nb,
- unsigned long msg, void *ptr)
+static void raw_notify(struct raw_sock *ro, unsigned long msg,
+ struct net_device *dev)
{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct raw_sock *ro = container_of(nb, struct raw_sock, notifier);
struct sock *sk = &ro->sk;
if (!net_eq(dev_net(dev), sock_net(sk)))
- return NOTIFY_DONE;
-
- if (dev->type != ARPHRD_CAN)
- return NOTIFY_DONE;
+ return;
if (ro->ifindex != dev->ifindex)
- return NOTIFY_DONE;
+ return;
switch (msg) {
case NETDEV_UNREGISTER:
@@ -305,7 +304,28 @@ static int raw_notifier(struct notifier_block *nb,
sk->sk_error_report(sk);
break;
}
+}
+
+static int raw_notifier(struct notifier_block *nb, unsigned long msg,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (dev->type != ARPHRD_CAN)
+ return NOTIFY_DONE;
+ if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+ return NOTIFY_DONE;
+ if (unlikely(raw_busy_notifier)) /* Check for reentrant bug. */
+ return NOTIFY_DONE;
+ spin_lock(&raw_notifier_lock);
+ list_for_each_entry(raw_busy_notifier, &raw_notifier_list, notifier) {
+ spin_unlock(&raw_notifier_lock);
+ raw_notify(raw_busy_notifier, msg, dev);
+ spin_lock(&raw_notifier_lock);
+ }
+ raw_busy_notifier = NULL;
+ spin_unlock(&raw_notifier_lock);
return NOTIFY_DONE;
}
@@ -334,9 +354,9 @@ static int raw_init(struct sock *sk)
return -ENOMEM;
/* set notifier */
- ro->notifier.notifier_call = raw_notifier;
-
- register_netdevice_notifier(&ro->notifier);
+ spin_lock(&raw_notifier_lock);
+ list_add_tail(&ro->notifier, &raw_notifier_list);
+ spin_unlock(&raw_notifier_lock);
return 0;
}
@@ -351,7 +371,14 @@ static int raw_release(struct socket *sock)
ro = raw_sk(sk);
- unregister_netdevice_notifier(&ro->notifier);
+ spin_lock(&raw_notifier_lock);
+ while (raw_busy_notifier == ro) {
+ spin_unlock(&raw_notifier_lock);
+ schedule_timeout_uninterruptible(1);
+ spin_lock(&raw_notifier_lock);
+ }
+ list_del(&ro->notifier);
+ spin_unlock(&raw_notifier_lock);
lock_sock(sk);
@@ -889,6 +916,10 @@ static const struct can_proto raw_can_proto = {
.prot = &raw_proto,
};
+static struct notifier_block canraw_notifier = {
+ .notifier_call = raw_notifier
+};
+
static __init int raw_module_init(void)
{
int err;
@@ -898,6 +929,8 @@ static __init int raw_module_init(void)
err = can_proto_register(&raw_can_proto);
if (err < 0)
pr_err("can: registration of raw protocol failed\n");
+ else
+ register_netdevice_notifier(&canraw_notifier);
return err;
}
@@ -905,6 +938,7 @@ static __init int raw_module_init(void)
static __exit void raw_module_exit(void)
{
can_proto_unregister(&raw_can_proto);
+ unregister_netdevice_notifier(&canraw_notifier);
}
module_init(raw_module_init);
diff --git a/net/core/dev.c b/net/core/dev.c
index 50531a2d0b20..991d09b67bd9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3852,10 +3852,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) {
+ if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
+ qdisc_run_begin(q)) {
+ /* Retest nolock_qdisc_is_empty() within the protection
+ * of q->seqlock to protect from racing with requeuing.
+ */
+ if (unlikely(!nolock_qdisc_is_empty(q))) {
+ rc = q->enqueue(skb, q, &to_free) &
+ NET_XMIT_MASK;
+ __qdisc_run(q);
+ qdisc_run_end(q);
+
+ goto no_lock_out;
+ }
+
+ qdisc_bstats_cpu_update(q, skb);
+ if (sch_direct_xmit(skb, q, dev, txq, NULL, true) &&
+ !nolock_qdisc_is_empty(q))
+ __qdisc_run(q);
+
+ qdisc_run_end(q);
+ return NET_XMIT_SUCCESS;
+ }
+
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
- if (likely(!netif_xmit_frozen_or_stopped(txq)))
- qdisc_run(q);
+ qdisc_run(q);
+no_lock_out:
if (unlikely(to_free))
kfree_skb_list(to_free);
return rc;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 566ddd147633..8fdd04f00fd7 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2709,23 +2709,16 @@ static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack)
{
struct devlink_rate *devlink_rate;
- u16 old_mode;
- int err;
-
- if (!devlink->ops->eswitch_mode_get)
- return -EOPNOTSUPP;
- err = devlink->ops->eswitch_mode_get(devlink, &old_mode);
- if (err)
- return err;
-
- if (old_mode == mode)
- return 0;
+ /* Take the lock to sync with devlink_rate_nodes_destroy() */
+ mutex_lock(&devlink->lock);
list_for_each_entry(devlink_rate, &devlink->rate_list, list)
if (devlink_rate_is_node(devlink_rate)) {
+ mutex_unlock(&devlink->lock);
NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists.");
return -EBUSY;
}
+ mutex_unlock(&devlink->lock);
return 0;
}
@@ -9275,6 +9268,8 @@ void devlink_rate_leaf_destroy(struct devlink_port *devlink_port)
mutex_lock(&devlink->lock);
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
+ if (devlink_rate->parent)
+ refcount_dec(&devlink_rate->parent->refcnt);
list_del(&devlink_rate->list);
devlink_port->devlink_rate = NULL;
mutex_unlock(&devlink->lock);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2b2f333bcdfe..53e85c70c6e5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -238,6 +238,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
write_lock(&n->lock);
if ((n->nud_state == NUD_FAILED) ||
+ (n->nud_state == NUD_NOARP) ||
(tbl->is_multicast &&
tbl->is_multicast(n->primary_key)) ||
time_after(tref, n->updated))
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 43b6ac4c4439..9b5a767eddd5 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -641,6 +641,18 @@ void __put_net(struct net *net)
}
EXPORT_SYMBOL_GPL(__put_net);
+/**
+ * get_net_ns - increment the refcount of the network namespace
+ * @ns: common namespace (net)
+ *
+ * Returns the net's common namespace.
+ */
+struct ns_common *get_net_ns(struct ns_common *ns)
+{
+ return &get_net(container_of(ns, struct net, ns))->ns;
+}
+EXPORT_SYMBOL_GPL(get_net_ns);
+
struct net *get_net_ns_by_fd(int fd)
{
struct file *file;
@@ -660,14 +672,8 @@ struct net *get_net_ns_by_fd(int fd)
fput(file);
return net;
}
-
-#else
-struct net *get_net_ns_by_fd(int fd)
-{
- return ERR_PTR(-EINVAL);
-}
-#endif
EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
+#endif
struct net *get_net_ns_by_pid(pid_t pid)
{
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5baa86bca876..745965e49f78 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4850,10 +4850,12 @@ static int rtnl_bridge_notify(struct net_device *dev)
if (err < 0)
goto errout;
- if (!skb->len) {
- err = -EINVAL;
+ /* Notification info is only filled for bridge ports, not the bridge
+ * device itself. Therefore, a zero notification length is valid and
+ * should not result in an error.
+ */
+ if (!skb->len)
goto errout;
- }
rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
return 0;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a0b1d4847efe..2531ac4ffa69 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1258,6 +1258,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
struct sock *sk = skb->sk;
struct sk_buff_head *q;
unsigned long flags;
+ bool is_zerocopy;
u32 lo, hi;
u16 len;
@@ -1272,6 +1273,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
len = uarg->len;
lo = uarg->id;
hi = uarg->id + len - 1;
+ is_zerocopy = uarg->zerocopy;
serr = SKB_EXT_ERR(skb);
memset(serr, 0, sizeof(*serr));
@@ -1279,7 +1281,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
serr->ee.ee_data = hi;
serr->ee.ee_info = lo;
- if (!uarg->zerocopy)
+ if (!is_zerocopy)
serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
q = &sk->sk_error_queue;
diff --git a/net/core/sock.c b/net/core/sock.c
index ddfa88082a2b..a2337b37eba6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1635,6 +1635,13 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_bound_dev_if;
break;
+ case SO_NETNS_COOKIE:
+ lv = sizeof(u64);
+ if (len != lv)
+ return -EINVAL;
+ v.val64 = sock_net(sk)->net_cookie;
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index de5ee3ae86d5..3f00a28fe762 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -6,6 +6,7 @@
* selecting the socket index from the array of available sockets.
*/
+#include <net/ip.h>
#include <net/sock_reuseport.h>
#include <linux/bpf.h>
#include <linux/idr.h>
@@ -536,7 +537,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
socks = READ_ONCE(reuse->num_socks);
if (unlikely(!socks))
- goto out;
+ goto failure;
/* paired with smp_wmb() in __reuseport_add_sock() */
smp_rmb();
@@ -546,13 +547,13 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
goto select_by_hash;
- goto out;
+ goto failure;
}
if (!skb) {
skb = alloc_skb(0, GFP_ATOMIC);
if (!skb)
- goto out;
+ goto failure;
allocated = true;
}
@@ -565,12 +566,18 @@ select_by_hash:
if (!nsk)
nsk = reuseport_select_sock_by_hash(reuse, hash, socks);
- if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt)))
+ if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt))) {
nsk = NULL;
+ goto failure;
+ }
out:
rcu_read_unlock();
return nsk;
+
+failure:
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
+ goto out;
}
EXPORT_SYMBOL(reuseport_migrate_sock);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index b71e87909f0e..9000a8c84baf 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -219,21 +219,6 @@ static void dsa_tree_put(struct dsa_switch_tree *dst)
kref_put(&dst->refcount, dsa_tree_release);
}
-static bool dsa_port_is_dsa(struct dsa_port *port)
-{
- return port->type == DSA_PORT_TYPE_DSA;
-}
-
-static bool dsa_port_is_cpu(struct dsa_port *port)
-{
- return port->type == DSA_PORT_TYPE_CPU;
-}
-
-static bool dsa_port_is_user(struct dsa_port *dp)
-{
- return dp->type == DSA_PORT_TYPE_USER;
-}
-
static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
struct device_node *dn)
{
@@ -1259,6 +1244,13 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds,
if (!ds->dst)
return -ENOMEM;
+ if (dsa_switch_find(ds->dst->index, ds->index)) {
+ dev_err(ds->dev,
+ "A DSA switch with index %d already exists in tree %d\n",
+ ds->index, ds->dst->index);
+ return -EEXIST;
+ }
+
return 0;
}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index b8b17474b72b..c8712942002f 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -84,7 +84,7 @@ struct dsa_notifier_vlan_info {
/* DSA_NOTIFIER_MTU */
struct dsa_notifier_mtu_info {
- bool propagate_upstream;
+ bool targeted_match;
int sw_index;
int port;
int mtu;
@@ -188,19 +188,23 @@ void dsa_port_disable_rt(struct dsa_port *dp);
void dsa_port_disable(struct dsa_port *dp);
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
struct netlink_ext_ack *extack);
+int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br,
+ struct netlink_ext_ack *extack);
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
int dsa_port_lag_change(struct dsa_port *dp,
struct netdev_lag_lower_state_info *linfo);
int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev,
struct netdev_lag_upper_info *uinfo,
struct netlink_ext_ack *extack);
+int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev,
+ struct netlink_ext_ack *extack);
void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
struct netlink_ext_ack *extack);
bool dsa_port_skip_vlan_configuration(struct dsa_port *dp);
int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock);
int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
- bool propagate_upstream);
+ bool targeted_match);
int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 6379d66a6bb3..46089dd2b2ec 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -194,26 +194,51 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp,
if (err && err != -EOPNOTSUPP)
return err;
- err = br_mdb_replay(br, brport_dev,
- &dsa_slave_switchdev_blocking_notifier,
- extack);
+ err = br_mdb_replay(br, brport_dev, dp, true,
+ &dsa_slave_switchdev_blocking_notifier, extack);
if (err && err != -EOPNOTSUPP)
return err;
- err = br_fdb_replay(br, brport_dev, &dsa_slave_switchdev_notifier);
+ err = br_fdb_replay(br, brport_dev, dp, true,
+ &dsa_slave_switchdev_notifier);
if (err && err != -EOPNOTSUPP)
return err;
- err = br_vlan_replay(br, brport_dev,
- &dsa_slave_switchdev_blocking_notifier,
- extack);
+ err = br_vlan_replay(br, brport_dev, dp, true,
+ &dsa_slave_switchdev_blocking_notifier, extack);
if (err && err != -EOPNOTSUPP)
return err;
return 0;
}
-static void dsa_port_switchdev_unsync(struct dsa_port *dp)
+static int dsa_port_switchdev_unsync_objs(struct dsa_port *dp,
+ struct net_device *br,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+ int err;
+
+ /* Delete the switchdev objects left on this port */
+ err = br_mdb_replay(br, brport_dev, dp, false,
+ &dsa_slave_switchdev_blocking_notifier, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ err = br_fdb_replay(br, brport_dev, dp, false,
+ &dsa_slave_switchdev_notifier);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ err = br_vlan_replay(br, brport_dev, dp, false,
+ &dsa_slave_switchdev_blocking_notifier, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ return 0;
+}
+
+static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp)
{
/* Configure the port for standalone mode (no address learning,
* flood everything).
@@ -279,6 +304,12 @@ out_rollback:
return err;
}
+int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br,
+ struct netlink_ext_ack *extack)
+{
+ return dsa_port_switchdev_unsync_objs(dp, br, extack);
+}
+
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
{
struct dsa_notifier_bridge_info info = {
@@ -298,7 +329,7 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
if (err)
pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
- dsa_port_switchdev_unsync(dp);
+ dsa_port_switchdev_unsync_attrs(dp);
}
int dsa_port_lag_change(struct dsa_port *dp,
@@ -366,6 +397,15 @@ err_lag_join:
return err;
}
+int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag,
+ struct netlink_ext_ack *extack)
+{
+ if (dp->bridge_dev)
+ return dsa_port_pre_bridge_leave(dp, dp->bridge_dev, extack);
+
+ return 0;
+}
+
void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
{
struct dsa_notifier_lag_info info = {
@@ -567,11 +607,11 @@ int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
}
int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
- bool propagate_upstream)
+ bool targeted_match)
{
struct dsa_notifier_mtu_info info = {
.sw_index = dp->ds->index,
- .propagate_upstream = propagate_upstream,
+ .targeted_match = targeted_match,
.port = dp->index,
.mtu = new_mtu,
};
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 798944aa847a..898ed9cf756f 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -271,13 +271,16 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return phylink_mii_ioctl(p->dp->pl, ifr, cmd);
}
-static int dsa_slave_port_attr_set(struct net_device *dev,
+static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
int ret;
+ if (ctx && ctx != dp)
+ return 0;
+
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
@@ -394,13 +397,16 @@ static int dsa_slave_vlan_add(struct net_device *dev,
return vlan_vid_add(master, htons(ETH_P_8021Q), vlan.vid);
}
-static int dsa_slave_port_obj_add(struct net_device *dev,
+static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
+ if (ctx && ctx != dp)
+ return 0;
+
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_MDB:
if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
@@ -469,12 +475,15 @@ static int dsa_slave_vlan_del(struct net_device *dev,
return 0;
}
-static int dsa_slave_port_obj_del(struct net_device *dev,
+static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
+ if (ctx && ctx != dp)
+ return 0;
+
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_MDB:
if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
@@ -1528,6 +1537,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp_iter;
struct dsa_port *cpu_dp;
int port = p->dp->index;
int largest_mtu = 0;
@@ -1535,31 +1545,31 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
int old_master_mtu;
int mtu_limit;
int cpu_mtu;
- int err, i;
+ int err;
if (!ds->ops->port_change_mtu)
return -EOPNOTSUPP;
- for (i = 0; i < ds->num_ports; i++) {
+ list_for_each_entry(dp_iter, &ds->dst->ports, list) {
int slave_mtu;
- if (!dsa_is_user_port(ds, i))
+ if (!dsa_port_is_user(dp_iter))
continue;
/* During probe, this function will be called for each slave
* device, while not all of them have been allocated. That's
* ok, it doesn't change what the maximum is, so ignore it.
*/
- if (!dsa_to_port(ds, i)->slave)
+ if (!dp_iter->slave)
continue;
/* Pretend that we already applied the setting, which we
* actually haven't (still haven't done all integrity checks)
*/
- if (i == port)
+ if (dp_iter == dp)
slave_mtu = new_mtu;
else
- slave_mtu = dsa_to_port(ds, i)->slave->mtu;
+ slave_mtu = dp_iter->slave->mtu;
if (largest_mtu < slave_mtu)
largest_mtu = slave_mtu;
@@ -1585,14 +1595,15 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
goto out_master_failed;
/* We only need to propagate the MTU of the CPU port to
- * upstream switches.
+ * upstream switches, so create a non-targeted notifier which
+ * updates all switches.
*/
- err = dsa_port_mtu_change(cpu_dp, cpu_mtu, true);
+ err = dsa_port_mtu_change(cpu_dp, cpu_mtu, false);
if (err)
goto out_cpu_failed;
}
- err = dsa_port_mtu_change(dp, new_mtu, false);
+ err = dsa_port_mtu_change(dp, new_mtu, true);
if (err)
goto out_port_failed;
@@ -1606,7 +1617,7 @@ out_port_failed:
if (new_master_mtu != old_master_mtu)
dsa_port_mtu_change(cpu_dp, old_master_mtu -
dsa_tag_protocol_overhead(cpu_dp->tag_ops),
- true);
+ false);
out_cpu_failed:
if (new_master_mtu != old_master_mtu)
dev_set_mtu(master, old_master_mtu);
@@ -2066,6 +2077,26 @@ static int dsa_slave_changeupper(struct net_device *dev,
return err;
}
+static int dsa_slave_prechangeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct netlink_ext_ack *extack;
+ int err = 0;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+
+ if (netif_is_bridge_master(info->upper_dev) && !info->linking)
+ err = dsa_port_pre_bridge_leave(dp, info->upper_dev, extack);
+ else if (netif_is_lag_master(info->upper_dev) && !info->linking)
+ err = dsa_port_pre_lag_leave(dp, info->upper_dev, extack);
+ /* dsa_port_pre_hsr_leave is not yet necessary since hsr cannot be
+ * meaningfully enslaved to a bridge yet
+ */
+
+ return notifier_from_errno(err);
+}
+
static int
dsa_slave_lag_changeupper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
@@ -2092,6 +2123,35 @@ dsa_slave_lag_changeupper(struct net_device *dev,
return err;
}
+/* Same as dsa_slave_lag_changeupper() except that it calls
+ * dsa_slave_prechangeupper()
+ */
+static int
+dsa_slave_lag_prechangeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *lower;
+ struct list_head *iter;
+ int err = NOTIFY_DONE;
+ struct dsa_port *dp;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ if (!dsa_slave_dev_check(lower))
+ continue;
+
+ dp = dsa_slave_to_port(lower);
+ if (!dp->lag_dev)
+ /* Software LAG */
+ continue;
+
+ err = dsa_slave_prechangeupper(lower, info);
+ if (notifier_to_errno(err))
+ break;
+ }
+
+ return err;
+}
+
static int
dsa_prevent_bridging_8021q_upper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
@@ -2155,6 +2215,32 @@ dsa_slave_check_8021q_upper(struct net_device *dev,
return NOTIFY_DONE;
}
+static int
+dsa_slave_prechangeupper_sanity_check(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct dsa_switch *ds;
+ struct dsa_port *dp;
+ int err;
+
+ if (!dsa_slave_dev_check(dev))
+ return dsa_prevent_bridging_8021q_upper(dev, info);
+
+ dp = dsa_slave_to_port(dev);
+ ds = dp->ds;
+
+ if (ds->ops->port_prechangeupper) {
+ err = ds->ops->port_prechangeupper(ds, dp->index, info);
+ if (err)
+ return notifier_from_errno(err);
+ }
+
+ if (is_vlan_dev(info->upper_dev))
+ return dsa_slave_check_8021q_upper(dev, info);
+
+ return NOTIFY_DONE;
+}
+
static int dsa_slave_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -2163,24 +2249,18 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
switch (event) {
case NETDEV_PRECHANGEUPPER: {
struct netdev_notifier_changeupper_info *info = ptr;
- struct dsa_switch *ds;
- struct dsa_port *dp;
int err;
- if (!dsa_slave_dev_check(dev))
- return dsa_prevent_bridging_8021q_upper(dev, ptr);
+ err = dsa_slave_prechangeupper_sanity_check(dev, info);
+ if (err != NOTIFY_DONE)
+ return err;
- dp = dsa_slave_to_port(dev);
- ds = dp->ds;
+ if (dsa_slave_dev_check(dev))
+ return dsa_slave_prechangeupper(dev, ptr);
- if (ds->ops->port_prechangeupper) {
- err = ds->ops->port_prechangeupper(ds, dp->index, info);
- if (err)
- return notifier_from_errno(err);
- }
+ if (netif_is_lag_master(dev))
+ return dsa_slave_lag_prechangeupper(dev, ptr);
- if (is_vlan_dev(info->upper_dev))
- return dsa_slave_check_8021q_upper(dev, ptr);
break;
}
case NETDEV_CHANGEUPPER:
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 9bf8e20ecdf3..c1e5afafe633 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -52,10 +52,13 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds,
static bool dsa_switch_mtu_match(struct dsa_switch *ds, int port,
struct dsa_notifier_mtu_info *info)
{
- if (ds->index == info->sw_index)
- return (port == info->port) || dsa_is_dsa_port(ds, port);
+ if (ds->index == info->sw_index && port == info->port)
+ return true;
- if (!info->propagate_upstream)
+ /* Do not propagate to other switches in the tree if the notifier was
+ * targeted for a single switch.
+ */
+ if (info->targeted_match)
return false;
if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
@@ -232,36 +235,15 @@ static int dsa_switch_lag_leave(struct dsa_switch *ds,
return 0;
}
-static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port,
- struct dsa_notifier_mdb_info *info)
-{
- if (ds->index == info->sw_index && port == info->port)
- return true;
-
- if (dsa_is_dsa_port(ds, port))
- return true;
-
- return false;
-}
-
static int dsa_switch_mdb_add(struct dsa_switch *ds,
struct dsa_notifier_mdb_info *info)
{
- int err = 0;
- int port;
+ int port = dsa_towards_port(ds, info->sw_index, info->port);
if (!ds->ops->port_mdb_add)
return -EOPNOTSUPP;
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_switch_mdb_match(ds, port, info)) {
- err = ds->ops->port_mdb_add(ds, port, info->mdb);
- if (err)
- break;
- }
- }
-
- return err;
+ return ds->ops->port_mdb_add(ds, port, info->mdb);
}
static int dsa_switch_mdb_del(struct dsa_switch *ds,
@@ -364,36 +346,16 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds,
return 0;
}
-static bool dsa_switch_mrp_match(struct dsa_switch *ds, int port,
- struct dsa_notifier_mrp_info *info)
-{
- if (ds->index == info->sw_index && port == info->port)
- return true;
-
- if (dsa_is_dsa_port(ds, port))
- return true;
-
- return false;
-}
-
static int dsa_switch_mrp_add(struct dsa_switch *ds,
struct dsa_notifier_mrp_info *info)
{
- int err = 0;
- int port;
-
if (!ds->ops->port_mrp_add)
return -EOPNOTSUPP;
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_switch_mrp_match(ds, port, info)) {
- err = ds->ops->port_mrp_add(ds, port, info->mrp);
- if (err)
- break;
- }
- }
+ if (ds->index == info->sw_index)
+ return ds->ops->port_mrp_add(ds, info->port, info->mrp);
- return err;
+ return 0;
}
static int dsa_switch_mrp_del(struct dsa_switch *ds,
@@ -408,39 +370,18 @@ static int dsa_switch_mrp_del(struct dsa_switch *ds,
return 0;
}
-static bool
-dsa_switch_mrp_ring_role_match(struct dsa_switch *ds, int port,
- struct dsa_notifier_mrp_ring_role_info *info)
-{
- if (ds->index == info->sw_index && port == info->port)
- return true;
-
- if (dsa_is_dsa_port(ds, port))
- return true;
-
- return false;
-}
-
static int
dsa_switch_mrp_add_ring_role(struct dsa_switch *ds,
struct dsa_notifier_mrp_ring_role_info *info)
{
- int err = 0;
- int port;
-
if (!ds->ops->port_mrp_add)
return -EOPNOTSUPP;
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_switch_mrp_ring_role_match(ds, port, info)) {
- err = ds->ops->port_mrp_add_ring_role(ds, port,
- info->mrp);
- if (err)
- break;
- }
- }
+ if (ds->index == info->sw_index)
+ return ds->ops->port_mrp_add_ring_role(ds, info->port,
+ info->mrp);
- return err;
+ return 0;
}
static int
diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
index 2a6733a6449a..7e6b37a54add 100644
--- a/net/ethtool/eeprom.c
+++ b/net/ethtool/eeprom.c
@@ -95,7 +95,7 @@ static int get_module_eeprom_by_page(struct net_device *dev,
if (dev->sfp_bus)
return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack);
- if (ops->get_module_info)
+ if (ops->get_module_eeprom_by_page)
return ops->get_module_eeprom_by_page(dev, page_data, extack);
return -EOPNOTSUPP;
@@ -159,9 +159,6 @@ static int eeprom_parse_request(struct ethnl_req_info *req_info, struct nlattr *
request->offset = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_OFFSET]);
request->length = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_LENGTH]);
- if (!request->length)
- return -EINVAL;
-
/* The following set of conditions limit the API to only dump 1/2
* EEPROM page without crossing low page boundary located at offset 128.
* This means user may only request dumps of length limited to 128 from
@@ -180,10 +177,6 @@ static int eeprom_parse_request(struct ethnl_req_info *req_info, struct nlattr *
NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_LENGTH],
"reading cross half page boundary is illegal");
return -EINVAL;
- } else if (request->offset >= ETH_MODULE_EEPROM_PAGE_LEN * 2) {
- NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_OFFSET],
- "offset is out of bounds");
- return -EINVAL;
} else if (request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN * 2) {
NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_LENGTH],
"reading cross page boundary is illegal");
@@ -236,8 +229,10 @@ const struct ethnl_request_ops ethnl_module_eeprom_request_ops = {
const struct nla_policy ethnl_module_eeprom_get_policy[] = {
[ETHTOOL_A_MODULE_EEPROM_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
- [ETHTOOL_A_MODULE_EEPROM_OFFSET] = { .type = NLA_U32 },
- [ETHTOOL_A_MODULE_EEPROM_LENGTH] = { .type = NLA_U32 },
+ [ETHTOOL_A_MODULE_EEPROM_OFFSET] =
+ NLA_POLICY_MAX(NLA_U32, ETH_MODULE_EEPROM_PAGE_LEN * 2 - 1),
+ [ETHTOOL_A_MODULE_EEPROM_LENGTH] =
+ NLA_POLICY_RANGE(NLA_U32, 1, ETH_MODULE_EEPROM_PAGE_LEN),
[ETHTOOL_A_MODULE_EEPROM_PAGE] = { .type = NLA_U8 },
[ETHTOOL_A_MODULE_EEPROM_BANK] = { .type = NLA_U8 },
[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS] =
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 3fa7a394eabf..baa5d10043cb 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1421,7 +1421,7 @@ static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr,
if (eeprom.offset + eeprom.len > total_len)
return -EINVAL;
- data = kmalloc(PAGE_SIZE, GFP_USER);
+ data = kzalloc(PAGE_SIZE, GFP_USER);
if (!data)
return -ENOMEM;
@@ -1486,7 +1486,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
return -EINVAL;
- data = kmalloc(PAGE_SIZE, GFP_USER);
+ data = kzalloc(PAGE_SIZE, GFP_USER);
if (!data)
return -ENOMEM;
@@ -1765,7 +1765,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
return -EFAULT;
test.len = test_len;
- data = kmalloc_array(test_len, sizeof(u64), GFP_USER);
+ data = kcalloc(test_len, sizeof(u64), GFP_USER);
if (!data)
return -ENOMEM;
@@ -2293,7 +2293,7 @@ static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr)
ret = ethtool_tunable_valid(&tuna);
if (ret)
return ret;
- data = kmalloc(tuna.len, GFP_USER);
+ data = kzalloc(tuna.len, GFP_USER);
if (!data)
return -ENOMEM;
ret = ops->get_tunable(dev, &tuna, data);
@@ -2485,7 +2485,7 @@ static int get_phy_tunable(struct net_device *dev, void __user *useraddr)
ret = ethtool_phy_tunable_valid(&tuna);
if (ret)
return ret;
- data = kmalloc(tuna.len, GFP_USER);
+ data = kzalloc(tuna.len, GFP_USER);
if (!data)
return -ENOMEM;
if (phy_drv_tunable) {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 90b10966b16b..3e25a47fd482 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -380,7 +380,7 @@ extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_T
extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1];
extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1];
extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1];
-extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_DATA + 1];
+extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS + 1];
extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1];
int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index b3029fff715d..2d51b7ab4dc5 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -353,6 +353,8 @@ static int strset_reply_size(const struct ethnl_req_info *req_base,
int len = 0;
int ret;
+ len += nla_total_size(0); /* ETHTOOL_A_STRSET_STRINGSETS */
+
for (i = 0; i < ETH_SS_COUNT; i++) {
const struct strset_info *set_info = &data->sets[i];
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 750f388a4a68..54648181dd56 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -575,7 +575,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
return err;
}
- if (!inet_sk(sk)->inet_num && inet_autobind(sk))
+ if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk))
return -EAGAIN;
return sk->sk_prot->connect(sk, uaddr, addr_len);
}
@@ -803,7 +803,7 @@ int inet_send_prepare(struct sock *sk)
sock_rps_record_flow(sk);
/* We may need to bind the socket. */
- if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
+ if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind &&
inet_autobind(sk))
return -EAGAIN;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 36ed85bf2ad5..2d2d08aa787d 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -554,7 +554,6 @@ static int ah4_rcv_cb(struct sk_buff *skb, int err)
static const struct xfrm_type ah_type =
{
- .description = "AH4",
.owner = THIS_MODULE,
.proto = IPPROTO_AH,
.flags = XFRM_TYPE_REPLAY_PROT,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index d6e3a92841e3..099259fc826a 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -471,6 +471,7 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
kfree(doi_def->map.std->lvl.local);
kfree(doi_def->map.std->cat.cipso);
kfree(doi_def->map.std->cat.local);
+ kfree(doi_def->map.std);
break;
}
kfree(doi_def);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 50deeff48c8b..73721a4448bd 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1989,7 +1989,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
return -EAFNOSUPPORT;
if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
- BUG();
+ return -EINVAL;
if (tb[IFLA_INET_CONF]) {
nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 35803ab7ac80..f5362b9d75eb 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1198,7 +1198,6 @@ static int esp4_rcv_cb(struct sk_buff *skb, int err)
static const struct xfrm_type esp_type =
{
- .description = "ESP4",
.owner = THIS_MODULE,
.proto = IPPROTO_ESP,
.flags = XFRM_TYPE_REPLAY_PROT,
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 33687cf58286..8e4e9aa12130 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -33,12 +33,11 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
struct xfrm_state *x;
__be32 seq;
__be32 spi;
- int err;
if (!pskb_pull(skb, offset))
return NULL;
- if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
+ if (xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq) != 0)
goto out;
xo = xfrm_offload(skb);
@@ -343,7 +342,6 @@ static const struct net_offload esp4_offload = {
};
static const struct xfrm_type_offload esp_type_offload = {
- .description = "ESP4 OFFLOAD",
.owner = THIS_MODULE,
.proto = IPPROTO_ESP,
.input_tail = esp_input_tail,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 2e09d62d59e3..c695d294a5df 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -759,6 +759,13 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
icmp_param.data_len = room;
icmp_param.head_len = sizeof(struct icmphdr);
+ /* if we don't have a source address at this point, fall back to the
+ * dummy address instead of sending out a packet with a source address
+ * of 0.0.0.0
+ */
+ if (!fl4.saddr)
+ fl4.saddr = htonl(INADDR_DUMMY);
+
icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
ende:
ip_rt_put(rt);
@@ -986,14 +993,8 @@ static bool icmp_redirect(struct sk_buff *skb)
static bool icmp_echo(struct sk_buff *skb)
{
- struct icmp_ext_hdr *ext_hdr, _ext_hdr;
- struct icmp_ext_echo_iio *iio, _iio;
struct icmp_bxm icmp_param;
- struct net_device *dev;
- char buff[IFNAMSIZ];
struct net *net;
- u16 ident_len;
- u8 status;
net = dev_net(skb_dst(skb)->dev);
/* should there be an ICMP stat for ignored echos? */
@@ -1006,20 +1007,46 @@ static bool icmp_echo(struct sk_buff *skb)
icmp_param.data_len = skb->len;
icmp_param.head_len = sizeof(struct icmphdr);
- if (icmp_param.data.icmph.type == ICMP_ECHO) {
+ if (icmp_param.data.icmph.type == ICMP_ECHO)
icmp_param.data.icmph.type = ICMP_ECHOREPLY;
- goto send_reply;
- }
- if (!net->ipv4.sysctl_icmp_echo_enable_probe)
+ else if (!icmp_build_probe(skb, &icmp_param.data.icmph))
return true;
+
+ icmp_reply(&icmp_param, skb);
+ return true;
+}
+
+/* Helper for icmp_echo and icmpv6_echo_reply.
+ * Searches for net_device that matches PROBE interface identifier
+ * and builds PROBE reply message in icmphdr.
+ *
+ * Returns false if PROBE responses are disabled via sysctl
+ */
+
+bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
+{
+ struct icmp_ext_hdr *ext_hdr, _ext_hdr;
+ struct icmp_ext_echo_iio *iio, _iio;
+ struct net *net = dev_net(skb->dev);
+ struct net_device *dev;
+ char buff[IFNAMSIZ];
+ u16 ident_len;
+ u8 status;
+
+ if (!net->ipv4.sysctl_icmp_echo_enable_probe)
+ return false;
+
/* We currently only support probing interfaces on the proxy node
* Check to ensure L-bit is set
*/
- if (!(ntohs(icmp_param.data.icmph.un.echo.sequence) & 1))
- return true;
+ if (!(ntohs(icmphdr->un.echo.sequence) & 1))
+ return false;
/* Clear status bits in reply message */
- icmp_param.data.icmph.un.echo.sequence &= htons(0xFF00);
- icmp_param.data.icmph.type = ICMP_EXT_ECHOREPLY;
+ icmphdr->un.echo.sequence &= htons(0xFF00);
+ if (icmphdr->type == ICMP_EXT_ECHO)
+ icmphdr->type = ICMP_EXT_ECHOREPLY;
+ else
+ icmphdr->type = ICMPV6_EXT_ECHO_REPLY;
ext_hdr = skb_header_pointer(skb, 0, sizeof(_ext_hdr), &_ext_hdr);
/* Size of iio is class_type dependent.
* Only check header here and assign length based on ctype in the switch statement
@@ -1080,8 +1107,8 @@ static bool icmp_echo(struct sk_buff *skb)
goto send_mal_query;
}
if (!dev) {
- icmp_param.data.icmph.code = ICMP_EXT_CODE_NO_IF;
- goto send_reply;
+ icmphdr->code = ICMP_EXT_CODE_NO_IF;
+ return true;
}
/* Fill bits in reply message */
if (dev->flags & IFF_UP)
@@ -1091,14 +1118,13 @@ static bool icmp_echo(struct sk_buff *skb)
if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list))
status |= ICMP_EXT_ECHOREPLY_IPV6;
dev_put(dev);
- icmp_param.data.icmph.un.echo.sequence |= htons(status);
-send_reply:
- icmp_reply(&icmp_param, skb);
- return true;
+ icmphdr->un.echo.sequence |= htons(status);
+ return true;
send_mal_query:
- icmp_param.data.icmph.code = ICMP_EXT_CODE_MAL_QUERY;
- goto send_reply;
+ icmphdr->code = ICMP_EXT_CODE_MAL_QUERY;
+ return true;
}
+EXPORT_SYMBOL_GPL(icmp_build_probe);
/*
* Handle ICMP Timestamp requests.
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 7b272bbed2b4..6b3c558a4f23 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1801,6 +1801,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) {
in_dev->mc_list = i->next_rcu;
in_dev->mc_count--;
+ ip_mc_clear_src(i);
ip_ma_put(i);
}
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0eea878edc30..754013fa393b 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -703,6 +703,8 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req,
nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN);
if (!nreq) {
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
+
/* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */
sock_put(sk);
return NULL;
@@ -876,9 +878,10 @@ static void reqsk_timer_handler(struct timer_list *t)
if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
/* delete timer */
inet_csk_reqsk_queue_drop(sk_listener, nreq);
- goto drop;
+ goto no_ownership;
}
+ __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQSUCCESS);
reqsk_migrate_reset(oreq);
reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq);
reqsk_put(oreq);
@@ -887,17 +890,19 @@ static void reqsk_timer_handler(struct timer_list *t)
return;
}
-drop:
/* Even if we can clone the req, we may need not retransmit any more
* SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another
* CPU may win the "own_req" race so that inet_ehash_insert() fails.
*/
if (nreq) {
+ __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQFAILURE);
+no_ownership:
reqsk_migrate_reset(nreq);
reqsk_queue_removed(queue, nreq);
__reqsk_free(nreq);
}
+drop:
inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
}
@@ -1135,11 +1140,13 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
refcount_set(&nreq->rsk_refcnt, 1);
if (inet_csk_reqsk_queue_add(sk, nreq, child)) {
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQSUCCESS);
reqsk_migrate_reset(req);
reqsk_put(req);
return child;
}
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
reqsk_migrate_reset(nreq);
__reqsk_free(nreq);
} else if (inet_csk_reqsk_queue_add(sk, req, child)) {
@@ -1188,8 +1195,12 @@ void inet_csk_listen_stop(struct sock *sk)
refcount_set(&nreq->rsk_refcnt, 1);
if (inet_csk_reqsk_queue_add(nsk, nreq, child)) {
+ __NET_INC_STATS(sock_net(nsk),
+ LINUX_MIB_TCPMIGRATEREQSUCCESS);
reqsk_migrate_reset(req);
} else {
+ __NET_INC_STATS(sock_net(nsk),
+ LINUX_MIB_TCPMIGRATEREQFAILURE);
reqsk_migrate_reset(nreq);
__reqsk_free(nreq);
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a68bf4c6fe9b..12dca0c85f3c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -107,6 +107,8 @@ module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
+static const struct header_ops ipgre_header_ops;
+
static int ipgre_tunnel_init(struct net_device *dev);
static void erspan_build_header(struct sk_buff *skb,
u32 id, u32 index,
@@ -364,7 +366,10 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
raw_proto, false) < 0)
goto drop;
- if (tunnel->dev->type != ARPHRD_NONE)
+ /* Special case for ipgre_header_parse(), which expects the
+ * mac_header to point to the outer IP header.
+ */
+ if (tunnel->dev->header_ops == &ipgre_header_ops)
skb_pop_mac_header(skb);
else
skb_reset_mac_header(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c3efc7d658f6..8d8a8da3ae7e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1054,7 +1054,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int datalen;
unsigned int fraglen;
unsigned int fraggap;
- unsigned int alloclen;
+ unsigned int alloclen, alloc_extra;
unsigned int pagedlen;
struct sk_buff *skb_prev;
alloc_new_skb:
@@ -1074,35 +1074,39 @@ alloc_new_skb:
fraglen = datalen + fragheaderlen;
pagedlen = 0;
+ alloc_extra = hh_len + 15;
+ alloc_extra += exthdrlen;
+
+ /* The last fragment gets additional space at tail.
+ * Note, with MSG_MORE we overallocate on fragments,
+ * because we have no idea what fragment will be
+ * the last.
+ */
+ if (datalen == length + fraggap)
+ alloc_extra += rt->dst.trailer_len;
+
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else if (!paged)
+ else if (!paged &&
+ (fraglen + alloc_extra < SKB_MAX_ALLOC ||
+ !(rt->dst.dev->features & NETIF_F_SG)))
alloclen = fraglen;
else {
alloclen = min_t(int, fraglen, MAX_HEADER);
pagedlen = fraglen - alloclen;
}
- alloclen += exthdrlen;
-
- /* The last fragment gets additional space at tail.
- * Note, with MSG_MORE we overallocate on fragments,
- * because we have no idea what fragment will be
- * the last.
- */
- if (datalen == length + fraggap)
- alloclen += rt->dst.trailer_len;
+ alloclen += alloc_extra;
if (transhdrlen) {
- skb = sock_alloc_send_skb(sk,
- alloclen + hh_len + 15,
+ skb = sock_alloc_send_skb(sk, alloclen,
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = alloc_skb(alloclen + hh_len + 15,
+ skb = alloc_skb(alloclen,
sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index b42683212c65..2e69e81e1f5d 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -152,7 +152,6 @@ static int ipcomp4_rcv_cb(struct sk_buff *skb, int err)
}
static const struct xfrm_type ipcomp_type = {
- .description = "IPCOMP4",
.owner = THIS_MODULE,
.proto = IPPROTO_COMP,
.init_state = ipcomp4_init_state,
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index d5bfa087c23a..266c65577ba6 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -242,6 +242,8 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
if (!tun_dst)
return 0;
}
+ skb_reset_mac_header(skb);
+
return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 1c9f71a37258..95a718397fd1 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -954,6 +954,7 @@ bool ping_rcv(struct sk_buff *skb)
struct sock *sk;
struct net *net = dev_net(skb->dev);
struct icmphdr *icmph = icmp_hdr(skb);
+ bool rc = false;
/* We assume the packet has already been checked by icmp_rcv */
@@ -968,14 +969,15 @@ bool ping_rcv(struct sk_buff *skb)
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
pr_debug("rcv on socket %p\n", sk);
- if (skb2)
- ping_queue_rcv_skb(sk, skb2);
+ if (skb2 && !ping_queue_rcv_skb(sk, skb2))
+ rc = true;
sock_put(sk);
- return true;
}
- pr_debug("no socket, dropping\n");
- return false;
+ if (!rc)
+ pr_debug("no socket, dropping\n");
+
+ return rc;
}
EXPORT_SYMBOL_GPL(ping_rcv);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 6d46297a99f8..b0d3a09dc84e 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -295,6 +295,8 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH),
SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS),
SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS),
+ SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS),
+ SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a4c477475f4c..66aacb939d3e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2179,6 +2179,19 @@ martian_source:
return err;
}
+/* get device for dst_alloc with local routes */
+static struct net_device *ip_rt_get_dev(struct net *net,
+ const struct fib_result *res)
+{
+ struct fib_nh_common *nhc = res->fi ? res->nhc : NULL;
+ struct net_device *dev = NULL;
+
+ if (nhc)
+ dev = l3mdev_master_dev_rcu(nhc->nhc_dev);
+
+ return dev ? : net->loopback_dev;
+}
+
/*
* NOTE. We drop all the packets that has local source
* addresses, because every properly looped back packet
@@ -2335,7 +2348,7 @@ local_input:
}
}
- rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
+ rth = rt_dst_alloc(ip_rt_get_dev(net, res),
flags | RTCF_LOCAL, res->type,
IN_DEV_ORCONF(in_dev, NOPOLICY), false);
if (!rth)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f258a4c0da71..0a4f3f16140a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -786,6 +786,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
return inet_csk_complete_hashdance(sk, child, req, own_req);
listen_overflow:
+ if (sk != req->rsk_listener)
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
+
if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
inet_rsk(req)->acked = 1;
return NULL;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 15f5504adf5b..1307ad0d3b9e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2607,6 +2607,9 @@ void udp_destroy_sock(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
bool slow = lock_sock_fast(sk);
+
+ /* protects from races with udp_abort() */
+ sock_set_flag(sk, SOCK_DEAD);
udp_flush_pending_frames(sk);
unlock_sock_fast(sk, slow);
if (static_branch_unlikely(&udp_encap_needed_key)) {
@@ -2857,10 +2860,17 @@ int udp_abort(struct sock *sk, int err)
{
lock_sock(sk);
+ /* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing
+ * with close()
+ */
+ if (sock_flag(sk, SOCK_DEAD))
+ goto out;
+
sk->sk_err = err;
sk->sk_error_report(sk);
__udp_disconnect(sk, 0);
+out:
release_sock(sk);
return 0;
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index fb0648e7fb32..f4555a88f86b 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -42,7 +42,6 @@ static void ipip_destroy(struct xfrm_state *x)
}
static const struct xfrm_type ipip_type = {
- .description = "IPIP",
.owner = THIS_MODULE,
.proto = IPPROTO_IPIP,
.init_state = ipip_init_state,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 048570900fdf..3bf685fe64b9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5827,7 +5827,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla,
return -EAFNOSUPPORT;
if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
- BUG();
+ return -EINVAL;
if (tb[IFLA_INET6_TOKEN]) {
err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]),
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 20d492da725a..828e62514260 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -755,7 +755,6 @@ static int ah6_rcv_cb(struct sk_buff *skb, int err)
}
static const struct xfrm_type ah6_type = {
- .description = "AH6",
.owner = THIS_MODULE,
.proto = IPPROTO_AH,
.flags = XFRM_TYPE_REPLAY_PROT,
@@ -763,7 +762,6 @@ static const struct xfrm_type ah6_type = {
.destructor = ah6_destroy,
.input = ah6_input,
.output = ah6_output,
- .hdr_offset = xfrm6_find_1stfragopt,
};
static struct xfrm6_protocol ah6_protocol = {
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 393ae2b78e7d..37c4b1726c5e 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -1243,7 +1243,6 @@ static int esp6_rcv_cb(struct sk_buff *skb, int err)
}
static const struct xfrm_type esp6_type = {
- .description = "ESP6",
.owner = THIS_MODULE,
.proto = IPPROTO_ESP,
.flags = XFRM_TYPE_REPLAY_PROT,
@@ -1251,7 +1250,6 @@ static const struct xfrm_type esp6_type = {
.destructor = esp6_destroy,
.input = esp6_input,
.output = esp6_output,
- .hdr_offset = xfrm6_find_1stfragopt,
};
static struct xfrm6_protocol esp6_protocol = {
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 40ed4fcf1cf4..a349d4798077 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -377,7 +377,6 @@ static const struct net_offload esp6_offload = {
};
static const struct xfrm_type_offload esp6_type_offload = {
- .description = "ESP6 OFFLOAD",
.owner = THIS_MODULE,
.proto = IPPROTO_ESP,
.input_tail = esp6_input_tail,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index e8398ffb5e35..a7c31ab67c5d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -725,6 +725,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
struct ipcm6_cookie ipc6;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
bool acast;
+ u8 type;
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
@@ -740,8 +741,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
!(net->ipv6.sysctl.anycast_src_echo_reply && acast))
saddr = NULL;
+ if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
+ type = ICMPV6_EXT_ECHO_REPLY;
+ else
+ type = ICMPV6_ECHO_REPLY;
+
memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
- tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
+ tmp_hdr.icmp6_type = type;
memset(&fl6, 0, sizeof(fl6));
if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
@@ -752,7 +758,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (saddr)
fl6.saddr = *saddr;
fl6.flowi6_oif = icmp6_iif(skb);
- fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
+ fl6.fl6_icmp_type = type;
fl6.flowi6_mark = mark;
fl6.flowi6_uid = sock_net_uid(net, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
@@ -783,13 +789,17 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
msg.skb = skb;
msg.offset = 0;
- msg.type = ICMPV6_ECHO_REPLY;
+ msg.type = type;
ipcm6_init_sk(&ipc6, np);
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
ipc6.sockc.mark = mark;
+ if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
+ if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
+ goto out_dst_release;
+
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
skb->len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr), &ipc6, &fl6,
@@ -911,6 +921,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
icmpv6_echo_reply(skb);
break;
+ case ICMPV6_EXT_ECHO_REQUEST:
+ if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
+ net->ipv4.sysctl_icmp_echo_enable_probe)
+ icmpv6_echo_reply(skb);
+ break;
case ICMPV6_ECHO_REPLY:
success = ping_rcv(skb);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ff4f9ebcf7f6..984050f35c61 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1055,13 +1055,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
* ip6_route_output will fail given src=any saddr, though, so
* that's why we try it again later.
*/
- if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
+ if (ipv6_addr_any(&fl6->saddr)) {
struct fib6_info *from;
struct rt6_info *rt;
- bool had_dst = *dst != NULL;
- if (!had_dst)
- *dst = ip6_route_output(net, sk, fl6);
+ *dst = ip6_route_output(net, sk, fl6);
rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
rcu_read_lock();
@@ -1078,7 +1076,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
* never existed and let the SA-enabled version take
* over.
*/
- if (!had_dst && (*dst)->error) {
+ if ((*dst)->error) {
dst_release(*dst);
*dst = NULL;
}
@@ -1555,7 +1553,7 @@ emsgsize:
unsigned int datalen;
unsigned int fraglen;
unsigned int fraggap;
- unsigned int alloclen;
+ unsigned int alloclen, alloc_extra;
unsigned int pagedlen;
alloc_new_skb:
/* There's no room in the current skb */
@@ -1582,17 +1580,28 @@ alloc_new_skb:
fraglen = datalen + fragheaderlen;
pagedlen = 0;
+ alloc_extra = hh_len;
+ alloc_extra += dst_exthdrlen;
+ alloc_extra += rt->dst.trailer_len;
+
+ /* We just reserve space for fragment header.
+ * Note: this may be overallocation if the message
+ * (without MSG_MORE) fits into the MTU.
+ */
+ alloc_extra += sizeof(struct frag_hdr);
+
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else if (!paged)
+ else if (!paged &&
+ (fraglen + alloc_extra < SKB_MAX_ALLOC ||
+ !(rt->dst.dev->features & NETIF_F_SG)))
alloclen = fraglen;
else {
alloclen = min_t(int, fraglen, MAX_HEADER);
pagedlen = fraglen - alloclen;
}
-
- alloclen += dst_exthdrlen;
+ alloclen += alloc_extra;
if (datalen != length + fraggap) {
/*
@@ -1602,30 +1611,21 @@ alloc_new_skb:
datalen += rt->dst.trailer_len;
}
- alloclen += rt->dst.trailer_len;
fraglen = datalen + fragheaderlen;
- /*
- * We just reserve space for fragment header.
- * Note: this may be overallocation if the message
- * (without MSG_MORE) fits into the MTU.
- */
- alloclen += sizeof(struct frag_hdr);
-
copy = datalen - transhdrlen - fraggap - pagedlen;
if (copy < 0) {
err = -EINVAL;
goto error;
}
if (transhdrlen) {
- skb = sock_alloc_send_skb(sk,
- alloclen + hh_len,
+ skb = sock_alloc_send_skb(sk, alloclen,
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = alloc_skb(alloclen + hh_len,
+ skb = alloc_skb(alloclen,
sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 288bafded998..0b8a38687ce4 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -837,6 +837,7 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
} else {
skb->dev = tunnel->dev;
+ skb_reset_mac_header(skb);
}
skb_reset_network_header(skb);
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index daef890460b7..15f984be3570 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -172,14 +172,12 @@ static int ipcomp6_rcv_cb(struct sk_buff *skb, int err)
}
static const struct xfrm_type ipcomp6_type = {
- .description = "IPCOMP6",
.owner = THIS_MODULE,
.proto = IPPROTO_COMP,
.init_state = ipcomp6_init_state,
.destructor = ipcomp_destroy,
.input = ipcomp_input,
.output = ipcomp_output,
- .hdr_offset = xfrm6_find_1stfragopt,
};
static struct xfrm6_protocol ipcomp6_protocol = {
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 878fcec14949..aeb35d26e474 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -247,54 +247,6 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
return err;
}
-static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
- u8 **nexthdr)
-{
- u16 offset = sizeof(struct ipv6hdr);
- struct ipv6_opt_hdr *exthdr =
- (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
- const unsigned char *nh = skb_network_header(skb);
- unsigned int packet_len = skb_tail_pointer(skb) -
- skb_network_header(skb);
- int found_rhdr = 0;
-
- *nexthdr = &ipv6_hdr(skb)->nexthdr;
-
- while (offset + 1 <= packet_len) {
-
- switch (**nexthdr) {
- case NEXTHDR_HOP:
- break;
- case NEXTHDR_ROUTING:
- found_rhdr = 1;
- break;
- case NEXTHDR_DEST:
- /*
- * HAO MUST NOT appear more than once.
- * XXX: It is better to try to find by the end of
- * XXX: packet if HAO exists.
- */
- if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
- net_dbg_ratelimited("mip6: hao exists already, override\n");
- return offset;
- }
-
- if (found_rhdr)
- return offset;
-
- break;
- default:
- return offset;
- }
-
- offset += ipv6_optlen(exthdr);
- *nexthdr = &exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr *)(nh + offset);
- }
-
- return offset;
-}
-
static int mip6_destopt_init_state(struct xfrm_state *x)
{
if (x->id.spi) {
@@ -324,7 +276,6 @@ static void mip6_destopt_destroy(struct xfrm_state *x)
}
static const struct xfrm_type mip6_destopt_type = {
- .description = "MIP6DESTOPT",
.owner = THIS_MODULE,
.proto = IPPROTO_DSTOPTS,
.flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR,
@@ -333,7 +284,6 @@ static const struct xfrm_type mip6_destopt_type = {
.input = mip6_destopt_input,
.output = mip6_destopt_output,
.reject = mip6_destopt_reject,
- .hdr_offset = mip6_destopt_offset,
};
static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
@@ -383,53 +333,6 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
return 0;
}
-static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
- u8 **nexthdr)
-{
- u16 offset = sizeof(struct ipv6hdr);
- struct ipv6_opt_hdr *exthdr =
- (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
- const unsigned char *nh = skb_network_header(skb);
- unsigned int packet_len = skb_tail_pointer(skb) -
- skb_network_header(skb);
- int found_rhdr = 0;
-
- *nexthdr = &ipv6_hdr(skb)->nexthdr;
-
- while (offset + 1 <= packet_len) {
-
- switch (**nexthdr) {
- case NEXTHDR_HOP:
- break;
- case NEXTHDR_ROUTING:
- if (offset + 3 <= packet_len) {
- struct ipv6_rt_hdr *rt;
- rt = (struct ipv6_rt_hdr *)(nh + offset);
- if (rt->type != 0)
- return offset;
- }
- found_rhdr = 1;
- break;
- case NEXTHDR_DEST:
- if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
- return offset;
-
- if (found_rhdr)
- return offset;
-
- break;
- default:
- return offset;
- }
-
- offset += ipv6_optlen(exthdr);
- *nexthdr = &exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr *)(nh + offset);
- }
-
- return offset;
-}
-
static int mip6_rthdr_init_state(struct xfrm_state *x)
{
if (x->id.spi) {
@@ -456,7 +359,6 @@ static void mip6_rthdr_destroy(struct xfrm_state *x)
}
static const struct xfrm_type mip6_rthdr_type = {
- .description = "MIP6RT",
.owner = THIS_MODULE,
.proto = IPPROTO_ROUTING,
.flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR,
@@ -464,7 +366,6 @@ static const struct xfrm_type mip6_rthdr_type = {
.destructor = mip6_rthdr_destroy,
.input = mip6_rthdr_input,
.output = mip6_rthdr_output,
- .hdr_offset = mip6_rthdr_offset,
};
static int __init mip6_init(void)
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index e204163c7036..92f3235fa287 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -135,6 +135,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
}
EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
+static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const struct ipv6hdr *iph)
+{
+ if (likely(next != IPPROTO_ICMPV6))
+ return false;
+
+ if (ipv6_addr_type(&iph->saddr) != IPV6_ADDR_ANY)
+ return false;
+
+ return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL;
+}
+
void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
@@ -163,10 +174,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph);
- if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
- nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
- nft_fib_store_result(dest, priv, nft_in(pkt));
- return;
+ if (nft_hook(pkt) == NF_INET_PRE_ROUTING ||
+ nft_hook(pkt) == NF_INET_INGRESS) {
+ if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) ||
+ nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) {
+ nft_fib_store_result(dest, priv, nft_in(pkt));
+ return;
+ }
}
*dest = 0;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 4ff38cb08f4b..60bf3b877957 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -87,10 +87,10 @@ struct seg6_end_dt_info {
int vrf_ifindex;
int vrf_table;
- /* tunneled packet proto and family (IPv4 or IPv6) */
- __be16 proto;
+ /* tunneled packet family (IPv4 or IPv6).
+ * Protocol and header length are inferred from family.
+ */
u16 family;
- int hdrlen;
};
struct pcpu_seg6_local_counters {
@@ -521,19 +521,6 @@ static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
info->net = net;
info->vrf_ifindex = vrf_ifindex;
- switch (family) {
- case AF_INET:
- info->proto = htons(ETH_P_IP);
- info->hdrlen = sizeof(struct iphdr);
- break;
- case AF_INET6:
- info->proto = htons(ETH_P_IPV6);
- info->hdrlen = sizeof(struct ipv6hdr);
- break;
- default:
- return -EINVAL;
- }
-
info->family = family;
info->mode = DT_VRF_MODE;
@@ -622,22 +609,44 @@ error:
}
static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
- struct seg6_local_lwt *slwt)
+ struct seg6_local_lwt *slwt, u16 family)
{
struct seg6_end_dt_info *info = &slwt->dt_info;
struct net_device *vrf;
+ __be16 protocol;
+ int hdrlen;
vrf = end_dt_get_vrf_rcu(skb, info);
if (unlikely(!vrf))
goto drop;
- skb->protocol = info->proto;
+ switch (family) {
+ case AF_INET:
+ protocol = htons(ETH_P_IP);
+ hdrlen = sizeof(struct iphdr);
+ break;
+ case AF_INET6:
+ protocol = htons(ETH_P_IPV6);
+ hdrlen = sizeof(struct ipv6hdr);
+ break;
+ case AF_UNSPEC:
+ fallthrough;
+ default:
+ goto drop;
+ }
+
+ if (unlikely(info->family != AF_UNSPEC && info->family != family)) {
+ pr_warn_once("seg6local: SRv6 End.DT* family mismatch");
+ goto drop;
+ }
+
+ skb->protocol = protocol;
skb_dst_drop(skb);
- skb_set_transport_header(skb, info->hdrlen);
+ skb_set_transport_header(skb, hdrlen);
- return end_dt_vrf_rcv(skb, info->family, vrf);
+ return end_dt_vrf_rcv(skb, family, vrf);
drop:
kfree_skb(skb);
@@ -656,7 +665,7 @@ static int input_action_end_dt4(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto drop;
- skb = end_dt_vrf_core(skb, slwt);
+ skb = end_dt_vrf_core(skb, slwt, AF_INET);
if (!skb)
/* packet has been processed and consumed by the VRF */
return 0;
@@ -739,7 +748,7 @@ static int input_action_end_dt6(struct sk_buff *skb,
goto legacy_mode;
/* DT6_VRF_MODE */
- skb = end_dt_vrf_core(skb, slwt);
+ skb = end_dt_vrf_core(skb, slwt, AF_INET6);
if (!skb)
/* packet has been processed and consumed by the VRF */
return 0;
@@ -767,6 +776,36 @@ drop:
return -EINVAL;
}
+#ifdef CONFIG_NET_L3_MASTER_DEV
+static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack)
+{
+ return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack);
+}
+
+static int input_action_end_dt46(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ unsigned int off = 0;
+ int nexthdr;
+
+ nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL);
+ if (unlikely(nexthdr < 0))
+ goto drop;
+
+ switch (nexthdr) {
+ case IPPROTO_IPIP:
+ return input_action_end_dt4(skb, slwt);
+ case IPPROTO_IPV6:
+ return input_action_end_dt6(skb, slwt);
+ }
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+#endif
+
/* push an SRH on top of the current one */
static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
@@ -969,6 +1008,17 @@ static struct seg6_action_desc seg6_action_table[] = {
.input = input_action_end_dt6,
},
{
+ .action = SEG6_LOCAL_ACTION_END_DT46,
+ .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ .input = input_action_end_dt46,
+ .slwt_ops = {
+ .build_state = seg6_end_dt46_build,
+ },
+#endif
+ },
+ {
.action = SEG6_LOCAL_ACTION_END_B6,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
.optattrs = SEG6_F_LOCAL_COUNTERS,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e0a39b0bb4c1..df5bea818410 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -710,6 +710,8 @@ static int ipip6_rcv(struct sk_buff *skb)
* old iph is no longer valid
*/
iph = (const struct iphdr *)skb_mac_header(skb);
+ skb_reset_mac_header(skb);
+
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
@@ -780,6 +782,8 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
tpi = &ipip_tpi;
if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
+ skb_reset_mac_header(skb);
+
return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 199b080d418a..3fcd86f4dfdc 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1598,6 +1598,9 @@ void udpv6_destroy_sock(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
lock_sock(sk);
+
+ /* protects from races with udp_abort() */
+ sock_set_flag(sk, SOCK_DEAD);
udp_v6_flush_pending_frames(sk);
release_sock(sk);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 8b84d534b19d..57fa27c1cdf9 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -16,13 +16,6 @@
#include <net/ip6_route.h>
#include <net/xfrm.h>
-int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
- u8 **prevhdr)
-{
- return ip6_find_1stfragopt(skb, prevhdr);
-}
-EXPORT_SYMBOL(xfrm6_find_1stfragopt);
-
void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
{
struct flowi6 fl6;
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index f696d46e6910..2b31112c0856 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -291,7 +291,6 @@ static void xfrm6_tunnel_destroy(struct xfrm_state *x)
}
static const struct xfrm_type xfrm6_tunnel_type = {
- .description = "IP6IP6",
.owner = THIS_MODULE,
.proto = IPPROTO_IPV6,
.init_state = xfrm6_tunnel_init_state,
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 1c572c8daced..6201965bd822 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1066,11 +1066,6 @@ out_error:
goto partial_message;
}
- if (skb_has_frag_list(head)) {
- kfree_skb_list(skb_shinfo(head)->frag_list);
- skb_shinfo(head)->frag_list = NULL;
- }
-
if (head != kcm->seq_skb)
kfree_skb(head);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ef9b4ac03e7b..de24a7d474df 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -141,7 +141,6 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
struct sock *sk;
struct pfkey_sock *pfk;
- int err;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -150,10 +149,9 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
if (protocol != PF_KEY_V2)
return -EPROTONOSUPPORT;
- err = -ENOMEM;
sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, kern);
if (sk == NULL)
- goto out;
+ return -ENOMEM;
pfk = pfkey_sk(sk);
mutex_init(&pfk->dump_lock);
@@ -169,8 +167,6 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
pfkey_insert(sk);
return 0;
-out:
- return err;
}
static int pfkey_release(struct socket *sock)
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7a99892e5aba..84cc7733ea66 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1442,6 +1442,38 @@ static void sta_apply_mesh_params(struct ieee80211_local *local,
#endif
}
+static void sta_apply_airtime_params(struct ieee80211_local *local,
+ struct sta_info *sta,
+ struct station_parameters *params)
+{
+ u8 ac;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ struct airtime_sched_info *air_sched = &local->airtime[ac];
+ struct airtime_info *air_info = &sta->airtime[ac];
+ struct txq_info *txqi;
+ u8 tid;
+
+ spin_lock_bh(&air_sched->lock);
+ for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
+ if (air_info->weight == params->airtime_weight ||
+ !sta->sta.txq[tid] ||
+ ac != ieee80211_ac_from_tid(tid))
+ continue;
+
+ airtime_weight_set(air_info, params->airtime_weight);
+
+ txqi = to_txq_info(sta->sta.txq[tid]);
+ if (RB_EMPTY_NODE(&txqi->schedule_order))
+ continue;
+
+ ieee80211_update_airtime_weight(local, air_sched,
+ 0, true);
+ }
+ spin_unlock_bh(&air_sched->lock);
+ }
+}
+
static int sta_apply_parameters(struct ieee80211_local *local,
struct sta_info *sta,
struct station_parameters *params)
@@ -1629,7 +1661,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
sta_apply_mesh_params(local, sta, params);
if (params->airtime_weight)
- sta->airtime_weight = params->airtime_weight;
+ sta_apply_airtime_params(local, sta, params);
+
/* set the STA state after all sta info from usermode has been set */
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
@@ -1693,15 +1726,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
test_sta_flag(sta, WLAN_STA_ASSOC))
rate_control_rate_init(sta);
- err = sta_info_insert_rcu(sta);
- if (err) {
- rcu_read_unlock();
- return err;
- }
-
- rcu_read_unlock();
-
- return 0;
+ return sta_info_insert(sta);
}
static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 907bb1f748a1..76fc36a68750 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* mac80211 - channel management
+ * Copyright 2020 - 2021 Intel Corporation
*/
#include <linux/nl80211.h>
@@ -308,8 +309,8 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
* the max of min required widths of all the interfaces bound to this
* channel context.
*/
-void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx)
+static u32 _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx)
{
enum nl80211_chan_width max_bw;
struct cfg80211_chan_def min_def;
@@ -326,7 +327,7 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
ctx->conf.def.width == NL80211_CHAN_WIDTH_16 ||
ctx->conf.radar_enabled) {
ctx->conf.min_def = ctx->conf.def;
- return;
+ return 0;
}
max_bw = ieee80211_get_chanctx_max_required_bw(local, &ctx->conf);
@@ -337,17 +338,21 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
ieee80211_chandef_downgrade(&min_def);
if (cfg80211_chandef_identical(&ctx->conf.min_def, &min_def))
- return;
+ return 0;
ctx->conf.min_def = min_def;
if (!ctx->driver_present)
- return;
+ return 0;
- drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_MIN_WIDTH);
+ return IEEE80211_CHANCTX_CHANGE_MIN_WIDTH;
}
+/* calling this function is assuming that station vif is updated to
+ * lates changes by calling ieee80211_vif_update_chandef
+ */
static void ieee80211_chan_bw_change(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx)
+ struct ieee80211_chanctx *ctx,
+ bool narrowed)
{
struct sta_info *sta;
struct ieee80211_supported_band *sband =
@@ -366,9 +371,16 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local,
continue;
new_sta_bw = ieee80211_sta_cur_vht_bw(sta);
+
+ /* nothing change */
if (new_sta_bw == sta->sta.bandwidth)
continue;
+ /* vif changed to narrow BW and narrow BW for station wasn't
+ * requested or vise versa */
+ if ((new_sta_bw < sta->sta.bandwidth) == !narrowed)
+ continue;
+
sta->sta.bandwidth = new_sta_bw;
rate_control_rate_update(local, sband, sta,
IEEE80211_RC_BW_CHANGED);
@@ -376,21 +388,34 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local,
rcu_read_unlock();
}
-static void ieee80211_change_chanctx(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx,
- const struct cfg80211_chan_def *chandef)
+/*
+ * recalc the min required chan width of the channel context, which is
+ * the max of min required widths of all the interfaces bound to this
+ * channel context.
+ */
+void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx)
{
- enum nl80211_chan_width width;
+ u32 changed = _ieee80211_recalc_chanctx_min_def(local, ctx);
- if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) {
- ieee80211_recalc_chanctx_min_def(local, ctx);
+ if (!changed)
return;
- }
- WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef));
+ /* check is BW narrowed */
+ ieee80211_chan_bw_change(local, ctx, true);
- width = ctx->conf.def.width;
- ctx->conf.def = *chandef;
+ drv_change_chanctx(local, ctx, changed);
+
+ /* check is BW wider */
+ ieee80211_chan_bw_change(local, ctx, false);
+}
+
+static void ieee80211_change_chanctx(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx,
+ struct ieee80211_chanctx *old_ctx,
+ const struct cfg80211_chan_def *chandef)
+{
+ u32 changed;
/* expected to handle only 20/40/80/160 channel widths */
switch (chandef->width) {
@@ -405,19 +430,33 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local,
WARN_ON(1);
}
- if (chandef->width < width)
- ieee80211_chan_bw_change(local, ctx);
+ /* Check maybe BW narrowed - we do this _before_ calling recalc_chanctx_min_def
+ * due to maybe not returning from it, e.g in case new context was added
+ * first time with all parameters up to date.
+ */
+ ieee80211_chan_bw_change(local, old_ctx, true);
+
+ if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) {
+ ieee80211_recalc_chanctx_min_def(local, ctx);
+ return;
+ }
- drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_WIDTH);
- ieee80211_recalc_chanctx_min_def(local, ctx);
+ WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef));
+
+ ctx->conf.def = *chandef;
+
+ /* check if min chanctx also changed */
+ changed = IEEE80211_CHANCTX_CHANGE_WIDTH |
+ _ieee80211_recalc_chanctx_min_def(local, ctx);
+ drv_change_chanctx(local, ctx, changed);
if (!local->use_chanctx) {
local->_oper_chandef = *chandef;
ieee80211_hw_config(local, 0);
}
- if (chandef->width > width)
- ieee80211_chan_bw_change(local, ctx);
+ /* check is BW wider */
+ ieee80211_chan_bw_change(local, old_ctx, false);
}
static struct ieee80211_chanctx *
@@ -450,7 +489,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
if (!compat)
continue;
- ieee80211_change_chanctx(local, ctx, compat);
+ ieee80211_change_chanctx(local, ctx, ctx, compat);
return ctx;
}
@@ -679,7 +718,7 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
if (!compat)
return;
- ieee80211_change_chanctx(local, ctx, compat);
+ ieee80211_change_chanctx(local, ctx, ctx, compat);
}
static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
@@ -1107,13 +1146,12 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
if (WARN_ON(!chandef))
return -EINVAL;
- if (old_ctx->conf.def.width > new_ctx->conf.def.width)
- ieee80211_chan_bw_change(local, new_ctx);
+ if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width)
+ changed = BSS_CHANGED_BANDWIDTH;
- ieee80211_change_chanctx(local, new_ctx, chandef);
+ ieee80211_vif_update_chandef(sdata, &sdata->reserved_chandef);
- if (old_ctx->conf.def.width < new_ctx->conf.def.width)
- ieee80211_chan_bw_change(local, new_ctx);
+ ieee80211_change_chanctx(local, new_ctx, old_ctx, chandef);
vif_chsw[0].vif = &sdata->vif;
vif_chsw[0].old_ctx = &old_ctx->conf;
@@ -1142,14 +1180,9 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
if (ieee80211_chanctx_refcount(local, old_ctx) == 0)
ieee80211_free_chanctx(local, old_ctx);
- if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width)
- changed = BSS_CHANGED_BANDWIDTH;
-
- ieee80211_vif_update_chandef(sdata, &sdata->reserved_chandef);
-
+ ieee80211_recalc_chanctx_min_def(local, new_ctx);
ieee80211_recalc_smps_chanctx(local, new_ctx);
ieee80211_recalc_radar_chanctx(local, new_ctx);
- ieee80211_recalc_chanctx_min_def(local, new_ctx);
if (changed)
ieee80211_bss_info_change_notify(sdata, changed);
@@ -1188,7 +1221,7 @@ ieee80211_vif_use_reserved_assign(struct ieee80211_sub_if_data *sdata)
if (WARN_ON(!chandef))
return -EINVAL;
- ieee80211_change_chanctx(local, new_ctx, chandef);
+ ieee80211_change_chanctx(local, new_ctx, new_ctx, chandef);
list_del(&sdata->reserved_chanctx_list);
sdata->reserved_chanctx = NULL;
@@ -1505,7 +1538,6 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
ieee80211_recalc_smps_chanctx(local, ctx);
ieee80211_recalc_radar_chanctx(local, ctx);
ieee80211_recalc_chanctx_min_def(local, ctx);
- ieee80211_chan_bw_change(local, ctx);
list_for_each_entry_safe(sdata, sdata_tmp, &ctx->reserved_vifs,
reserved_chanctx_list) {
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 9245c0421bda..8dbfe325ee66 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -4,7 +4,7 @@
*
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2019, 2021 Intel Corporation
*/
#include <linux/debugfs.h>
@@ -216,14 +216,14 @@ static ssize_t aql_txq_limit_read(struct file *file,
"VI %u %u\n"
"BE %u %u\n"
"BK %u %u\n",
- local->aql_txq_limit_low[IEEE80211_AC_VO],
- local->aql_txq_limit_high[IEEE80211_AC_VO],
- local->aql_txq_limit_low[IEEE80211_AC_VI],
- local->aql_txq_limit_high[IEEE80211_AC_VI],
- local->aql_txq_limit_low[IEEE80211_AC_BE],
- local->aql_txq_limit_high[IEEE80211_AC_BE],
- local->aql_txq_limit_low[IEEE80211_AC_BK],
- local->aql_txq_limit_high[IEEE80211_AC_BK]);
+ local->airtime[IEEE80211_AC_VO].aql_txq_limit_low,
+ local->airtime[IEEE80211_AC_VO].aql_txq_limit_high,
+ local->airtime[IEEE80211_AC_VI].aql_txq_limit_low,
+ local->airtime[IEEE80211_AC_VI].aql_txq_limit_high,
+ local->airtime[IEEE80211_AC_BE].aql_txq_limit_low,
+ local->airtime[IEEE80211_AC_BE].aql_txq_limit_high,
+ local->airtime[IEEE80211_AC_BK].aql_txq_limit_low,
+ local->airtime[IEEE80211_AC_BK].aql_txq_limit_high);
return simple_read_from_buffer(user_buf, count, ppos,
buf, len);
}
@@ -255,11 +255,11 @@ static ssize_t aql_txq_limit_write(struct file *file,
if (ac >= IEEE80211_NUM_ACS)
return -EINVAL;
- q_limit_low_old = local->aql_txq_limit_low[ac];
- q_limit_high_old = local->aql_txq_limit_high[ac];
+ q_limit_low_old = local->airtime[ac].aql_txq_limit_low;
+ q_limit_high_old = local->airtime[ac].aql_txq_limit_high;
- local->aql_txq_limit_low[ac] = q_limit_low;
- local->aql_txq_limit_high[ac] = q_limit_high;
+ local->airtime[ac].aql_txq_limit_low = q_limit_low;
+ local->airtime[ac].aql_txq_limit_high = q_limit_high;
mutex_lock(&local->sta_mtx);
list_for_each_entry(sta, &local->sta_list, list) {
@@ -382,15 +382,62 @@ static const struct file_operations force_tx_status_ops = {
.llseek = default_llseek,
};
+static ssize_t airtime_read(struct file *file,
+ char __user *user_buf,
+ size_t count,
+ loff_t *ppos)
+{
+ struct ieee80211_local *local = file->private_data;
+ char buf[200];
+ u64 v_t[IEEE80211_NUM_ACS];
+ u64 wt[IEEE80211_NUM_ACS];
+ int len = 0, ac;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ spin_lock_bh(&local->airtime[ac].lock);
+ v_t[ac] = local->airtime[ac].v_t;
+ wt[ac] = local->airtime[ac].weight_sum;
+ spin_unlock_bh(&local->airtime[ac].lock);
+ }
+ len = scnprintf(buf, sizeof(buf),
+ "\tVO VI BE BK\n"
+ "Virt-t\t%-10llu %-10llu %-10llu %-10llu\n"
+ "Weight\t%-10llu %-10llu %-10llu %-10llu\n",
+ v_t[0],
+ v_t[1],
+ v_t[2],
+ v_t[3],
+ wt[0],
+ wt[1],
+ wt[2],
+ wt[3]);
+
+ return simple_read_from_buffer(user_buf, count, ppos,
+ buf, len);
+}
+
+static const struct file_operations airtime_ops = {
+ .read = airtime_read,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
#ifdef CONFIG_PM
static ssize_t reset_write(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
{
struct ieee80211_local *local = file->private_data;
+ int ret;
rtnl_lock();
+ wiphy_lock(local->hw.wiphy);
__ieee80211_suspend(&local->hw, NULL);
- __ieee80211_resume(&local->hw);
+ ret = __ieee80211_resume(&local->hw);
+ wiphy_unlock(local->hw.wiphy);
+
+ if (ret)
+ cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+
rtnl_unlock();
return count;
@@ -625,7 +672,11 @@ void debugfs_hw_add(struct ieee80211_local *local)
if (local->ops->wake_tx_queue)
DEBUGFS_ADD_MODE(aqm, 0600);
- DEBUGFS_ADD_MODE(airtime_flags, 0600);
+ if (wiphy_ext_feature_isset(local->hw.wiphy,
+ NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) {
+ DEBUGFS_ADD_MODE(airtime, 0600);
+ DEBUGFS_ADD_MODE(airtime_flags, 0600);
+ }
DEBUGFS_ADD(aql_txq_limit);
debugfs_create_u32("aql_threshold", 0600,
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 0ad3860852ff..db724fc10a5f 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -57,7 +57,6 @@ static ssize_t ieee80211_if_write(
return -EFAULT;
buf[count] = '\0';
- ret = -ENODEV;
rtnl_lock();
ret = (*write)(sdata, buf, count);
rtnl_unlock();
@@ -513,6 +512,34 @@ static ssize_t ieee80211_if_fmt_aqm(
}
IEEE80211_IF_FILE_R(aqm);
+static ssize_t ieee80211_if_fmt_airtime(
+ const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_txq *txq = sdata->vif.txq;
+ struct airtime_info *air_info;
+ int len;
+
+ if (!txq)
+ return 0;
+
+ spin_lock_bh(&local->airtime[txq->ac].lock);
+ air_info = to_airtime_info(txq);
+ len = scnprintf(buf,
+ buflen,
+ "RX: %llu us\nTX: %llu us\nWeight: %u\n"
+ "Virt-T: %lld us\n",
+ air_info->rx_airtime,
+ air_info->tx_airtime,
+ air_info->weight,
+ air_info->v_t);
+ spin_unlock_bh(&local->airtime[txq->ac].lock);
+
+ return len;
+}
+
+IEEE80211_IF_FILE_R(airtime);
+
IEEE80211_IF_FILE(multicast_to_unicast, u.ap.multicast_to_unicast, HEX);
/* IBSS attributes */
@@ -658,8 +685,10 @@ static void add_common_files(struct ieee80211_sub_if_data *sdata)
if (sdata->local->ops->wake_tx_queue &&
sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
- sdata->vif.type != NL80211_IFTYPE_NAN)
+ sdata->vif.type != NL80211_IFTYPE_NAN) {
DEBUGFS_ADD(aqm);
+ DEBUGFS_ADD(airtime);
+ }
}
static void add_sta_files(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 936c9dfa86c8..8be28cfd6f64 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -202,7 +202,7 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
size_t bufsz = 400;
char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
u64 rx_airtime = 0, tx_airtime = 0;
- s64 deficit[IEEE80211_NUM_ACS];
+ u64 v_t[IEEE80211_NUM_ACS];
ssize_t rv;
int ac;
@@ -210,18 +210,18 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
return -ENOMEM;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- spin_lock_bh(&local->active_txq_lock[ac]);
+ spin_lock_bh(&local->airtime[ac].lock);
rx_airtime += sta->airtime[ac].rx_airtime;
tx_airtime += sta->airtime[ac].tx_airtime;
- deficit[ac] = sta->airtime[ac].deficit;
- spin_unlock_bh(&local->active_txq_lock[ac]);
+ v_t[ac] = sta->airtime[ac].v_t;
+ spin_unlock_bh(&local->airtime[ac].lock);
}
p += scnprintf(p, bufsz + buf - p,
"RX: %llu us\nTX: %llu us\nWeight: %u\n"
- "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
- rx_airtime, tx_airtime, sta->airtime_weight,
- deficit[0], deficit[1], deficit[2], deficit[3]);
+ "Virt-T: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
+ rx_airtime, tx_airtime, sta->airtime[0].weight,
+ v_t[0], v_t[1], v_t[2], v_t[3]);
rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
kfree(buf);
@@ -236,11 +236,11 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
int ac;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- spin_lock_bh(&local->active_txq_lock[ac]);
+ spin_lock_bh(&local->airtime[ac].lock);
sta->airtime[ac].rx_airtime = 0;
sta->airtime[ac].tx_airtime = 0;
- sta->airtime[ac].deficit = sta->airtime_weight;
- spin_unlock_bh(&local->active_txq_lock[ac]);
+ sta->airtime[ac].v_t = 0;
+ spin_unlock_bh(&local->airtime[ac].lock);
}
return count;
@@ -263,10 +263,10 @@ static ssize_t sta_aql_read(struct file *file, char __user *userbuf,
return -ENOMEM;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- spin_lock_bh(&local->active_txq_lock[ac]);
+ spin_lock_bh(&local->airtime[ac].lock);
q_limit_l[ac] = sta->airtime[ac].aql_limit_low;
q_limit_h[ac] = sta->airtime[ac].aql_limit_high;
- spin_unlock_bh(&local->active_txq_lock[ac]);
+ spin_unlock_bh(&local->airtime[ac].lock);
q_depth[ac] = atomic_read(&sta->airtime[ac].aql_tx_pending);
}
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 604ca59937f0..bcb7cc06db3d 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -2,7 +2,7 @@
/*
* Portions of this file
* Copyright(c) 2016 Intel Deutschland GmbH
-* Copyright (C) 2018 - 2019 Intel Corporation
+* Copyright (C) 2018 - 2019, 2021 Intel Corporation
*/
#ifndef __MAC80211_DRIVER_OPS
@@ -821,7 +821,7 @@ drv_allow_buffered_frames(struct ieee80211_local *local,
static inline void drv_mgd_prepare_tx(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
- u16 duration)
+ struct ieee80211_prep_tx_info *info)
{
might_sleep();
@@ -829,9 +829,27 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local,
return;
WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION);
- trace_drv_mgd_prepare_tx(local, sdata, duration);
+ trace_drv_mgd_prepare_tx(local, sdata, info->duration,
+ info->subtype, info->success);
if (local->ops->mgd_prepare_tx)
- local->ops->mgd_prepare_tx(&local->hw, &sdata->vif, duration);
+ local->ops->mgd_prepare_tx(&local->hw, &sdata->vif, info);
+ trace_drv_return_void(local);
+}
+
+static inline void drv_mgd_complete_tx(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_prep_tx_info *info)
+{
+ might_sleep();
+
+ if (!check_sdata_in_driver(sdata))
+ return;
+ WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION);
+
+ trace_drv_mgd_complete_tx(local, sdata, info->duration,
+ info->subtype, info->success);
+ if (local->ops->mgd_complete_tx)
+ local->ops->mgd_complete_tx(&local->hw, &sdata->vif, info);
trace_drv_return_void(local);
}
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index 0c0b970835ce..c05af7018f79 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -111,7 +111,7 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
{
struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap;
- struct ieee80211_sta_he_cap own_he_cap = sband->iftype_data->he_cap;
+ struct ieee80211_sta_he_cap own_he_cap;
struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie;
u8 he_ppe_size;
u8 mcs_nss_size;
@@ -120,9 +120,13 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
memset(he_cap, 0, sizeof(*he_cap));
- if (!he_cap_ie || !ieee80211_get_he_sta_cap(sband))
+ if (!he_cap_ie ||
+ !ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif)))
return;
+ own_he_cap = sband->iftype_data->he_cap;
+
/* Make sure size is OK */
mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap_ie_elem);
he_ppe_size =
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 3d62a80b5790..2eb7641f5556 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright 2017 Intel Deutschland GmbH
- * Copyright(c) 2020 Intel Corporation
+ * Copyright(c) 2020-2021 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -555,17 +555,15 @@ void ieee80211_request_smps(struct ieee80211_vif *vif,
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- if (WARN_ON_ONCE(vif->type != NL80211_IFTYPE_STATION &&
- vif->type != NL80211_IFTYPE_AP))
+ if (WARN_ON_ONCE(vif->type != NL80211_IFTYPE_STATION))
return;
- if (vif->type == NL80211_IFTYPE_STATION) {
- if (sdata->u.mgd.driver_smps_mode == smps_mode)
- return;
- sdata->u.mgd.driver_smps_mode = smps_mode;
- ieee80211_queue_work(&sdata->local->hw,
- &sdata->u.mgd.request_smps_work);
- }
+ if (sdata->u.mgd.driver_smps_mode == smps_mode)
+ return;
+
+ sdata->u.mgd.driver_smps_mode = smps_mode;
+ ieee80211_queue_work(&sdata->local->hw,
+ &sdata->u.mgd.request_smps_work);
}
/* this might change ... don't want non-open drivers using it */
EXPORT_SYMBOL_GPL(ieee80211_request_smps);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 214404a558fb..22549b95d1aa 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#ifndef IEEE80211_I_H
@@ -831,17 +831,16 @@ enum txq_info_flags {
* @def_flow: used as a fallback flow when a packet destined to @tin hashes to
* a fq_flow which is already owned by a different tin
* @def_cvars: codel vars for @def_flow
- * @frags: used to keep fragments created after dequeue
* @schedule_order: used with ieee80211_local->active_txqs
- * @schedule_round: counter to prevent infinite loops on TXQ scheduling
+ * @frags: used to keep fragments created after dequeue
*/
struct txq_info {
struct fq_tin tin;
struct codel_vars def_cvars;
struct codel_stats cstats;
+ struct rb_node schedule_order;
+
struct sk_buff_head frags;
- struct list_head schedule_order;
- u16 schedule_round;
unsigned long flags;
/* keep last! */
@@ -918,6 +917,8 @@ struct ieee80211_sub_if_data {
struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS];
struct mac80211_qos_map __rcu *qos_map;
+ struct airtime_info airtime[IEEE80211_NUM_ACS];
+
struct work_struct csa_finalize_work;
bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */
struct cfg80211_chan_def csa_chandef;
@@ -1130,6 +1131,44 @@ enum mac80211_scan_state {
SCAN_ABORT,
};
+/**
+ * struct airtime_sched_info - state used for airtime scheduling and AQL
+ *
+ * @lock: spinlock that protects all the fields in this struct
+ * @active_txqs: rbtree of currently backlogged queues, sorted by virtual time
+ * @schedule_pos: the current position maintained while a driver walks the tree
+ * with ieee80211_next_txq()
+ * @active_list: list of struct airtime_info structs that were active within
+ * the last AIRTIME_ACTIVE_DURATION (100 ms), used to compute
+ * weight_sum
+ * @last_weight_update: used for rate limiting walking active_list
+ * @last_schedule_time: tracks the last time a transmission was scheduled; used
+ * for catching up v_t if no stations are eligible for
+ * transmission.
+ * @v_t: global virtual time; queues with v_t < this are eligible for
+ * transmission
+ * @weight_sum: total sum of all active stations used for dividing airtime
+ * @weight_sum_reciprocal: reciprocal of weight_sum (to avoid divisions in fast
+ * path - see comment above
+ * IEEE80211_RECIPROCAL_DIVISOR_64)
+ * @aql_txq_limit_low: AQL limit when total outstanding airtime
+ * is < IEEE80211_AQL_THRESHOLD
+ * @aql_txq_limit_high: AQL limit when total outstanding airtime
+ * is > IEEE80211_AQL_THRESHOLD
+ */
+struct airtime_sched_info {
+ spinlock_t lock;
+ struct rb_root_cached active_txqs;
+ struct rb_node *schedule_pos;
+ struct list_head active_list;
+ u64 last_weight_update;
+ u64 last_schedule_activity;
+ u64 v_t;
+ u64 weight_sum;
+ u64 weight_sum_reciprocal;
+ u32 aql_txq_limit_low;
+ u32 aql_txq_limit_high;
+};
DECLARE_STATIC_KEY_FALSE(aql_disable);
struct ieee80211_local {
@@ -1143,13 +1182,8 @@ struct ieee80211_local {
struct codel_params cparams;
/* protects active_txqs and txqi->schedule_order */
- spinlock_t active_txq_lock[IEEE80211_NUM_ACS];
- struct list_head active_txqs[IEEE80211_NUM_ACS];
- u16 schedule_round[IEEE80211_NUM_ACS];
-
+ struct airtime_sched_info airtime[IEEE80211_NUM_ACS];
u16 airtime_flags;
- u32 aql_txq_limit_low[IEEE80211_NUM_ACS];
- u32 aql_txq_limit_high[IEEE80211_NUM_ACS];
u32 aql_threshold;
atomic_t aql_total_pending_airtime;
@@ -1414,10 +1448,6 @@ struct ieee80211_local {
/* extended capabilities provided by mac80211 */
u8 ext_capa[8];
-
- /* TDLS channel switch */
- struct work_struct tdls_chsw_work;
- struct sk_buff_head skb_queue_tdls_chsw;
};
static inline struct ieee80211_sub_if_data *
@@ -1442,7 +1472,7 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata)
rcu_read_lock();
chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (WARN_ON_ONCE(!chanctx_conf)) {
+ if (!chanctx_conf) {
rcu_read_unlock();
return NULL;
}
@@ -1567,6 +1597,125 @@ static inline bool txq_has_queue(struct ieee80211_txq *txq)
return !(skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets);
}
+static inline struct airtime_info *to_airtime_info(struct ieee80211_txq *txq)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct sta_info *sta;
+
+ if (txq->sta) {
+ sta = container_of(txq->sta, struct sta_info, sta);
+ return &sta->airtime[txq->ac];
+ }
+
+ sdata = vif_to_sdata(txq->vif);
+ return &sdata->airtime[txq->ac];
+}
+
+/* To avoid divisions in the fast path, we keep pre-computed reciprocals for
+ * airtime weight calculations. There are two different weights to keep track
+ * of: The per-station weight and the sum of weights per phy.
+ *
+ * For the per-station weights (kept in airtime_info below), we use 32-bit
+ * reciprocals with a devisor of 2^19. This lets us keep the multiplications and
+ * divisions for the station weights as 32-bit operations at the cost of a bit
+ * of rounding error for high weights; but the choice of divisor keeps rounding
+ * errors <10% for weights <2^15, assuming no more than 8ms of airtime is
+ * reported at a time.
+ *
+ * For the per-phy sum of weights the values can get higher, so we use 64-bit
+ * operations for those with a 32-bit divisor, which should avoid any
+ * significant rounding errors.
+ */
+#define IEEE80211_RECIPROCAL_DIVISOR_64 0x100000000ULL
+#define IEEE80211_RECIPROCAL_SHIFT_64 32
+#define IEEE80211_RECIPROCAL_DIVISOR_32 0x80000U
+#define IEEE80211_RECIPROCAL_SHIFT_32 19
+
+static inline void airtime_weight_set(struct airtime_info *air_info, u16 weight)
+{
+ if (air_info->weight == weight)
+ return;
+
+ air_info->weight = weight;
+ if (weight) {
+ air_info->weight_reciprocal =
+ IEEE80211_RECIPROCAL_DIVISOR_32 / weight;
+ } else {
+ air_info->weight_reciprocal = 0;
+ }
+}
+
+static inline void airtime_weight_sum_set(struct airtime_sched_info *air_sched,
+ int weight_sum)
+{
+ if (air_sched->weight_sum == weight_sum)
+ return;
+
+ air_sched->weight_sum = weight_sum;
+ if (air_sched->weight_sum) {
+ air_sched->weight_sum_reciprocal = IEEE80211_RECIPROCAL_DIVISOR_64;
+ do_div(air_sched->weight_sum_reciprocal, air_sched->weight_sum);
+ } else {
+ air_sched->weight_sum_reciprocal = 0;
+ }
+}
+
+/* A problem when trying to enforce airtime fairness is that we want to divide
+ * the airtime between the currently *active* stations. However, basing this on
+ * the instantaneous queue state of stations doesn't work, as queues tend to
+ * oscillate very quickly between empty and occupied, leading to the scheduler
+ * thinking only a single station is active when deciding whether to allow
+ * transmission (and thus not throttling correctly).
+ *
+ * To fix this we use a timer-based notion of activity: a station is considered
+ * active if it has been scheduled within the last 100 ms; we keep a separate
+ * list of all the stations considered active in this manner, and lazily update
+ * the total weight of active stations from this list (filtering the stations in
+ * the list by their 'last active' time).
+ *
+ * We add one additional safeguard to guard against stations that manage to get
+ * scheduled every 100 ms but don't transmit a lot of data, and thus don't use
+ * up any airtime. Such stations would be able to get priority for an extended
+ * period of time if they do start transmitting at full capacity again, and so
+ * we add an explicit maximum for how far behind a station is allowed to fall in
+ * the virtual airtime domain. This limit is set to a relatively high value of
+ * 20 ms because the main mechanism for catching up idle stations is the active
+ * state as described above; i.e., the hard limit should only be hit in
+ * pathological cases.
+ */
+#define AIRTIME_ACTIVE_DURATION (100 * NSEC_PER_MSEC)
+#define AIRTIME_MAX_BEHIND 20000 /* 20 ms */
+
+static inline bool airtime_is_active(struct airtime_info *air_info, u64 now)
+{
+ return air_info->last_scheduled >= now - AIRTIME_ACTIVE_DURATION;
+}
+
+static inline void airtime_set_active(struct airtime_sched_info *air_sched,
+ struct airtime_info *air_info, u64 now)
+{
+ air_info->last_scheduled = now;
+ air_sched->last_schedule_activity = now;
+ list_move_tail(&air_info->list, &air_sched->active_list);
+}
+
+static inline bool airtime_catchup_v_t(struct airtime_sched_info *air_sched,
+ u64 v_t, u64 now)
+{
+ air_sched->v_t = v_t;
+ return true;
+}
+
+static inline void init_airtime_info(struct airtime_info *air_info,
+ struct airtime_sched_info *air_sched)
+{
+ atomic_set(&air_info->aql_tx_pending, 0);
+ air_info->aql_limit_low = air_sched->aql_txq_limit_low;
+ air_info->aql_limit_high = air_sched->aql_txq_limit_high;
+ airtime_weight_set(air_info, IEEE80211_DEFAULT_AIRTIME_WEIGHT);
+ INIT_LIST_HEAD(&air_info->list);
+}
+
static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
{
return ether_addr_equal(raddr, addr) ||
@@ -1809,6 +1958,14 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
u64 *cookie);
int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev,
const u8 *buf, size_t len);
+void ieee80211_resort_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq);
+void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq,
+ bool purge);
+void ieee80211_update_airtime_weight(struct ieee80211_local *local,
+ struct airtime_sched_info *air_sched,
+ u64 now, bool force);
/* HT */
void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
@@ -1879,7 +2036,6 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta);
enum ieee80211_sta_rx_bandwidth
ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width);
enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta);
-void ieee80211_sta_set_rx_nss(struct sta_info *sta);
void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt);
u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
@@ -2287,9 +2443,13 @@ void ieee80211_tdls_cancel_channel_switch(struct wiphy *wiphy,
struct net_device *dev,
const u8 *addr);
void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata);
-void ieee80211_tdls_chsw_work(struct work_struct *wk);
void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata,
const u8 *peer, u16 reason);
+void
+ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb);
+
+
const char *ieee80211_get_reason_code_string(u16 reason_code);
u16 ieee80211_encode_usf(int val);
u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 2e2f73a4aa73..1e5e9fc45523 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -476,14 +476,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
GFP_KERNEL);
}
- /* APs need special treatment */
if (sdata->vif.type == NL80211_IFTYPE_AP) {
- struct ieee80211_sub_if_data *vlan, *tmpsdata;
-
- /* down all dependent devices, that is VLANs */
- list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
- u.vlan.list)
- dev_close(vlan->dev);
WARN_ON(!list_empty(&sdata->u.ap.vlans));
} else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
/* remove all packets in parent bc_buf pointing to this dev */
@@ -641,6 +634,15 @@ static int ieee80211_stop(struct net_device *dev)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ /* close all dependent VLAN interfaces before locking wiphy */
+ if (sdata->vif.type == NL80211_IFTYPE_AP) {
+ struct ieee80211_sub_if_data *vlan, *tmpsdata;
+
+ list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
+ u.vlan.list)
+ dev_close(vlan->dev);
+ }
+
wiphy_lock(sdata->local->hw.wiphy);
ieee80211_do_stop(sdata, true);
wiphy_unlock(sdata->local->hw.wiphy);
@@ -1316,13 +1318,130 @@ static void ieee80211_if_setup_no_queue(struct net_device *dev)
dev->priv_flags |= IFF_NO_QUEUE;
}
+static void ieee80211_iface_process_skb(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (void *)skb->data;
+
+ if (ieee80211_is_action(mgmt->frame_control) &&
+ mgmt->u.action.category == WLAN_CATEGORY_BACK) {
+ struct sta_info *sta;
+ int len = skb->len;
+
+ mutex_lock(&local->sta_mtx);
+ sta = sta_info_get_bss(sdata, mgmt->sa);
+ if (sta) {
+ switch (mgmt->u.action.u.addba_req.action_code) {
+ case WLAN_ACTION_ADDBA_REQ:
+ ieee80211_process_addba_request(local, sta,
+ mgmt, len);
+ break;
+ case WLAN_ACTION_ADDBA_RESP:
+ ieee80211_process_addba_resp(local, sta,
+ mgmt, len);
+ break;
+ case WLAN_ACTION_DELBA:
+ ieee80211_process_delba(sdata, sta,
+ mgmt, len);
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+ }
+ mutex_unlock(&local->sta_mtx);
+ } else if (ieee80211_is_action(mgmt->frame_control) &&
+ mgmt->u.action.category == WLAN_CATEGORY_VHT) {
+ switch (mgmt->u.action.u.vht_group_notif.action_code) {
+ case WLAN_VHT_ACTION_OPMODE_NOTIF: {
+ struct ieee80211_rx_status *status;
+ enum nl80211_band band;
+ struct sta_info *sta;
+ u8 opmode;
+
+ status = IEEE80211_SKB_RXCB(skb);
+ band = status->band;
+ opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
+
+ mutex_lock(&local->sta_mtx);
+ sta = sta_info_get_bss(sdata, mgmt->sa);
+
+ if (sta)
+ ieee80211_vht_handle_opmode(sdata, sta, opmode,
+ band);
+
+ mutex_unlock(&local->sta_mtx);
+ break;
+ }
+ case WLAN_VHT_ACTION_GROUPID_MGMT:
+ ieee80211_process_mu_groups(sdata, mgmt);
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+ } else if (ieee80211_is_ext(mgmt->frame_control)) {
+ if (sdata->vif.type == NL80211_IFTYPE_STATION)
+ ieee80211_sta_rx_queued_ext(sdata, skb);
+ else
+ WARN_ON(1);
+ } else if (ieee80211_is_data_qos(mgmt->frame_control)) {
+ struct ieee80211_hdr *hdr = (void *)mgmt;
+ struct sta_info *sta;
+
+ /*
+ * So the frame isn't mgmt, but frame_control
+ * is at the right place anyway, of course, so
+ * the if statement is correct.
+ *
+ * Warn if we have other data frame types here,
+ * they must not get here.
+ */
+ WARN_ON(hdr->frame_control &
+ cpu_to_le16(IEEE80211_STYPE_NULLFUNC));
+ WARN_ON(!(hdr->seq_ctrl &
+ cpu_to_le16(IEEE80211_SCTL_FRAG)));
+ /*
+ * This was a fragment of a frame, received while
+ * a block-ack session was active. That cannot be
+ * right, so terminate the session.
+ */
+ mutex_lock(&local->sta_mtx);
+ sta = sta_info_get_bss(sdata, mgmt->sa);
+ if (sta) {
+ u16 tid = ieee80211_get_tid(hdr);
+
+ __ieee80211_stop_rx_ba_session(
+ sta, tid, WLAN_BACK_RECIPIENT,
+ WLAN_REASON_QSTA_REQUIRE_SETUP,
+ true);
+ }
+ mutex_unlock(&local->sta_mtx);
+ } else switch (sdata->vif.type) {
+ case NL80211_IFTYPE_STATION:
+ ieee80211_sta_rx_queued_mgmt(sdata, skb);
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ ieee80211_ibss_rx_queued_mgmt(sdata, skb);
+ break;
+ case NL80211_IFTYPE_MESH_POINT:
+ if (!ieee80211_vif_is_mesh(&sdata->vif))
+ break;
+ ieee80211_mesh_rx_queued_mgmt(sdata, skb);
+ break;
+ default:
+ WARN(1, "frame for unexpected interface type");
+ break;
+ }
+}
+
static void ieee80211_iface_work(struct work_struct *work)
{
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data, work);
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
- struct sta_info *sta;
if (!ieee80211_sdata_running(sdata))
return;
@@ -1335,116 +1454,12 @@ static void ieee80211_iface_work(struct work_struct *work)
/* first process frames */
while ((skb = skb_dequeue(&sdata->skb_queue))) {
- struct ieee80211_mgmt *mgmt = (void *)skb->data;
-
kcov_remote_start_common(skb_get_kcov_handle(skb));
- if (ieee80211_is_action(mgmt->frame_control) &&
- mgmt->u.action.category == WLAN_CATEGORY_BACK) {
- int len = skb->len;
-
- mutex_lock(&local->sta_mtx);
- sta = sta_info_get_bss(sdata, mgmt->sa);
- if (sta) {
- switch (mgmt->u.action.u.addba_req.action_code) {
- case WLAN_ACTION_ADDBA_REQ:
- ieee80211_process_addba_request(
- local, sta, mgmt, len);
- break;
- case WLAN_ACTION_ADDBA_RESP:
- ieee80211_process_addba_resp(local, sta,
- mgmt, len);
- break;
- case WLAN_ACTION_DELBA:
- ieee80211_process_delba(sdata, sta,
- mgmt, len);
- break;
- default:
- WARN_ON(1);
- break;
- }
- }
- mutex_unlock(&local->sta_mtx);
- } else if (ieee80211_is_action(mgmt->frame_control) &&
- mgmt->u.action.category == WLAN_CATEGORY_VHT) {
- switch (mgmt->u.action.u.vht_group_notif.action_code) {
- case WLAN_VHT_ACTION_OPMODE_NOTIF: {
- struct ieee80211_rx_status *status;
- enum nl80211_band band;
- u8 opmode;
-
- status = IEEE80211_SKB_RXCB(skb);
- band = status->band;
- opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
-
- mutex_lock(&local->sta_mtx);
- sta = sta_info_get_bss(sdata, mgmt->sa);
-
- if (sta)
- ieee80211_vht_handle_opmode(sdata, sta,
- opmode,
- band);
-
- mutex_unlock(&local->sta_mtx);
- break;
- }
- case WLAN_VHT_ACTION_GROUPID_MGMT:
- ieee80211_process_mu_groups(sdata, mgmt);
- break;
- default:
- WARN_ON(1);
- break;
- }
- } else if (ieee80211_is_ext(mgmt->frame_control)) {
- if (sdata->vif.type == NL80211_IFTYPE_STATION)
- ieee80211_sta_rx_queued_ext(sdata, skb);
- else
- WARN_ON(1);
- } else if (ieee80211_is_data_qos(mgmt->frame_control)) {
- struct ieee80211_hdr *hdr = (void *)mgmt;
- /*
- * So the frame isn't mgmt, but frame_control
- * is at the right place anyway, of course, so
- * the if statement is correct.
- *
- * Warn if we have other data frame types here,
- * they must not get here.
- */
- WARN_ON(hdr->frame_control &
- cpu_to_le16(IEEE80211_STYPE_NULLFUNC));
- WARN_ON(!(hdr->seq_ctrl &
- cpu_to_le16(IEEE80211_SCTL_FRAG)));
- /*
- * This was a fragment of a frame, received while
- * a block-ack session was active. That cannot be
- * right, so terminate the session.
- */
- mutex_lock(&local->sta_mtx);
- sta = sta_info_get_bss(sdata, mgmt->sa);
- if (sta) {
- u16 tid = ieee80211_get_tid(hdr);
- __ieee80211_stop_rx_ba_session(
- sta, tid, WLAN_BACK_RECIPIENT,
- WLAN_REASON_QSTA_REQUIRE_SETUP,
- true);
- }
- mutex_unlock(&local->sta_mtx);
- } else switch (sdata->vif.type) {
- case NL80211_IFTYPE_STATION:
- ieee80211_sta_rx_queued_mgmt(sdata, skb);
- break;
- case NL80211_IFTYPE_ADHOC:
- ieee80211_ibss_rx_queued_mgmt(sdata, skb);
- break;
- case NL80211_IFTYPE_MESH_POINT:
- if (!ieee80211_vif_is_mesh(&sdata->vif))
- break;
- ieee80211_mesh_rx_queued_mgmt(sdata, skb);
- break;
- default:
- WARN(1, "frame for unexpected interface type");
- break;
- }
+ if (skb->protocol == cpu_to_be16(ETH_P_TDLS))
+ ieee80211_process_tdls_channel_switch(sdata, skb);
+ else
+ ieee80211_iface_process_skb(local, sdata, skb);
kfree_skb(skb);
kcov_remote_stop();
@@ -1591,6 +1606,9 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
+ if (!list_empty(&sdata->u.ap.vlans))
+ return -EBUSY;
+ break;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_OCB:
@@ -1959,6 +1977,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
}
}
+ for (i = 0; i < IEEE80211_NUM_ACS; i++)
+ init_airtime_info(&sdata->airtime[i], &local->airtime[i]);
+
ieee80211_set_default_queues(sdata);
sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index b275c8853074..6de8d0ad5497 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -259,7 +259,6 @@ static void tpt_trig_timer(struct timer_list *t)
{
struct tpt_led_trigger *tpt_trig = from_timer(tpt_trig, t, timer);
struct ieee80211_local *local = tpt_trig->local;
- struct led_classdev *led_cdev;
unsigned long on, off, tpt;
int i;
@@ -283,10 +282,7 @@ static void tpt_trig_timer(struct timer_list *t)
}
}
- read_lock(&local->tpt_led.leddev_list_lock);
- list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list)
- led_blink_set(led_cdev, &on, &off);
- read_unlock(&local->tpt_led.leddev_list_lock);
+ led_trigger_blink(&local->tpt_led, &on, &off);
}
const char *
@@ -341,7 +337,6 @@ static void ieee80211_start_tpt_led_trig(struct ieee80211_local *local)
static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
{
struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger;
- struct led_classdev *led_cdev;
if (!tpt_trig->running)
return;
@@ -349,10 +344,7 @@ static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
tpt_trig->running = false;
del_timer_sync(&tpt_trig->timer);
- read_lock(&local->tpt_led.leddev_list_lock);
- list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list)
- led_set_brightness(led_cdev, LED_OFF);
- read_unlock(&local->tpt_led.leddev_list_lock);
+ led_trigger_event(&local->tpt_led, LED_OFF);
}
void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 62145e5f9628..05f4c3c72619 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -252,18 +252,18 @@ static void ieee80211_restart_work(struct work_struct *work)
struct ieee80211_local *local =
container_of(work, struct ieee80211_local, restart_work);
struct ieee80211_sub_if_data *sdata;
+ int ret;
/* wait for scan work complete */
flush_workqueue(local->workqueue);
flush_work(&local->sched_scan_stopped_work);
+ flush_work(&local->radar_detected_work);
+
+ rtnl_lock();
WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
"%s called with hardware scan in progress\n", __func__);
- flush_work(&local->radar_detected_work);
- /* we might do interface manipulations, so need both */
- rtnl_lock();
- wiphy_lock(local->hw.wiphy);
list_for_each_entry(sdata, &local->interfaces, list) {
/*
* XXX: there may be more work for other vif types and even
@@ -301,8 +301,12 @@ static void ieee80211_restart_work(struct work_struct *work)
/* wait for all packet processing to be done */
synchronize_net();
- ieee80211_reconfig(local);
+ ret = ieee80211_reconfig(local);
wiphy_unlock(local->hw.wiphy);
+
+ if (ret)
+ cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+
rtnl_unlock();
}
@@ -701,10 +705,13 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
spin_lock_init(&local->queue_stop_reason_lock);
for (i = 0; i < IEEE80211_NUM_ACS; i++) {
- INIT_LIST_HEAD(&local->active_txqs[i]);
- spin_lock_init(&local->active_txq_lock[i]);
- local->aql_txq_limit_low[i] = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L;
- local->aql_txq_limit_high[i] =
+ struct airtime_sched_info *air_sched = &local->airtime[i];
+
+ air_sched->active_txqs = RB_ROOT_CACHED;
+ INIT_LIST_HEAD(&air_sched->active_list);
+ spin_lock_init(&air_sched->lock);
+ air_sched->aql_txq_limit_low = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L;
+ air_sched->aql_txq_limit_high =
IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H;
}
@@ -734,8 +741,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
INIT_WORK(&local->sched_scan_stopped_work,
ieee80211_sched_scan_stopped_work);
- INIT_WORK(&local->tdls_chsw_work, ieee80211_tdls_chsw_work);
-
spin_lock_init(&local->ack_status_lock);
idr_init(&local->ack_status_frames);
@@ -752,7 +757,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
skb_queue_head_init(&local->skb_queue);
skb_queue_head_init(&local->skb_queue_unreliable);
- skb_queue_head_init(&local->skb_queue_tdls_chsw);
ieee80211_alloc_led_names(local);
@@ -1009,8 +1013,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
supp_ht = supp_ht || sband->ht_cap.ht_supported;
supp_vht = supp_vht || sband->vht_cap.vht_supported;
- if (!supp_he)
- supp_he = !!ieee80211_get_he_sta_cap(sband);
+ for (i = 0; i < sband->n_iftype_data; i++) {
+ const struct ieee80211_sband_iftype_data *iftd;
+
+ iftd = &sband->iftype_data[i];
+
+ supp_he = supp_he || (iftd && iftd->he_cap.has_he);
+ }
/* HT, VHT, HE require QoS, thus >= 4 queues */
if (WARN_ON(local->hw.queues < IEEE80211_NUM_ACS &&
@@ -1384,7 +1393,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
cancel_delayed_work_sync(&local->roc_work);
cancel_work_sync(&local->restart_work);
cancel_work_sync(&local->reconfig_filter);
- cancel_work_sync(&local->tdls_chsw_work);
flush_work(&local->sched_scan_stopped_work);
flush_work(&local->radar_detected_work);
@@ -1396,7 +1404,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
wiphy_warn(local->hw.wiphy, "skb_queue not empty\n");
skb_queue_purge(&local->skb_queue);
skb_queue_purge(&local->skb_queue_unreliable);
- skb_queue_purge(&local->skb_queue_tdls_chsw);
wiphy_unregister(local->hw.wiphy);
destroy_workqueue(local->workqueue);
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 40492d1bd8fd..77080b4f87b8 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -134,7 +134,7 @@ struct mesh_path {
* gate's mpath may or may not be resolved and active.
* @gates_lock: protects updates to known_gates
* @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr
- * @walk_head: linked list containging all mesh_path objects
+ * @walk_head: linked list containing all mesh_path objects
* @walk_lock: lock protecting walk_head
* @entries: number of entries in the table
*/
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 3db514c4c63a..a05b615deb51 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -1124,7 +1124,7 @@ enddiscovery:
* forwarding information is found.
*
* Returns: 0 if the next hop was found and -ENOENT if the frame was queued.
- * skb is freeed here if no mpath could be allocated.
+ * skb is freed here if no mpath could be allocated.
*/
int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 620ecf922408..efbefcbac3ac 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -122,7 +122,7 @@ static void prepare_for_gate(struct sk_buff *skb, char *dst_addr,
hdr = (struct ieee80211_hdr *) skb->data;
/* we preserve the previous mesh header and only add
- * the new addreses */
+ * the new addresses */
mshdr = (struct ieee80211s_hdr *) (skb->data + hdrlen);
mshdr->flags = MESH_FLAGS_AE_A5_A6;
memcpy(mshdr->eaddr1, hdr->addr3, ETH_ALEN);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index aca26df7587d..a6915847d78a 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -150,7 +150,7 @@ out:
* mesh STA in a MBSS. Three HT protection modes are supported for now, non-HT
* mixed mode, 20MHz-protection and no-protection mode. non-HT mixed mode is
* selected if any non-HT peers are present in our MBSS. 20MHz-protection mode
- * is selected if all peers in our 20/40MHz MBSS support HT and atleast one
+ * is selected if all peers in our 20/40MHz MBSS support HT and at least one
* HT20 peer is present. Otherwise no-protection mode is selected.
*/
static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2480bd0577bb..a00f11a33699 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8,7 +8,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2020 Intel Corporation
+ * Copyright (C) 2018 - 2021 Intel Corporation
*/
#include <linux/delay.h>
@@ -371,7 +371,6 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
struct cfg80211_chan_def chandef;
u16 ht_opmode;
u32 flags;
- enum ieee80211_sta_rx_bandwidth new_sta_bw;
u32 vht_cap_info = 0;
int ret;
@@ -385,7 +384,9 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
/* don't check HE if we associated as non-HE station */
if (ifmgd->flags & IEEE80211_STA_DISABLE_HE ||
- !ieee80211_get_he_sta_cap(sband))
+ !ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif)))
+
he_oper = NULL;
if (WARN_ON_ONCE(!sta))
@@ -445,40 +446,13 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
IEEE80211_STA_DISABLE_160MHZ)) ||
!cfg80211_chandef_valid(&chandef)) {
sdata_info(sdata,
- "AP %pM changed bandwidth in a way we can't support - disconnect\n",
- ifmgd->bssid);
+ "AP %pM changed caps/bw in a way we can't support (0x%x/0x%x) - disconnect\n",
+ ifmgd->bssid, flags, ifmgd->flags);
return -EINVAL;
}
- switch (chandef.width) {
- case NL80211_CHAN_WIDTH_20_NOHT:
- case NL80211_CHAN_WIDTH_20:
- new_sta_bw = IEEE80211_STA_RX_BW_20;
- break;
- case NL80211_CHAN_WIDTH_40:
- new_sta_bw = IEEE80211_STA_RX_BW_40;
- break;
- case NL80211_CHAN_WIDTH_80:
- new_sta_bw = IEEE80211_STA_RX_BW_80;
- break;
- case NL80211_CHAN_WIDTH_80P80:
- case NL80211_CHAN_WIDTH_160:
- new_sta_bw = IEEE80211_STA_RX_BW_160;
- break;
- default:
- return -EINVAL;
- }
-
- if (new_sta_bw > sta->cur_max_bandwidth)
- new_sta_bw = sta->cur_max_bandwidth;
-
- if (new_sta_bw < sta->sta.bandwidth) {
- sta->sta.bandwidth = new_sta_bw;
- rate_control_rate_update(local, sband, sta,
- IEEE80211_RC_BW_CHANGED);
- }
-
ret = ieee80211_vif_change_bandwidth(sdata, &chandef, changed);
+
if (ret) {
sdata_info(sdata,
"AP %pM changed bandwidth to incompatible one - disconnect\n",
@@ -486,12 +460,6 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
return ret;
}
- if (new_sta_bw > sta->sta.bandwidth) {
- sta->sta.bandwidth = new_sta_bw;
- rate_control_rate_update(local, sband, sta,
- IEEE80211_RC_BW_CHANGED);
- }
-
return 0;
}
@@ -617,7 +585,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
/*
- * If some other vif is using the MU-MIMO capablity we cannot associate
+ * If some other vif is using the MU-MIMO capability we cannot associate
* using MU-MIMO - this will lead to contradictions in the group-id
* mechanism.
* Ownership is defined since association request, in order to avoid
@@ -676,7 +644,8 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
rcu_read_unlock();
- he_cap = ieee80211_get_he_sta_cap(sband);
+ he_cap = ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
if (!he_cap || !reg_cap)
return;
@@ -712,6 +681,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
u32 rates = 0;
__le16 listen_int;
struct element *ext_capa = NULL;
+ enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
+ const struct ieee80211_sband_iftype_data *iftd;
+ struct ieee80211_prep_tx_info info = {};
/* we know it's writable, cast away the const */
if (assoc_data->ie_len)
@@ -756,6 +728,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
}
}
+ iftd = ieee80211_get_sband_iftype_data(sband, iftype);
+
skb = alloc_skb(local->hw.extra_tx_headroom +
sizeof(*mgmt) + /* bit too much but doesn't matter */
2 + assoc_data->ssid_len + /* SSID */
@@ -770,7 +744,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) +
assoc_data->ie_len + /* extra IEs */
(assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) +
- 9, /* WMM */
+ 9 + /* WMM */
+ (iftd ? iftd->vendor_elems.len : 0),
GFP_KERNEL);
if (!skb)
return;
@@ -810,12 +785,14 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
mgmt->u.reassoc_req.listen_interval = listen_int;
memcpy(mgmt->u.reassoc_req.current_ap, assoc_data->prev_bssid,
ETH_ALEN);
+ info.subtype = IEEE80211_STYPE_REASSOC_REQ;
} else {
skb_put(skb, 4);
mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
IEEE80211_STYPE_ASSOC_REQ);
mgmt->u.assoc_req.capab_info = cpu_to_le16(capab);
mgmt->u.assoc_req.listen_interval = listen_int;
+ info.subtype = IEEE80211_STYPE_ASSOC_REQ;
}
/* SSID */
@@ -1043,6 +1020,9 @@ skip_rates:
ieee80211_add_s1g_capab_ie(sdata, &sband->s1g_cap, skb);
}
+ if (iftd && iftd->vendor_elems.data && iftd->vendor_elems.len)
+ skb_put_data(skb, iftd->vendor_elems.data, iftd->vendor_elems.len);
+
/* add any remaining custom (i.e. vendor specific here) IEs */
if (assoc_data->ie_len) {
noffset = assoc_data->ie_len;
@@ -1060,7 +1040,7 @@ skip_rates:
ifmgd->assoc_req_ies = kmemdup(ie_start, pos - ie_start, GFP_ATOMIC);
ifmgd->assoc_req_ies_len = pos - ie_start;
- drv_mgd_prepare_tx(local, sdata, 0);
+ drv_mgd_prepare_tx(local, sdata, &info);
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
@@ -1094,11 +1074,6 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
struct ieee80211_hdr_3addr *nullfunc;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- /* Don't send NDPs when STA is connected HE */
- if (sdata->vif.type == NL80211_IFTYPE_STATION &&
- !(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
- return;
-
skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif,
!ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP));
if (!skb)
@@ -1130,10 +1105,6 @@ static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
return;
- /* Don't send NDPs when connected HE */
- if (!(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE))
- return;
-
skb = dev_alloc_skb(local->hw.extra_tx_headroom + 30);
if (!skb)
return;
@@ -1183,10 +1154,6 @@ static void ieee80211_chswitch_work(struct work_struct *work)
*/
if (sdata->reserved_chanctx) {
- struct ieee80211_supported_band *sband = NULL;
- struct sta_info *mgd_sta = NULL;
- enum ieee80211_sta_rx_bandwidth bw = IEEE80211_STA_RX_BW_20;
-
/*
* with multi-vif csa driver may call ieee80211_csa_finish()
* many times while waiting for other interfaces to use their
@@ -1195,48 +1162,6 @@ static void ieee80211_chswitch_work(struct work_struct *work)
if (sdata->reserved_ready)
goto out;
- if (sdata->vif.bss_conf.chandef.width !=
- sdata->csa_chandef.width) {
- /*
- * For managed interface, we need to also update the AP
- * station bandwidth and align the rate scale algorithm
- * on the bandwidth change. Here we only consider the
- * bandwidth of the new channel definition (as channel
- * switch flow does not have the full HT/VHT/HE
- * information), assuming that if additional changes are
- * required they would be done as part of the processing
- * of the next beacon from the AP.
- */
- switch (sdata->csa_chandef.width) {
- case NL80211_CHAN_WIDTH_20_NOHT:
- case NL80211_CHAN_WIDTH_20:
- default:
- bw = IEEE80211_STA_RX_BW_20;
- break;
- case NL80211_CHAN_WIDTH_40:
- bw = IEEE80211_STA_RX_BW_40;
- break;
- case NL80211_CHAN_WIDTH_80:
- bw = IEEE80211_STA_RX_BW_80;
- break;
- case NL80211_CHAN_WIDTH_80P80:
- case NL80211_CHAN_WIDTH_160:
- bw = IEEE80211_STA_RX_BW_160;
- break;
- }
-
- mgd_sta = sta_info_get(sdata, ifmgd->bssid);
- sband =
- local->hw.wiphy->bands[sdata->csa_chandef.chan->band];
- }
-
- if (sdata->vif.bss_conf.chandef.width >
- sdata->csa_chandef.width) {
- mgd_sta->sta.bandwidth = bw;
- rate_control_rate_update(local, sband, mgd_sta,
- IEEE80211_RC_BW_CHANGED);
- }
-
ret = ieee80211_vif_use_reserved_context(sdata);
if (ret) {
sdata_info(sdata,
@@ -1247,13 +1172,6 @@ static void ieee80211_chswitch_work(struct work_struct *work)
goto out;
}
- if (sdata->vif.bss_conf.chandef.width <
- sdata->csa_chandef.width) {
- mgd_sta->sta.bandwidth = bw;
- rate_control_rate_update(local, sband, mgd_sta,
- IEEE80211_RC_BW_CHANGED);
- }
-
goto out;
}
@@ -2341,6 +2259,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_local *local = sdata->local;
u32 changed = 0;
+ struct ieee80211_prep_tx_info info = {
+ .subtype = stype,
+ };
sdata_assert_lock(sdata);
@@ -2390,8 +2311,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
* driver requested so.
*/
if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) &&
- !ifmgd->have_beacon)
- drv_mgd_prepare_tx(sdata->local, sdata, 0);
+ !ifmgd->have_beacon) {
+ drv_mgd_prepare_tx(sdata->local, sdata, &info);
+ }
ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid,
ifmgd->bssid, stype, reason,
@@ -2402,6 +2324,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
if (tx)
ieee80211_flush_queues(local, sdata, false);
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
+
/* clear bssid only after building the needed mgmt frames */
eth_zero_addr(ifmgd->bssid);
@@ -2617,10 +2541,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) {
ifmgd->nullfunc_failed = false;
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
- ifmgd->probe_send_count--;
- else
- ieee80211_send_nullfunc(sdata->local, sdata, false);
+ ieee80211_send_nullfunc(sdata->local, sdata, false);
} else {
int ssid_len;
@@ -2952,6 +2873,9 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
u8 *pos;
struct ieee802_11_elems elems;
u32 tx_flags = 0;
+ struct ieee80211_prep_tx_info info = {
+ .subtype = IEEE80211_STYPE_AUTH,
+ };
pos = mgmt->u.auth.variable;
ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems,
@@ -2959,7 +2883,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
if (!elems.challenge)
return;
auth_data->expected_transaction = 4;
- drv_mgd_prepare_tx(sdata->local, sdata, 0);
+ drv_mgd_prepare_tx(sdata->local, sdata, &info);
if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
IEEE80211_TX_INTFL_MLME_CONN_TX;
@@ -3012,6 +2936,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
.type = MLME_EVENT,
.u.mlme.data = AUTH_EVENT,
};
+ struct ieee80211_prep_tx_info info = {
+ .subtype = IEEE80211_STYPE_AUTH,
+ };
sdata_assert_lock(sdata);
@@ -3040,7 +2967,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
mgmt->sa, auth_alg, ifmgd->auth_data->algorithm,
auth_transaction,
ifmgd->auth_data->expected_transaction);
- return;
+ goto notify_driver;
}
if (status_code != WLAN_STATUS_SUCCESS) {
@@ -3051,7 +2978,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
(auth_transaction == 1 &&
(status_code == WLAN_STATUS_SAE_HASH_TO_ELEMENT ||
status_code == WLAN_STATUS_SAE_PK))))
- return;
+ goto notify_driver;
sdata_info(sdata, "%pM denied authentication (status %d)\n",
mgmt->sa, status_code);
@@ -3059,7 +2986,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
event.u.mlme.status = MLME_DENIED;
event.u.mlme.reason = status_code;
drv_event_callback(sdata->local, sdata, &event);
- return;
+ goto notify_driver;
}
switch (ifmgd->auth_data->algorithm) {
@@ -3081,10 +3008,11 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
default:
WARN_ONCE(1, "invalid auth alg %d",
ifmgd->auth_data->algorithm);
- return;
+ goto notify_driver;
}
event.u.mlme.status = MLME_SUCCESS;
+ info.success = 1;
drv_event_callback(sdata->local, sdata, &event);
if (ifmgd->auth_data->algorithm != WLAN_AUTH_SAE ||
(auth_transaction == 2 &&
@@ -3098,6 +3026,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
}
cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
+notify_driver:
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
}
#define case_WLAN(type) \
@@ -3314,6 +3244,23 @@ static int ieee80211_recalc_twt_req(struct ieee80211_sub_if_data *sdata,
return 0;
}
+static bool ieee80211_twt_bcast_support(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *bss_conf,
+ struct ieee80211_supported_band *sband,
+ struct sta_info *sta)
+{
+ const struct ieee80211_sta_he_cap *own_he_cap =
+ ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
+
+ return bss_conf->he_support &&
+ (sta->sta.he_cap.he_cap_elem.mac_cap_info[2] &
+ IEEE80211_HE_MAC_CAP2_BCAST_TWT) &&
+ own_he_cap &&
+ (own_he_cap->he_cap_elem.mac_cap_info[2] &
+ IEEE80211_HE_MAC_CAP2_BCAST_TWT);
+}
+
static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
struct cfg80211_bss *cbss,
struct ieee80211_mgmt *mgmt, size_t len,
@@ -3529,6 +3476,9 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
bss_conf->twt_protected = false;
}
+ bss_conf->twt_broadcast =
+ ieee80211_twt_bcast_support(sdata, bss_conf, sband, sta);
+
if (bss_conf->he_support) {
bss_conf->he_bss_color.color =
le32_get_bits(elems->he_operation->he_oper_params,
@@ -3699,6 +3649,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
.type = MLME_EVENT,
.u.mlme.data = ASSOC_EVENT,
};
+ struct ieee80211_prep_tx_info info = {};
sdata_assert_lock(sdata);
@@ -3728,6 +3679,15 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
aid = 0; /* TODO */
}
+ /*
+ * Note: this may not be perfect, AP might misbehave - if
+ * anyone needs to rely on perfect complete notification
+ * with the exact right subtype, then we need to track what
+ * we actually transmitted.
+ */
+ info.subtype = reassoc ? IEEE80211_STYPE_REASSOC_REQ :
+ IEEE80211_STYPE_ASSOC_REQ;
+
sdata_info(sdata,
"RX %sssocResp from %pM (capab=0x%x status=%d aid=%d)\n",
reassoc ? "Rea" : "A", mgmt->sa,
@@ -3753,7 +3713,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
assoc_data->timeout_started = true;
if (ms > IEEE80211_ASSOC_TIMEOUT)
run_again(sdata, assoc_data->timeout);
- return;
+ goto notify_driver;
}
if (status_code != WLAN_STATUS_SUCCESS) {
@@ -3768,7 +3728,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
/* oops -- internal error -- send timeout for now */
ieee80211_destroy_assoc_data(sdata, false, false);
cfg80211_assoc_timeout(sdata->dev, cbss);
- return;
+ goto notify_driver;
}
event.u.mlme.status = MLME_SUCCESS;
drv_event_callback(sdata->local, sdata, &event);
@@ -3786,10 +3746,14 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
if (sdata->tx_conf[ac].uapsd)
uapsd_queues |= ieee80211_ac_to_qos_mask[ac];
+
+ info.success = 1;
}
cfg80211_rx_assoc_resp(sdata->dev, cbss, (u8 *)mgmt, len, uapsd_queues,
ifmgd->assoc_req_ies, ifmgd->assoc_req_ies_len);
+notify_driver:
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
}
static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
@@ -4062,10 +4026,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
if (elems.mbssid_config_ie)
bss_conf->profile_periodicity =
elems.mbssid_config_ie->profile_periodicity;
+ else
+ bss_conf->profile_periodicity = 0;
if (elems.ext_capab_len >= 11 &&
(elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
bss_conf->ema_ap = true;
+ else
+ bss_conf->ema_ap = false;
/* continue assoc process */
ifmgd->assoc_data->timeout = jiffies;
@@ -4404,7 +4372,9 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
u32 tx_flags = 0;
u16 trans = 1;
u16 status = 0;
- u16 prepare_tx_duration = 0;
+ struct ieee80211_prep_tx_info info = {
+ .subtype = IEEE80211_STYPE_AUTH,
+ };
sdata_assert_lock(sdata);
@@ -4427,10 +4397,9 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
}
if (auth_data->algorithm == WLAN_AUTH_SAE)
- prepare_tx_duration =
- jiffies_to_msecs(IEEE80211_AUTH_TIMEOUT_SAE);
+ info.duration = jiffies_to_msecs(IEEE80211_AUTH_TIMEOUT_SAE);
- drv_mgd_prepare_tx(local, sdata, prepare_tx_duration);
+ drv_mgd_prepare_tx(local, sdata, &info);
sdata_info(sdata, "send auth to %pM (try %d/%d)\n",
auth_data->bss->bssid, auth_data->tries,
@@ -4925,11 +4894,13 @@ static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata,
}
static bool
-ieee80211_verify_sta_he_mcs_support(struct ieee80211_supported_band *sband,
+ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
const struct ieee80211_he_operation *he_op)
{
const struct ieee80211_sta_he_cap *sta_he_cap =
- ieee80211_get_he_sta_cap(sband);
+ ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
u16 ap_min_req_set;
int i;
@@ -5023,7 +4994,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
}
- if (!ieee80211_get_he_sta_cap(sband))
+ if (!ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif)))
ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
rcu_read_lock();
@@ -5081,7 +5053,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
else
he_oper = NULL;
- if (!ieee80211_verify_sta_he_mcs_support(sband, he_oper))
+ if (!ieee80211_verify_sta_he_mcs_support(sdata, sband, he_oper))
ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
}
@@ -5651,15 +5623,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
2 * FILS_NONCE_LEN);
assoc_data->bss = req->bss;
-
- if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) {
- if (ifmgd->powersave)
- sdata->smps_mode = IEEE80211_SMPS_DYNAMIC;
- else
- sdata->smps_mode = IEEE80211_SMPS_OFF;
- } else
- sdata->smps_mode = ifmgd->req_smps;
-
assoc_data->capability = req->bss->capability;
assoc_data->supp_rates = bss->supp_rates;
assoc_data->supp_rates_len = bss->supp_rates_len;
@@ -5766,6 +5729,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
if (err)
goto err_clear;
+ if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) {
+ if (ifmgd->powersave)
+ sdata->smps_mode = IEEE80211_SMPS_DYNAMIC;
+ else
+ sdata->smps_mode = IEEE80211_SMPS_OFF;
+ } else {
+ sdata->smps_mode = ifmgd->req_smps;
+ }
+
rcu_read_lock();
beacon_ies = rcu_dereference(req->bss->beacon_ies);
@@ -5802,12 +5774,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
beacon_ies->data, beacon_ies->len);
if (elem && elem->datalen >= 3)
sdata->vif.bss_conf.profile_periodicity = elem->data[2];
+ else
+ sdata->vif.bss_conf.profile_periodicity = 0;
elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY,
beacon_ies->data, beacon_ies->len);
if (elem && elem->datalen >= 11 &&
(elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
sdata->vif.bss_conf.ema_ap = true;
+ else
+ sdata->vif.bss_conf.ema_ap = false;
} else {
assoc_data->timeout = jiffies;
assoc_data->timeout_started = true;
@@ -5846,6 +5822,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
bool tx = !req->local_state_change;
+ struct ieee80211_prep_tx_info info = {
+ .subtype = IEEE80211_STYPE_DEAUTH,
+ };
if (ifmgd->auth_data &&
ether_addr_equal(ifmgd->auth_data->bss->bssid, req->bssid)) {
@@ -5854,7 +5833,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
req->bssid, req->reason_code,
ieee80211_get_reason_code_string(req->reason_code));
- drv_mgd_prepare_tx(sdata->local, sdata, 0);
+ drv_mgd_prepare_tx(sdata->local, sdata, &info);
ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid,
IEEE80211_STYPE_DEAUTH,
req->reason_code, tx,
@@ -5863,7 +5842,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
ieee80211_report_disconnect(sdata, frame_buf,
sizeof(frame_buf), true,
req->reason_code, false);
-
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
return 0;
}
@@ -5874,7 +5853,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
req->bssid, req->reason_code,
ieee80211_get_reason_code_string(req->reason_code));
- drv_mgd_prepare_tx(sdata->local, sdata, 0);
+ drv_mgd_prepare_tx(sdata->local, sdata, &info);
ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid,
IEEE80211_STYPE_DEAUTH,
req->reason_code, tx,
@@ -5898,6 +5877,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
ieee80211_report_disconnect(sdata, frame_buf,
sizeof(frame_buf), true,
req->reason_code, false);
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
return 0;
}
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 63652c39c8e0..e5935e3d7a07 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -297,15 +297,11 @@ void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata)
static bool rc_no_data_or_no_ack_use_min(struct ieee80211_tx_rate_control *txrc)
{
struct sk_buff *skb = txrc->skb;
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
- __le16 fc;
-
- fc = hdr->frame_control;
return (info->flags & (IEEE80211_TX_CTL_NO_ACK |
IEEE80211_TX_CTL_USE_MINRATE)) ||
- !ieee80211_is_data(fc);
+ !ieee80211_is_tx_data(skb);
}
static void rc_send_low_basicrate(struct ieee80211_tx_rate *rate,
@@ -396,6 +392,10 @@ static bool rate_control_send_low(struct ieee80211_sta *pubsta,
int mcast_rate;
bool use_basicrate = false;
+ if (ieee80211_is_tx_data(txrc->skb) &&
+ info->flags & IEEE80211_TX_CTL_NO_ACK)
+ return false;
+
if (!pubsta || rc_no_data_or_no_ack_use_min(txrc)) {
__rate_control_send_low(txrc->hw, sband, pubsta, info,
txrc->rate_idx_mask);
@@ -870,7 +870,6 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
int max_rates)
{
struct ieee80211_sub_if_data *sdata;
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_supported_band *sband;
@@ -882,7 +881,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
sdata = vif_to_sdata(vif);
sband = sdata->local->hw.wiphy->bands[info->band];
- if (ieee80211_is_data(hdr->frame_control))
+ if (ieee80211_is_tx_data(skb))
rate_control_apply_mask(sdata, sta, sband, dest, max_rates);
if (dest[0].idx < 0)
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 6487b05da6fa..72b44d4c42d0 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -434,7 +434,7 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
unsigned int nsecs = 0, overhead = mi->overhead;
unsigned int ampdu_len = 1;
- /* do not account throughput if sucess prob is below 10% */
+ /* do not account throughput if success prob is below 10% */
if (prob_avg < MINSTREL_FRAC(10, 100))
return 0;
@@ -1176,29 +1176,6 @@ minstrel_downgrade_rate(struct minstrel_ht_sta *mi, u16 *idx, bool primary)
}
static void
-minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb)
-{
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
- struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
- u16 tid;
-
- if (skb_get_queue_mapping(skb) == IEEE80211_AC_VO)
- return;
-
- if (unlikely(!ieee80211_is_data_qos(hdr->frame_control)))
- return;
-
- if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE)))
- return;
-
- tid = ieee80211_get_tid(hdr);
- if (likely(sta->ampdu_mlme.tid_tx[tid]))
- return;
-
- ieee80211_start_tx_ba_session(pubsta, tid, 0);
-}
-
-static void
minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
void *priv_sta, struct ieee80211_tx_status *st)
{
@@ -1211,6 +1188,10 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
bool last, update = false;
int i;
+ /* Ignore packet that was sent with noAck flag */
+ if (info->flags & IEEE80211_TX_CTL_NO_ACK)
+ return;
+
/* This packet was aggregated but doesn't carry status info */
if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
!(info->flags & IEEE80211_TX_STAT_AMPDU))
@@ -1498,10 +1479,6 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
struct minstrel_priv *mp = priv;
u16 sample_idx;
- if (!(info->flags & IEEE80211_TX_CTL_AMPDU) &&
- !minstrel_ht_is_legacy_group(MI_RATE_GROUP(mi->max_prob_rate)))
- minstrel_aggr_check(sta, txrc->skb);
-
info->flags |= mi->tx_flags;
#ifdef CONFIG_MAC80211_DEBUGFS
@@ -1514,7 +1491,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
(info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO))
return;
- if (time_is_before_jiffies(mi->sample_time))
+ if (time_is_after_jiffies(mi->sample_time))
return;
mi->sample_time = jiffies + MINSTREL_SAMPLE_INTERVAL;
@@ -1907,6 +1884,7 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
static const struct rate_control_ops mac80211_minstrel_ht = {
.name = "minstrel_ht",
+ .capa = RATE_CTRL_CAPA_AMPDU_TRIGGER,
.tx_status_ext = minstrel_ht_tx_status,
.get_rate = minstrel_ht_get_rate,
.rate_init = minstrel_ht_rate_init,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 1bb43edd47b6..771921c057e8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -214,6 +214,24 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
return len;
}
+static void __ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct sk_buff *skb)
+{
+ skb_queue_tail(&sdata->skb_queue, skb);
+ ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ if (sta)
+ sta->rx_stats.packets++;
+}
+
+static void ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct sk_buff *skb)
+{
+ skb->protocol = 0;
+ __ieee80211_queue_skb_to_iface(sdata, sta, skb);
+}
+
static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb,
int rtap_space)
@@ -254,8 +272,7 @@ static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
if (!skb)
return;
- skb_queue_tail(&sdata->skb_queue, skb);
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ ieee80211_queue_skb_to_iface(sdata, NULL, skb);
}
/*
@@ -1339,7 +1356,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
struct sk_buff_head *frames)
{
struct sk_buff *skb = rx->skb;
- struct ieee80211_local *local = rx->local;
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct sta_info *sta = rx->sta;
struct tid_ampdu_rx *tid_agg_rx;
@@ -1391,8 +1407,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
/* if this mpdu is fragmented - terminate rx aggregation session */
sc = le16_to_cpu(hdr->seq_ctrl);
if (sc & IEEE80211_SCTL_FRAG) {
- skb_queue_tail(&rx->sdata->skb_queue, skb);
- ieee80211_queue_work(&local->hw, &rx->sdata->work);
+ ieee80211_queue_skb_to_iface(rx->sdata, NULL, skb);
return;
}
@@ -1563,12 +1578,8 @@ static void sta_ps_start(struct sta_info *sta)
for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) {
struct ieee80211_txq *txq = sta->sta.txq[tid];
- struct txq_info *txqi = to_txq_info(txq);
- spin_lock(&local->active_txq_lock[txq->ac]);
- if (!list_empty(&txqi->schedule_order))
- list_del_init(&txqi->schedule_order);
- spin_unlock(&local->active_txq_lock[txq->ac]);
+ ieee80211_unschedule_txq(&local->hw, txq, false);
if (txq_has_queue(txq))
set_bit(tid, &sta->txq_buffered_tids);
@@ -2240,17 +2251,15 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
sc = le16_to_cpu(hdr->seq_ctrl);
frag = sc & IEEE80211_SCTL_FRAG;
- if (is_multicast_ether_addr(hdr->addr1)) {
- I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount);
- goto out_no_led;
- }
-
if (rx->sta)
cache = &rx->sta->frags;
if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
goto out;
+ if (is_multicast_ether_addr(hdr->addr1))
+ return RX_DROP_MONITOR;
+
I802_DEBUG_INC(rx->local->rx_handlers_fragments);
if (skb_linearize(rx->skb))
@@ -2376,7 +2385,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
out:
ieee80211_led_rx(rx->local);
- out_no_led:
if (rx->sta)
rx->sta->rx_stats.packets++;
return RX_CONTINUE;
@@ -3012,11 +3020,8 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
tf->category == WLAN_CATEGORY_TDLS &&
(tf->action_code == WLAN_TDLS_CHANNEL_SWITCH_REQUEST ||
tf->action_code == WLAN_TDLS_CHANNEL_SWITCH_RESPONSE)) {
- skb_queue_tail(&local->skb_queue_tdls_chsw, rx->skb);
- schedule_work(&local->tdls_chsw_work);
- if (rx->sta)
- rx->sta->rx_stats.packets++;
-
+ rx->skb->protocol = cpu_to_be16(ETH_P_TDLS);
+ __ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
return RX_QUEUED;
}
}
@@ -3496,10 +3501,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
return RX_QUEUED;
queue:
- skb_queue_tail(&sdata->skb_queue, rx->skb);
- ieee80211_queue_work(&local->hw, &sdata->work);
- if (rx->sta)
- rx->sta->rx_stats.packets++;
+ ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
return RX_QUEUED;
}
@@ -3647,10 +3649,7 @@ ieee80211_rx_h_ext(struct ieee80211_rx_data *rx)
return RX_DROP_MONITOR;
/* for now only beacons are ext, so queue them */
- skb_queue_tail(&sdata->skb_queue, rx->skb);
- ieee80211_queue_work(&rx->local->hw, &sdata->work);
- if (rx->sta)
- rx->sta->rx_stats.packets++;
+ ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
return RX_QUEUED;
}
@@ -3707,11 +3706,7 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
return RX_DROP_MONITOR;
}
- /* queue up frame and kick off work to process it */
- skb_queue_tail(&sdata->skb_queue, rx->skb);
- ieee80211_queue_work(&rx->local->hw, &sdata->work);
- if (rx->sta)
- rx->sta->rx_stats.packets++;
+ ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
return RX_QUEUED;
}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index d4cc9ac2d703..6b50cb5e0e3c 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -251,13 +251,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
struct ieee80211_mgmt *mgmt = (void *)skb->data;
struct ieee80211_bss *bss;
struct ieee80211_channel *channel;
+ size_t min_hdr_len = offsetof(struct ieee80211_mgmt,
+ u.probe_resp.variable);
+
+ if (!ieee80211_is_probe_resp(mgmt->frame_control) &&
+ !ieee80211_is_beacon(mgmt->frame_control) &&
+ !ieee80211_is_s1g_beacon(mgmt->frame_control))
+ return;
if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
- if (skb->len < 15)
- return;
- } else if (skb->len < 24 ||
- (!ieee80211_is_probe_resp(mgmt->frame_control) &&
- !ieee80211_is_beacon(mgmt->frame_control)))
+ if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
+ min_hdr_len = offsetof(struct ieee80211_ext,
+ u.s1g_short_beacon.variable);
+ else
+ min_hdr_len = offsetof(struct ieee80211_ext,
+ u.s1g_beacon);
+ }
+
+ if (skb->len < min_hdr_len)
return;
sdata1 = rcu_dereference(local->scan_sdata);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f2fb69da9b6e..a5505ee51229 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -425,15 +425,11 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
if (sta_prepare_rate_control(local, sta, gfp))
goto free_txq;
- sta->airtime_weight = IEEE80211_DEFAULT_AIRTIME_WEIGHT;
for (i = 0; i < IEEE80211_NUM_ACS; i++) {
skb_queue_head_init(&sta->ps_tx_buf[i]);
skb_queue_head_init(&sta->tx_filtered[i]);
- sta->airtime[i].deficit = sta->airtime_weight;
- atomic_set(&sta->airtime[i].aql_tx_pending, 0);
- sta->airtime[i].aql_limit_low = local->aql_txq_limit_low[i];
- sta->airtime[i].aql_limit_high = local->aql_txq_limit_high[i];
+ init_airtime_info(&sta->airtime[i], &local->airtime[i]);
}
for (i = 0; i < IEEE80211_NUM_TIDS; i++)
@@ -1398,11 +1394,6 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid,
struct ieee80211_tx_info *info;
struct ieee80211_chanctx_conf *chanctx_conf;
- /* Don't send NDPs when STA is connected HE */
- if (sdata->vif.type == NL80211_IFTYPE_STATION &&
- !(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE))
- return;
-
if (qos) {
fc = cpu_to_le16(IEEE80211_FTYPE_DATA |
IEEE80211_STYPE_QOS_NULLFUNC |
@@ -1897,24 +1888,59 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
}
EXPORT_SYMBOL(ieee80211_sta_set_buffered);
-void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
- u32 tx_airtime, u32 rx_airtime)
+void ieee80211_register_airtime(struct ieee80211_txq *txq,
+ u32 tx_airtime, u32 rx_airtime)
{
- struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
- struct ieee80211_local *local = sta->sdata->local;
- u8 ac = ieee80211_ac_from_tid(tid);
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif);
+ struct ieee80211_local *local = sdata->local;
+ u64 weight_sum, weight_sum_reciprocal;
+ struct airtime_sched_info *air_sched;
+ struct airtime_info *air_info;
u32 airtime = 0;
- if (sta->local->airtime_flags & AIRTIME_USE_TX)
+ air_sched = &local->airtime[txq->ac];
+ air_info = to_airtime_info(txq);
+
+ if (local->airtime_flags & AIRTIME_USE_TX)
airtime += tx_airtime;
- if (sta->local->airtime_flags & AIRTIME_USE_RX)
+ if (local->airtime_flags & AIRTIME_USE_RX)
airtime += rx_airtime;
- spin_lock_bh(&local->active_txq_lock[ac]);
- sta->airtime[ac].tx_airtime += tx_airtime;
- sta->airtime[ac].rx_airtime += rx_airtime;
- sta->airtime[ac].deficit -= airtime;
- spin_unlock_bh(&local->active_txq_lock[ac]);
+ /* Weights scale so the unit weight is 256 */
+ airtime <<= 8;
+
+ spin_lock_bh(&air_sched->lock);
+
+ air_info->tx_airtime += tx_airtime;
+ air_info->rx_airtime += rx_airtime;
+
+ if (air_sched->weight_sum) {
+ weight_sum = air_sched->weight_sum;
+ weight_sum_reciprocal = air_sched->weight_sum_reciprocal;
+ } else {
+ weight_sum = air_info->weight;
+ weight_sum_reciprocal = air_info->weight_reciprocal;
+ }
+
+ /* Round the calculation of global vt */
+ air_sched->v_t += (u64)((airtime + (weight_sum >> 1)) *
+ weight_sum_reciprocal) >> IEEE80211_RECIPROCAL_SHIFT_64;
+ air_info->v_t += (u32)((airtime + (air_info->weight >> 1)) *
+ air_info->weight_reciprocal) >> IEEE80211_RECIPROCAL_SHIFT_32;
+ ieee80211_resort_txq(&local->hw, txq);
+
+ spin_unlock_bh(&air_sched->lock);
+}
+
+void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
+ u32 tx_airtime, u32 rx_airtime)
+{
+ struct ieee80211_txq *txq = pubsta->txq[tid];
+
+ if (!txq)
+ return;
+
+ ieee80211_register_airtime(txq, tx_airtime, rx_airtime);
}
EXPORT_SYMBOL(ieee80211_sta_register_airtime);
@@ -2093,10 +2119,9 @@ static struct ieee80211_sta_rx_stats *
sta_get_last_rx_stats(struct sta_info *sta)
{
struct ieee80211_sta_rx_stats *stats = &sta->rx_stats;
- struct ieee80211_local *local = sta->local;
int cpu;
- if (!ieee80211_hw_check(&local->hw, USES_RSS))
+ if (!sta->pcpu_rx_stats)
return stats;
for_each_possible_cpu(cpu) {
@@ -2196,9 +2221,7 @@ static void sta_set_tidstats(struct sta_info *sta,
int cpu;
if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) {
- if (!ieee80211_hw_check(&local->hw, USES_RSS))
- tidstats->rx_msdu +=
- sta_get_tidstats_msdu(&sta->rx_stats, tid);
+ tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->rx_stats, tid);
if (sta->pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
@@ -2277,7 +2300,6 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
sinfo->rx_beacon = sdata->u.mgd.count_beacon_signal;
drv_sta_statistics(local, sdata, &sta->sta, sinfo);
-
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME) |
BIT_ULL(NL80211_STA_INFO_STA_FLAGS) |
BIT_ULL(NL80211_STA_INFO_BSS_PARAM) |
@@ -2312,8 +2334,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) |
BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) {
- if (!ieee80211_hw_check(&local->hw, USES_RSS))
- sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats);
+ sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats);
if (sta->pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
@@ -2363,7 +2384,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) {
- sinfo->airtime_weight = sta->airtime_weight;
+ sinfo->airtime_weight = sta->airtime[0].weight;
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT);
}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 0333072ebd98..ba2796782008 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -135,18 +135,25 @@ enum ieee80211_agg_stop_reason {
#define AIRTIME_USE_TX BIT(0)
#define AIRTIME_USE_RX BIT(1)
+
struct airtime_info {
u64 rx_airtime;
u64 tx_airtime;
- s64 deficit;
+ u64 v_t;
+ u64 last_scheduled;
+ struct list_head list;
atomic_t aql_tx_pending; /* Estimated airtime for frames pending */
u32 aql_limit_low;
u32 aql_limit_high;
+ u32 weight_reciprocal;
+ u16 weight;
};
void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
struct sta_info *sta, u8 ac,
u16 tx_airtime, bool tx_completed);
+void ieee80211_register_airtime(struct ieee80211_txq *txq,
+ u32 tx_airtime, u32 rx_airtime);
struct sta_info;
@@ -515,7 +522,6 @@ struct ieee80211_fragment_cache {
* @tid_seq: per-TID sequence numbers for sending to this STA
* @airtime: per-AC struct airtime_info describing airtime statistics for this
* station
- * @airtime_weight: station weight for airtime fairness calculation purposes
* @ampdu_mlme: A-MPDU state machine state
* @mesh: mesh STA information
* @debugfs_dir: debug filesystem directory dentry
@@ -646,7 +652,6 @@ struct sta_info {
u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
struct airtime_info airtime[IEEE80211_NUM_ACS];
- u16 airtime_weight;
/*
* Aggregation information, locked with lock.
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 9baf185ee4c7..bae321ff77f6 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -970,6 +970,25 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && acked)
ieee80211_frame_acked(sta, skb);
+ } else if (wiphy_ext_feature_isset(local->hw.wiphy,
+ NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) {
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_txq *txq;
+ u32 airtime;
+
+ /* Account airtime to multicast queue */
+ sdata = ieee80211_sdata_from_skb(local, skb);
+
+ if (sdata && (txq = sdata->vif.txq)) {
+ airtime = info->status.tx_time ?:
+ ieee80211_calc_expected_tx_airtime(hw,
+ &sdata->vif,
+ NULL,
+ skb->len,
+ false);
+
+ ieee80211_register_airtime(txq, airtime, 0);
+ }
}
/* SNMP counters
@@ -1006,12 +1025,11 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) &&
!(info->flags & IEEE80211_TX_CTL_INJECTED) &&
local->ps_sdata && !(local->scanning)) {
- if (info->flags & IEEE80211_TX_STAT_ACK) {
+ if (info->flags & IEEE80211_TX_STAT_ACK)
local->ps_sdata->u.mgd.flags |=
IEEE80211_STA_NULLFUNC_ACKED;
- } else
- mod_timer(&local->dynamic_ps_timer, jiffies +
- msecs_to_jiffies(10));
+ mod_timer(&local->dynamic_ps_timer,
+ jiffies + msecs_to_jiffies(10));
}
ieee80211_report_used_skb(local, skb, false);
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index f91d02b81b92..45e532ad1215 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1920,7 +1920,7 @@ out:
return ret;
}
-static void
+void
ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
@@ -1971,32 +1971,6 @@ void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata)
rcu_read_unlock();
}
-void ieee80211_tdls_chsw_work(struct work_struct *wk)
-{
- struct ieee80211_local *local =
- container_of(wk, struct ieee80211_local, tdls_chsw_work);
- struct ieee80211_sub_if_data *sdata;
- struct sk_buff *skb;
- struct ieee80211_tdls_data *tf;
-
- wiphy_lock(local->hw.wiphy);
- while ((skb = skb_dequeue(&local->skb_queue_tdls_chsw))) {
- tf = (struct ieee80211_tdls_data *)skb->data;
- list_for_each_entry(sdata, &local->interfaces, list) {
- if (!ieee80211_sdata_running(sdata) ||
- sdata->vif.type != NL80211_IFTYPE_STATION ||
- !ether_addr_equal(tf->da, sdata->vif.addr))
- continue;
-
- ieee80211_process_tdls_channel_switch(sdata, skb);
- break;
- }
-
- kfree_skb(skb);
- }
- wiphy_unlock(local->hw.wiphy);
-}
-
void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata,
const u8 *peer, u16 reason)
{
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 8fcc39056402..f6ef15366938 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2,7 +2,7 @@
/*
* Portions of this file
* Copyright(c) 2016-2017 Intel Deutschland GmbH
-* Copyright (C) 2018 - 2020 Intel Corporation
+* Copyright (C) 2018 - 2021 Intel Corporation
*/
#if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
@@ -1461,31 +1461,52 @@ DEFINE_EVENT(release_evt, drv_allow_buffered_frames,
TP_ARGS(local, sta, tids, num_frames, reason, more_data)
);
-TRACE_EVENT(drv_mgd_prepare_tx,
+DECLARE_EVENT_CLASS(mgd_prepare_complete_tx_evt,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
- u16 duration),
+ u16 duration, u16 subtype, bool success),
- TP_ARGS(local, sdata, duration),
+ TP_ARGS(local, sdata, duration, subtype, success),
TP_STRUCT__entry(
LOCAL_ENTRY
VIF_ENTRY
__field(u32, duration)
+ __field(u16, subtype)
+ __field(u8, success)
),
TP_fast_assign(
LOCAL_ASSIGN;
VIF_ASSIGN;
__entry->duration = duration;
+ __entry->subtype = subtype;
+ __entry->success = success;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT " duration: %u",
- LOCAL_PR_ARG, VIF_PR_ARG, __entry->duration
+ LOCAL_PR_FMT VIF_PR_FMT " duration: %u, subtype:0x%x, success:%d",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->duration,
+ __entry->subtype, __entry->success
)
);
+DEFINE_EVENT(mgd_prepare_complete_tx_evt, drv_mgd_prepare_tx,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u16 duration, u16 subtype, bool success),
+
+ TP_ARGS(local, sdata, duration, subtype, success)
+);
+
+DEFINE_EVENT(mgd_prepare_complete_tx_evt, drv_mgd_complete_tx,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u16 duration, u16 subtype, bool success),
+
+ TP_ARGS(local, sdata, duration, subtype, success)
+);
+
DEFINE_EVENT(local_sdata_evt, drv_mgd_protect_tdls_discover,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata),
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 0b719f3d2dec..e96981144358 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -18,6 +18,7 @@
#include <linux/bitmap.h>
#include <linux/rcupdate.h>
#include <linux/export.h>
+#include <linux/timekeeping.h>
#include <net/net_namespace.h>
#include <net/ieee80211_radiotap.h>
#include <net/cfg80211.h>
@@ -666,6 +667,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
u32 len;
struct ieee80211_tx_rate_control txrc;
struct ieee80211_sta_rates *ratetbl = NULL;
+ bool encap = info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP;
bool assoc = false;
memset(&txrc, 0, sizeof(txrc));
@@ -707,7 +709,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
* just wants a probe response.
*/
if (tx->sdata->vif.bss_conf.use_short_preamble &&
- (ieee80211_is_data(hdr->frame_control) ||
+ (ieee80211_is_tx_data(tx->skb) ||
(tx->sta && test_sta_flag(tx->sta, WLAN_STA_SHORT_PREAMBLE))))
txrc.short_preamble = true;
@@ -729,7 +731,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
"%s: Dropped data frame as no usable bitrate found while "
"scanning and associated. Target station: "
"%pM on %d GHz band\n",
- tx->sdata->name, hdr->addr1,
+ tx->sdata->name,
+ encap ? ((struct ethhdr *)hdr)->h_dest : hdr->addr1,
info->band ? 5 : 2))
return TX_DROP;
@@ -763,7 +766,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
if (txrc.reported_rate.idx < 0) {
txrc.reported_rate = tx->rate;
- if (tx->sta && ieee80211_is_data(hdr->frame_control))
+ if (tx->sta && ieee80211_is_tx_data(tx->skb))
tx->sta->tx_stats.last_rate = txrc.reported_rate;
} else if (tx->sta)
tx->sta->tx_stats.last_rate = txrc.reported_rate;
@@ -1447,7 +1450,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
codel_vars_init(&txqi->def_cvars);
codel_stats_init(&txqi->cstats);
__skb_queue_head_init(&txqi->frags);
- INIT_LIST_HEAD(&txqi->schedule_order);
+ RB_CLEAR_NODE(&txqi->schedule_order);
txqi->txq.vif = &sdata->vif;
@@ -1491,9 +1494,7 @@ void ieee80211_txq_purge(struct ieee80211_local *local,
ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
spin_unlock_bh(&fq->lock);
- spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]);
- list_del_init(&txqi->schedule_order);
- spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]);
+ ieee80211_unschedule_txq(&local->hw, &txqi->txq, true);
}
void ieee80211_txq_set_params(struct ieee80211_local *local)
@@ -1768,8 +1769,6 @@ static int invoke_tx_handlers_early(struct ieee80211_tx_data *tx)
CALL_TXH(ieee80211_tx_h_ps_buf);
CALL_TXH(ieee80211_tx_h_check_control_port_protocol);
CALL_TXH(ieee80211_tx_h_select_key);
- if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
- CALL_TXH(ieee80211_tx_h_rate_ctrl);
txh_done:
if (unlikely(res == TX_DROP)) {
@@ -1802,6 +1801,9 @@ static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx)
goto txh_done;
}
+ if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
+ CALL_TXH(ieee80211_tx_h_rate_ctrl);
+
CALL_TXH(ieee80211_tx_h_michael_mic_add);
CALL_TXH(ieee80211_tx_h_sequence);
CALL_TXH(ieee80211_tx_h_fragment);
@@ -2014,6 +2016,26 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
ieee80211_tx(sdata, sta, skb, false);
}
+static bool ieee80211_validate_radiotap_len(struct sk_buff *skb)
+{
+ struct ieee80211_radiotap_header *rthdr =
+ (struct ieee80211_radiotap_header *)skb->data;
+
+ /* check for not even having the fixed radiotap header part */
+ if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header)))
+ return false; /* too short to be possibly valid */
+
+ /* is it a header version we can trust to find length from? */
+ if (unlikely(rthdr->it_version))
+ return false; /* only version 0 is supported */
+
+ /* does the skb contain enough to deliver on the alleged length? */
+ if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data)))
+ return false; /* skb too short for claimed rt header extent */
+
+ return true;
+}
+
bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
struct net_device *dev)
{
@@ -2022,8 +2044,6 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
struct ieee80211_radiotap_header *rthdr =
(struct ieee80211_radiotap_header *) skb->data;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
- struct ieee80211_supported_band *sband =
- local->hw.wiphy->bands[info->band];
int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len,
NULL);
u16 txflags;
@@ -2036,17 +2056,8 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
u8 vht_mcs = 0, vht_nss = 0;
int i;
- /* check for not even having the fixed radiotap header part */
- if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header)))
- return false; /* too short to be possibly valid */
-
- /* is it a header version we can trust to find length from? */
- if (unlikely(rthdr->it_version))
- return false; /* only version 0 is supported */
-
- /* does the skb contain enough to deliver on the alleged length? */
- if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data)))
- return false; /* skb too short for claimed rt header extent */
+ if (!ieee80211_validate_radiotap_len(skb))
+ return false;
info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
IEEE80211_TX_CTL_DONTFRAG;
@@ -2186,6 +2197,9 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
return false;
if (rate_found) {
+ struct ieee80211_supported_band *sband =
+ local->hw.wiphy->bands[info->band];
+
info->control.flags |= IEEE80211_TX_CTRL_RATE_INJECT;
for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
@@ -2199,7 +2213,7 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
} else if (rate_flags & IEEE80211_TX_RC_VHT_MCS) {
ieee80211_rate_set_vht(info->control.rates, vht_mcs,
vht_nss);
- } else {
+ } else if (sband) {
for (i = 0; i < sband->n_bitrates; i++) {
if (rate * 5 != sband->bitrates[i].bitrate)
continue;
@@ -2236,8 +2250,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
IEEE80211_TX_CTL_INJECTED;
- /* Sanity-check and process the injection radiotap header */
- if (!ieee80211_parse_tx_radiotap(skb, dev))
+ /* Sanity-check the length of the radiotap header */
+ if (!ieee80211_validate_radiotap_len(skb))
goto fail;
/* we now know there is a radiotap header with a length we can use */
@@ -2351,6 +2365,14 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
ieee80211_select_queue_80211(sdata, skb, hdr);
skb_set_queue_mapping(skb, ieee80211_ac_from_tid(skb->priority));
+ /*
+ * Process the radiotap header. This will now take into account the
+ * selected chandef above to accurately set injection rates and
+ * retransmissions.
+ */
+ if (!ieee80211_parse_tx_radiotap(skb, dev))
+ goto fail_rcu;
+
/* remove the injection radiotap header */
skb_pull(skb, len_rthdr);
@@ -3264,6 +3286,9 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
return false;
+ if (sdata->vif.offload_flags & IEEE80211_OFFLOAD_ENCAP_ENABLED)
+ return false;
+
if (skb_is_gso(skb))
return false;
@@ -3369,15 +3394,21 @@ out:
* Can be called while the sta lock is held. Anything that can cause packets to
* be generated will cause deadlock!
*/
-static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta, u8 pn_offs,
- struct ieee80211_key *key,
- struct sk_buff *skb)
+static ieee80211_tx_result
+ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta, u8 pn_offs,
+ struct ieee80211_key *key,
+ struct ieee80211_tx_data *tx)
{
+ struct sk_buff *skb = tx->skb;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_hdr *hdr = (void *)skb->data;
u8 tid = IEEE80211_NUM_TIDS;
+ if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL) &&
+ ieee80211_tx_h_rate_ctrl(tx) != TX_CONTINUE)
+ return TX_DROP;
+
if (key)
info->control.hw_key = &key->conf;
@@ -3426,6 +3457,8 @@ static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
break;
}
}
+
+ return TX_CONTINUE;
}
static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
@@ -3529,24 +3562,17 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
tx.sta = sta;
tx.key = fast_tx->key;
- if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
- tx.skb = skb;
- r = ieee80211_tx_h_rate_ctrl(&tx);
- skb = tx.skb;
- tx.skb = NULL;
-
- if (r != TX_CONTINUE) {
- if (r != TX_QUEUED)
- kfree_skb(skb);
- return true;
- }
- }
-
if (ieee80211_queue_skb(local, sdata, sta, skb))
return true;
- ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs,
- fast_tx->key, skb);
+ tx.skb = skb;
+ r = ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs,
+ fast_tx->key, &tx);
+ tx.skb = NULL;
+ if (r == TX_DROP) {
+ kfree_skb(skb);
+ return true;
+ }
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
sdata = container_of(sdata->bss,
@@ -3651,8 +3677,16 @@ begin:
else
info->flags &= ~IEEE80211_TX_CTL_AMPDU;
- if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)
+ if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) {
+ if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
+ r = ieee80211_tx_h_rate_ctrl(&tx);
+ if (r != TX_CONTINUE) {
+ ieee80211_free_txskb(&local->hw, skb);
+ goto begin;
+ }
+ }
goto encap_out;
+ }
if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) {
struct sta_info *sta = container_of(txq->sta, struct sta_info,
@@ -3663,8 +3697,12 @@ begin:
(tx.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))
pn_offs = ieee80211_hdrlen(hdr->frame_control);
- ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs,
- tx.key, skb);
+ r = ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs,
+ tx.key, &tx);
+ if (r != TX_CONTINUE) {
+ ieee80211_free_txskb(&local->hw, skb);
+ goto begin;
+ }
} else {
if (invoke_tx_handlers_late(&tx))
goto begin;
@@ -3744,102 +3782,259 @@ EXPORT_SYMBOL(ieee80211_tx_dequeue);
struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
{
struct ieee80211_local *local = hw_to_local(hw);
+ struct airtime_sched_info *air_sched;
+ u64 now = ktime_get_boottime_ns();
struct ieee80211_txq *ret = NULL;
- struct txq_info *txqi = NULL, *head = NULL;
- bool found_eligible_txq = false;
+ struct airtime_info *air_info;
+ struct txq_info *txqi = NULL;
+ struct rb_node *node;
+ bool first = false;
- spin_lock_bh(&local->active_txq_lock[ac]);
+ air_sched = &local->airtime[ac];
+ spin_lock_bh(&air_sched->lock);
- begin:
- txqi = list_first_entry_or_null(&local->active_txqs[ac],
- struct txq_info,
- schedule_order);
- if (!txqi)
+ node = air_sched->schedule_pos;
+
+begin:
+ if (!node) {
+ node = rb_first_cached(&air_sched->active_txqs);
+ first = true;
+ } else {
+ node = rb_next(node);
+ }
+
+ if (!node)
goto out;
- if (txqi == head) {
- if (!found_eligible_txq)
- goto out;
- else
- found_eligible_txq = false;
+ txqi = container_of(node, struct txq_info, schedule_order);
+ air_info = to_airtime_info(&txqi->txq);
+
+ if (air_info->v_t > air_sched->v_t &&
+ (!first || !airtime_catchup_v_t(air_sched, air_info->v_t, now)))
+ goto out;
+
+ if (!ieee80211_txq_airtime_check(hw, &txqi->txq)) {
+ first = false;
+ goto begin;
}
- if (!head)
- head = txqi;
+ air_sched->schedule_pos = node;
+ air_sched->last_schedule_activity = now;
+ ret = &txqi->txq;
+out:
+ spin_unlock_bh(&air_sched->lock);
+ return ret;
+}
+EXPORT_SYMBOL(ieee80211_next_txq);
- if (txqi->txq.sta) {
- struct sta_info *sta = container_of(txqi->txq.sta,
- struct sta_info, sta);
- bool aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
- s64 deficit = sta->airtime[txqi->txq.ac].deficit;
+static void __ieee80211_insert_txq(struct rb_root_cached *root,
+ struct txq_info *txqi)
+{
+ struct rb_node **new = &root->rb_root.rb_node;
+ struct airtime_info *old_air, *new_air;
+ struct rb_node *parent = NULL;
+ struct txq_info *__txqi;
+ bool leftmost = true;
+
+ while (*new) {
+ parent = *new;
+ __txqi = rb_entry(parent, struct txq_info, schedule_order);
+ old_air = to_airtime_info(&__txqi->txq);
+ new_air = to_airtime_info(&txqi->txq);
+
+ if (new_air->v_t <= old_air->v_t) {
+ new = &parent->rb_left;
+ } else {
+ new = &parent->rb_right;
+ leftmost = false;
+ }
+ }
- if (aql_check)
- found_eligible_txq = true;
+ rb_link_node(&txqi->schedule_order, parent, new);
+ rb_insert_color_cached(&txqi->schedule_order, root, leftmost);
+}
- if (deficit < 0)
- sta->airtime[txqi->txq.ac].deficit +=
- sta->airtime_weight;
+void ieee80211_resort_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq)
+{
+ struct airtime_info *air_info = to_airtime_info(txq);
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct txq_info *txqi = to_txq_info(txq);
+ struct airtime_sched_info *air_sched;
- if (deficit < 0 || !aql_check) {
- list_move_tail(&txqi->schedule_order,
- &local->active_txqs[txqi->txq.ac]);
- goto begin;
+ air_sched = &local->airtime[txq->ac];
+
+ lockdep_assert_held(&air_sched->lock);
+
+ if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
+ struct airtime_info *a_prev = NULL, *a_next = NULL;
+ struct txq_info *t_prev, *t_next;
+ struct rb_node *n_prev, *n_next;
+
+ /* Erasing a node can cause an expensive rebalancing operation,
+ * so we check the previous and next nodes first and only remove
+ * and re-insert if the current node is not already in the
+ * correct position.
+ */
+ if ((n_prev = rb_prev(&txqi->schedule_order)) != NULL) {
+ t_prev = container_of(n_prev, struct txq_info,
+ schedule_order);
+ a_prev = to_airtime_info(&t_prev->txq);
+ }
+
+ if ((n_next = rb_next(&txqi->schedule_order)) != NULL) {
+ t_next = container_of(n_next, struct txq_info,
+ schedule_order);
+ a_next = to_airtime_info(&t_next->txq);
}
+
+ if ((!a_prev || a_prev->v_t <= air_info->v_t) &&
+ (!a_next || a_next->v_t > air_info->v_t))
+ return;
+
+ if (air_sched->schedule_pos == &txqi->schedule_order)
+ air_sched->schedule_pos = n_prev;
+
+ rb_erase_cached(&txqi->schedule_order,
+ &air_sched->active_txqs);
+ RB_CLEAR_NODE(&txqi->schedule_order);
+ __ieee80211_insert_txq(&air_sched->active_txqs, txqi);
}
+}
+
+void ieee80211_update_airtime_weight(struct ieee80211_local *local,
+ struct airtime_sched_info *air_sched,
+ u64 now, bool force)
+{
+ struct airtime_info *air_info, *tmp;
+ u64 weight_sum = 0;
+
+ if (unlikely(!now))
+ now = ktime_get_boottime_ns();
+ lockdep_assert_held(&air_sched->lock);
+
+ if (!force && (air_sched->last_weight_update <
+ now - AIRTIME_ACTIVE_DURATION))
+ return;
+
+ list_for_each_entry_safe(air_info, tmp,
+ &air_sched->active_list, list) {
+ if (airtime_is_active(air_info, now))
+ weight_sum += air_info->weight;
+ else
+ list_del_init(&air_info->list);
+ }
+ airtime_weight_sum_set(air_sched, weight_sum);
+ air_sched->last_weight_update = now;
+}
- if (txqi->schedule_round == local->schedule_round[ac])
+void ieee80211_schedule_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq)
+ __acquires(txq_lock) __releases(txq_lock)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct txq_info *txqi = to_txq_info(txq);
+ struct airtime_sched_info *air_sched;
+ u64 now = ktime_get_boottime_ns();
+ struct airtime_info *air_info;
+ u8 ac = txq->ac;
+ bool was_active;
+
+ air_sched = &local->airtime[ac];
+ air_info = to_airtime_info(txq);
+
+ spin_lock_bh(&air_sched->lock);
+ was_active = airtime_is_active(air_info, now);
+ airtime_set_active(air_sched, air_info, now);
+
+ if (!RB_EMPTY_NODE(&txqi->schedule_order))
goto out;
- list_del_init(&txqi->schedule_order);
- txqi->schedule_round = local->schedule_round[ac];
- ret = &txqi->txq;
+ /* If the station has been inactive for a while, catch up its v_t so it
+ * doesn't get indefinite priority; see comment above the definition of
+ * AIRTIME_MAX_BEHIND.
+ */
+ if ((!was_active && air_info->v_t < air_sched->v_t) ||
+ air_info->v_t < air_sched->v_t - AIRTIME_MAX_BEHIND)
+ air_info->v_t = air_sched->v_t;
+
+ ieee80211_update_airtime_weight(local, air_sched, now, !was_active);
+ __ieee80211_insert_txq(&air_sched->active_txqs, txqi);
out:
- spin_unlock_bh(&local->active_txq_lock[ac]);
- return ret;
+ spin_unlock_bh(&air_sched->lock);
}
-EXPORT_SYMBOL(ieee80211_next_txq);
+EXPORT_SYMBOL(ieee80211_schedule_txq);
-void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq,
- bool force)
+static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq,
+ bool purge)
{
struct ieee80211_local *local = hw_to_local(hw);
struct txq_info *txqi = to_txq_info(txq);
+ struct airtime_sched_info *air_sched;
+ struct airtime_info *air_info;
- spin_lock_bh(&local->active_txq_lock[txq->ac]);
-
- if (list_empty(&txqi->schedule_order) &&
- (force || !skb_queue_empty(&txqi->frags) ||
- txqi->tin.backlog_packets)) {
- /* If airtime accounting is active, always enqueue STAs at the
- * head of the list to ensure that they only get moved to the
- * back by the airtime DRR scheduler once they have a negative
- * deficit. A station that already has a negative deficit will
- * get immediately moved to the back of the list on the next
- * call to ieee80211_next_txq().
- */
- if (txqi->txq.sta && local->airtime_flags &&
- wiphy_ext_feature_isset(local->hw.wiphy,
- NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
- list_add(&txqi->schedule_order,
- &local->active_txqs[txq->ac]);
- else
- list_add_tail(&txqi->schedule_order,
- &local->active_txqs[txq->ac]);
+ air_sched = &local->airtime[txq->ac];
+ air_info = to_airtime_info(&txqi->txq);
+
+ lockdep_assert_held(&air_sched->lock);
+
+ if (purge) {
+ list_del_init(&air_info->list);
+ ieee80211_update_airtime_weight(local, air_sched, 0, true);
}
- spin_unlock_bh(&local->active_txq_lock[txq->ac]);
+ if (RB_EMPTY_NODE(&txqi->schedule_order))
+ return;
+
+ if (air_sched->schedule_pos == &txqi->schedule_order)
+ air_sched->schedule_pos = rb_prev(&txqi->schedule_order);
+
+ if (!purge)
+ airtime_set_active(air_sched, air_info,
+ ktime_get_boottime_ns());
+
+ rb_erase_cached(&txqi->schedule_order,
+ &air_sched->active_txqs);
+ RB_CLEAR_NODE(&txqi->schedule_order);
}
-EXPORT_SYMBOL(__ieee80211_schedule_txq);
+
+void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq,
+ bool purge)
+ __acquires(txq_lock) __releases(txq_lock)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ spin_lock_bh(&local->airtime[txq->ac].lock);
+ __ieee80211_unschedule_txq(hw, txq, purge);
+ spin_unlock_bh(&local->airtime[txq->ac].lock);
+}
+
+void ieee80211_return_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq, bool force)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct txq_info *txqi = to_txq_info(txq);
+
+ spin_lock_bh(&local->airtime[txq->ac].lock);
+
+ if (!RB_EMPTY_NODE(&txqi->schedule_order) && !force &&
+ !txq_has_queue(txq))
+ __ieee80211_unschedule_txq(hw, txq, false);
+
+ spin_unlock_bh(&local->airtime[txq->ac].lock);
+}
+EXPORT_SYMBOL(ieee80211_return_txq);
DEFINE_STATIC_KEY_FALSE(aql_disable);
bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw,
struct ieee80211_txq *txq)
{
- struct sta_info *sta;
+ struct airtime_info *air_info = to_airtime_info(txq);
struct ieee80211_local *local = hw_to_local(hw);
if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
@@ -3854,15 +4049,12 @@ bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw,
if (unlikely(txq->tid == IEEE80211_NUM_TIDS))
return true;
- sta = container_of(txq->sta, struct sta_info, sta);
- if (atomic_read(&sta->airtime[txq->ac].aql_tx_pending) <
- sta->airtime[txq->ac].aql_limit_low)
+ if (atomic_read(&air_info->aql_tx_pending) < air_info->aql_limit_low)
return true;
if (atomic_read(&local->aql_total_pending_airtime) <
local->aql_threshold &&
- atomic_read(&sta->airtime[txq->ac].aql_tx_pending) <
- sta->airtime[txq->ac].aql_limit_high)
+ atomic_read(&air_info->aql_tx_pending) < air_info->aql_limit_high)
return true;
return false;
@@ -3872,63 +4064,85 @@ EXPORT_SYMBOL(ieee80211_txq_airtime_check);
bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
struct ieee80211_txq *txq)
{
+ struct txq_info *first_txqi = NULL, *txqi = to_txq_info(txq);
struct ieee80211_local *local = hw_to_local(hw);
- struct txq_info *iter, *tmp, *txqi = to_txq_info(txq);
- struct sta_info *sta;
- u8 ac = txq->ac;
+ struct airtime_sched_info *air_sched;
+ struct airtime_info *air_info;
+ struct rb_node *node = NULL;
+ bool ret = false;
+ u64 now;
- spin_lock_bh(&local->active_txq_lock[ac]);
- if (!txqi->txq.sta)
- goto out;
+ if (!ieee80211_txq_airtime_check(hw, txq))
+ return false;
+
+ air_sched = &local->airtime[txq->ac];
+ spin_lock_bh(&air_sched->lock);
- if (list_empty(&txqi->schedule_order))
+ if (RB_EMPTY_NODE(&txqi->schedule_order))
goto out;
- list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac],
- schedule_order) {
- if (iter == txqi)
- break;
+ now = ktime_get_boottime_ns();
- if (!iter->txq.sta) {
- list_move_tail(&iter->schedule_order,
- &local->active_txqs[ac]);
- continue;
- }
- sta = container_of(iter->txq.sta, struct sta_info, sta);
- if (sta->airtime[ac].deficit < 0)
- sta->airtime[ac].deficit += sta->airtime_weight;
- list_move_tail(&iter->schedule_order, &local->active_txqs[ac]);
- }
+ /* Like in ieee80211_next_txq(), make sure the first station in the
+ * scheduling order is eligible for transmission to avoid starvation.
+ */
+ node = rb_first_cached(&air_sched->active_txqs);
+ if (node) {
+ first_txqi = container_of(node, struct txq_info,
+ schedule_order);
+ air_info = to_airtime_info(&first_txqi->txq);
- sta = container_of(txqi->txq.sta, struct sta_info, sta);
- if (sta->airtime[ac].deficit >= 0)
- goto out;
+ if (air_sched->v_t < air_info->v_t)
+ airtime_catchup_v_t(air_sched, air_info->v_t, now);
+ }
- sta->airtime[ac].deficit += sta->airtime_weight;
- list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
- spin_unlock_bh(&local->active_txq_lock[ac]);
+ air_info = to_airtime_info(&txqi->txq);
+ if (air_info->v_t <= air_sched->v_t) {
+ air_sched->last_schedule_activity = now;
+ ret = true;
+ }
- return false;
out:
- if (!list_empty(&txqi->schedule_order))
- list_del_init(&txqi->schedule_order);
- spin_unlock_bh(&local->active_txq_lock[ac]);
-
- return true;
+ spin_unlock_bh(&air_sched->lock);
+ return ret;
}
EXPORT_SYMBOL(ieee80211_txq_may_transmit);
void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
{
struct ieee80211_local *local = hw_to_local(hw);
+ struct airtime_sched_info *air_sched = &local->airtime[ac];
- spin_lock_bh(&local->active_txq_lock[ac]);
- local->schedule_round[ac]++;
- spin_unlock_bh(&local->active_txq_lock[ac]);
+ spin_lock_bh(&air_sched->lock);
+ air_sched->schedule_pos = NULL;
+ spin_unlock_bh(&air_sched->lock);
}
EXPORT_SYMBOL(ieee80211_txq_schedule_start);
+static void
+ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct sk_buff *skb)
+{
+ struct rate_control_ref *ref = sdata->local->rate_ctrl;
+ u16 tid;
+
+ if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER))
+ return;
+
+ if (!sta || !sta->sta.ht_cap.ht_supported ||
+ !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO ||
+ skb->protocol == sdata->control_port_protocol)
+ return;
+
+ tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
+ if (likely(sta->ampdu_mlme.tid_tx[tid]))
+ return;
+
+ ieee80211_start_tx_ba_session(&sta->sta, tid, 0);
+}
+
void __ieee80211_subif_start_xmit(struct sk_buff *skb,
struct net_device *dev,
u32 info_flags,
@@ -3959,6 +4173,8 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
skb_get_hash(skb);
}
+ ieee80211_aggr_check(sdata, sta, skb);
+
if (sta) {
struct ieee80211_fast_tx *fast_tx;
@@ -4222,6 +4438,8 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
memset(info, 0, sizeof(*info));
+ ieee80211_aggr_check(sdata, sta, skb);
+
tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
if (tid_tx) {
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 0a0481f5af48..05e96212b104 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -6,7 +6,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*
* utilities for mac80211
*/
@@ -947,7 +947,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
switch (elem->data[0]) {
case WLAN_EID_EXT_HE_MU_EDCA:
- if (len == sizeof(*elems->mu_edca_param_set)) {
+ if (len >= sizeof(*elems->mu_edca_param_set)) {
elems->mu_edca_param_set = data;
if (crc)
*crc = crc32_be(*crc, (void *)elem,
@@ -968,7 +968,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
}
break;
case WLAN_EID_EXT_UORA:
- if (len == 1)
+ if (len >= 1)
elems->uora_element = data;
break;
case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME:
@@ -976,7 +976,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
elems->max_channel_switch_time = data;
break;
case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION:
- if (len == sizeof(*elems->mbssid_config_ie))
+ if (len >= sizeof(*elems->mbssid_config_ie))
elems->mbssid_config_ie = data;
break;
case WLAN_EID_EXT_HE_SPR:
@@ -985,7 +985,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
elems->he_spr = data;
break;
case WLAN_EID_EXT_HE_6GHZ_CAPA:
- if (len == sizeof(*elems->he_6ghz_capa))
+ if (len >= sizeof(*elems->he_6ghz_capa))
elems->he_6ghz_capa = data;
break;
}
@@ -1074,14 +1074,14 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
switch (id) {
case WLAN_EID_LINK_ID:
- if (elen + 2 != sizeof(struct ieee80211_tdls_lnkie)) {
+ if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) {
elem_parse_failed = true;
break;
}
elems->lnk_id = (void *)(pos - 2);
break;
case WLAN_EID_CHAN_SWITCH_TIMING:
- if (elen != sizeof(struct ieee80211_ch_switch_timing)) {
+ if (elen < sizeof(struct ieee80211_ch_switch_timing)) {
elem_parse_failed = true;
break;
}
@@ -1244,7 +1244,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
elems->sec_chan_offs = (void *)pos;
break;
case WLAN_EID_CHAN_SWITCH_PARAM:
- if (elen !=
+ if (elen <
sizeof(*elems->mesh_chansw_params_ie)) {
elem_parse_failed = true;
break;
@@ -1253,7 +1253,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
break;
case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
if (!action ||
- elen != sizeof(*elems->wide_bw_chansw_ie)) {
+ elen < sizeof(*elems->wide_bw_chansw_ie)) {
elem_parse_failed = true;
break;
}
@@ -1272,7 +1272,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH,
pos, elen);
if (ie) {
- if (ie[1] == sizeof(*elems->wide_bw_chansw_ie))
+ if (ie[1] >= sizeof(*elems->wide_bw_chansw_ie))
elems->wide_bw_chansw_ie =
(void *)(ie + 2);
else
@@ -1316,7 +1316,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
elems->cisco_dtpc_elem = pos;
break;
case WLAN_EID_ADDBA_EXT:
- if (elen != sizeof(struct ieee80211_addba_ext_ie)) {
+ if (elen < sizeof(struct ieee80211_addba_ext_ie)) {
elem_parse_failed = true;
break;
}
@@ -1342,7 +1342,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
elem, elems);
break;
case WLAN_EID_S1G_CAPABILITIES:
- if (elen == sizeof(*elems->s1g_capab))
+ if (elen >= sizeof(*elems->s1g_capab))
elems->s1g_capab = (void *)pos;
else
elem_parse_failed = true;
@@ -1693,7 +1693,10 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
if (auth_alg == WLAN_AUTH_SHARED_KEY && transaction == 3) {
mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
err = ieee80211_wep_encrypt(local, skb, key, key_len, key_idx);
- WARN_ON(err);
+ if (WARN_ON(err)) {
+ kfree_skb(skb);
+ return;
+ }
}
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
@@ -1934,13 +1937,26 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
*offset = noffset;
}
- he_cap = ieee80211_get_he_sta_cap(sband);
- if (he_cap) {
+ he_cap = ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
+ if (he_cap &&
+ cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band),
+ IEEE80211_CHAN_NO_HE)) {
pos = ieee80211_ie_build_he_cap(pos, he_cap, end);
if (!pos)
goto out_err;
+ }
+
+ if (cfg80211_any_usable_channels(local->hw.wiphy,
+ BIT(NL80211_BAND_6GHZ),
+ IEEE80211_CHAN_NO_HE)) {
+ struct ieee80211_supported_band *sband6;
+
+ sband6 = local->hw.wiphy->bands[NL80211_BAND_6GHZ];
+ he_cap = ieee80211_get_he_iftype_cap(sband6,
+ ieee80211_vif_type_p2p(&sdata->vif));
- if (sband->band == NL80211_BAND_6GHZ) {
+ if (he_cap) {
enum nl80211_iftype iftype =
ieee80211_vif_type_p2p(&sdata->vif);
__le16 cap = ieee80211_get_he_6ghz_capa(sband, iftype);
@@ -2178,8 +2194,6 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
list_for_each_entry(ctx, &local->chanctx_list, list)
ctx->driver_present = false;
mutex_unlock(&local->chanctx_mtx);
-
- cfg80211_shutdown_all_interfaces(local->hw.wiphy);
}
static void ieee80211_assign_chanctx(struct ieee80211_local *local,
@@ -2946,12 +2960,15 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
u8 *pos;
u16 cap;
- sband = ieee80211_get_sband(sdata);
- if (!sband)
+ if (!cfg80211_any_usable_channels(sdata->local->hw.wiphy,
+ BIT(NL80211_BAND_6GHZ),
+ IEEE80211_CHAN_NO_HE))
return;
+ sband = sdata->local->hw.wiphy->bands[NL80211_BAND_6GHZ];
+
iftd = ieee80211_get_sband_iftype_data(sband, iftype);
- if (WARN_ON(!iftd))
+ if (!iftd)
return;
/* Check for device HE 6 GHz capability before adding element */
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 1ec4d36a39f0..7d738bd06f2c 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -23,6 +23,8 @@ struct mptcp_pernet {
u8 mptcp_enabled;
unsigned int add_addr_timeout;
+ u8 checksum_enabled;
+ u8 allow_join_initial_addr_port;
};
static struct mptcp_pernet *mptcp_get_pernet(struct net *net)
@@ -40,10 +42,22 @@ unsigned int mptcp_get_add_addr_timeout(struct net *net)
return mptcp_get_pernet(net)->add_addr_timeout;
}
+int mptcp_is_checksum_enabled(struct net *net)
+{
+ return mptcp_get_pernet(net)->checksum_enabled;
+}
+
+int mptcp_allow_join_id0(struct net *net)
+{
+ return mptcp_get_pernet(net)->allow_join_initial_addr_port;
+}
+
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX;
+ pernet->checksum_enabled = 0;
+ pernet->allow_join_initial_addr_port = 1;
}
#ifdef CONFIG_SYSCTL
@@ -65,6 +79,22 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "checksum_enabled",
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
+ },
+ {
+ .procname = "allow_join_initial_addr_port",
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
+ },
{}
};
@@ -82,6 +112,8 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[0].data = &pernet->mptcp_enabled;
table[1].data = &pernet->add_addr_timeout;
+ table[2].data = &pernet->checksum_enabled;
+ table[3].data = &pernet->allow_join_initial_addr_port;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr)
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index eb2dc6dbe212..52ea2517e856 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -25,6 +25,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
+ SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH),
+ SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),
SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL),
SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE),
SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index f0da4f060fe1..193466c9b549 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -18,6 +18,8 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
+ MPTCP_MIB_DSSTCPMISMATCH, /* DSS-mapping did not map with TCP's sequence numbers */
+ MPTCP_MIB_DATACSUMERR, /* The data checksum fail */
MPTCP_MIB_OFOQUEUETAIL, /* Segments inserted into OoO queue tail */
MPTCP_MIB_OFOQUEUE, /* Segments inserted into OoO queue */
MPTCP_MIB_OFOMERGE, /* Segments merged in OoO queue */
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index f16d9b5ee978..8f88ddeab6a2 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -144,6 +144,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
+ info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
unlock_sock_fast(sk, slow);
}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 6b825fb3fa83..a05270996613 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -44,7 +44,20 @@ static void mptcp_parse_option(const struct sk_buff *skb,
else
expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
}
- if (opsize != expected_opsize)
+
+ /* Cfr RFC 8684 Section 3.3.0:
+ * If a checksum is present but its use had
+ * not been negotiated in the MP_CAPABLE handshake, the receiver MUST
+ * close the subflow with a RST, as it is not behaving as negotiated.
+ * If a checksum is not present when its use has been negotiated, the
+ * receiver MUST close the subflow with a RST, as it is considered
+ * broken
+ * We parse even option with mismatching csum presence, so that
+ * later in subflow_data_ready we can trigger the reset.
+ */
+ if (opsize != expected_opsize &&
+ (expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA ||
+ opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM))
break;
/* try to be gentle vs future versions on the initial syn */
@@ -66,16 +79,12 @@ static void mptcp_parse_option(const struct sk_buff *skb,
* host requires the use of checksums, checksums MUST be used.
* In other words, the only way for checksums not to be used
* is if both hosts in their SYNs set A=0."
- *
- * Section 3.3.0:
- * "If a checksum is not present when its use has been
- * negotiated, the receiver MUST close the subflow with a RST as
- * it is considered broken."
- *
- * We don't implement DSS checksum - fall back to TCP.
*/
if (flags & MPTCP_CAP_CHECKSUM_REQD)
- break;
+ mp_opt->csum_reqd = 1;
+
+ if (flags & MPTCP_CAP_DENY_JOIN_ID0)
+ mp_opt->deny_join_id0 = 1;
mp_opt->mp_capable = 1;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
@@ -86,7 +95,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->rcvr_key = get_unaligned_be64(ptr);
ptr += 8;
}
- if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
+ if (opsize >= TCPOLEN_MPTCP_MPC_ACK_DATA) {
/* Section 3.1.:
* "the data parameters in a MP_CAPABLE are semantically
* equivalent to those in a DSS option and can be used
@@ -98,9 +107,14 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
}
- pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
+ if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
+ mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ mp_opt->csum_reqd = 1;
+ ptr += 2;
+ }
+ pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u",
version, flags, opsize, mp_opt->sndr_key,
- mp_opt->rcvr_key, mp_opt->data_len);
+ mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum);
break;
case MPTCPOPT_MP_JOIN:
@@ -171,10 +185,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
expected_opsize += TCPOLEN_MPTCP_DSS_MAP32;
}
- /* RFC 6824, Section 3.3:
- * If a checksum is present, but its use had
- * not been negotiated in the MP_CAPABLE handshake,
- * the checksum field MUST be ignored.
+ /* Always parse any csum presence combination, we will enforce
+ * RFC 8684 Section 3.3.0 checks later in subflow_data_ready
*/
if (opsize != expected_opsize &&
opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
@@ -209,9 +221,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
- pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
+ if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
+ mp_opt->csum_reqd = 1;
+ mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ ptr += 2;
+ }
+
+ pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
mp_opt->data_seq, mp_opt->subflow_seq,
- mp_opt->data_len);
+ mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum);
}
break;
@@ -323,9 +341,12 @@ static void mptcp_parse_option(const struct sk_buff *skb,
}
}
-void mptcp_get_options(const struct sk_buff *skb,
+void mptcp_get_options(const struct sock *sk,
+ const struct sk_buff *skb,
struct mptcp_options_received *mp_opt)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
const struct tcphdr *th = tcp_hdr(skb);
const unsigned char *ptr;
int length;
@@ -341,6 +362,8 @@ void mptcp_get_options(const struct sk_buff *skb,
mp_opt->dss = 0;
mp_opt->mp_prio = 0;
mp_opt->reset = 0;
+ mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
+ mp_opt->deny_join_id0 = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -356,6 +379,8 @@ void mptcp_get_options(const struct sk_buff *skb,
length--;
continue;
default:
+ if (length < 2)
+ return;
opsize = *ptr++;
if (opsize < 2) /* "silly options" */
return;
@@ -380,6 +405,8 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
if (subflow->request_mptcp) {
opts->suboptions = OPTION_MPTCP_MPC_SYN;
+ opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk));
+ opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));
*size = TCPOLEN_MPTCP_MPC_SYN;
return true;
} else if (subflow->request_join) {
@@ -435,8 +462,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct mptcp_ext *mpext;
unsigned int data_len;
+ u8 len;
/* When skb is not available, we better over-estimate the emitted
* options len. A full DSS option (28 bytes) is longer than
@@ -465,16 +494,27 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
opts->suboptions = OPTION_MPTCP_MPC_ACK;
opts->sndr_key = subflow->local_key;
opts->rcvr_key = subflow->remote_key;
+ opts->csum_reqd = READ_ONCE(msk->csum_enabled);
+ opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));
/* Section 3.1.
* The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
* packets that start the first subflow of an MPTCP connection,
* as well as the first packet that carries data
*/
- if (data_len > 0)
- *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
- else
+ if (data_len > 0) {
+ len = TCPOLEN_MPTCP_MPC_ACK_DATA;
+ if (opts->csum_reqd) {
+ /* we need to propagate more info to csum the pseudo hdr */
+ opts->ext_copy.data_seq = mpext->data_seq;
+ opts->ext_copy.subflow_seq = mpext->subflow_seq;
+ opts->ext_copy.csum = mpext->csum;
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+ }
+ *size = ALIGN(len, 4);
+ } else {
*size = TCPOLEN_MPTCP_MPC_ACK;
+ }
pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
subflow, subflow->local_key, subflow->remote_key,
@@ -535,18 +575,21 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
bool ret = false;
u64 ack_seq;
+ opts->csum_reqd = READ_ONCE(msk->csum_enabled);
mpext = skb ? mptcp_get_ext(skb) : NULL;
if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
- unsigned int map_size;
+ unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
- map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
+ if (mpext) {
+ if (opts->csum_reqd)
+ map_size += TCPOLEN_MPTCP_DSS_CHECKSUM;
- remaining -= map_size;
- dss_size = map_size;
- if (mpext)
opts->ext_copy = *mpext;
+ }
+ remaining -= map_size;
+ dss_size = map_size;
if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
ret = true;
@@ -789,6 +832,8 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
if (subflow_req->mp_capable) {
opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
opts->sndr_key = subflow_req->local_key;
+ opts->csum_reqd = subflow_req->csum_reqd;
+ opts->allow_join_id0 = subflow_req->allow_join_id0;
*size = TCPOLEN_MPTCP_MPC_SYNACK;
pr_debug("subflow_req=%p, local_key=%llu",
subflow_req, subflow_req->local_key);
@@ -867,6 +912,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
return false;
}
+ if (mp_opt->deny_join_id0)
+ WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
+
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
mptcp_subflow_fully_established(subflow, mp_opt);
@@ -1007,7 +1055,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
return;
}
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
return;
@@ -1099,6 +1147,10 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
}
mpext->data_len = mp_opt.data_len;
mpext->use_map = 1;
+ mpext->csum_reqd = mp_opt.csum_reqd;
+
+ if (mpext->csum_reqd)
+ mpext->csum = mp_opt.csum;
}
}
@@ -1118,25 +1170,53 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
}
+static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+{
+ struct csum_pseudo_header header;
+ __wsum csum;
+
+ /* cfr RFC 8684 3.3.1.:
+ * the data sequence number used in the pseudo-header is
+ * always the 64-bit value, irrespective of what length is used in the
+ * DSS option itself.
+ */
+ header.data_seq = cpu_to_be64(mpext->data_seq);
+ header.subflow_seq = htonl(mpext->subflow_seq);
+ header.data_len = htons(mpext->data_len);
+ header.csum = 0;
+
+ csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum));
+ return (__force u16)csum_fold(csum);
+}
+
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts)
{
if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
- u8 len;
+ u8 len, flag = MPTCP_CAP_HMAC_SHA256;
- if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
+ if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYN;
- else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
+ } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYNACK;
- else if (opts->ext_copy.data_len)
+ } else if (opts->ext_copy.data_len) {
len = TCPOLEN_MPTCP_MPC_ACK_DATA;
- else
+ if (opts->csum_reqd)
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+ } else {
len = TCPOLEN_MPTCP_MPC_ACK;
+ }
+
+ if (opts->csum_reqd)
+ flag |= MPTCP_CAP_CHECKSUM_REQD;
+
+ if (!opts->allow_join_id0)
+ flag |= MPTCP_CAP_DENY_JOIN_ID0;
*ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
MPTCP_SUPPORTED_VERSION,
- MPTCP_CAP_HMAC_SHA256);
+ flag);
if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
opts->suboptions))
@@ -1152,8 +1232,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
if (!opts->ext_copy.data_len)
goto mp_capable_done;
- put_unaligned_be32(opts->ext_copy.data_len << 16 |
- TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ if (opts->csum_reqd) {
+ put_unaligned_be32(opts->ext_copy.data_len << 16 |
+ mptcp_make_csum(&opts->ext_copy), ptr);
+ } else {
+ put_unaligned_be32(opts->ext_copy.data_len << 16 |
+ TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ }
ptr += 1;
}
@@ -1305,6 +1390,9 @@ mp_capable_done:
flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
if (mpext->data_fin)
flags |= MPTCP_DSS_DATA_FIN;
+
+ if (opts->csum_reqd)
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
}
*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
@@ -1324,8 +1412,13 @@ mp_capable_done:
ptr += 2;
put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1;
- put_unaligned_be32(mpext->data_len << 16 |
- TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ if (opts->csum_reqd) {
+ put_unaligned_be32(mpext->data_len << 16 |
+ mptcp_make_csum(mpext), ptr);
+ } else {
+ put_unaligned_be32(mpext->data_len << 16 |
+ TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ }
}
}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 9d00fa6d22e9..639271e09604 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -320,6 +320,7 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
WRITE_ONCE(msk->pm.addr_signal, 0);
WRITE_ONCE(msk->pm.accept_addr, false);
WRITE_ONCE(msk->pm.accept_subflow, false);
+ WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
msk->pm.status = 0;
spin_lock_init(&msk->pm.lock);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 09722598994d..d2591ebf01d9 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -451,7 +451,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* check if should create a new subflow */
if (msk->pm.local_addr_used < local_addr_max &&
- msk->pm.subflows < subflows_max) {
+ msk->pm.subflows < subflows_max &&
+ !READ_ONCE(msk->pm.remote_deny_join_id0)) {
local = select_local_address(pernet, msk);
if (local) {
struct mptcp_addr_info remote = { 0 };
@@ -540,6 +541,7 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
if (subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow;
spin_unlock_bh(&msk->pm.lock);
pr_debug("send ack for %s%s%s",
@@ -547,9 +549,9 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "",
mptcp_pm_should_add_signal_port(msk) ? " [port]" : "");
- lock_sock(ssk);
+ slow = lock_sock_fast(ssk);
tcp_send_ack(ssk);
- release_sock(ssk);
+ unlock_sock_fast(ssk, slow);
spin_lock_bh(&msk->pm.lock);
}
}
@@ -566,6 +568,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct sock *sk = (struct sock *)msk;
struct mptcp_addr_info local;
+ bool slow;
local_address((struct sock_common *)ssk, &local);
if (!addresses_equal(&local, addr, addr->port))
@@ -578,9 +581,9 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
spin_unlock_bh(&msk->pm.lock);
pr_debug("send ack for mp_prio");
- lock_sock(ssk);
+ slow = lock_sock_fast(ssk);
tcp_send_ack(ssk);
- release_sock(ssk);
+ unlock_sock_fast(ssk, slow);
spin_lock_bh(&msk->pm.lock);
return 0;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 993095089990..7bb82424e551 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -286,11 +286,13 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
/* try to fetch required memory from subflow */
if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
- if (ssk->sk_forward_alloc < skb->truesize)
- goto drop;
- __sk_mem_reclaim(ssk, skb->truesize);
- if (!sk_rmem_schedule(sk, skb, skb->truesize))
+ int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT;
+
+ if (ssk->sk_forward_alloc < amount)
goto drop;
+
+ ssk->sk_forward_alloc -= amount;
+ sk->sk_forward_alloc += amount;
}
has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
@@ -431,56 +433,55 @@ static void mptcp_send_ack(struct mptcp_sock *msk)
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow;
- lock_sock(ssk);
+ slow = lock_sock_fast(ssk);
if (tcp_can_send_ack(ssk))
tcp_send_ack(ssk);
- release_sock(ssk);
+ unlock_sock_fast(ssk, slow);
}
}
-static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
+static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
{
- int ret;
+ bool slow;
- lock_sock(ssk);
- ret = tcp_can_send_ack(ssk);
- if (ret)
+ slow = lock_sock_fast(ssk);
+ if (tcp_can_send_ack(ssk))
tcp_cleanup_rbuf(ssk, 1);
- release_sock(ssk);
- return ret;
+ unlock_sock_fast(ssk, slow);
+}
+
+static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty)
+{
+ const struct inet_connection_sock *icsk = inet_csk(ssk);
+ u8 ack_pending = READ_ONCE(icsk->icsk_ack.pending);
+ const struct tcp_sock *tp = tcp_sk(ssk);
+
+ return (ack_pending & ICSK_ACK_SCHED) &&
+ ((READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->rcv_wup) >
+ READ_ONCE(icsk->icsk_ack.rcv_mss)) ||
+ (rx_empty && ack_pending &
+ (ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED)));
}
static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
{
- struct sock *ack_hint = READ_ONCE(msk->ack_hint);
int old_space = READ_ONCE(msk->old_wspace);
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
- bool cleanup;
+ int space = __mptcp_space(sk);
+ bool cleanup, rx_empty;
- /* this is a simple superset of what tcp_cleanup_rbuf() implements
- * so that we don't have to acquire the ssk socket lock most of the time
- * to do actually nothing
- */
- cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
- if (!cleanup)
- return;
+ cleanup = (space > 0) && (space >= (old_space << 1));
+ rx_empty = !atomic_read(&sk->sk_rmem_alloc);
- /* if the hinted ssk is still active, try to use it */
- if (likely(ack_hint)) {
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- if (ack_hint == ssk && mptcp_subflow_cleanup_rbuf(ssk))
- return;
- }
+ if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty))
+ mptcp_subflow_cleanup_rbuf(ssk);
}
-
- /* otherwise pick the first active subflow */
- mptcp_for_each_subflow(msk, subflow)
- if (mptcp_subflow_cleanup_rbuf(mptcp_subflow_tcp_sock(subflow)))
- return;
}
static bool mptcp_check_data_fin(struct sock *sk)
@@ -625,7 +626,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
break;
}
} while (more_data_avail);
- WRITE_ONCE(msk->ack_hint, ssk);
*bytes += moved;
return done;
@@ -677,18 +677,19 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
/* In most cases we will be able to lock the mptcp socket. If its already
* owned, we need to defer to the work queue to avoid ABBA deadlock.
*/
-static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
+static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
{
struct sock *sk = (struct sock *)msk;
unsigned int moved = 0;
- if (inet_sk_state_load(sk) == TCP_CLOSE)
- return;
-
- mptcp_data_lock(sk);
-
__mptcp_move_skbs_from_subflow(msk, ssk, &moved);
__mptcp_ofo_queue(msk);
+ if (unlikely(ssk->sk_err)) {
+ if (!sock_owned_by_user(sk))
+ __mptcp_error_report(sk);
+ else
+ set_bit(MPTCP_ERROR_REPORT, &msk->flags);
+ }
/* If the moves have caught up with the DATA_FIN sequence number
* it's time to ack the DATA_FIN and change socket state, but
@@ -697,7 +698,7 @@ static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
*/
if (mptcp_pending_data_fin(sk, NULL))
mptcp_schedule_work(sk);
- mptcp_data_unlock(sk);
+ return moved > 0;
}
void mptcp_data_ready(struct sock *sk, struct sock *ssk)
@@ -705,7 +706,6 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_sock *msk = mptcp_sk(sk);
int sk_rbuf, ssk_rbuf;
- bool wake;
/* The peer can send data while we are shutting down this
* subflow at msk destruction time, but we must avoid enqueuing
@@ -714,28 +714,22 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
if (unlikely(subflow->disposable))
return;
- /* move_skbs_to_msk below can legitly clear the data_avail flag,
- * but we will need later to properly woke the reader, cache its
- * value
- */
- wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL;
- if (wake)
- set_bit(MPTCP_DATA_READY, &msk->flags);
-
ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf);
sk_rbuf = READ_ONCE(sk->sk_rcvbuf);
if (unlikely(ssk_rbuf > sk_rbuf))
sk_rbuf = ssk_rbuf;
- /* over limit? can't append more skbs to msk */
+ /* over limit? can't append more skbs to msk, Also, no need to wake-up*/
if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf)
- goto wake;
-
- move_skbs_to_msk(msk, ssk);
+ return;
-wake:
- if (wake)
+ /* Wake-up the reader only for in-sequence data */
+ mptcp_data_lock(sk);
+ if (move_skbs_to_msk(msk, ssk)) {
+ set_bit(MPTCP_DATA_READY, &msk->flags);
sk->sk_data_ready(sk);
+ }
+ mptcp_data_unlock(sk);
}
static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
@@ -867,7 +861,7 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
sock_owned_by_me(sk);
mptcp_for_each_subflow(msk, subflow) {
- if (subflow->data_avail)
+ if (READ_ONCE(subflow->data_avail))
return mptcp_subflow_tcp_sock(subflow);
}
@@ -903,22 +897,14 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
df->data_seq + df->data_len == msk->write_seq;
}
-static int mptcp_wmem_with_overhead(struct sock *sk, int size)
+static int mptcp_wmem_with_overhead(int size)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
- int ret, skbs;
-
- ret = size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
- skbs = (msk->tx_pending_data + size) / msk->size_goal_cache;
- if (skbs < msk->skb_tx_cache.qlen)
- return ret;
-
- return ret + (skbs - msk->skb_tx_cache.qlen) * SKB_TRUESIZE(MAX_TCP_HEADER);
+ return size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
}
static void __mptcp_wmem_reserve(struct sock *sk, int size)
{
- int amount = mptcp_wmem_with_overhead(sk, size);
+ int amount = mptcp_wmem_with_overhead(size);
struct mptcp_sock *msk = mptcp_sk(sk);
WARN_ON_ONCE(msk->wmem_reserved);
@@ -1213,49 +1199,8 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
return NULL;
}
-static bool mptcp_tx_cache_refill(struct sock *sk, int size,
- struct sk_buff_head *skbs, int *total_ts)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
- struct sk_buff *skb;
- int space_needed;
-
- if (unlikely(tcp_under_memory_pressure(sk))) {
- mptcp_mem_reclaim_partial(sk);
-
- /* under pressure pre-allocate at most a single skb */
- if (msk->skb_tx_cache.qlen)
- return true;
- space_needed = msk->size_goal_cache;
- } else {
- space_needed = msk->tx_pending_data + size -
- msk->skb_tx_cache.qlen * msk->size_goal_cache;
- }
-
- while (space_needed > 0) {
- skb = __mptcp_do_alloc_tx_skb(sk, sk->sk_allocation);
- if (unlikely(!skb)) {
- /* under memory pressure, try to pass the caller a
- * single skb to allow forward progress
- */
- while (skbs->qlen > 1) {
- skb = __skb_dequeue_tail(skbs);
- *total_ts -= skb->truesize;
- __kfree_skb(skb);
- }
- return skbs->qlen > 0;
- }
-
- *total_ts += skb->truesize;
- __skb_queue_tail(skbs, skb);
- space_needed -= msk->size_goal_cache;
- }
- return true;
-}
-
static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
struct sk_buff *skb;
if (ssk->sk_tx_skb_cache) {
@@ -1266,22 +1211,6 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
return true;
}
- skb = skb_peek(&msk->skb_tx_cache);
- if (skb) {
- if (likely(sk_wmem_schedule(ssk, skb->truesize))) {
- skb = __skb_dequeue(&msk->skb_tx_cache);
- if (WARN_ON_ONCE(!skb))
- return false;
-
- mptcp_wmem_uncharge(sk, skb->truesize);
- ssk->sk_tx_skb_cache = skb;
- return true;
- }
-
- /* over memory limit, no point to try to allocate a new skb */
- return false;
- }
-
skb = __mptcp_do_alloc_tx_skb(sk, gfp);
if (!skb)
return false;
@@ -1297,7 +1226,6 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
static bool mptcp_must_reclaim_memory(struct sock *sk, struct sock *ssk)
{
return !ssk->sk_tx_skb_cache &&
- !skb_peek(&mptcp_sk(sk)->skb_tx_cache) &&
tcp_under_memory_pressure(sk);
}
@@ -1308,6 +1236,18 @@ static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk)
return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation);
}
+/* note: this always recompute the csum on the whole skb, even
+ * if we just appended a single frag. More status info needed
+ */
+static void mptcp_update_data_checksum(struct sk_buff *skb, int added)
+{
+ struct mptcp_ext *mpext = mptcp_get_ext(skb);
+ __wsum csum = ~csum_unfold(mpext->csum);
+ int offset = skb->len - added;
+
+ mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset));
+}
+
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_data_frag *dfrag,
struct mptcp_sendmsg_info *info)
@@ -1328,7 +1268,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
/* compute send limit */
info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
avail_size = info->size_goal;
- msk->size_goal_cache = info->size_goal;
skb = tcp_write_queue_tail(ssk);
if (skb) {
/* Limit the write to the size available in the
@@ -1402,10 +1341,14 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (zero_window_probe) {
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
mpext->frozen = 1;
- ret = 0;
+ if (READ_ONCE(msk->csum_enabled))
+ mptcp_update_data_checksum(tail, ret);
tcp_push_pending_frames(ssk);
+ return 0;
}
out:
+ if (READ_ONCE(msk->csum_enabled))
+ mptcp_update_data_checksum(tail, ret);
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
return ret;
}
@@ -1673,7 +1616,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
while (msg_data_left(msg)) {
int total_ts, frag_truesize = 0;
struct mptcp_data_frag *dfrag;
- struct sk_buff_head skbs;
bool dfrag_collapsed;
size_t psize, offset;
@@ -1706,16 +1648,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
psize = pfrag->size - offset;
psize = min_t(size_t, psize, msg_data_left(msg));
total_ts = psize + frag_truesize;
- __skb_queue_head_init(&skbs);
- if (!mptcp_tx_cache_refill(sk, psize, &skbs, &total_ts))
- goto wait_for_memory;
- if (!mptcp_wmem_alloc(sk, total_ts)) {
- __skb_queue_purge(&skbs);
+ if (!mptcp_wmem_alloc(sk, total_ts))
goto wait_for_memory;
- }
- skb_queue_splice_tail(&skbs, &msk->skb_tx_cache);
if (copy_page_from_iter(dfrag->page, offset, psize,
&msg->msg_iter) != psize) {
mptcp_wmem_uncharge(sk, psize + frag_truesize);
@@ -1772,7 +1708,7 @@ static void mptcp_wait_data(struct sock *sk, long *timeo)
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
sk_wait_event(sk, timeo,
- test_and_clear_bit(MPTCP_DATA_READY, &msk->flags), &wait);
+ test_bit(MPTCP_DATA_READY, &msk->flags), &wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
remove_wait_queue(sk_sleep(sk), &wait);
@@ -1970,7 +1906,9 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
__mptcp_update_rmem(sk);
done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
mptcp_data_unlock(sk);
- tcp_cleanup_rbuf(ssk, moved);
+
+ if (unlikely(ssk->sk_err))
+ __mptcp_error_report(sk);
unlock_sock_fast(ssk, slowpath);
} while (!done);
@@ -1983,7 +1921,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
ret |= __mptcp_ofo_queue(msk);
__mptcp_splice_receive_queue(sk);
mptcp_data_unlock(sk);
- mptcp_cleanup_rbuf(msk);
}
if (ret)
mptcp_check_data_fin((struct sock *)msk);
@@ -2093,10 +2030,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
*/
if (unlikely(__mptcp_move_skbs(msk)))
set_bit(MPTCP_DATA_READY, &msk->flags);
- } else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) {
- /* data to read but mptcp_wait_data() cleared DATA_READY */
- set_bit(MPTCP_DATA_READY, &msk->flags);
}
+
out_err:
if (cmsg_flags && copied >= 0) {
if (cmsg_flags & MPTCP_CMSG_TS)
@@ -2234,9 +2169,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (ssk == msk->last_snd)
msk->last_snd = NULL;
- if (ssk == msk->ack_hint)
- msk->ack_hint = NULL;
-
if (ssk == msk->first)
msk->first = NULL;
@@ -2308,13 +2240,14 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
+ bool slow;
- lock_sock(tcp_sk);
+ slow = lock_sock_fast(tcp_sk);
if (tcp_sk->sk_state != TCP_CLOSE) {
tcp_send_active_reset(tcp_sk, GFP_ATOMIC);
tcp_set_state(tcp_sk, TCP_CLOSE);
}
- release_sock(tcp_sk);
+ unlock_sock_fast(tcp_sk, slow);
}
inet_sk_state_store(sk, TCP_CLOSE);
@@ -2359,8 +2292,8 @@ static void __mptcp_retrans(struct sock *sk)
/* limit retransmission to the bytes already sent on some subflows */
info.sent = 0;
- info.limit = dfrag->already_sent;
- while (info.sent < dfrag->already_sent) {
+ info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent;
+ while (info.sent < info.limit) {
if (!mptcp_alloc_tx_skb(sk, ssk))
break;
@@ -2372,9 +2305,11 @@ static void __mptcp_retrans(struct sock *sk)
copied += ret;
info.sent += ret;
}
- if (copied)
+ if (copied) {
+ dfrag->already_sent = max(dfrag->already_sent, info.sent);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
+ }
mptcp_set_timeout(sk, ssk);
release_sock(ssk);
@@ -2442,17 +2377,15 @@ static int __mptcp_init_sock(struct sock *sk)
INIT_LIST_HEAD(&msk->rtx_queue);
INIT_WORK(&msk->work, mptcp_worker);
__skb_queue_head_init(&msk->receive_queue);
- __skb_queue_head_init(&msk->skb_tx_cache);
msk->out_of_order_queue = RB_ROOT;
msk->first_pending = NULL;
msk->wmem_reserved = 0;
msk->rmem_released = 0;
msk->tx_pending_data = 0;
- msk->size_goal_cache = TCP_BASE_MSS;
- msk->ack_hint = NULL;
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
+ WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_init(msk);
@@ -2504,15 +2437,10 @@ static void __mptcp_clear_xmit(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_data_frag *dtmp, *dfrag;
- struct sk_buff *skb;
WRITE_ONCE(msk->first_pending, NULL);
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list)
dfrag_clear(sk, dfrag);
- while ((skb = __skb_dequeue(&msk->skb_tx_cache)) != NULL) {
- sk->sk_forward_alloc += skb->truesize;
- kfree_skb(skb);
- }
}
static void mptcp_cancel_work(struct sock *sk)
@@ -2793,6 +2721,8 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->token = subflow_req->token;
msk->subflow = NULL;
WRITE_ONCE(msk->fully_established, false);
+ if (mp_opt->csum_reqd)
+ WRITE_ONCE(msk->csum_enabled, true);
msk->write_seq = subflow_req->idsn + 1;
msk->snd_nxt = msk->write_seq;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 89f6b73783d5..d8ad3270dfab 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -68,6 +68,8 @@
#define TCPOLEN_MPTCP_FASTCLOSE 12
#define TCPOLEN_MPTCP_RST 4
+#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
+
/* MPTCP MP_JOIN flags */
#define MPTCPOPT_BACKUP BIT(0)
#define MPTCPOPT_HMAC_LEN 20
@@ -77,8 +79,9 @@
#define MPTCP_VERSION_MASK (0x0F)
#define MPTCP_CAP_CHECKSUM_REQD BIT(7)
#define MPTCP_CAP_EXTENSIBILITY BIT(6)
+#define MPTCP_CAP_DENY_JOIN_ID0 BIT(5)
#define MPTCP_CAP_HMAC_SHA256 BIT(0)
-#define MPTCP_CAP_FLAG_MASK (0x3F)
+#define MPTCP_CAP_FLAG_MASK (0x1F)
/* MPTCP DSS flags */
#define MPTCP_DSS_DATA_FIN BIT(4)
@@ -124,6 +127,7 @@ struct mptcp_options_received {
u64 data_seq;
u32 subflow_seq;
u16 data_len;
+ __sum16 csum;
u16 mp_capable : 1,
mp_join : 1,
fastclose : 1,
@@ -133,7 +137,9 @@ struct mptcp_options_received {
rm_addr : 1,
mp_prio : 1,
echo : 1,
- backup : 1;
+ csum_reqd : 1,
+ backup : 1,
+ deny_join_id0 : 1;
u32 token;
u32 nonce;
u64 thmac;
@@ -188,6 +194,7 @@ struct mptcp_pm_data {
bool work_pending;
bool accept_addr;
bool accept_subflow;
+ bool remote_deny_join_id0;
u8 add_addr_signaled;
u8 add_addr_accepted;
u8 local_addr_used;
@@ -234,15 +241,13 @@ struct mptcp_sock {
bool snd_data_fin_enable;
bool rcv_fastclose;
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
+ bool csum_enabled;
spinlock_t join_list_lock;
- struct sock *ack_hint;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
struct sk_buff_head receive_queue;
- struct sk_buff_head skb_tx_cache; /* this is wmem accounted */
int tx_pending_data;
- int size_goal_cache;
struct list_head conn_list;
struct list_head rtx_queue;
struct mptcp_data_frag *first_pending;
@@ -335,11 +340,20 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
}
+struct csum_pseudo_header {
+ __be64 data_seq;
+ __be32 subflow_seq;
+ __be16 data_len;
+ __sum16 csum;
+};
+
struct mptcp_subflow_request_sock {
struct tcp_request_sock sk;
u16 mp_capable : 1,
mp_join : 1,
- backup : 1;
+ backup : 1,
+ csum_reqd : 1,
+ allow_join_id0 : 1;
u8 local_id;
u8 remote_id;
u64 local_key;
@@ -362,7 +376,6 @@ mptcp_subflow_rsk(const struct request_sock *rsk)
enum mptcp_data_avail {
MPTCP_SUBFLOW_NODATA,
MPTCP_SUBFLOW_DATA_AVAIL,
- MPTCP_SUBFLOW_OOO_DATA
};
struct mptcp_delegated_action {
@@ -387,6 +400,8 @@ struct mptcp_subflow_context {
u32 map_subflow_seq;
u32 ssn_offset;
u32 map_data_len;
+ __wsum map_data_csum;
+ u32 map_csum_len;
u32 request_mptcp : 1, /* send MP_CAPABLE */
request_join : 1, /* send MP_JOIN */
request_bkup : 1,
@@ -396,6 +411,8 @@ struct mptcp_subflow_context {
pm_notified : 1, /* PM hook called for established status */
conn_finished : 1,
map_valid : 1,
+ map_csum_reqd : 1,
+ map_data_fin : 1,
mpc_map : 1,
backup : 1,
send_mp_prio : 1,
@@ -525,6 +542,8 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su
int mptcp_is_enabled(struct net *net);
unsigned int mptcp_get_add_addr_timeout(struct net *net);
+int mptcp_is_checksum_enabled(struct net *net);
+int mptcp_allow_join_id0(struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt);
bool mptcp_subflow_data_available(struct sock *sk);
@@ -576,7 +595,8 @@ int __init mptcp_proto_v6_init(void);
struct sock *mptcp_sk_clone(const struct sock *sk,
const struct mptcp_options_received *mp_opt,
struct request_sock *req);
-void mptcp_get_options(const struct sk_buff *skb,
+void mptcp_get_options(const struct sock *sk,
+ const struct sk_buff *skb,
struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 33956337c46b..d55f4ef736a5 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -108,6 +108,8 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
subflow_req->mp_capable = 0;
subflow_req->mp_join = 0;
+ subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener));
+ subflow_req->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk_listener));
subflow_req->msk = NULL;
mptcp_token_init_request(req);
}
@@ -150,7 +152,7 @@ static int subflow_check_req(struct request_sock *req,
return -EINVAL;
#endif
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
@@ -247,7 +249,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
int err;
subflow_init_req(req, sk_listener);
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable && mp_opt.mp_join)
return -EINVAL;
@@ -394,7 +396,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (subflow->request_mptcp) {
if (!mp_opt.mp_capable) {
MPTCP_INC_STATS(sock_net(sk),
@@ -404,6 +406,10 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
goto fallback;
}
+ if (mp_opt.csum_reqd)
+ WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
+ if (mp_opt.deny_join_id0)
+ WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
subflow->can_ack = 1;
subflow->remote_key = mp_opt.sndr_key;
@@ -638,7 +644,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* reordered MPC will cause fallback, but we don't have other
* options.
*/
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (!mp_opt.mp_capable) {
fallback = true;
goto create_child;
@@ -648,7 +654,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (!new_msk)
fallback = true;
} else if (subflow_req->mp_join) {
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
!mptcp_can_accept_new_subflow(subflow_req->msk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
@@ -784,10 +790,10 @@ static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
}
-static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
+static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
{
- WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
- ssn, subflow->map_subflow_seq, subflow->map_data_len);
+ pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
+ ssn, subflow->map_subflow_seq, subflow->map_data_len);
}
static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
@@ -812,22 +818,104 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
/* Mapping covers data later in the subflow stream,
* currently unsupported.
*/
- warn_bad_map(subflow, ssn);
+ dbg_bad_map(subflow, ssn);
return false;
}
if (unlikely(!before(ssn, subflow->map_subflow_seq +
subflow->map_data_len))) {
/* Mapping does covers past subflow data, invalid */
- warn_bad_map(subflow, ssn + skb->len);
+ dbg_bad_map(subflow, ssn);
return false;
}
return true;
}
+static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb,
+ bool csum_reqd)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct csum_pseudo_header header;
+ u32 offset, seq, delta;
+ __wsum csum;
+ int len;
+
+ if (!csum_reqd)
+ return MAPPING_OK;
+
+ /* mapping already validated on previous traversal */
+ if (subflow->map_csum_len == subflow->map_data_len)
+ return MAPPING_OK;
+
+ /* traverse the receive queue, ensuring it contains a full
+ * DSS mapping and accumulating the related csum.
+ * Preserve the accoumlate csum across multiple calls, to compute
+ * the csum only once
+ */
+ delta = subflow->map_data_len - subflow->map_csum_len;
+ for (;;) {
+ seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len;
+ offset = seq - TCP_SKB_CB(skb)->seq;
+
+ /* if the current skb has not been accounted yet, csum its contents
+ * up to the amount covered by the current DSS
+ */
+ if (offset < skb->len) {
+ __wsum csum;
+
+ len = min(skb->len - offset, delta);
+ csum = skb_checksum(skb, offset, len, 0);
+ subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum,
+ subflow->map_csum_len);
+
+ delta -= len;
+ subflow->map_csum_len += len;
+ }
+ if (delta == 0)
+ break;
+
+ if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) {
+ /* if this subflow is closed, the partial mapping
+ * will be never completed; flush the pending skbs, so
+ * that subflow_sched_work_if_closed() can kick in
+ */
+ if (unlikely(ssk->sk_state == TCP_CLOSE))
+ while ((skb = skb_peek(&ssk->sk_receive_queue)))
+ sk_eat_skb(ssk, skb);
+
+ /* not enough data to validate the csum */
+ return MAPPING_EMPTY;
+ }
+
+ /* the DSS mapping for next skbs will be validated later,
+ * when a get_mapping_status call will process such skb
+ */
+ skb = skb->next;
+ }
+
+ /* note that 'map_data_len' accounts only for the carried data, does
+ * not include the eventual seq increment due to the data fin,
+ * while the pseudo header requires the original DSS data len,
+ * including that
+ */
+ header.data_seq = cpu_to_be64(subflow->map_seq);
+ header.subflow_seq = htonl(subflow->map_subflow_seq);
+ header.data_len = htons(subflow->map_data_len + subflow->map_data_fin);
+ header.csum = 0;
+
+ csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
+ if (unlikely(csum_fold(csum))) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
+ return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
+ }
+
+ return MAPPING_OK;
+}
+
static enum mapping_status get_mapping_status(struct sock *ssk,
struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ bool csum_reqd = READ_ONCE(msk->csum_enabled);
struct mptcp_ext *mpext;
struct sk_buff *skb;
u16 data_len;
@@ -920,9 +1008,10 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
/* Allow replacing only with an identical map */
if (subflow->map_seq == map_seq &&
subflow->map_subflow_seq == mpext->subflow_seq &&
- subflow->map_data_len == data_len) {
+ subflow->map_data_len == data_len &&
+ subflow->map_csum_reqd == mpext->csum_reqd) {
skb_ext_del(skb, SKB_EXT_MPTCP);
- return MAPPING_OK;
+ goto validate_csum;
}
/* If this skb data are fully covered by the current mapping,
@@ -934,27 +1023,41 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
}
/* will validate the next map after consuming the current one */
- return MAPPING_OK;
+ goto validate_csum;
}
subflow->map_seq = map_seq;
subflow->map_subflow_seq = mpext->subflow_seq;
subflow->map_data_len = data_len;
subflow->map_valid = 1;
+ subflow->map_data_fin = mpext->data_fin;
subflow->mpc_map = mpext->mpc_map;
- pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
+ subflow->map_csum_reqd = mpext->csum_reqd;
+ subflow->map_csum_len = 0;
+ subflow->map_data_csum = csum_unfold(mpext->csum);
+
+ /* Cfr RFC 8684 Section 3.3.0 */
+ if (unlikely(subflow->map_csum_reqd != csum_reqd))
+ return MAPPING_INVALID;
+
+ pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
subflow->map_seq, subflow->map_subflow_seq,
- subflow->map_data_len);
+ subflow->map_data_len, subflow->map_csum_reqd,
+ subflow->map_data_csum);
validate_seq:
/* we revalidate valid mapping on new skb, because we must ensure
* the current skb is completely covered by the available mapping
*/
- if (!validate_mapping(ssk, skb))
+ if (!validate_mapping(ssk, skb)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSTCPMISMATCH);
return MAPPING_INVALID;
+ }
skb_ext_del(skb, SKB_EXT_MPTCP);
- return MAPPING_OK;
+
+validate_csum:
+ return validate_data_csum(ssk, skb, csum_reqd);
}
static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
@@ -1000,7 +1103,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
struct sk_buff *skb;
if (!skb_peek(&ssk->sk_receive_queue))
- subflow->data_avail = 0;
+ WRITE_ONCE(subflow->data_avail, 0);
if (subflow->data_avail)
return true;
@@ -1039,18 +1142,13 @@ static bool subflow_check_data_avail(struct sock *ssk)
ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
ack_seq);
- if (ack_seq == old_ack) {
- subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
- break;
- } else if (after64(ack_seq, old_ack)) {
- subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
- break;
+ if (unlikely(before64(ack_seq, old_ack))) {
+ mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
+ continue;
}
- /* only accept in-sequence mapping. Old values are spurious
- * retransmission
- */
- mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ break;
}
return true;
@@ -1065,12 +1163,11 @@ fallback:
* subflow_error_report() will introduce the appropriate barriers
*/
ssk->sk_err = EBADMSG;
- ssk->sk_error_report(ssk);
tcp_set_state(ssk, TCP_CLOSE);
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMPTCP;
tcp_send_active_reset(ssk, GFP_ATOMIC);
- subflow->data_avail = 0;
+ WRITE_ONCE(subflow->data_avail, 0);
return false;
}
@@ -1080,7 +1177,7 @@ fallback:
subflow->map_seq = READ_ONCE(msk->ack_seq);
subflow->map_data_len = skb->len;
subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
- subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
return true;
}
@@ -1092,7 +1189,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
if (subflow->map_valid &&
mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
subflow->map_valid = 0;
- subflow->data_avail = 0;
+ WRITE_ONCE(subflow->data_avail, 0);
pr_debug("Done with mapping: seq=%u data_len=%u",
subflow->map_subflow_seq,
@@ -1120,41 +1217,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
*full_space = tcp_full_space(sk);
}
-static void subflow_data_ready(struct sock *sk)
-{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- u16 state = 1 << inet_sk_state_load(sk);
- struct sock *parent = subflow->conn;
- struct mptcp_sock *msk;
-
- msk = mptcp_sk(parent);
- if (state & TCPF_LISTEN) {
- /* MPJ subflow are removed from accept queue before reaching here,
- * avoid stray wakeups
- */
- if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
- return;
-
- set_bit(MPTCP_DATA_READY, &msk->flags);
- parent->sk_data_ready(parent);
- return;
- }
-
- WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
- !subflow->mp_join && !(state & TCPF_CLOSE));
-
- if (mptcp_subflow_data_available(sk))
- mptcp_data_ready(parent, sk);
-}
-
-static void subflow_write_space(struct sock *ssk)
-{
- struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
-
- mptcp_propagate_sndbuf(sk, ssk);
- mptcp_write_space(sk);
-}
-
void __mptcp_error_report(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
@@ -1195,6 +1257,43 @@ static void subflow_error_report(struct sock *ssk)
mptcp_data_unlock(sk);
}
+static void subflow_data_ready(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ u16 state = 1 << inet_sk_state_load(sk);
+ struct sock *parent = subflow->conn;
+ struct mptcp_sock *msk;
+
+ msk = mptcp_sk(parent);
+ if (state & TCPF_LISTEN) {
+ /* MPJ subflow are removed from accept queue before reaching here,
+ * avoid stray wakeups
+ */
+ if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
+ return;
+
+ set_bit(MPTCP_DATA_READY, &msk->flags);
+ parent->sk_data_ready(parent);
+ return;
+ }
+
+ WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
+ !subflow->mp_join && !(state & TCPF_CLOSE));
+
+ if (mptcp_subflow_data_available(sk))
+ mptcp_data_ready(parent, sk);
+ else if (unlikely(sk->sk_err))
+ subflow_error_report(sk);
+}
+
+static void subflow_write_space(struct sock *ssk)
+{
+ struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+
+ mptcp_propagate_sndbuf(sk, ssk);
+ mptcp_write_space(sk);
+}
+
static struct inet_connection_sock_af_ops *
subflow_default_af_ops(struct sock *sk)
{
@@ -1505,6 +1604,8 @@ static void subflow_state_change(struct sock *sk)
*/
if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
+ else if (unlikely(sk->sk_err))
+ subflow_error_report(sk);
subflow_sched_work_if_closed(mptcp_sk(parent), sk);
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 87112dad1fd4..049890e00a3d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -74,7 +74,7 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o
nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
- nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o \
+ nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o nft_last.o \
nft_chain_route.o nf_tables_offload.o \
nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \
nft_set_pipapo.o
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index be14e0bea4c8..55647409a9be 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -45,12 +45,13 @@
static DEFINE_MUTEX(nf_ct_proto_mutex);
#ifdef CONFIG_SYSCTL
-__printf(5, 6)
+__printf(4, 5)
void nf_l4proto_log_invalid(const struct sk_buff *skb,
- struct net *net,
- u16 pf, u8 protonum,
+ const struct nf_hook_state *state,
+ u8 protonum,
const char *fmt, ...)
{
+ struct net *net = state->net;
struct va_format vaf;
va_list args;
@@ -62,15 +63,16 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb,
vaf.fmt = fmt;
vaf.va = &args;
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_proto_%d: %pV ", protonum, &vaf);
+ nf_log_packet(net, state->pf, 0, skb, state->in, state->out,
+ NULL, "nf_ct_proto_%d: %pV ", protonum, &vaf);
va_end(args);
}
EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid);
-__printf(3, 4)
+__printf(4, 5)
void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
const struct nf_conn *ct,
+ const struct nf_hook_state *state,
const char *fmt, ...)
{
struct va_format vaf;
@@ -85,7 +87,7 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
vaf.fmt = fmt;
vaf.va = &args;
- nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct),
+ nf_l4proto_log_invalid(skb, state,
nf_ct_protonum(ct), "%pV", &vaf);
va_end(args);
}
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 4f33307fa3cf..c1557d47ccd1 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -382,7 +382,8 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
static noinline bool
dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
- const struct dccp_hdr *dh)
+ const struct dccp_hdr *dh,
+ const struct nf_hook_state *hook_state)
{
struct net *net = nf_ct_net(ct);
struct nf_dccp_net *dn;
@@ -414,7 +415,7 @@ dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
out_invalid:
- nf_ct_l4proto_log_invalid(skb, ct, "%s", msg);
+ nf_ct_l4proto_log_invalid(skb, ct, hook_state, "%s", msg);
return false;
}
@@ -464,8 +465,7 @@ static bool dccp_error(const struct dccp_hdr *dh,
}
return false;
out_invalid:
- nf_l4proto_log_invalid(skb, state->net, state->pf,
- IPPROTO_DCCP, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_DCCP, "%s", msg);
return true;
}
@@ -488,7 +488,7 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
return -NF_ACCEPT;
type = dh->dccph_type;
- if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh))
+ if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state))
return -NF_ACCEPT;
if (type == DCCP_PKT_RESET &&
@@ -543,11 +543,11 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
ct->proto.dccp.last_pkt = type;
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet");
+ nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid packet");
return NF_ACCEPT;
case CT_DCCP_INVALID:
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition");
+ nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid state transition");
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 4efd8741c105..b38b7164acd5 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -170,12 +170,12 @@ int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
ct_daddr = &ct->tuplehash[dir].tuple.dst.u3;
if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) {
if (state->pf == AF_INET) {
- nf_l4proto_log_invalid(skb, state->net, state->pf,
+ nf_l4proto_log_invalid(skb, state,
l4proto,
"outer daddr %pI4 != inner %pI4",
&outer_daddr->ip, &ct_daddr->ip);
} else if (state->pf == AF_INET6) {
- nf_l4proto_log_invalid(skb, state->net, state->pf,
+ nf_l4proto_log_invalid(skb, state,
l4proto,
"outer daddr %pI6 != inner %pI6",
&outer_daddr->ip6, &ct_daddr->ip6);
@@ -197,8 +197,7 @@ static void icmp_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
- nf_l4proto_log_invalid(skb, state->net, state->pf,
- IPPROTO_ICMP, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_ICMP, "%s", msg);
}
/* Small and modified version of icmp_rcv */
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index facd8c64ec4e..61e3b05cf02c 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -126,8 +126,7 @@ static void icmpv6_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
- nf_l4proto_log_invalid(skb, state->net, state->pf,
- IPPROTO_ICMPV6, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_ICMPV6, "%s", msg);
}
int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index fb8dc02e502f..2394238d01c9 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -351,7 +351,7 @@ static bool sctp_error(struct sk_buff *skb,
}
return false;
out_invalid:
- nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_SCTP, "%s", logmsg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_SCTP, "%s", logmsg);
return true;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index de840fc41a2e..f7e8baf59b51 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -446,14 +446,15 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
}
}
-static bool tcp_in_window(const struct nf_conn *ct,
- struct ip_ct_tcp *state,
+static bool tcp_in_window(struct nf_conn *ct,
enum ip_conntrack_dir dir,
unsigned int index,
const struct sk_buff *skb,
unsigned int dataoff,
- const struct tcphdr *tcph)
+ const struct tcphdr *tcph,
+ const struct nf_hook_state *hook_state)
{
+ struct ip_ct_tcp *state = &ct->proto.tcp;
struct net *net = nf_ct_net(ct);
struct nf_tcp_net *tn = nf_tcp_pernet(net);
struct ip_ct_tcp_state *sender = &state->seen[dir];
@@ -670,7 +671,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
tn->tcp_be_liberal)
res = true;
if (!res) {
- nf_ct_l4proto_log_invalid(skb, ct,
+ nf_ct_l4proto_log_invalid(skb, ct, hook_state,
"%s",
before(seq, sender->td_maxend + 1) ?
in_recv_win ?
@@ -710,7 +711,7 @@ static void tcp_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
- nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_TCP, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_TCP, "%s", msg);
}
/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
@@ -970,7 +971,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
IP_CT_EXP_CHALLENGE_ACK;
}
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct,
+ nf_ct_l4proto_log_invalid(skb, ct, state,
"packet (index %d) in dir %d ignored, state %s",
index, dir,
tcp_conntrack_names[old_state]);
@@ -995,7 +996,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
dir, get_conntrack_index(th), old_state);
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
+ nf_ct_l4proto_log_invalid(skb, ct, state, "invalid state");
return -NF_ACCEPT;
case TCP_CONNTRACK_TIME_WAIT:
/* RFC5961 compliance cause stack to send "challenge-ACK"
@@ -1010,7 +1011,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
/* Detected RFC5961 challenge ACK */
ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
+ nf_ct_l4proto_log_invalid(skb, ct, state, "challenge-ack ignored");
return NF_ACCEPT; /* Don't change state */
}
break;
@@ -1035,7 +1036,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) {
/* Invalid RST */
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
+ nf_ct_l4proto_log_invalid(skb, ct, state, "invalid rst");
return -NF_ACCEPT;
}
@@ -1079,8 +1080,8 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
break;
}
- if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
- skb, dataoff, th)) {
+ if (!tcp_in_window(ct, dir, index,
+ skb, dataoff, th, state)) {
spin_unlock_bh(&ct->lock);
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 68911fcaa0f1..698fee49e732 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -38,8 +38,7 @@ static void udp_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
- nf_l4proto_log_invalid(skb, state->net, state->pf,
- IPPROTO_UDP, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_UDP, "%s", msg);
}
static bool udp_error(struct sk_buff *skb,
@@ -130,8 +129,7 @@ static void udplite_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
- nf_l4proto_log_invalid(skb, state->net, state->pf,
- IPPROTO_UDPLITE, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_UDPLITE, "%s", msg);
}
static bool udplite_error(struct sk_buff *skb,
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index b100c04a0e43..3d6d49420db8 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -31,6 +31,9 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
int length = (th->doff * 4) - sizeof(*th);
u8 buf[40], *ptr;
+ if (unlikely(length < 0))
+ return false;
+
ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
if (ptr == NULL)
return false;
@@ -47,6 +50,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
length--;
continue;
default:
+ if (length < 2)
+ return true;
opsize = *ptr++;
if (opsize < 2)
return true;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f20f6ae0e215..d6214242fe7f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4338,13 +4338,45 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
err = nf_tables_set_alloc_name(&ctx, set, name);
kfree(name);
if (err < 0)
- goto err_set_alloc_name;
+ goto err_set_name;
+
+ udata = NULL;
+ if (udlen) {
+ udata = set->data + size;
+ nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
+ }
+
+ INIT_LIST_HEAD(&set->bindings);
+ INIT_LIST_HEAD(&set->catchall_list);
+ set->table = table;
+ write_pnet(&set->net, net);
+ set->ops = ops;
+ set->ktype = ktype;
+ set->klen = desc.klen;
+ set->dtype = dtype;
+ set->objtype = objtype;
+ set->dlen = desc.dlen;
+ set->flags = flags;
+ set->size = desc.size;
+ set->policy = policy;
+ set->udlen = udlen;
+ set->udata = udata;
+ set->timeout = timeout;
+ set->gc_int = gc_int;
+
+ set->field_count = desc.field_count;
+ for (i = 0; i < desc.field_count; i++)
+ set->field_len[i] = desc.field_len[i];
+
+ err = ops->init(set, &desc, nla);
+ if (err < 0)
+ goto err_set_init;
if (nla[NFTA_SET_EXPR]) {
expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]);
if (IS_ERR(expr)) {
err = PTR_ERR(expr);
- goto err_set_alloc_name;
+ goto err_set_expr_alloc;
}
set->exprs[0] = expr;
set->num_exprs++;
@@ -4355,75 +4387,44 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(flags & NFT_SET_EXPR)) {
err = -EINVAL;
- goto err_set_alloc_name;
+ goto err_set_expr_alloc;
}
i = 0;
nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
if (i == NFT_SET_EXPR_MAX) {
err = -E2BIG;
- goto err_set_init;
+ goto err_set_expr_alloc;
}
if (nla_type(tmp) != NFTA_LIST_ELEM) {
err = -EINVAL;
- goto err_set_init;
+ goto err_set_expr_alloc;
}
expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
if (IS_ERR(expr)) {
err = PTR_ERR(expr);
- goto err_set_init;
+ goto err_set_expr_alloc;
}
set->exprs[i++] = expr;
set->num_exprs++;
}
}
- udata = NULL;
- if (udlen) {
- udata = set->data + size;
- nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
- }
-
- INIT_LIST_HEAD(&set->bindings);
- INIT_LIST_HEAD(&set->catchall_list);
- set->table = table;
- write_pnet(&set->net, net);
- set->ops = ops;
- set->ktype = ktype;
- set->klen = desc.klen;
- set->dtype = dtype;
- set->objtype = objtype;
- set->dlen = desc.dlen;
- set->flags = flags;
- set->size = desc.size;
- set->policy = policy;
- set->udlen = udlen;
- set->udata = udata;
- set->timeout = timeout;
- set->gc_int = gc_int;
set->handle = nf_tables_alloc_handle(table);
- set->field_count = desc.field_count;
- for (i = 0; i < desc.field_count; i++)
- set->field_len[i] = desc.field_len[i];
-
- err = ops->init(set, &desc, nla);
- if (err < 0)
- goto err_set_init;
-
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
- goto err_set_trans;
+ goto err_set_expr_alloc;
list_add_tail_rcu(&set->list, &table->sets);
table->use++;
return 0;
-err_set_trans:
- ops->destroy(set);
-err_set_init:
+err_set_expr_alloc:
for (i = 0; i < set->num_exprs; i++)
nft_expr_destroy(&ctx, set->exprs[i]);
-err_set_alloc_name:
+
+ ops->destroy(set);
+err_set_init:
kfree(set->name);
err_set_name:
kvfree(set);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 7780342e2f2d..866cfba04d6c 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -268,6 +268,7 @@ static struct nft_expr_type *nft_basic_types[] = {
&nft_meta_type,
&nft_rt_type,
&nft_exthdr_type,
+ &nft_last_type,
};
static struct nft_object_type *nft_basic_objects[] = {
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index 58fda6ac663b..50b4e3c9347a 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -126,8 +126,10 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb,
#ifdef CONFIG_KALLSYMS
ret = snprintf(sym, sizeof(sym), "%ps", ops->hook);
- if (ret < 0 || ret > (int)sizeof(sym))
+ if (ret >= sizeof(sym)) {
+ ret = -EINVAL;
goto nla_put_failure;
+ }
module_name = strstr(sym, " [");
if (module_name) {
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 7f705b5c09de..4f583d2e220e 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -164,7 +164,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
{
struct tcphdr *tcph;
- if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
+ if (pkt->tprot != IPPROTO_TCP)
return NULL;
tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer);
@@ -312,6 +312,9 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
const struct sctp_chunkhdr *sch;
struct sctp_chunkhdr _sch;
+ if (pkt->tprot != IPPROTO_SCTP)
+ goto err;
+
do {
sch = skb_header_pointer(pkt->skb, offset, sizeof(_sch), &_sch);
if (!sch || !sch->length)
@@ -334,7 +337,7 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
}
offset += SCTP_PAD4(ntohs(sch->length));
} while (offset < pkt->skb->len);
-
+err:
if (priv->flags & NFT_EXTHDR_F_PRESENT)
nft_reg_store8(dest, false);
else
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
new file mode 100644
index 000000000000..913ac45167f2
--- /dev/null
+++ b/net/netfilter/nft_last.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_last_priv {
+ unsigned long last_jiffies;
+ unsigned int last_set;
+};
+
+static const struct nla_policy nft_last_policy[NFTA_LAST_MAX + 1] = {
+ [NFTA_LAST_SET] = { .type = NLA_U32 },
+ [NFTA_LAST_MSECS] = { .type = NLA_U64 },
+};
+
+static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_last_priv *priv = nft_expr_priv(expr);
+ u64 last_jiffies;
+ int err;
+
+ if (tb[NFTA_LAST_MSECS]) {
+ err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies);
+ if (err < 0)
+ return err;
+
+ priv->last_jiffies = jiffies + (unsigned long)last_jiffies;
+ priv->last_set = 1;
+ }
+
+ return 0;
+}
+
+static void nft_last_eval(const struct nft_expr *expr,
+ struct nft_regs *regs, const struct nft_pktinfo *pkt)
+{
+ struct nft_last_priv *priv = nft_expr_priv(expr);
+
+ priv->last_jiffies = jiffies;
+ priv->last_set = 1;
+}
+
+static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_last_priv *priv = nft_expr_priv(expr);
+ __be64 msecs;
+
+ if (time_before(jiffies, priv->last_jiffies))
+ priv->last_set = 0;
+
+ if (priv->last_set)
+ msecs = nf_jiffies64_to_msecs(jiffies - priv->last_jiffies);
+ else
+ msecs = 0;
+
+ if (nla_put_be32(skb, NFTA_LAST_SET, htonl(priv->last_set)) ||
+ nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nft_expr_ops nft_last_ops = {
+ .type = &nft_last_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_last_priv)),
+ .eval = nft_last_eval,
+ .init = nft_last_init,
+ .dump = nft_last_dump,
+};
+
+struct nft_expr_type nft_last_type __read_mostly = {
+ .name = "last",
+ .ops = &nft_last_ops,
+ .policy = nft_last_policy,
+ .maxattr = NFTA_LAST_MAX,
+ .flags = NFT_EXPR_STATEFUL,
+ .owner = THIS_MODULE,
+};
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 41109c326f3a..28982630bef3 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -13,6 +13,7 @@ openvswitch-y := \
flow_netlink.o \
flow_table.o \
meter.o \
+ openvswitch_trace.o \
vport.o \
vport-internal_dev.o \
vport-netdev.o
@@ -24,3 +25,5 @@ endif
obj-$(CONFIG_OPENVSWITCH_VXLAN)+= vport-vxlan.o
obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o
obj-$(CONFIG_OPENVSWITCH_GRE) += vport-gre.o
+
+CFLAGS_openvswitch_trace.o = -I$(src)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 77d924ab8cdb..ef15d9eb4774 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -30,6 +30,7 @@
#include "conntrack.h"
#include "vport.h"
#include "flow_netlink.h"
+#include "openvswitch_trace.h"
struct deferred_action {
struct sk_buff *skb;
@@ -1242,6 +1243,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
a = nla_next(a, &rem)) {
int err = 0;
+ if (trace_ovs_do_execute_action_enabled())
+ trace_ovs_do_execute_action(dp, skb, key, a, rem);
+
switch (nla_type(a)) {
case OVS_ACTION_ATTR_OUTPUT: {
int port = nla_get_u32(a);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9d6ef6cb9b26..bc164b35e67d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -43,6 +43,7 @@
#include "flow_table.h"
#include "flow_netlink.h"
#include "meter.h"
+#include "openvswitch_trace.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
@@ -275,6 +276,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
struct dp_stats_percpu *stats;
int err;
+ if (trace_ovs_dp_upcall_enabled())
+ trace_ovs_dp_upcall(dp, skb, key, upcall_info);
+
if (upcall_info->portid == 0) {
err = -ENOTCONN;
goto err;
diff --git a/net/openvswitch/openvswitch_trace.c b/net/openvswitch/openvswitch_trace.c
new file mode 100644
index 000000000000..62c5f7d6f023
--- /dev/null
+++ b/net/openvswitch/openvswitch_trace.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+/* bug in tracepoint.h, it should include this */
+#include <linux/module.h>
+
+/* sparse isn't too happy with all macros... */
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "openvswitch_trace.h"
+
+#endif
diff --git a/net/openvswitch/openvswitch_trace.h b/net/openvswitch/openvswitch_trace.h
new file mode 100644
index 000000000000..3eb35d9eb700
--- /dev/null
+++ b/net/openvswitch/openvswitch_trace.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM openvswitch
+
+#if !defined(_TRACE_OPENVSWITCH_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_OPENVSWITCH_H
+
+#include <linux/tracepoint.h>
+
+#include "datapath.h"
+
+TRACE_EVENT(ovs_do_execute_action,
+
+ TP_PROTO(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key, const struct nlattr *a, int rem),
+
+ TP_ARGS(dp, skb, key, a, rem),
+
+ TP_STRUCT__entry(
+ __field( void *, dpaddr )
+ __string( dp_name, ovs_dp_name(dp) )
+ __string( dev_name, skb->dev->name )
+ __field( void *, skbaddr )
+ __field( unsigned int, len )
+ __field( unsigned int, data_len )
+ __field( unsigned int, truesize )
+ __field( u8, nr_frags )
+ __field( u16, gso_size )
+ __field( u16, gso_type )
+ __field( u32, ovs_flow_hash )
+ __field( u32, recirc_id )
+ __field( void *, keyaddr )
+ __field( u16, key_eth_type )
+ __field( u8, key_ct_state )
+ __field( u8, key_ct_orig_proto )
+ __field( u16, key_ct_zone )
+ __field( unsigned int, flow_key_valid )
+ __field( u8, action_type )
+ __field( unsigned int, action_len )
+ __field( void *, action_data )
+ __field( u8, is_last )
+ ),
+
+ TP_fast_assign(
+ __entry->dpaddr = dp;
+ __assign_str(dp_name, ovs_dp_name(dp));
+ __assign_str(dev_name, skb->dev->name);
+ __entry->skbaddr = skb;
+ __entry->len = skb->len;
+ __entry->data_len = skb->data_len;
+ __entry->truesize = skb->truesize;
+ __entry->nr_frags = skb_shinfo(skb)->nr_frags;
+ __entry->gso_size = skb_shinfo(skb)->gso_size;
+ __entry->gso_type = skb_shinfo(skb)->gso_type;
+ __entry->ovs_flow_hash = key->ovs_flow_hash;
+ __entry->recirc_id = key->recirc_id;
+ __entry->keyaddr = key;
+ __entry->key_eth_type = key->eth.type;
+ __entry->key_ct_state = key->ct_state;
+ __entry->key_ct_orig_proto = key->ct_orig_proto;
+ __entry->key_ct_zone = key->ct_zone;
+ __entry->flow_key_valid = !(key->mac_proto & SW_FLOW_KEY_INVALID);
+ __entry->action_type = nla_type(a);
+ __entry->action_len = nla_len(a);
+ __entry->action_data = nla_data(a);
+ __entry->is_last = nla_is_last(a, rem);
+ ),
+
+ TP_printk("dpaddr=%p dp_name=%s dev=%s skbaddr=%p len=%u data_len=%u truesize=%u nr_frags=%d gso_size=%d gso_type=%#x ovs_flow_hash=0x%08x recirc_id=0x%08x keyaddr=%p eth_type=0x%04x ct_state=%02x ct_orig_proto=%02x ct_Zone=%04x flow_key_valid=%d action_type=%u action_len=%u action_data=%p is_last=%d",
+ __entry->dpaddr, __get_str(dp_name), __get_str(dev_name),
+ __entry->skbaddr, __entry->len, __entry->data_len,
+ __entry->truesize, __entry->nr_frags, __entry->gso_size,
+ __entry->gso_type, __entry->ovs_flow_hash,
+ __entry->recirc_id, __entry->keyaddr, __entry->key_eth_type,
+ __entry->key_ct_state, __entry->key_ct_orig_proto,
+ __entry->key_ct_zone,
+ __entry->flow_key_valid,
+ __entry->action_type, __entry->action_len,
+ __entry->action_data, __entry->is_last)
+);
+
+TRACE_EVENT(ovs_dp_upcall,
+
+ TP_PROTO(struct datapath *dp, struct sk_buff *skb,
+ const struct sw_flow_key *key,
+ const struct dp_upcall_info *upcall_info),
+
+ TP_ARGS(dp, skb, key, upcall_info),
+
+ TP_STRUCT__entry(
+ __field( void *, dpaddr )
+ __string( dp_name, ovs_dp_name(dp) )
+ __string( dev_name, skb->dev->name )
+ __field( void *, skbaddr )
+ __field( unsigned int, len )
+ __field( unsigned int, data_len )
+ __field( unsigned int, truesize )
+ __field( u8, nr_frags )
+ __field( u16, gso_size )
+ __field( u16, gso_type )
+ __field( u32, ovs_flow_hash )
+ __field( u32, recirc_id )
+ __field( const void *, keyaddr )
+ __field( u16, key_eth_type )
+ __field( u8, key_ct_state )
+ __field( u8, key_ct_orig_proto )
+ __field( u16, key_ct_zone )
+ __field( unsigned int, flow_key_valid )
+ __field( u8, upcall_cmd )
+ __field( u32, upcall_port )
+ __field( u16, upcall_mru )
+ ),
+
+ TP_fast_assign(
+ __entry->dpaddr = dp;
+ __assign_str(dp_name, ovs_dp_name(dp));
+ __assign_str(dev_name, skb->dev->name);
+ __entry->skbaddr = skb;
+ __entry->len = skb->len;
+ __entry->data_len = skb->data_len;
+ __entry->truesize = skb->truesize;
+ __entry->nr_frags = skb_shinfo(skb)->nr_frags;
+ __entry->gso_size = skb_shinfo(skb)->gso_size;
+ __entry->gso_type = skb_shinfo(skb)->gso_type;
+ __entry->ovs_flow_hash = key->ovs_flow_hash;
+ __entry->recirc_id = key->recirc_id;
+ __entry->keyaddr = key;
+ __entry->key_eth_type = key->eth.type;
+ __entry->key_ct_state = key->ct_state;
+ __entry->key_ct_orig_proto = key->ct_orig_proto;
+ __entry->key_ct_zone = key->ct_zone;
+ __entry->flow_key_valid = !(key->mac_proto & SW_FLOW_KEY_INVALID);
+ __entry->upcall_cmd = upcall_info->cmd;
+ __entry->upcall_port = upcall_info->portid;
+ __entry->upcall_mru = upcall_info->mru;
+ ),
+
+ TP_printk("dpaddr=%p dp_name=%s dev=%s skbaddr=%p len=%u data_len=%u truesize=%u nr_frags=%d gso_size=%d gso_type=%#x ovs_flow_hash=0x%08x recirc_id=0x%08x keyaddr=%p eth_type=0x%04x ct_state=%02x ct_orig_proto=%02x ct_zone=%04x flow_key_valid=%d upcall_cmd=%u upcall_port=%u upcall_mru=%u",
+ __entry->dpaddr, __get_str(dp_name), __get_str(dev_name),
+ __entry->skbaddr, __entry->len, __entry->data_len,
+ __entry->truesize, __entry->nr_frags, __entry->gso_size,
+ __entry->gso_type, __entry->ovs_flow_hash,
+ __entry->recirc_id, __entry->keyaddr, __entry->key_eth_type,
+ __entry->key_ct_state, __entry->key_ct_orig_proto,
+ __entry->key_ct_zone,
+ __entry->flow_key_valid,
+ __entry->upcall_cmd, __entry->upcall_port,
+ __entry->upcall_mru)
+);
+
+#endif /* _TRACE_OPENVSWITCH_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE openvswitch_trace
+#include <trace/define_trace.h>
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 71dd6b910f7c..77b0cdab3810 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2683,7 +2683,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
}
if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
- proto = po->num;
+ proto = READ_ONCE(po->num);
} else {
err = -EINVAL;
if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2896,7 +2896,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
- proto = po->num;
+ proto = READ_ONCE(po->num);
} else {
err = -EINVAL;
if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -3034,10 +3034,13 @@ static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
- if (po->tx_ring.pg_vec)
+ /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy.
+ * tpacket_snd() will redo the check safely.
+ */
+ if (data_race(po->tx_ring.pg_vec))
return tpacket_snd(po, msg);
- else
- return packet_snd(sock, msg, len);
+
+ return packet_snd(sock, msg, len);
}
/*
@@ -3168,7 +3171,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
/* prevents packet_notifier() from calling
* register_prot_hook()
*/
- po->num = 0;
+ WRITE_ONCE(po->num, 0);
__unregister_prot_hook(sk, true);
rcu_read_lock();
dev_curr = po->prot_hook.dev;
@@ -3178,17 +3181,17 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
}
BUG_ON(po->running);
- po->num = proto;
+ WRITE_ONCE(po->num, proto);
po->prot_hook.type = proto;
if (unlikely(unlisted)) {
dev_put(dev);
po->prot_hook.dev = NULL;
- po->ifindex = -1;
+ WRITE_ONCE(po->ifindex, -1);
packet_cached_dev_reset(po);
} else {
po->prot_hook.dev = dev;
- po->ifindex = dev ? dev->ifindex : 0;
+ WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
packet_cached_dev_assign(po, dev);
}
}
@@ -3502,7 +3505,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
uaddr->sa_family = AF_PACKET;
memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
rcu_read_lock();
- dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
+ dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
if (dev)
strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
rcu_read_unlock();
@@ -3517,16 +3520,18 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
+ int ifindex;
if (peer)
return -EOPNOTSUPP;
+ ifindex = READ_ONCE(po->ifindex);
sll->sll_family = AF_PACKET;
- sll->sll_ifindex = po->ifindex;
- sll->sll_protocol = po->num;
+ sll->sll_ifindex = ifindex;
+ sll->sll_protocol = READ_ONCE(po->num);
sll->sll_pkttype = 0;
rcu_read_lock();
- dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
+ dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
if (dev) {
sll->sll_hatype = dev->type;
sll->sll_halen = dev->addr_len;
@@ -4102,7 +4107,7 @@ static int packet_notifier(struct notifier_block *this,
}
if (msg == NETDEV_UNREGISTER) {
packet_cached_dev_reset(po);
- po->ifindex = -1;
+ WRITE_ONCE(po->ifindex, -1);
if (po->prot_hook.dev)
dev_put(po->prot_hook.dev);
po->prot_hook.dev = NULL;
@@ -4408,7 +4413,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
was_running = po->running;
num = po->num;
if (was_running) {
- po->num = 0;
+ WRITE_ONCE(po->num, 0);
__unregister_prot_hook(sk, false);
}
spin_unlock(&po->bind_lock);
@@ -4443,7 +4448,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
spin_lock(&po->bind_lock);
if (was_running) {
- po->num = num;
+ WRITE_ONCE(po->num, num);
register_prot_hook(sk);
}
spin_unlock(&po->bind_lock);
@@ -4613,8 +4618,8 @@ static int packet_seq_show(struct seq_file *seq, void *v)
s,
refcount_read(&s->sk_refcnt),
s->sk_type,
- ntohs(po->num),
- po->ifindex,
+ ntohs(READ_ONCE(po->num)),
+ READ_ONCE(po->ifindex),
po->running,
atomic_read(&s->sk_rmem_alloc),
from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index c0477bec09bd..f2efaa4225f9 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -436,7 +436,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
struct qrtr_sock *ipc;
struct sk_buff *skb;
struct qrtr_cb *cb;
- unsigned int size;
+ size_t size;
unsigned int ver;
size_t hdrlen;
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 4db109fb6ec2..5b426dc3634d 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -714,7 +714,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (rds_cmsg_recv(inc, msg, rs)) {
ret = -EFAULT;
- goto out;
+ break;
}
rds_recvmsg_zcookie(rs, msg);
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 18edd9ad1410..a656baa321fe 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -904,14 +904,19 @@ static int tcf_ct_act_nat(struct sk_buff *skb,
}
err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
- if (err == NF_ACCEPT &&
- ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) {
- if (maniptype == NF_NAT_MANIP_SRC)
- maniptype = NF_NAT_MANIP_DST;
- else
- maniptype = NF_NAT_MANIP_SRC;
-
- err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
+ if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
+ if (ct->status & IPS_SRC_NAT) {
+ if (maniptype == NF_NAT_MANIP_SRC)
+ maniptype = NF_NAT_MANIP_DST;
+ else
+ maniptype = NF_NAT_MANIP_SRC;
+
+ err = ct_nat_execute(skb, ct, ctinfo, range,
+ maniptype);
+ } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ err = ct_nat_execute(skb, ct, ctinfo, NULL,
+ NF_NAT_MANIP_SRC);
+ }
}
return err;
#else
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 2e704c7a105a..d7869a984881 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1531,13 +1531,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
&mask->basic.n_proto,
TCA_FLOWER_UNSPEC,
sizeof(key->basic.n_proto));
- mask->basic.n_proto = cpu_to_be16(0);
} else {
key->basic.n_proto = ethertype;
+ mask->basic.n_proto = cpu_to_be16(~0);
}
}
} else {
key->basic.n_proto = ethertype;
+ mask->basic.n_proto = cpu_to_be16(~0);
}
}
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 7d37638ee1c7..951542843cab 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -943,7 +943,7 @@ static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb,
}
tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
- if (!tcph)
+ if (!tcph || tcph->doff < 5)
return NULL;
return skb_header_pointer(skb, offset,
@@ -967,6 +967,8 @@ static const void *cake_get_tcpopt(const struct tcphdr *tcph,
length--;
continue;
}
+ if (length < 2)
+ break;
opsize = *ptr++;
if (opsize < 2 || opsize > length)
break;
@@ -1104,6 +1106,8 @@ static bool cake_tcph_may_drop(const struct tcphdr *tcph,
length--;
continue;
}
+ if (length < 2)
+ break;
opsize = *ptr++;
if (opsize < 2 || opsize > length)
break;
@@ -2338,7 +2342,7 @@ static int cake_config_precedence(struct Qdisc *sch)
/* List of known Diffserv codepoints:
*
- * Least Effort (CS1)
+ * Least Effort (CS1, LE)
* Best Effort (CS0)
* Max Reliability & LLT "Lo" (TOS1)
* Max Throughput (TOS2)
@@ -2360,7 +2364,7 @@ static int cake_config_precedence(struct Qdisc *sch)
* Total 25 codepoints.
*/
-/* List of traffic classes in RFC 4594:
+/* List of traffic classes in RFC 4594, updated by RFC 8622:
* (roughly descending order of contended priority)
* (roughly ascending order of uncontended throughput)
*
@@ -2375,7 +2379,7 @@ static int cake_config_precedence(struct Qdisc *sch)
* Ops, Admin, Management (CS2,TOS1) - eg. ssh
* Standard Service (CS0 & unrecognised codepoints)
* High Throughput Data (AF1x,TOS2) - eg. web traffic
- * Low Priority Data (CS1) - eg. BitTorrent
+ * Low Priority Data (CS1,LE) - eg. BitTorrent
* Total 12 traffic classes.
*/
@@ -2391,7 +2395,7 @@ static int cake_config_diffserv8(struct Qdisc *sch)
* Video Streaming (AF4x, AF3x, CS3)
* Bog Standard (CS0 etc.)
* High Throughput (AF1x, TOS2)
- * Background Traffic (CS1)
+ * Background Traffic (CS1, LE)
*
* Total 8 traffic classes.
*/
@@ -2435,7 +2439,7 @@ static int cake_config_diffserv4(struct Qdisc *sch)
* Latency Sensitive (CS7, CS6, EF, VA, CS5, CS4)
* Streaming Media (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1)
* Best Effort (CS0, AF1x, TOS2, and those not specified)
- * Background Traffic (CS1)
+ * Background Traffic (CS1, LE)
*
* Total 4 traffic classes.
*/
@@ -2473,7 +2477,7 @@ static int cake_config_diffserv4(struct Qdisc *sch)
static int cake_config_diffserv3(struct Qdisc *sch)
{
/* Simplified Diffserv structure with 3 tins.
- * Low Priority (CS1)
+ * Low Priority (CS1, LE)
* Best Effort
* Latency Sensitive (TOS4, VA, EF, CS6, CS7)
*/
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e9c0afc8becc..d9ac60ffe927 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -52,6 +52,8 @@ static void qdisc_maybe_clear_missed(struct Qdisc *q,
*/
if (!netif_xmit_frozen_or_stopped(txq))
set_bit(__QDISC_STATE_MISSED, &q->state);
+ else
+ set_bit(__QDISC_STATE_DRAINING, &q->state);
}
/* Main transmission queue. */
@@ -164,9 +166,13 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
skb = next;
}
- if (lock)
+
+ if (lock) {
spin_unlock(lock);
- __netif_schedule(q);
+ set_bit(__QDISC_STATE_MISSED, &q->state);
+ } else {
+ __netif_schedule(q);
+ }
}
static void try_bulk_dequeue_skb(struct Qdisc *q,
@@ -409,7 +415,11 @@ void __qdisc_run(struct Qdisc *q)
while (qdisc_restart(q, &packets)) {
quota -= packets;
if (quota <= 0) {
- __netif_schedule(q);
+ if (q->flags & TCQ_F_NOLOCK)
+ set_bit(__QDISC_STATE_MISSED, &q->state);
+ else
+ __netif_schedule(q);
+
break;
}
}
@@ -698,13 +708,14 @@ retry:
if (likely(skb)) {
qdisc_update_stats_at_dequeue(qdisc, skb);
} else if (need_retry &&
- test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
+ READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY) {
/* Delay clearing the STATE_MISSED here to reduce
* the overhead of the second spin_trylock() in
* qdisc_run_begin() and __netif_schedule() calling
* in qdisc_run_end().
*/
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+ clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
/* Make sure dequeuing happens after clearing
* STATE_MISSED.
@@ -714,8 +725,6 @@ retry:
need_retry = false;
goto retry;
- } else {
- WRITE_ONCE(qdisc->empty, true);
}
return skb;
@@ -916,7 +925,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev_queue = dev_queue;
- sch->empty = true;
dev_hold(dev);
refcount_set(&sch->refcnt, 1);
@@ -1222,6 +1230,7 @@ static void dev_reset_queue(struct net_device *dev,
spin_unlock_bh(qdisc_lock(qdisc));
if (nolock) {
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+ clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
spin_unlock_bh(&qdisc->seqlock);
}
}
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 336df4b36655..be29da09cc7a 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -98,6 +98,7 @@ static struct sctp_association *sctp_association_init(
* sock configured value.
*/
asoc->hbinterval = msecs_to_jiffies(sp->hbinterval);
+ asoc->probe_interval = msecs_to_jiffies(sp->probe_interval);
asoc->encap_port = sp->encap_port;
@@ -625,6 +626,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
* association configured value.
*/
peer->hbinterval = asoc->hbinterval;
+ peer->probe_interval = asoc->probe_interval;
peer->encap_port = asoc->encap_port;
@@ -714,6 +716,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
return NULL;
}
+ sctp_transport_pl_reset(peer);
+
/* Attach the remote transport to our asoc. */
list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
asoc->peer.transport_count++;
@@ -812,6 +816,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
spc_state = SCTP_ADDR_CONFIRMED;
transport->state = SCTP_ACTIVE;
+ sctp_transport_pl_reset(transport);
break;
case SCTP_TRANSPORT_DOWN:
@@ -821,6 +826,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
*/
if (transport->state != SCTP_UNCONFIRMED) {
transport->state = SCTP_INACTIVE;
+ sctp_transport_pl_reset(transport);
spc_state = SCTP_ADDR_UNREACHABLE;
} else {
sctp_transport_dst_release(transport);
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index c4d9c7feffb9..ccd773e4c371 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -154,6 +154,7 @@ static const char *const sctp_timer_tbl[] = {
"TIMEOUT_T5_SHUTDOWN_GUARD",
"TIMEOUT_HEARTBEAT",
"TIMEOUT_RECONF",
+ "TIMEOUT_PROBE",
"TIMEOUT_SACK",
"TIMEOUT_AUTOCLOSE",
};
diff --git a/net/sctp/input.c b/net/sctp/input.c
index d508f6f3dd08..fe6429cc012f 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -385,7 +385,9 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
struct sctp_transport *t, __u32 pmtu)
{
- if (!t || (t->pathmtu <= pmtu))
+ if (!t ||
+ (t->pathmtu <= pmtu &&
+ t->pl.probe_size + sctp_transport_pl_hlen(t) <= pmtu))
return;
if (sock_owned_by_user(sk)) {
@@ -554,6 +556,49 @@ void sctp_err_finish(struct sock *sk, struct sctp_transport *t)
sctp_transport_put(t);
}
+static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb,
+ __u8 type, __u8 code, __u32 info)
+{
+ struct sctp_association *asoc = t->asoc;
+ struct sock *sk = asoc->base.sk;
+ int err = 0;
+
+ switch (type) {
+ case ICMP_PARAMETERPROB:
+ err = EPROTO;
+ break;
+ case ICMP_DEST_UNREACH:
+ if (code > NR_ICMP_UNREACH)
+ return;
+ if (code == ICMP_FRAG_NEEDED) {
+ sctp_icmp_frag_needed(sk, asoc, t, SCTP_TRUNC4(info));
+ return;
+ }
+ if (code == ICMP_PROT_UNREACH) {
+ sctp_icmp_proto_unreachable(sk, asoc, t);
+ return;
+ }
+ err = icmp_err_convert[code].errno;
+ break;
+ case ICMP_TIME_EXCEEDED:
+ if (code == ICMP_EXC_FRAGTIME)
+ return;
+
+ err = EHOSTUNREACH;
+ break;
+ case ICMP_REDIRECT:
+ sctp_icmp_redirect(sk, t, skb);
+ default:
+ return;
+ }
+ if (!sock_owned_by_user(sk) && inet_sk(sk)->recverr) {
+ sk->sk_err = err;
+ sk->sk_error_report(sk);
+ } else { /* Only an error on timeout */
+ sk->sk_err_soft = err;
+ }
+}
+
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
@@ -572,22 +617,19 @@ void sctp_err_finish(struct sock *sk, struct sctp_transport *t)
int sctp_v4_err(struct sk_buff *skb, __u32 info)
{
const struct iphdr *iph = (const struct iphdr *)skb->data;
- const int ihlen = iph->ihl * 4;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
- struct sock *sk;
- struct sctp_association *asoc = NULL;
+ struct net *net = dev_net(skb->dev);
struct sctp_transport *transport;
- struct inet_sock *inet;
+ struct sctp_association *asoc;
__u16 saveip, savesctp;
- int err;
- struct net *net = dev_net(skb->dev);
+ struct sock *sk;
/* Fix up skb to look at the embedded net header. */
saveip = skb->network_header;
savesctp = skb->transport_header;
skb_reset_network_header(skb);
- skb_set_transport_header(skb, ihlen);
+ skb_set_transport_header(skb, iph->ihl * 4);
sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
/* Put back, the original values. */
skb->network_header = saveip;
@@ -596,59 +638,41 @@ int sctp_v4_err(struct sk_buff *skb, __u32 info)
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return -ENOENT;
}
- /* Warning: The sock lock is held. Remember to call
- * sctp_err_finish!
- */
- switch (type) {
- case ICMP_PARAMETERPROB:
- err = EPROTO;
- break;
- case ICMP_DEST_UNREACH:
- if (code > NR_ICMP_UNREACH)
- goto out_unlock;
+ sctp_v4_err_handle(transport, skb, type, code, info);
+ sctp_err_finish(sk, transport);
- /* PMTU discovery (RFC1191) */
- if (ICMP_FRAG_NEEDED == code) {
- sctp_icmp_frag_needed(sk, asoc, transport,
- SCTP_TRUNC4(info));
- goto out_unlock;
- } else {
- if (ICMP_PROT_UNREACH == code) {
- sctp_icmp_proto_unreachable(sk, asoc,
- transport);
- goto out_unlock;
- }
- }
- err = icmp_err_convert[code].errno;
- break;
- case ICMP_TIME_EXCEEDED:
- /* Ignore any time exceeded errors due to fragment reassembly
- * timeouts.
- */
- if (ICMP_EXC_FRAGTIME == code)
- goto out_unlock;
+ return 0;
+}
- err = EHOSTUNREACH;
- break;
- case ICMP_REDIRECT:
- sctp_icmp_redirect(sk, transport, skb);
- /* Fall through to out_unlock. */
- default:
- goto out_unlock;
+int sctp_udp_v4_err(struct sock *sk, struct sk_buff *skb)
+{
+ struct net *net = dev_net(skb->dev);
+ struct sctp_association *asoc;
+ struct sctp_transport *t;
+ struct icmphdr *hdr;
+ __u32 info = 0;
+
+ skb->transport_header += sizeof(struct udphdr);
+ sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &t);
+ if (!sk) {
+ __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
+ return -ENOENT;
}
- inet = inet_sk(sk);
- if (!sock_owned_by_user(sk) && inet->recverr) {
- sk->sk_err = err;
- sk->sk_error_report(sk);
- } else { /* Only an error on timeout */
- sk->sk_err_soft = err;
+ skb->transport_header -= sizeof(struct udphdr);
+ hdr = (struct icmphdr *)(skb_network_header(skb) - sizeof(struct icmphdr));
+ if (hdr->type == ICMP_REDIRECT) {
+ /* can't be handled without outer iphdr known, leave it to udp_err */
+ sctp_err_finish(sk, t);
+ return 0;
}
+ if (hdr->type == ICMP_DEST_UNREACH && hdr->code == ICMP_FRAG_NEEDED)
+ info = ntohs(hdr->un.frag.mtu);
+ sctp_v4_err_handle(t, skb, hdr->type, hdr->code, info);
-out_unlock:
- sctp_err_finish(sk, transport);
- return 0;
+ sctp_err_finish(sk, t);
+ return 1;
}
/*
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index bd08807c9e44..05f81a4d0ee7 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -122,54 +122,28 @@ static struct notifier_block sctp_inet6addr_notifier = {
.notifier_call = sctp_inet6addr_event,
};
-/* ICMP error handler. */
-static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb,
+ __u8 type, __u8 code, __u32 info)
{
- struct inet6_dev *idev;
- struct sock *sk;
- struct sctp_association *asoc;
- struct sctp_transport *transport;
+ struct sctp_association *asoc = t->asoc;
+ struct sock *sk = asoc->base.sk;
struct ipv6_pinfo *np;
- __u16 saveip, savesctp;
- int err, ret = 0;
- struct net *net = dev_net(skb->dev);
-
- idev = in6_dev_get(skb->dev);
-
- /* Fix up skb to look at the embedded net header. */
- saveip = skb->network_header;
- savesctp = skb->transport_header;
- skb_reset_network_header(skb);
- skb_set_transport_header(skb, offset);
- sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
- /* Put back, the original pointers. */
- skb->network_header = saveip;
- skb->transport_header = savesctp;
- if (!sk) {
- __ICMP6_INC_STATS(net, idev, ICMP6_MIB_INERRORS);
- ret = -ENOENT;
- goto out;
- }
-
- /* Warning: The sock lock is held. Remember to call
- * sctp_err_finish!
- */
+ int err = 0;
switch (type) {
case ICMPV6_PKT_TOOBIG:
if (ip6_sk_accept_pmtu(sk))
- sctp_icmp_frag_needed(sk, asoc, transport, ntohl(info));
- goto out_unlock;
+ sctp_icmp_frag_needed(sk, asoc, t, info);
+ return;
case ICMPV6_PARAMPROB:
if (ICMPV6_UNK_NEXTHDR == code) {
- sctp_icmp_proto_unreachable(sk, asoc, transport);
- goto out_unlock;
+ sctp_icmp_proto_unreachable(sk, asoc, t);
+ return;
}
break;
case NDISC_REDIRECT:
- sctp_icmp_redirect(sk, transport, skb);
- goto out_unlock;
+ sctp_icmp_redirect(sk, t, skb);
+ return;
default:
break;
}
@@ -179,17 +153,69 @@ static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!sock_owned_by_user(sk) && np->recverr) {
sk->sk_err = err;
sk->sk_error_report(sk);
- } else { /* Only an error on timeout */
+ } else {
sk->sk_err_soft = err;
}
+}
+
+/* ICMP error handler. */
+static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct net *net = dev_net(skb->dev);
+ struct sctp_transport *transport;
+ struct sctp_association *asoc;
+ __u16 saveip, savesctp;
+ struct sock *sk;
+
+ /* Fix up skb to look at the embedded net header. */
+ saveip = skb->network_header;
+ savesctp = skb->transport_header;
+ skb_reset_network_header(skb);
+ skb_set_transport_header(skb, offset);
+ sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
+ /* Put back, the original pointers. */
+ skb->network_header = saveip;
+ skb->transport_header = savesctp;
+ if (!sk) {
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+ return -ENOENT;
+ }
-out_unlock:
+ sctp_v6_err_handle(transport, skb, type, code, ntohl(info));
sctp_err_finish(sk, transport);
-out:
- if (likely(idev != NULL))
- in6_dev_put(idev);
- return ret;
+ return 0;
+}
+
+int sctp_udp_v6_err(struct sock *sk, struct sk_buff *skb)
+{
+ struct net *net = dev_net(skb->dev);
+ struct sctp_association *asoc;
+ struct sctp_transport *t;
+ struct icmp6hdr *hdr;
+ __u32 info = 0;
+
+ skb->transport_header += sizeof(struct udphdr);
+ sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &t);
+ if (!sk) {
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+ return -ENOENT;
+ }
+
+ skb->transport_header -= sizeof(struct udphdr);
+ hdr = (struct icmp6hdr *)(skb_network_header(skb) - sizeof(struct icmp6hdr));
+ if (hdr->icmp6_type == NDISC_REDIRECT) {
+ /* can't be handled without outer ip6hdr known, leave it to udpv6_err */
+ sctp_err_finish(sk, t);
+ return 0;
+ }
+ if (hdr->icmp6_type == ICMPV6_PKT_TOOBIG)
+ info = ntohl(hdr->icmp6_mtu);
+ sctp_v6_err_handle(t, skb, hdr->icmp6_type, hdr->icmp6_code, info);
+
+ sctp_err_finish(sk, t);
+ return 1;
}
static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t)
diff --git a/net/sctp/output.c b/net/sctp/output.c
index a6aa17df09ef..9032ce60d50e 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -103,7 +103,8 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
sctp_transport_route(tp, NULL, sp);
if (asoc->param_flags & SPP_PMTUD_ENABLE)
sctp_assoc_sync_pmtu(asoc);
- } else if (!sctp_transport_pmtu_check(tp)) {
+ } else if (!sctp_transport_pl_enabled(tp) &&
+ !sctp_transport_pmtu_check(tp)) {
if (asoc->param_flags & SPP_PMTUD_ENABLE)
sctp_assoc_sync_pmtu(asoc);
}
@@ -211,6 +212,30 @@ enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet,
return retval;
}
+/* Try to bundle a pad chunk into a packet with a heartbeat chunk for PLPMTUTD probe */
+static enum sctp_xmit sctp_packet_bundle_pad(struct sctp_packet *pkt, struct sctp_chunk *chunk)
+{
+ struct sctp_transport *t = pkt->transport;
+ struct sctp_chunk *pad;
+ int overhead = 0;
+
+ if (!chunk->pmtu_probe)
+ return SCTP_XMIT_OK;
+
+ /* calculate the Padding Data size for the pad chunk */
+ overhead += sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ overhead += sizeof(struct sctp_sender_hb_info) + sizeof(struct sctp_pad_chunk);
+ pad = sctp_make_pad(t->asoc, t->pl.probe_size - overhead);
+ if (!pad)
+ return SCTP_XMIT_DELAY;
+
+ list_add_tail(&pad->list, &pkt->chunk_list);
+ pkt->size += SCTP_PAD4(ntohs(pad->chunk_hdr->length));
+ chunk->transport = t;
+
+ return SCTP_XMIT_OK;
+}
+
/* Try to bundle an auth chunk into the packet. */
static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt,
struct sctp_chunk *chunk)
@@ -382,6 +407,10 @@ enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet,
goto finish;
retval = __sctp_packet_append_chunk(packet, chunk);
+ if (retval != SCTP_XMIT_OK)
+ goto finish;
+
+ retval = sctp_packet_bundle_pad(packet, chunk);
finish:
return retval;
@@ -553,7 +582,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
sk = chunk->skb->sk;
/* check gso */
- if (packet->size > tp->pathmtu && !packet->ipfragok) {
+ if (packet->size > tp->pathmtu && !packet->ipfragok && !chunk->pmtu_probe) {
if (!sk_can_gso(sk)) {
pr_err_once("Trying to GSO but underlying device doesn't support it.");
goto out;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 5cb1aa5f067b..ff47091c385e 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -769,7 +769,11 @@ static int sctp_packet_singleton(struct sctp_transport *transport,
sctp_packet_init(&singleton, transport, sport, dport);
sctp_packet_config(&singleton, vtag, 0);
- sctp_packet_append_chunk(&singleton, chunk);
+ if (sctp_packet_append_chunk(&singleton, chunk) != SCTP_XMIT_OK) {
+ list_del_init(&chunk->list);
+ sctp_chunk_free(chunk);
+ return -ENOMEM;
+ }
return sctp_packet_transmit(&singleton, gfp);
}
@@ -929,8 +933,13 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx)
one_packet = 1;
fallthrough;
- case SCTP_CID_SACK:
case SCTP_CID_HEARTBEAT:
+ if (chunk->pmtu_probe) {
+ sctp_packet_singleton(ctx->transport, chunk, ctx->gfp);
+ break;
+ }
+ fallthrough;
+ case SCTP_CID_SACK:
case SCTP_CID_SHUTDOWN:
case SCTP_CID_ECN_ECNE:
case SCTP_CID_ASCONF:
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index baa4e770e4ba..bc5db0b404ce 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -850,23 +850,6 @@ static int sctp_udp_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
-static int sctp_udp_err_lookup(struct sock *sk, struct sk_buff *skb)
-{
- struct sctp_association *asoc;
- struct sctp_transport *t;
- int family;
-
- skb->transport_header += sizeof(struct udphdr);
- family = (ip_hdr(skb)->version == 4) ? AF_INET : AF_INET6;
- sk = sctp_err_lookup(dev_net(skb->dev), family, skb, sctp_hdr(skb),
- &asoc, &t);
- if (!sk)
- return -ENOENT;
-
- sctp_err_finish(sk, t);
- return 0;
-}
-
int sctp_udp_sock_start(struct net *net)
{
struct udp_tunnel_sock_cfg tuncfg = {NULL};
@@ -885,7 +868,7 @@ int sctp_udp_sock_start(struct net *net)
tuncfg.encap_type = 1;
tuncfg.encap_rcv = sctp_udp_rcv;
- tuncfg.encap_err_lookup = sctp_udp_err_lookup;
+ tuncfg.encap_err_lookup = sctp_udp_v4_err;
setup_udp_tunnel_sock(net, sock, &tuncfg);
net->sctp.udp4_sock = sock->sk;
@@ -907,7 +890,7 @@ int sctp_udp_sock_start(struct net *net)
tuncfg.encap_type = 1;
tuncfg.encap_rcv = sctp_udp_rcv;
- tuncfg.encap_err_lookup = sctp_udp_err_lookup;
+ tuncfg.encap_err_lookup = sctp_udp_v6_err;
setup_udp_tunnel_sock(net, sock, &tuncfg);
net->sctp.udp6_sock = sock->sk;
#endif
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 5b44d228b6ca..b0eaa93a9cc6 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1160,7 +1160,8 @@ nodata:
/* Make a HEARTBEAT chunk. */
struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
- const struct sctp_transport *transport)
+ const struct sctp_transport *transport,
+ __u32 probe_size)
{
struct sctp_sender_hb_info hbinfo;
struct sctp_chunk *retval;
@@ -1176,6 +1177,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
hbinfo.daddr = transport->ipaddr;
hbinfo.sent_at = jiffies;
hbinfo.hb_nonce = transport->hb_nonce;
+ hbinfo.probe_size = probe_size;
/* Cast away the 'const', as this is just telling the chunk
* what transport it belongs to.
@@ -1183,6 +1185,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
retval->transport = (struct sctp_transport *) transport;
retval->subh.hbs_hdr = sctp_addto_chunk(retval, sizeof(hbinfo),
&hbinfo);
+ retval->pmtu_probe = !!probe_size;
nodata:
return retval;
@@ -1218,6 +1221,32 @@ nodata:
return retval;
}
+/* RFC4820 3. Padding Chunk (PAD)
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Type = 0x84 | Flags=0 | Length |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * \ Padding Data /
+ * / \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct sctp_chunk *sctp_make_pad(const struct sctp_association *asoc, int len)
+{
+ struct sctp_chunk *retval;
+
+ retval = sctp_make_control(asoc, SCTP_CID_PAD, 0, len, GFP_ATOMIC);
+ if (!retval)
+ return NULL;
+
+ skb_put_zero(retval->skb, len);
+ retval->chunk_hdr->length = htons(ntohs(retval->chunk_hdr->length) + len);
+ retval->chunk_end = skb_tail_pointer(retval->skb);
+
+ return retval;
+}
+
/* Create an Operation Error chunk with the specified space reserved.
* This routine can be used for containing multiple causes in the chunk.
*/
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index ce15d590a615..b3815b568e8e 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -471,6 +471,38 @@ out_unlock:
sctp_transport_put(transport);
}
+/* Handle the timeout of the probe timer. */
+void sctp_generate_probe_event(struct timer_list *t)
+{
+ struct sctp_transport *transport = from_timer(transport, t, probe_timer);
+ struct sctp_association *asoc = transport->asoc;
+ struct sock *sk = asoc->base.sk;
+ struct net *net = sock_net(sk);
+ int error = 0;
+
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ pr_debug("%s: sock is busy\n", __func__);
+
+ /* Try again later. */
+ if (!mod_timer(&transport->probe_timer, jiffies + (HZ / 20)))
+ sctp_transport_hold(transport);
+ goto out_unlock;
+ }
+
+ error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
+ SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_PROBE),
+ asoc->state, asoc->ep, asoc,
+ transport, GFP_ATOMIC);
+
+ if (error)
+ sk->sk_err = -error;
+
+out_unlock:
+ bh_unlock_sock(sk);
+ sctp_transport_put(transport);
+}
+
/* Inject a SACK Timeout event into the state machine. */
static void sctp_generate_sack_event(struct timer_list *t)
{
@@ -1641,6 +1673,11 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type,
sctp_cmd_hb_timers_stop(commands, asoc);
break;
+ case SCTP_CMD_PROBE_TIMER_UPDATE:
+ t = cmd->obj.transport;
+ sctp_transport_reset_probe_timer(t);
+ break;
+
case SCTP_CMD_REPORT_ERROR:
error = cmd->obj.error;
break;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 4f30388a0dd0..09a8f23ec709 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1004,7 +1004,7 @@ static enum sctp_disposition sctp_sf_heartbeat(
struct sctp_chunk *reply;
/* Send a heartbeat to our peer. */
- reply = sctp_make_heartbeat(asoc, transport);
+ reply = sctp_make_heartbeat(asoc, transport, 0);
if (!reply)
return SCTP_DISPOSITION_NOMEM;
@@ -1095,6 +1095,32 @@ enum sctp_disposition sctp_sf_send_reconf(struct net *net,
return SCTP_DISPOSITION_CONSUME;
}
+/* send hb chunk with padding for PLPMUTD. */
+enum sctp_disposition sctp_sf_send_probe(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
+{
+ struct sctp_transport *transport = (struct sctp_transport *)arg;
+ struct sctp_chunk *reply;
+
+ if (!sctp_transport_pl_enabled(transport))
+ return SCTP_DISPOSITION_CONSUME;
+
+ sctp_transport_pl_send(transport);
+
+ reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size);
+ if (!reply)
+ return SCTP_DISPOSITION_NOMEM;
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+ sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE,
+ SCTP_TRANSPORT(transport));
+
+ return SCTP_DISPOSITION_CONSUME;
+}
+
/*
* Process an heartbeat request.
*
@@ -1243,6 +1269,18 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net,
if (hbinfo->hb_nonce != link->hb_nonce)
return SCTP_DISPOSITION_DISCARD;
+ if (hbinfo->probe_size) {
+ if (hbinfo->probe_size != link->pl.probe_size ||
+ !sctp_transport_pl_enabled(link))
+ return SCTP_DISPOSITION_DISCARD;
+
+ sctp_transport_pl_recv(link);
+ if (link->pl.state == SCTP_PL_COMPLETE)
+ return SCTP_DISPOSITION_CONSUME;
+
+ return sctp_sf_send_probe(net, ep, asoc, type, link, commands);
+ }
+
max_interval = link->hbinterval + link->rto;
/* Check if the timestamp looks valid. */
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 88ea87f4f0e7..1816a4410b2b 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -527,6 +527,26 @@ auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
}; /*state_fn_t auth_chunk_event_table[][] */
static const struct sctp_sm_table_entry
+pad_chunk_event_table[SCTP_STATE_NUM_STATES] = {
+ /* SCTP_STATE_CLOSED */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_COOKIE_WAIT */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_COOKIE_ECHOED */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_ESTABLISHED */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_SHUTDOWN_PENDING */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_SHUTDOWN_SENT */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_SHUTDOWN_RECEIVED */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_SHUTDOWN_ACK_SENT */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+}; /* chunk pad */
+
+static const struct sctp_sm_table_entry
chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
/* SCTP_STATE_CLOSED */
TYPE_SCTP_FUNC(sctp_sf_ootb),
@@ -947,6 +967,25 @@ other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
}
+#define TYPE_SCTP_EVENT_TIMEOUT_PROBE { \
+ /* SCTP_STATE_CLOSED */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_COOKIE_WAIT */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_COOKIE_ECHOED */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_ESTABLISHED */ \
+ TYPE_SCTP_FUNC(sctp_sf_send_probe), \
+ /* SCTP_STATE_SHUTDOWN_PENDING */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_SHUTDOWN_SENT */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+}
+
static const struct sctp_sm_table_entry
timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_EVENT_TIMEOUT_NONE,
@@ -958,6 +997,7 @@ timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD,
TYPE_SCTP_EVENT_TIMEOUT_HEARTBEAT,
TYPE_SCTP_EVENT_TIMEOUT_RECONF,
+ TYPE_SCTP_EVENT_TIMEOUT_PROBE,
TYPE_SCTP_EVENT_TIMEOUT_SACK,
TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE,
};
@@ -992,6 +1032,9 @@ static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
case SCTP_CID_AUTH:
return &auth_chunk_event_table[0][state];
+
+ case SCTP_CID_PAD:
+ return &pad_chunk_event_table[state];
}
return &chunk_event_table_unknown[state];
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a79d193ff872..e64e01f61b11 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2496,6 +2496,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
sctp_transport_pmtu(trans, sctp_opt2sk(sp));
sctp_assoc_sync_pmtu(asoc);
}
+ sctp_transport_pl_reset(trans);
} else if (asoc) {
asoc->param_flags =
(asoc->param_flags & ~SPP_PMTUD) | pmtud_change;
@@ -4481,6 +4482,61 @@ static int sctp_setsockopt_encap_port(struct sock *sk,
return 0;
}
+static int sctp_setsockopt_probe_interval(struct sock *sk,
+ struct sctp_probeinterval *params,
+ unsigned int optlen)
+{
+ struct sctp_association *asoc;
+ struct sctp_transport *t;
+ __u32 probe_interval;
+
+ if (optlen != sizeof(*params))
+ return -EINVAL;
+
+ probe_interval = params->spi_interval;
+ if (probe_interval && probe_interval < SCTP_PROBE_TIMER_MIN)
+ return -EINVAL;
+
+ /* If an address other than INADDR_ANY is specified, and
+ * no transport is found, then the request is invalid.
+ */
+ if (!sctp_is_any(sk, (union sctp_addr *)&params->spi_address)) {
+ t = sctp_addr_id2transport(sk, &params->spi_address,
+ params->spi_assoc_id);
+ if (!t)
+ return -EINVAL;
+
+ t->probe_interval = msecs_to_jiffies(probe_interval);
+ sctp_transport_pl_reset(t);
+ return 0;
+ }
+
+ /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the
+ * socket is a one to many style socket, and an association
+ * was not found, then the id was invalid.
+ */
+ asoc = sctp_id2assoc(sk, params->spi_assoc_id);
+ if (!asoc && params->spi_assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP))
+ return -EINVAL;
+
+ /* If changes are for association, also apply probe_interval to
+ * each transport.
+ */
+ if (asoc) {
+ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) {
+ t->probe_interval = msecs_to_jiffies(probe_interval);
+ sctp_transport_pl_reset(t);
+ }
+
+ asoc->probe_interval = msecs_to_jiffies(probe_interval);
+ return 0;
+ }
+
+ sctp_sk(sk)->probe_interval = probe_interval;
+ return 0;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4703,6 +4759,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_REMOTE_UDP_ENCAPS_PORT:
retval = sctp_setsockopt_encap_port(sk, kopt, optlen);
break;
+ case SCTP_PLPMTUD_PROBE_INTERVAL:
+ retval = sctp_setsockopt_probe_interval(sk, kopt, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -4989,6 +5048,7 @@ static int sctp_init_sock(struct sock *sk)
atomic_set(&sp->pd_mode, 0);
skb_queue_head_init(&sp->pd_lobby);
sp->frag_interleave = 0;
+ sp->probe_interval = net->sctp.probe_interval;
/* Create a per socket endpoint structure. Even if we
* change the data structure relationships, this may still
@@ -7905,6 +7965,66 @@ out:
return 0;
}
+static int sctp_getsockopt_probe_interval(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_probeinterval params;
+ struct sctp_association *asoc;
+ struct sctp_transport *t;
+ __u32 probe_interval;
+
+ if (len < sizeof(params))
+ return -EINVAL;
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ return -EFAULT;
+
+ /* If an address other than INADDR_ANY is specified, and
+ * no transport is found, then the request is invalid.
+ */
+ if (!sctp_is_any(sk, (union sctp_addr *)&params.spi_address)) {
+ t = sctp_addr_id2transport(sk, &params.spi_address,
+ params.spi_assoc_id);
+ if (!t) {
+ pr_debug("%s: failed no transport\n", __func__);
+ return -EINVAL;
+ }
+
+ probe_interval = jiffies_to_msecs(t->probe_interval);
+ goto out;
+ }
+
+ /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the
+ * socket is a one to many style socket, and an association
+ * was not found, then the id was invalid.
+ */
+ asoc = sctp_id2assoc(sk, params.spi_assoc_id);
+ if (!asoc && params.spi_assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP)) {
+ pr_debug("%s: failed no association\n", __func__);
+ return -EINVAL;
+ }
+
+ if (asoc) {
+ probe_interval = jiffies_to_msecs(asoc->probe_interval);
+ goto out;
+ }
+
+ probe_interval = sctp_sk(sk)->probe_interval;
+
+out:
+ params.spi_interval = probe_interval;
+ if (copy_to_user(optval, &params, len))
+ return -EFAULT;
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+
+ return 0;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -8128,6 +8248,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_REMOTE_UDP_ENCAPS_PORT:
retval = sctp_getsockopt_encap_port(sk, len, optval, optlen);
break;
+ case SCTP_PLPMTUD_PROBE_INTERVAL:
+ retval = sctp_getsockopt_probe_interval(sk, len, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 55871b277f47..b46a416787ec 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -55,6 +55,8 @@ static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
+static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos);
static struct ctl_table sctp_table[] = {
{
@@ -294,6 +296,13 @@ static struct ctl_table sctp_net_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "plpmtud_probe_interval",
+ .data = &init_net.sctp.probe_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_sctp_do_probe_interval,
+ },
+ {
.procname = "udp_port",
.data = &init_net.sctp.udp_port,
.maxlen = sizeof(int),
@@ -539,6 +548,32 @@ static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write,
return ret;
}
+static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct ctl_table tbl;
+ int ret, new_value;
+
+ memset(&tbl, 0, sizeof(struct ctl_table));
+ tbl.maxlen = sizeof(unsigned int);
+
+ if (write)
+ tbl.data = &new_value;
+ else
+ tbl.data = &net->sctp.probe_interval;
+
+ ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
+ if (write && ret == 0) {
+ if (new_value && new_value < SCTP_PROBE_TIMER_MIN)
+ return -EINVAL;
+
+ net->sctp.probe_interval = new_value;
+ }
+
+ return ret;
+}
+
int sctp_sysctl_net_register(struct net *net)
{
struct ctl_table *table;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index bf0ac467e757..5f23804f21c7 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -75,6 +75,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0);
timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0);
timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0);
+ timer_setup(&peer->probe_timer, sctp_generate_probe_event, 0);
timer_setup(&peer->proto_unreach_timer,
sctp_generate_proto_unreach_event, 0);
@@ -131,6 +132,9 @@ void sctp_transport_free(struct sctp_transport *transport)
if (del_timer(&transport->reconf_timer))
sctp_transport_put(transport);
+ if (del_timer(&transport->probe_timer))
+ sctp_transport_put(transport);
+
/* Delete the ICMP proto unreachable timer if it's active. */
if (del_timer(&transport->proto_unreach_timer))
sctp_transport_put(transport);
@@ -207,6 +211,15 @@ void sctp_transport_reset_reconf_timer(struct sctp_transport *transport)
sctp_transport_hold(transport);
}
+void sctp_transport_reset_probe_timer(struct sctp_transport *transport)
+{
+ if (timer_pending(&transport->probe_timer))
+ return;
+ if (!mod_timer(&transport->probe_timer,
+ jiffies + transport->probe_interval))
+ sctp_transport_hold(transport);
+}
+
/* This transport has been assigned to an association.
* Initialize fields from the association or from the sock itself.
* Register the reference count in the association.
@@ -241,12 +254,143 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
transport->pathmtu = sctp_dst_mtu(transport->dst);
else
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
+
+ sctp_transport_pl_update(transport);
+}
+
+void sctp_transport_pl_send(struct sctp_transport *t)
+{
+ pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
+ __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
+
+ if (t->pl.probe_count < SCTP_MAX_PROBES) {
+ t->pl.probe_count++;
+ return;
+ }
+
+ if (t->pl.state == SCTP_PL_BASE) {
+ if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */
+ t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
+
+ t->pl.pmtu = SCTP_MIN_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ }
+ } else if (t->pl.state == SCTP_PL_SEARCH) {
+ if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */
+ t->pl.state = SCTP_PL_BASE; /* Search -> Base */
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+ t->pl.probe_high = 0;
+
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ } else { /* Normal probe failure. */
+ t->pl.probe_high = t->pl.probe_size;
+ t->pl.probe_size = t->pl.pmtu;
+ }
+ } else if (t->pl.state == SCTP_PL_COMPLETE) {
+ if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */
+ t->pl.state = SCTP_PL_BASE; /* Search Complete -> Base */
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ }
+ }
+ t->pl.probe_count = 1;
+}
+
+void sctp_transport_pl_recv(struct sctp_transport *t)
+{
+ pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
+ __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
+
+ t->pl.pmtu = t->pl.probe_size;
+ t->pl.probe_count = 0;
+ if (t->pl.state == SCTP_PL_BASE) {
+ t->pl.state = SCTP_PL_SEARCH; /* Base -> Search */
+ t->pl.probe_size += SCTP_PL_BIG_STEP;
+ } else if (t->pl.state == SCTP_PL_ERROR) {
+ t->pl.state = SCTP_PL_SEARCH; /* Error -> Search */
+
+ t->pl.pmtu = t->pl.probe_size;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ t->pl.probe_size += SCTP_PL_BIG_STEP;
+ } else if (t->pl.state == SCTP_PL_SEARCH) {
+ if (!t->pl.probe_high) {
+ t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
+ SCTP_MAX_PLPMTU);
+ return;
+ }
+ t->pl.probe_size += SCTP_PL_MIN_STEP;
+ if (t->pl.probe_size >= t->pl.probe_high) {
+ t->pl.probe_high = 0;
+ t->pl.raise_count = 0;
+ t->pl.state = SCTP_PL_COMPLETE; /* Search -> Search Complete */
+
+ t->pl.probe_size = t->pl.pmtu;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ }
+ } else if (t->pl.state == SCTP_PL_COMPLETE && ++t->pl.raise_count == 30) {
+ /* Raise probe_size again after 30 * interval in Search Complete */
+ t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
+ t->pl.probe_size += SCTP_PL_MIN_STEP;
+ }
+}
+
+static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu)
+{
+ pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, ptb: %d\n",
+ __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, pmtu);
+
+ if (pmtu < SCTP_MIN_PLPMTU || pmtu >= t->pl.probe_size)
+ return false;
+
+ if (t->pl.state == SCTP_PL_BASE) {
+ if (pmtu >= SCTP_MIN_PLPMTU && pmtu < SCTP_BASE_PLPMTU) {
+ t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
+
+ t->pl.pmtu = SCTP_MIN_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ }
+ } else if (t->pl.state == SCTP_PL_SEARCH) {
+ if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) {
+ t->pl.state = SCTP_PL_BASE; /* Search -> Base */
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+ t->pl.probe_count = 0;
+
+ t->pl.probe_high = 0;
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ } else if (pmtu > t->pl.pmtu && pmtu < t->pl.probe_size) {
+ t->pl.probe_size = pmtu;
+ t->pl.probe_count = 0;
+
+ return false;
+ }
+ } else if (t->pl.state == SCTP_PL_COMPLETE) {
+ if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) {
+ t->pl.state = SCTP_PL_BASE; /* Complete -> Base */
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+ t->pl.probe_count = 0;
+
+ t->pl.probe_high = 0;
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ }
+ }
+
+ return true;
}
bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
{
- struct dst_entry *dst = sctp_transport_dst_check(t);
struct sock *sk = t->asoc->base.sk;
+ struct dst_entry *dst;
bool change = true;
if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
@@ -257,6 +401,10 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
}
pmtu = SCTP_TRUNC4(pmtu);
+ if (sctp_transport_pl_enabled(t))
+ return sctp_transport_pl_toobig(t, pmtu - sctp_transport_pl_hlen(t));
+
+ dst = sctp_transport_dst_check(t);
if (dst) {
struct sctp_pf *pf = sctp_get_pf_specific(dst->ops->family);
union sctp_addr addr;
diff --git a/net/smc/smc_stats.c b/net/smc/smc_stats.c
index 614013e3b574..e80e34f7ac15 100644
--- a/net/smc/smc_stats.c
+++ b/net/smc/smc_stats.c
@@ -393,17 +393,17 @@ int smc_nl_get_fback_stats(struct sk_buff *skb, struct netlink_callback *cb)
continue;
if (!skip_serv) {
rc_srv = smc_nl_get_fback_details(skb, cb, k, is_srv);
- if (rc_srv && rc_srv != ENODATA)
+ if (rc_srv && rc_srv != -ENODATA)
break;
} else {
skip_serv = 0;
}
rc_clnt = smc_nl_get_fback_details(skb, cb, k, !is_srv);
- if (rc_clnt && rc_clnt != ENODATA) {
+ if (rc_clnt && rc_clnt != -ENODATA) {
skip_serv = 1;
break;
}
- if (rc_clnt == ENODATA && rc_srv == ENODATA)
+ if (rc_clnt == -ENODATA && rc_srv == -ENODATA)
break;
}
mutex_unlock(&net->smc.mutex_fback_rsn);
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 075c4f4b41cf..289025cd545a 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -154,6 +154,9 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
goto out_err;
}
+ if (sk->sk_state == SMC_INIT)
+ return -ENOTCONN;
+
if (len > conn->sndbuf_desc->len)
SMC_STAT_RMB_TX_SIZE_SMALL(smc, !conn->lnk);
@@ -164,8 +167,6 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
SMC_STAT_INC(smc, urg_data_cnt);
while (msg_data_left(msg)) {
- if (sk->sk_state == SMC_INIT)
- return -ENOTCONN;
if (smc->sk.sk_shutdown & SEND_SHUTDOWN ||
(smc->sk.sk_err == ECONNABORTED) ||
conn->killed)
diff --git a/net/socket.c b/net/socket.c
index 27e3e7d53f8e..bd9233da2497 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -165,6 +165,54 @@ static const struct file_operations socket_file_ops = {
.show_fdinfo = sock_show_fdinfo,
};
+static const char * const pf_family_names[] = {
+ [PF_UNSPEC] = "PF_UNSPEC",
+ [PF_UNIX] = "PF_UNIX/PF_LOCAL",
+ [PF_INET] = "PF_INET",
+ [PF_AX25] = "PF_AX25",
+ [PF_IPX] = "PF_IPX",
+ [PF_APPLETALK] = "PF_APPLETALK",
+ [PF_NETROM] = "PF_NETROM",
+ [PF_BRIDGE] = "PF_BRIDGE",
+ [PF_ATMPVC] = "PF_ATMPVC",
+ [PF_X25] = "PF_X25",
+ [PF_INET6] = "PF_INET6",
+ [PF_ROSE] = "PF_ROSE",
+ [PF_DECnet] = "PF_DECnet",
+ [PF_NETBEUI] = "PF_NETBEUI",
+ [PF_SECURITY] = "PF_SECURITY",
+ [PF_KEY] = "PF_KEY",
+ [PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
+ [PF_PACKET] = "PF_PACKET",
+ [PF_ASH] = "PF_ASH",
+ [PF_ECONET] = "PF_ECONET",
+ [PF_ATMSVC] = "PF_ATMSVC",
+ [PF_RDS] = "PF_RDS",
+ [PF_SNA] = "PF_SNA",
+ [PF_IRDA] = "PF_IRDA",
+ [PF_PPPOX] = "PF_PPPOX",
+ [PF_WANPIPE] = "PF_WANPIPE",
+ [PF_LLC] = "PF_LLC",
+ [PF_IB] = "PF_IB",
+ [PF_MPLS] = "PF_MPLS",
+ [PF_CAN] = "PF_CAN",
+ [PF_TIPC] = "PF_TIPC",
+ [PF_BLUETOOTH] = "PF_BLUETOOTH",
+ [PF_IUCV] = "PF_IUCV",
+ [PF_RXRPC] = "PF_RXRPC",
+ [PF_ISDN] = "PF_ISDN",
+ [PF_PHONET] = "PF_PHONET",
+ [PF_IEEE802154] = "PF_IEEE802154",
+ [PF_CAIF] = "PF_CAIF",
+ [PF_ALG] = "PF_ALG",
+ [PF_NFC] = "PF_NFC",
+ [PF_VSOCK] = "PF_VSOCK",
+ [PF_KCM] = "PF_KCM",
+ [PF_QIPCRTR] = "PF_QIPCRTR",
+ [PF_SMC] = "PF_SMC",
+ [PF_XDP] = "PF_XDP",
+};
+
/*
* The protocol list. Each protocol is registered in here.
*/
@@ -1072,19 +1120,6 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
* what to do with it - that's up to the protocol still.
*/
-/**
- * get_net_ns - increment the refcount of the network namespace
- * @ns: common namespace (net)
- *
- * Returns the net's common namespace.
- */
-
-struct ns_common *get_net_ns(struct ns_common *ns)
-{
- return &get_net(container_of(ns, struct net, ns))->ns;
-}
-EXPORT_SYMBOL_GPL(get_net_ns);
-
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
struct socket *sock;
@@ -2988,7 +3023,7 @@ int sock_register(const struct net_proto_family *ops)
}
spin_unlock(&net_family_lock);
- pr_info("NET: Registered protocol family %d\n", ops->family);
+ pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
return err;
}
EXPORT_SYMBOL(sock_register);
@@ -3016,7 +3051,7 @@ void sock_unregister(int family)
synchronize_rcu();
- pr_info("NET: Unregistered protocol family %d\n", family);
+ pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
}
EXPORT_SYMBOL(sock_unregister);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index f555d335e910..42623d6b8f0e 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1677,13 +1677,6 @@ call_reserveresult(struct rpc_task *task)
return;
}
- /*
- * Even though there was an error, we may have acquired
- * a request slot somehow. Make sure not to leak it.
- */
- if (task->tk_rqstp)
- xprt_release(task);
-
switch (status) {
case -ENOMEM:
rpc_delay(task, HZ >> 2);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index e5b5a960a69b..3509a7f139b9 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -70,6 +70,7 @@
static void xprt_init(struct rpc_xprt *xprt, struct net *net);
static __be32 xprt_alloc_xid(struct rpc_xprt *xprt);
static void xprt_destroy(struct rpc_xprt *xprt);
+static void xprt_request_init(struct rpc_task *task);
static DEFINE_SPINLOCK(xprt_list_lock);
static LIST_HEAD(xprt_list);
@@ -1606,17 +1607,40 @@ xprt_transmit(struct rpc_task *task)
spin_unlock(&xprt->queue_lock);
}
-static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
+static void xprt_complete_request_init(struct rpc_task *task)
+{
+ if (task->tk_rqstp)
+ xprt_request_init(task);
+}
+
+void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
{
set_bit(XPRT_CONGESTED, &xprt->state);
- rpc_sleep_on(&xprt->backlog, task, NULL);
+ rpc_sleep_on(&xprt->backlog, task, xprt_complete_request_init);
+}
+EXPORT_SYMBOL_GPL(xprt_add_backlog);
+
+static bool __xprt_set_rq(struct rpc_task *task, void *data)
+{
+ struct rpc_rqst *req = data;
+
+ if (task->tk_rqstp == NULL) {
+ memset(req, 0, sizeof(*req)); /* mark unused */
+ task->tk_rqstp = req;
+ return true;
+ }
+ return false;
}
-static void xprt_wake_up_backlog(struct rpc_xprt *xprt)
+bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
- if (rpc_wake_up_next(&xprt->backlog) == NULL)
+ if (rpc_wake_up_first(&xprt->backlog, __xprt_set_rq, req) == NULL) {
clear_bit(XPRT_CONGESTED, &xprt->state);
+ return false;
+ }
+ return true;
}
+EXPORT_SYMBOL_GPL(xprt_wake_up_backlog);
static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task)
{
@@ -1626,7 +1650,7 @@ static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task
goto out;
spin_lock(&xprt->reserve_lock);
if (test_bit(XPRT_CONGESTED, &xprt->state)) {
- rpc_sleep_on(&xprt->backlog, task, NULL);
+ xprt_add_backlog(xprt, task);
ret = true;
}
spin_unlock(&xprt->reserve_lock);
@@ -1703,11 +1727,11 @@ EXPORT_SYMBOL_GPL(xprt_alloc_slot);
void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
spin_lock(&xprt->reserve_lock);
- if (!xprt_dynamic_free_slot(xprt, req)) {
+ if (!xprt_wake_up_backlog(xprt, req) &&
+ !xprt_dynamic_free_slot(xprt, req)) {
memset(req, 0, sizeof(*req)); /* mark unused */
list_add(&req->rq_list, &xprt->free);
}
- xprt_wake_up_backlog(xprt);
spin_unlock(&xprt->reserve_lock);
}
EXPORT_SYMBOL_GPL(xprt_free_slot);
@@ -1894,10 +1918,10 @@ void xprt_release(struct rpc_task *task)
xdr_free_bvec(&req->rq_snd_buf);
if (req->rq_cred != NULL)
put_rpccred(req->rq_cred);
- task->tk_rqstp = NULL;
if (req->rq_release_snd_buf)
req->rq_release_snd_buf(req);
+ task->tk_rqstp = NULL;
if (likely(!bc_prealloc(req)))
xprt->ops->free_slot(xprt, req);
else
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 649f7d8b9733..c335c1361564 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -628,8 +628,9 @@ out_mapping_err:
return false;
}
-/* The tail iovec might not reside in the same page as the
- * head iovec.
+/* The tail iovec may include an XDR pad for the page list,
+ * as well as additional content, and may not reside in the
+ * same page as the head iovec.
*/
static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
struct xdr_buf *xdr,
@@ -747,19 +748,27 @@ static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req,
struct xdr_buf *xdr)
{
- struct kvec *tail = &xdr->tail[0];
-
if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
return false;
- /* If there is a Read chunk, the page list is handled
+ /* If there is a Read chunk, the page list is being handled
* via explicit RDMA, and thus is skipped here.
*/
- if (tail->iov_len) {
- if (!rpcrdma_prepare_tail_iov(req, xdr,
- offset_in_page(tail->iov_base),
- tail->iov_len))
+ /* Do not include the tail if it is only an XDR pad */
+ if (xdr->tail[0].iov_len > 3) {
+ unsigned int page_base, len;
+
+ /* If the content in the page list is an odd length,
+ * xdr_write_pages() adds a pad at the beginning of
+ * the tail iovec. Force the tail's non-pad content to
+ * land at the next XDR position in the Send message.
+ */
+ page_base = offset_in_page(xdr->tail[0].iov_base);
+ len = xdr->tail[0].iov_len;
+ page_base += len & 3;
+ len -= len & 3;
+ if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
return false;
kref_get(&req->rl_kref);
}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 09953597d055..19a49d26b1e4 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -520,9 +520,8 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
return;
out_sleep:
- set_bit(XPRT_CONGESTED, &xprt->state);
- rpc_sleep_on(&xprt->backlog, task, NULL);
task->tk_status = -EAGAIN;
+ xprt_add_backlog(xprt, task);
}
/**
@@ -537,10 +536,11 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
struct rpcrdma_xprt *r_xprt =
container_of(xprt, struct rpcrdma_xprt, rx_xprt);
- memset(rqst, 0, sizeof(*rqst));
- rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
- if (unlikely(!rpc_wake_up_next(&xprt->backlog)))
- clear_bit(XPRT_CONGESTED, &xprt->state);
+ rpcrdma_reply_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
+ if (!xprt_wake_up_backlog(xprt, rqst)) {
+ memset(rqst, 0, sizeof(*rqst));
+ rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
+ }
}
static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 1e965a380896..649c23518ec0 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1201,6 +1201,20 @@ rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
}
/**
+ * rpcrdma_reply_put - Put reply buffers back into pool
+ * @buffers: buffer pool
+ * @req: object to return
+ *
+ */
+void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
+{
+ if (req->rl_reply) {
+ rpcrdma_rep_put(buffers, req->rl_reply);
+ req->rl_reply = NULL;
+ }
+}
+
+/**
* rpcrdma_buffer_get - Get a request buffer
* @buffers: Buffer pool from which to obtain a buffer
*
@@ -1228,9 +1242,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
*/
void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
{
- if (req->rl_reply)
- rpcrdma_rep_put(buffers, req->rl_reply);
- req->rl_reply = NULL;
+ rpcrdma_reply_put(buffers, req);
spin_lock(&buffers->rb_lock);
list_add(&req->rl_list, &buffers->rb_send_bufs);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 436ad7312614..5d231d94e944 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -479,6 +479,7 @@ struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
struct rpcrdma_req *req);
void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep);
+void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req);
bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
gfp_t flags);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 47aa47a2b07c..316d04945587 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1010,6 +1010,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
kernel_sock_shutdown(transport->sock, SHUT_RDWR);
return -ENOTCONN;
}
+ if (!transport->inet)
+ return -ENOTCONN;
xs_pktdump("packet data:",
req->rq_svec->iov_base,
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 89a36db47ab4..070698dd19bc 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -381,19 +381,20 @@ EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
static int __switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
- int (*add_cb)(struct net_device *dev,
+ int (*add_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack))
{
+ struct switchdev_notifier_info *info = &port_obj_info->info;
struct netlink_ext_ack *extack;
struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
- extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
+ extack = switchdev_notifier_info_to_extack(info);
if (check_cb(dev)) {
- err = add_cb(dev, port_obj_info->obj, extack);
+ err = add_cb(dev, info->ctx, port_obj_info->obj, extack);
if (err != -EOPNOTSUPP)
port_obj_info->handled = true;
return err;
@@ -422,7 +423,7 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev,
int switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
- int (*add_cb)(struct net_device *dev,
+ int (*add_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack))
{
@@ -439,15 +440,16 @@ EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add);
static int __switchdev_handle_port_obj_del(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
- int (*del_cb)(struct net_device *dev,
+ int (*del_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj))
{
+ struct switchdev_notifier_info *info = &port_obj_info->info;
struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
if (check_cb(dev)) {
- err = del_cb(dev, port_obj_info->obj);
+ err = del_cb(dev, info->ctx, port_obj_info->obj);
if (err != -EOPNOTSUPP)
port_obj_info->handled = true;
return err;
@@ -476,7 +478,7 @@ static int __switchdev_handle_port_obj_del(struct net_device *dev,
int switchdev_handle_port_obj_del(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
- int (*del_cb)(struct net_device *dev,
+ int (*del_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj))
{
int err;
@@ -492,19 +494,20 @@ EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del);
static int __switchdev_handle_port_attr_set(struct net_device *dev,
struct switchdev_notifier_port_attr_info *port_attr_info,
bool (*check_cb)(const struct net_device *dev),
- int (*set_cb)(struct net_device *dev,
+ int (*set_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack))
{
+ struct switchdev_notifier_info *info = &port_attr_info->info;
struct netlink_ext_ack *extack;
struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
- extack = switchdev_notifier_info_to_extack(&port_attr_info->info);
+ extack = switchdev_notifier_info_to_extack(info);
if (check_cb(dev)) {
- err = set_cb(dev, port_attr_info->attr, extack);
+ err = set_cb(dev, info->ctx, port_attr_info->attr, extack);
if (err != -EOPNOTSUPP)
port_attr_info->handled = true;
return err;
@@ -533,7 +536,7 @@ static int __switchdev_handle_port_attr_set(struct net_device *dev,
int switchdev_handle_port_attr_set(struct net_device *dev,
struct switchdev_notifier_port_attr_info *port_attr_info,
bool (*check_cb)(const struct net_device *dev),
- int (*set_cb)(struct net_device *dev,
+ int (*set_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack))
{
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index d4beca895992..593846d25214 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -699,7 +699,7 @@ int tipc_bcast_init(struct net *net)
spin_lock_init(&tipc_net(net)->bclock);
if (!tipc_link_bc_create(net, 0, 0, NULL,
- FB_MTU,
+ one_page_mtu,
BCLINK_WIN_DEFAULT,
BCLINK_WIN_DEFAULT,
0,
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index ce6ab54822d8..5c9fd4791c4b 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -41,19 +41,18 @@
#include "name_table.h"
#include "crypto.h"
+#define BUF_ALIGN(x) ALIGN(x, 4)
#define MAX_FORWARD_SIZE 1024
#ifdef CONFIG_TIPC_CRYPTO
#define BUF_HEADROOM ALIGN(((LL_MAX_HEADER + 48) + EHDR_MAX_SIZE), 16)
-#define BUF_TAILROOM (TIPC_AES_GCM_TAG_SIZE)
+#define BUF_OVERHEAD (BUF_HEADROOM + TIPC_AES_GCM_TAG_SIZE)
#else
#define BUF_HEADROOM (LL_MAX_HEADER + 48)
-#define BUF_TAILROOM 16
+#define BUF_OVERHEAD BUF_HEADROOM
#endif
-static unsigned int align(unsigned int i)
-{
- return (i + 3) & ~3u;
-}
+const int one_page_mtu = PAGE_SIZE - SKB_DATA_ALIGN(BUF_OVERHEAD) -
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
/**
* tipc_buf_acquire - creates a TIPC message buffer
@@ -69,13 +68,8 @@ static unsigned int align(unsigned int i)
struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp)
{
struct sk_buff *skb;
-#ifdef CONFIG_TIPC_CRYPTO
- unsigned int buf_size = (BUF_HEADROOM + size + BUF_TAILROOM + 3) & ~3u;
-#else
- unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
-#endif
- skb = alloc_skb_fclone(buf_size, gfp);
+ skb = alloc_skb_fclone(BUF_OVERHEAD + size, gfp);
if (skb) {
skb_reserve(skb, BUF_HEADROOM);
skb_put(skb, size);
@@ -395,7 +389,8 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
if (unlikely(!skb)) {
if (pktmax != MAX_MSG_SIZE)
return -ENOMEM;
- rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list);
+ rc = tipc_msg_build(mhdr, m, offset, dsz,
+ one_page_mtu, list);
if (rc != dsz)
return rc;
if (tipc_msg_assemble(list))
@@ -490,7 +485,7 @@ static bool tipc_msg_bundle(struct sk_buff *bskb, struct tipc_msg *msg,
msz = msg_size(msg);
bsz = msg_size(bmsg);
- offset = align(bsz);
+ offset = BUF_ALIGN(bsz);
pad = offset - bsz;
if (unlikely(skb_tailroom(bskb) < (pad + msz)))
@@ -547,7 +542,7 @@ bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss,
/* Make a new bundle of the two messages if possible */
tsz = msg_size(buf_msg(tskb));
- if (unlikely(mss < align(INT_H_SIZE + tsz) + msg_size(msg)))
+ if (unlikely(mss < BUF_ALIGN(INT_H_SIZE + tsz) + msg_size(msg)))
return true;
if (unlikely(pskb_expand_head(tskb, INT_H_SIZE, mss - tsz - INT_H_SIZE,
GFP_ATOMIC)))
@@ -606,7 +601,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
if (unlikely(!tipc_msg_validate(iskb)))
goto none;
- *pos += align(imsz);
+ *pos += BUF_ALIGN(imsz);
return true;
none:
kfree_skb(skb);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 5d64596ba987..64ae4c4c44f8 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -99,9 +99,10 @@ struct plist;
#define MAX_H_SIZE 60 /* Largest possible TIPC header size */
#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
-#define FB_MTU 3744
#define TIPC_MEDIA_INFO_OFFSET 5
+extern const int one_page_mtu;
+
struct tipc_skb_cb {
union {
struct {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 4d4f24cbd86b..58c2f318b0a8 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -262,6 +262,14 @@ static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
sk_add_node(sk, list);
}
+static void __unix_set_addr(struct sock *sk, struct unix_address *addr,
+ unsigned hash)
+{
+ __unix_remove_socket(sk);
+ smp_store_release(&unix_sk(sk)->addr, addr);
+ __unix_insert_socket(&unix_socket_table[hash], sk);
+}
+
static inline void unix_remove_socket(struct sock *sk)
{
spin_lock(&unix_table_lock);
@@ -278,11 +286,11 @@ static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
static struct sock *__unix_find_socket_byname(struct net *net,
struct sockaddr_un *sunname,
- int len, int type, unsigned int hash)
+ int len, unsigned int hash)
{
struct sock *s;
- sk_for_each(s, &unix_socket_table[hash ^ type]) {
+ sk_for_each(s, &unix_socket_table[hash]) {
struct unix_sock *u = unix_sk(s);
if (!net_eq(sock_net(s), net))
@@ -297,13 +305,12 @@ static struct sock *__unix_find_socket_byname(struct net *net,
static inline struct sock *unix_find_socket_byname(struct net *net,
struct sockaddr_un *sunname,
- int len, int type,
- unsigned int hash)
+ int len, unsigned int hash)
{
struct sock *s;
spin_lock(&unix_table_lock);
- s = __unix_find_socket_byname(net, sunname, len, type, hash);
+ s = __unix_find_socket_byname(net, sunname, len, hash);
if (s)
sock_hold(s);
spin_unlock(&unix_table_lock);
@@ -535,12 +542,14 @@ static void unix_release_sock(struct sock *sk, int embrion)
u->path.mnt = NULL;
state = sk->sk_state;
sk->sk_state = TCP_CLOSE;
+
+ skpair = unix_peer(sk);
+ unix_peer(sk) = NULL;
+
unix_state_unlock(sk);
wake_up_interruptible_all(&u->peer_wait);
- skpair = unix_peer(sk);
-
if (skpair != NULL) {
if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
unix_state_lock(skpair);
@@ -555,7 +564,6 @@ static void unix_release_sock(struct sock *sk, int embrion)
unix_dgram_peer_wake_disconnect(sk, skpair);
sock_put(skpair); /* It may now die */
- unix_peer(sk) = NULL;
}
/* Try to flush out this socket. Throw out buffers at least */
@@ -890,12 +898,12 @@ static int unix_autobind(struct socket *sock)
retry:
addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
+ addr->hash ^= sk->sk_type;
spin_lock(&unix_table_lock);
ordernum = (ordernum+1)&0xFFFFF;
- if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
- addr->hash)) {
+ if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) {
spin_unlock(&unix_table_lock);
/*
* __unix_find_socket_byname() may take long time if many names
@@ -910,11 +918,8 @@ retry:
}
goto retry;
}
- addr->hash ^= sk->sk_type;
- __unix_remove_socket(sk);
- smp_store_release(&u->addr, addr);
- __unix_insert_socket(&unix_socket_table[addr->hash], sk);
+ __unix_set_addr(sk, addr, addr->hash);
spin_unlock(&unix_table_lock);
err = 0;
@@ -959,7 +964,7 @@ static struct sock *unix_find_other(struct net *net,
}
} else {
err = -ECONNREFUSED;
- u = unix_find_socket_byname(net, sunname, len, type, hash);
+ u = unix_find_socket_byname(net, sunname, len, type ^ hash);
if (u) {
struct dentry *dentry;
dentry = unix_sk(u)->path.dentry;
@@ -977,125 +982,125 @@ fail:
return NULL;
}
-static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
{
+ struct unix_sock *u = unix_sk(sk);
+ umode_t mode = S_IFSOCK |
+ (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
+ struct user_namespace *ns; // barf...
+ struct path parent;
struct dentry *dentry;
- struct path path;
- int err = 0;
+ unsigned int hash;
+ int err;
+
/*
* Get the parent directory, calculate the hash for last
* component.
*/
- dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
- err = PTR_ERR(dentry);
+ dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
if (IS_ERR(dentry))
- return err;
+ return PTR_ERR(dentry);
+ ns = mnt_user_ns(parent.mnt);
/*
* All right, let's create it.
*/
- err = security_path_mknod(&path, dentry, mode, 0);
- if (!err) {
- err = vfs_mknod(mnt_user_ns(path.mnt), d_inode(path.dentry),
- dentry, mode, 0);
- if (!err) {
- res->mnt = mntget(path.mnt);
- res->dentry = dget(dentry);
- }
- }
- done_path_create(&path, dentry);
+ err = security_path_mknod(&parent, dentry, mode, 0);
+ if (!err)
+ err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
+ if (err)
+ goto out;
+ err = mutex_lock_interruptible(&u->bindlock);
+ if (err)
+ goto out_unlink;
+ if (u->addr)
+ goto out_unlock;
+
+ addr->hash = UNIX_HASH_SIZE;
+ hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+ spin_lock(&unix_table_lock);
+ u->path.mnt = mntget(parent.mnt);
+ u->path.dentry = dget(dentry);
+ __unix_set_addr(sk, addr, hash);
+ spin_unlock(&unix_table_lock);
+ mutex_unlock(&u->bindlock);
+ done_path_create(&parent, dentry);
+ return 0;
+
+out_unlock:
+ mutex_unlock(&u->bindlock);
+ err = -EINVAL;
+out_unlink:
+ /* failed after successful mknod? unlink what we'd created... */
+ vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
+out:
+ done_path_create(&parent, dentry);
return err;
}
+static int unix_bind_abstract(struct sock *sk, struct unix_address *addr)
+{
+ struct unix_sock *u = unix_sk(sk);
+ int err;
+
+ err = mutex_lock_interruptible(&u->bindlock);
+ if (err)
+ return err;
+
+ if (u->addr) {
+ mutex_unlock(&u->bindlock);
+ return -EINVAL;
+ }
+
+ spin_lock(&unix_table_lock);
+ if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
+ addr->hash)) {
+ spin_unlock(&unix_table_lock);
+ mutex_unlock(&u->bindlock);
+ return -EADDRINUSE;
+ }
+ __unix_set_addr(sk, addr, addr->hash);
+ spin_unlock(&unix_table_lock);
+ mutex_unlock(&u->bindlock);
+ return 0;
+}
+
static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
- struct net *net = sock_net(sk);
- struct unix_sock *u = unix_sk(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
char *sun_path = sunaddr->sun_path;
int err;
unsigned int hash;
struct unix_address *addr;
- struct hlist_head *list;
- struct path path = { };
- err = -EINVAL;
if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
sunaddr->sun_family != AF_UNIX)
- goto out;
+ return -EINVAL;
- if (addr_len == sizeof(short)) {
- err = unix_autobind(sock);
- goto out;
- }
+ if (addr_len == sizeof(short))
+ return unix_autobind(sock);
err = unix_mkname(sunaddr, addr_len, &hash);
if (err < 0)
- goto out;
+ return err;
addr_len = err;
-
- if (sun_path[0]) {
- umode_t mode = S_IFSOCK |
- (SOCK_INODE(sock)->i_mode & ~current_umask());
- err = unix_mknod(sun_path, mode, &path);
- if (err) {
- if (err == -EEXIST)
- err = -EADDRINUSE;
- goto out;
- }
- }
-
- err = mutex_lock_interruptible(&u->bindlock);
- if (err)
- goto out_put;
-
- err = -EINVAL;
- if (u->addr)
- goto out_up;
-
- err = -ENOMEM;
addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
if (!addr)
- goto out_up;
+ return -ENOMEM;
memcpy(addr->name, sunaddr, addr_len);
addr->len = addr_len;
addr->hash = hash ^ sk->sk_type;
refcount_set(&addr->refcnt, 1);
- if (sun_path[0]) {
- addr->hash = UNIX_HASH_SIZE;
- hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
- spin_lock(&unix_table_lock);
- u->path = path;
- list = &unix_socket_table[hash];
- } else {
- spin_lock(&unix_table_lock);
- err = -EADDRINUSE;
- if (__unix_find_socket_byname(net, sunaddr, addr_len,
- sk->sk_type, hash)) {
- unix_release_addr(addr);
- goto out_unlock;
- }
-
- list = &unix_socket_table[addr->hash];
- }
-
- err = 0;
- __unix_remove_socket(sk);
- smp_store_release(&u->addr, addr);
- __unix_insert_socket(list, sk);
-
-out_unlock:
- spin_unlock(&unix_table_lock);
-out_up:
- mutex_unlock(&u->bindlock);
-out_put:
+ if (sun_path[0])
+ err = unix_bind_bsd(sk, addr);
+ else
+ err = unix_bind_abstract(sk, addr);
if (err)
- path_put(&path);
-out:
- return err;
+ unix_release_addr(addr);
+ return err == -EEXIST ? -EADDRINUSE : err;
}
static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 67954afef4e1..21ccf450e249 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -860,7 +860,7 @@ s64 vsock_stream_has_data(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(vsock_stream_has_data);
-static s64 vsock_has_data(struct vsock_sock *vsk)
+static s64 vsock_connectible_has_data(struct vsock_sock *vsk)
{
struct sock *sk = sk_vsock(vsk);
@@ -1866,10 +1866,11 @@ out:
return err;
}
-static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait,
- long timeout,
- struct vsock_transport_recv_notify_data *recv_data,
- size_t target)
+static int vsock_connectible_wait_data(struct sock *sk,
+ struct wait_queue_entry *wait,
+ long timeout,
+ struct vsock_transport_recv_notify_data *recv_data,
+ size_t target)
{
const struct vsock_transport *transport;
struct vsock_sock *vsk;
@@ -1880,7 +1881,7 @@ static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait,
err = 0;
transport = vsk->transport;
- while ((data = vsock_has_data(vsk)) == 0) {
+ while ((data = vsock_connectible_has_data(vsk)) == 0) {
prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE);
if (sk->sk_err != 0 ||
@@ -1967,7 +1968,8 @@ static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg,
while (1) {
ssize_t read;
- err = vsock_wait_data(sk, &wait, timeout, &recv_data, target);
+ err = vsock_connectible_wait_data(sk, &wait, timeout,
+ &recv_data, target);
if (err <= 0)
break;
@@ -2022,7 +2024,7 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
- err = vsock_wait_data(sk, &wait, timeout, NULL, 0);
+ err = vsock_connectible_wait_data(sk, &wait, timeout, NULL, 0);
if (err <= 0)
goto out;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index e73ce652bf3c..ed1664e7bd88 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -498,9 +498,11 @@ static bool virtio_transport_seqpacket_allow(u32 remote_cid)
struct virtio_vsock *vsock;
bool seqpacket_allow;
+ seqpacket_allow = false;
rcu_read_lock();
vsock = rcu_dereference(the_virtio_vsock);
- seqpacket_allow = vsock->seqpacket_allow;
+ if (vsock)
+ seqpacket_allow = vsock->seqpacket_allow;
rcu_read_unlock();
return seqpacket_allow;
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 23704a6bc437..f014ccfdd9c2 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -413,7 +413,6 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
struct virtio_vsock_pkt *pkt;
int dequeued_len = 0;
size_t user_buf_len = msg_data_left(msg);
- bool copy_failed = false;
bool msg_ready = false;
spin_lock_bh(&vvs->rx_lock);
@@ -426,7 +425,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
while (!msg_ready) {
pkt = list_first_entry(&vvs->rx_queue, struct virtio_vsock_pkt, list);
- if (!copy_failed) {
+ if (dequeued_len >= 0) {
size_t pkt_len;
size_t bytes_to_copy;
@@ -443,11 +442,9 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
err = memcpy_to_msg(msg, pkt->buf, bytes_to_copy);
if (err) {
- /* Copy of message failed, set flag to skip
- * copy path for rest of fragments. Rest of
+ /* Copy of message failed. Rest of
* fragments will be freed without copy.
*/
- copy_failed = true;
dequeued_len = err;
} else {
user_buf_len -= bytes_to_copy;
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 2eee93985ab0..af590ae606b6 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -28,7 +28,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
@$(kecho) " GEN $@"
@(echo '#include "reg.h"'; \
echo 'const u8 shipped_regdb_certs[] = {'; \
- cat $^ ; \
+ echo | cat - $^ ; \
echo '};'; \
echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
) > $@
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 285b8076054b..869c43d4414c 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -6,7 +6,7 @@
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2018-2020 Intel Corporation
+ * Copyright 2018-2021 Intel Corporation
*/
#include <linux/export.h>
@@ -942,7 +942,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
struct ieee80211_sta_vht_cap *vht_cap;
struct ieee80211_edmg *edmg_cap;
u32 width, control_freq, cap;
- bool support_80_80 = false;
+ bool ext_nss_cap, support_80_80 = false;
if (WARN_ON(!cfg80211_chandef_valid(chandef)))
return false;
@@ -950,6 +950,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
ht_cap = &wiphy->bands[chandef->chan->band]->ht_cap;
vht_cap = &wiphy->bands[chandef->chan->band]->vht_cap;
edmg_cap = &wiphy->bands[chandef->chan->band]->edmg_cap;
+ ext_nss_cap = __le16_to_cpu(vht_cap->vht_mcs.tx_highest) &
+ IEEE80211_VHT_EXT_NSS_BW_CAPABLE;
if (edmg_cap->channels &&
!cfg80211_edmg_usable(wiphy,
@@ -1015,7 +1017,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
(cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) ||
(cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ &&
cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) ||
- u32_get_bits(cap, IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) > 1;
+ (ext_nss_cap &&
+ u32_get_bits(cap, IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) > 1);
if (chandef->chan->band != NL80211_BAND_6GHZ && !support_80_80)
return false;
fallthrough;
@@ -1037,7 +1040,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ &&
cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ &&
- !(vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK))
+ !(ext_nss_cap &&
+ (vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK)))
return false;
break;
default:
@@ -1335,3 +1339,34 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
WARN_ON(1);
}
}
+
+bool cfg80211_any_usable_channels(struct wiphy *wiphy,
+ unsigned long sband_mask,
+ u32 prohibited_flags)
+{
+ int idx;
+
+ prohibited_flags |= IEEE80211_CHAN_DISABLED;
+
+ for_each_set_bit(idx, &sband_mask, NUM_NL80211_BANDS) {
+ struct ieee80211_supported_band *sband = wiphy->bands[idx];
+ int chanidx;
+
+ if (!sband)
+ continue;
+
+ for (chanidx = 0; chanidx < sband->n_channels; chanidx++) {
+ struct ieee80211_channel *chan;
+
+ chan = &sband->channels[chanidx];
+
+ if (chan->flags & prohibited_flags)
+ continue;
+
+ return true;
+ }
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(cfg80211_any_usable_channels);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 6fbf7537faf5..03323121ca50 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -532,11 +532,11 @@ use_default_name:
wiphy_net_set(&rdev->wiphy, &init_net);
rdev->rfkill_ops.set_block = cfg80211_rfkill_set_block;
- rdev->rfkill = rfkill_alloc(dev_name(&rdev->wiphy.dev),
- &rdev->wiphy.dev, RFKILL_TYPE_WLAN,
- &rdev->rfkill_ops, rdev);
+ rdev->wiphy.rfkill = rfkill_alloc(dev_name(&rdev->wiphy.dev),
+ &rdev->wiphy.dev, RFKILL_TYPE_WLAN,
+ &rdev->rfkill_ops, rdev);
- if (!rdev->rfkill) {
+ if (!rdev->wiphy.rfkill) {
wiphy_free(&rdev->wiphy);
return NULL;
}
@@ -589,14 +589,6 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
if (WARN_ON(!c->num_different_channels))
return -EINVAL;
- /*
- * Put a sane limit on maximum number of different
- * channels to simplify channel accounting code.
- */
- if (WARN_ON(c->num_different_channels >
- CFG80211_MAX_NUM_DIFFERENT_CHANNELS))
- return -EINVAL;
-
/* DFS only works on one channel. */
if (WARN_ON(c->radar_detect_widths &&
(c->num_different_channels > 1)))
@@ -936,9 +928,6 @@ int wiphy_register(struct wiphy *wiphy)
return res;
}
- /* set up regulatory info */
- wiphy_regulatory_register(wiphy);
-
list_add_rcu(&rdev->list, &cfg80211_rdev_list);
cfg80211_rdev_list_generation++;
@@ -949,6 +938,9 @@ int wiphy_register(struct wiphy *wiphy)
cfg80211_debugfs_rdev_add(rdev);
nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
+ /* set up regulatory info */
+ wiphy_regulatory_register(wiphy);
+
if (wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) {
struct regulatory_request request;
@@ -993,10 +985,10 @@ int wiphy_register(struct wiphy *wiphy)
rdev->wiphy.registered = true;
rtnl_unlock();
- res = rfkill_register(rdev->rfkill);
+ res = rfkill_register(rdev->wiphy.rfkill);
if (res) {
- rfkill_destroy(rdev->rfkill);
- rdev->rfkill = NULL;
+ rfkill_destroy(rdev->wiphy.rfkill);
+ rdev->wiphy.rfkill = NULL;
wiphy_unregister(&rdev->wiphy);
return res;
}
@@ -1012,18 +1004,10 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy)
if (!rdev->ops->rfkill_poll)
return;
rdev->rfkill_ops.poll = cfg80211_rfkill_poll;
- rfkill_resume_polling(rdev->rfkill);
+ rfkill_resume_polling(wiphy->rfkill);
}
EXPORT_SYMBOL(wiphy_rfkill_start_polling);
-void wiphy_rfkill_stop_polling(struct wiphy *wiphy)
-{
- struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
-
- rfkill_pause_polling(rdev->rfkill);
-}
-EXPORT_SYMBOL(wiphy_rfkill_stop_polling);
-
void wiphy_unregister(struct wiphy *wiphy)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -1035,8 +1019,8 @@ void wiphy_unregister(struct wiphy *wiphy)
wiphy_unlock(&rdev->wiphy);
__count == 0; }));
- if (rdev->rfkill)
- rfkill_unregister(rdev->rfkill);
+ if (rdev->wiphy.rfkill)
+ rfkill_unregister(rdev->wiphy.rfkill);
rtnl_lock();
wiphy_lock(&rdev->wiphy);
@@ -1088,7 +1072,7 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
{
struct cfg80211_internal_bss *scan, *tmp;
struct cfg80211_beacon_registration *reg, *treg;
- rfkill_destroy(rdev->rfkill);
+ rfkill_destroy(rdev->wiphy.rfkill);
list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) {
list_del(&reg->list);
kfree(reg);
@@ -1110,7 +1094,7 @@ void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked,
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- if (rfkill_set_hw_state_reason(rdev->rfkill, blocked, reason))
+ if (rfkill_set_hw_state_reason(wiphy->rfkill, blocked, reason))
schedule_work(&rdev->rfkill_block);
}
EXPORT_SYMBOL(wiphy_rfkill_set_hw_state_reason);
@@ -1340,6 +1324,11 @@ void cfg80211_register_wdev(struct cfg80211_registered_device *rdev,
rdev->devlist_generation++;
wdev->registered = true;
+ if (wdev->netdev &&
+ sysfs_create_link(&wdev->netdev->dev.kobj, &rdev->wiphy.dev.kobj,
+ "phy80211"))
+ pr_err("failed to add phy80211 symlink to netdev!\n");
+
nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
}
@@ -1365,14 +1354,6 @@ int cfg80211_register_netdevice(struct net_device *dev)
if (ret)
goto out;
- if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
- "phy80211")) {
- pr_err("failed to add phy80211 symlink to netdev!\n");
- unregister_netdevice(dev);
- ret = -EINVAL;
- goto out;
- }
-
cfg80211_register_wdev(rdev, wdev);
ret = 0;
out:
@@ -1506,7 +1487,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
wdev->use_4addr, 0))
return notifier_from_errno(-EOPNOTSUPP);
- if (rfkill_blocked(rdev->rfkill))
+ if (rfkill_blocked(rdev->wiphy.rfkill))
return notifier_from_errno(-ERFKILL);
break;
default:
diff --git a/net/wireless/core.h b/net/wireless/core.h
index a7d19b4b40ac..b35d0db12f1d 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -3,7 +3,7 @@
* Wireless configuration interface internals.
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#ifndef __NET_WIRELESS_CORE_H
#define __NET_WIRELESS_CORE_H
@@ -27,7 +27,6 @@ struct cfg80211_registered_device {
/* rfkill support */
struct rfkill_ops rfkill_ops;
- struct rfkill *rfkill;
struct work_struct rfkill_block;
/* ISO / IEC 3166 alpha2 for which this device is receiving
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fc9286afe3c9..50eb405b0690 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -330,7 +330,7 @@ nl80211_pmsr_req_attr_policy[NL80211_PMSR_REQ_ATTR_MAX + 1] = {
};
static const struct nla_policy
-nl80211_psmr_peer_attr_policy[NL80211_PMSR_PEER_ATTR_MAX + 1] = {
+nl80211_pmsr_peer_attr_policy[NL80211_PMSR_PEER_ATTR_MAX + 1] = {
[NL80211_PMSR_PEER_ATTR_ADDR] = NLA_POLICY_ETH_ADDR,
[NL80211_PMSR_PEER_ATTR_CHAN] = NLA_POLICY_NESTED(nl80211_policy),
[NL80211_PMSR_PEER_ATTR_REQ] =
@@ -345,7 +345,7 @@ nl80211_pmsr_attr_policy[NL80211_PMSR_ATTR_MAX + 1] = {
[NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR] = { .type = NLA_REJECT },
[NL80211_PMSR_ATTR_TYPE_CAPA] = { .type = NLA_REJECT },
[NL80211_PMSR_ATTR_PEERS] =
- NLA_POLICY_NESTED_ARRAY(nl80211_psmr_peer_attr_policy),
+ NLA_POLICY_NESTED_ARRAY(nl80211_pmsr_peer_attr_policy),
};
static const struct nla_policy
@@ -1731,6 +1731,11 @@ nl80211_send_iftype_data(struct sk_buff *msg,
&iftdata->he_6ghz_capa))
return -ENOBUFS;
+ if (iftdata->vendor_elems.data && iftdata->vendor_elems.len &&
+ nla_put(msg, NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS,
+ iftdata->vendor_elems.len, iftdata->vendor_elems.data))
+ return -ENOBUFS;
+
return 0;
}
@@ -4781,11 +4786,10 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
sband->ht_cap.mcs.rx_mask,
sizeof(mask->control[i].ht_mcs));
- if (!sband->vht_cap.vht_supported)
- continue;
-
- vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
- vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs);
+ if (sband->vht_cap.vht_supported) {
+ vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
+ vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs);
+ }
he_cap = ieee80211_get_he_iftype_cap(sband, wdev->iftype);
if (!he_cap)
@@ -13042,7 +13046,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info)
if (wdev_running(wdev))
return 0;
- if (rfkill_blocked(rdev->rfkill))
+ if (rfkill_blocked(rdev->wiphy.rfkill))
return -ERFKILL;
err = rdev_start_p2p_device(rdev, wdev);
@@ -13084,7 +13088,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info)
if (wdev_running(wdev))
return -EEXIST;
- if (rfkill_blocked(rdev->rfkill))
+ if (rfkill_blocked(rdev->wiphy.rfkill))
return -ERFKILL;
if (!info->attrs[NL80211_ATTR_NAN_MASTER_PREF])
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index 6bdd96408022..328cf54bda82 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -168,6 +168,18 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
return -EINVAL;
}
+ if (tb[NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR]) {
+ if (!out->ftm.non_trigger_based && !out->ftm.trigger_based) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR],
+ "FTM: BSS color set for EDCA based ranging");
+ return -EINVAL;
+ }
+
+ out->ftm.bss_color =
+ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR]);
+ }
+
return 0;
}
@@ -334,6 +346,7 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev,
gfp_t gfp)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct cfg80211_pmsr_request *tmp, *prev, *to_free = NULL;
struct sk_buff *msg;
void *hdr;
@@ -364,9 +377,20 @@ free_msg:
nlmsg_free(msg);
free_request:
spin_lock_bh(&wdev->pmsr_lock);
- list_del(&req->list);
+ /*
+ * cfg80211_pmsr_process_abort() may have already moved this request
+ * to the free list, and will free it later. In this case, don't free
+ * it here.
+ */
+ list_for_each_entry_safe(tmp, prev, &wdev->pmsr_list, list) {
+ if (tmp == req) {
+ list_del(&req->list);
+ to_free = req;
+ break;
+ }
+ }
spin_unlock_bh(&wdev->pmsr_lock);
- kfree(req);
+ kfree(to_free);
}
EXPORT_SYMBOL_GPL(cfg80211_pmsr_complete);
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 8b1358d04ca2..b1d37f582dc6 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -464,8 +464,18 @@ static inline int rdev_assoc(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct cfg80211_assoc_request *req)
{
+ const struct cfg80211_bss_ies *bss_ies;
int ret;
- trace_rdev_assoc(&rdev->wiphy, dev, req);
+
+ /*
+ * Note: we might trace not exactly the data that's processed,
+ * due to races and the driver/mac80211 getting a newer copy.
+ */
+ rcu_read_lock();
+ bss_ies = rcu_dereference(req->bss->ies);
+ trace_rdev_assoc(&rdev->wiphy, dev, req, bss_ies);
+ rcu_read_unlock();
+
ret = rdev->ops->assoc(&rdev->wiphy, dev, req);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 0406ce7334fa..c2d0ff7f089f 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -3975,7 +3975,9 @@ static int __regulatory_set_wiphy_regd(struct wiphy *wiphy,
"wiphy should have REGULATORY_WIPHY_SELF_MANAGED\n"))
return -EPERM;
- if (WARN(!is_valid_rd(rd), "Invalid regulatory domain detected\n")) {
+ if (WARN(!is_valid_rd(rd),
+ "Invalid regulatory domain detected: %c%c\n",
+ rd->alpha2[0], rd->alpha2[1])) {
print_regdomain_info(rd);
return -EINVAL;
}
@@ -4049,6 +4051,7 @@ void wiphy_regulatory_register(struct wiphy *wiphy)
wiphy_update_regulatory(wiphy, lr->initiator);
wiphy_all_share_dfs_chan_state(wiphy);
+ reg_process_self_managed_hints();
}
void wiphy_regulatory_deregister(struct wiphy *wiphy)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 4f06c1825029..f03c7ac8e184 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -5,7 +5,7 @@
* Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2016 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -618,7 +618,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
freq = ieee80211_channel_to_frequency(ap_info->channel, band);
- if (end - pos < count * ap_info->tbtt_info_len)
+ if (end - pos < count * length)
break;
/*
@@ -630,7 +630,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
if (band != NL80211_BAND_6GHZ ||
(length != IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM &&
length < IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM)) {
- pos += count * ap_info->tbtt_info_len;
+ pos += count * length;
continue;
}
@@ -653,7 +653,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
kfree(entry);
}
- pos += ap_info->tbtt_info_len;
+ pos += length;
}
}
@@ -757,7 +757,8 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
}
request = kzalloc(struct_size(request, channels, n_channels) +
- sizeof(*request->scan_6ghz_params) * count,
+ sizeof(*request->scan_6ghz_params) * count +
+ sizeof(*request->ssids) * rdev_req->n_ssids,
GFP_KERNEL);
if (!request) {
cfg80211_free_coloc_ap_list(&coloc_ap_list);
@@ -848,10 +849,19 @@ skip:
if (request->n_channels) {
struct cfg80211_scan_request *old = rdev->int_scan_req;
-
rdev->int_scan_req = request;
/*
+ * Add the ssids from the parent scan request to the new scan
+ * request, so the driver would be able to use them in its
+ * probe requests to discover hidden APs on PSC channels.
+ */
+ request->ssids = (void *)&request->channels[request->n_channels];
+ request->n_ssids = rdev_req->n_ssids;
+ memcpy(request->ssids, rdev_req->ssids, sizeof(*request->ssids) *
+ request->n_ssids);
+
+ /*
* If this scan follows a previous scan, save the scan start
* info from the first part of the scan
*/
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 9b959e3b09c6..0c3f05c9be27 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -133,6 +133,10 @@ static int wiphy_resume(struct device *dev)
if (rdev->wiphy.registered && rdev->ops->resume)
ret = rdev_resume(rdev);
wiphy_unlock(&rdev->wiphy);
+
+ if (ret)
+ cfg80211_shutdown_all_interfaces(&rdev->wiphy);
+
rtnl_unlock();
return ret;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 76b777d5903f..440bce5f0274 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1195,8 +1195,9 @@ TRACE_EVENT(rdev_auth,
TRACE_EVENT(rdev_assoc,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- struct cfg80211_assoc_request *req),
- TP_ARGS(wiphy, netdev, req),
+ struct cfg80211_assoc_request *req,
+ const struct cfg80211_bss_ies *bss_ies),
+ TP_ARGS(wiphy, netdev, req, bss_ies),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
@@ -1204,6 +1205,17 @@ TRACE_EVENT(rdev_assoc,
MAC_ENTRY(prev_bssid)
__field(bool, use_mfp)
__field(u32, flags)
+ __dynamic_array(u8, bss_elements, bss_ies->len)
+ __field(bool, bss_elements_bcon)
+ __field(u64, bss_elements_tsf)
+ __dynamic_array(u8, elements, req->ie_len)
+ __array(u8, ht_capa, sizeof(struct ieee80211_ht_cap))
+ __array(u8, ht_capa_mask, sizeof(struct ieee80211_ht_cap))
+ __array(u8, vht_capa, sizeof(struct ieee80211_vht_cap))
+ __array(u8, vht_capa_mask, sizeof(struct ieee80211_vht_cap))
+ __dynamic_array(u8, fils_kek, req->fils_kek_len)
+ __dynamic_array(u8, fils_nonces,
+ req->fils_nonces ? 2 * FILS_NONCE_LEN : 0)
),
TP_fast_assign(
WIPHY_ASSIGN;
@@ -1215,6 +1227,26 @@ TRACE_EVENT(rdev_assoc,
MAC_ASSIGN(prev_bssid, req->prev_bssid);
__entry->use_mfp = req->use_mfp;
__entry->flags = req->flags;
+ if (bss_ies->len)
+ memcpy(__get_dynamic_array(bss_elements),
+ bss_ies->data, bss_ies->len);
+ __entry->bss_elements_bcon = bss_ies->from_beacon;
+ __entry->bss_elements_tsf = bss_ies->tsf;
+ if (req->ie)
+ memcpy(__get_dynamic_array(elements),
+ req->ie, req->ie_len);
+ memcpy(__entry->ht_capa, &req->ht_capa, sizeof(req->ht_capa));
+ memcpy(__entry->ht_capa_mask, &req->ht_capa_mask,
+ sizeof(req->ht_capa_mask));
+ memcpy(__entry->vht_capa, &req->vht_capa, sizeof(req->vht_capa));
+ memcpy(__entry->vht_capa_mask, &req->vht_capa_mask,
+ sizeof(req->vht_capa_mask));
+ if (req->fils_kek)
+ memcpy(__get_dynamic_array(fils_kek),
+ req->fils_kek, req->fils_kek_len);
+ if (req->fils_nonces)
+ memcpy(__get_dynamic_array(fils_nonces),
+ req->fils_nonces, 2 * FILS_NONCE_LEN);
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT
", previous bssid: " MAC_PR_FMT ", use mfp: %s, flags: %u",
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 7ec021a610ae..18dba3d7c638 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1059,6 +1059,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
case NL80211_IFTYPE_MESH_POINT:
/* mesh should be handled? */
break;
+ case NL80211_IFTYPE_OCB:
+ cfg80211_leave_ocb(rdev, dev);
+ break;
default:
break;
}
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index a8320dc59af7..a32065d600a1 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -902,7 +902,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev,
/* only change when not disabling */
if (!data->txpower.disabled) {
- rfkill_set_sw_state(rdev->rfkill, false);
+ rfkill_set_sw_state(rdev->wiphy.rfkill, false);
if (data->txpower.fixed) {
/*
@@ -927,7 +927,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev,
}
}
} else {
- if (rfkill_set_sw_state(rdev->rfkill, true))
+ if (rfkill_set_sw_state(rdev->wiphy.rfkill, true))
schedule_work(&rdev->rfkill_block);
return 0;
}
@@ -963,7 +963,7 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev,
/* well... oh well */
data->txpower.fixed = 1;
- data->txpower.disabled = rfkill_blocked(rdev->rfkill);
+ data->txpower.disabled = rfkill_blocked(rdev->wiphy.rfkill);
data->txpower.value = val;
data->txpower.flags = IW_TXPOW_DBM;
@@ -1167,7 +1167,7 @@ static int cfg80211_wext_siwpower(struct net_device *dev,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
- bool ps = wdev->ps;
+ bool ps;
int timeout = wdev->ps_timeout;
int err;
diff --git a/net/wireless/wext-spy.c b/net/wireless/wext-spy.c
index 33bef22e44e9..b379a0371653 100644
--- a/net/wireless/wext-spy.c
+++ b/net/wireless/wext-spy.c
@@ -120,8 +120,8 @@ int iw_handler_set_thrspy(struct net_device * dev,
return -EOPNOTSUPP;
/* Just do it */
- memcpy(&(spydata->spy_thr_low), &(threshold->low),
- 2 * sizeof(struct iw_quality));
+ spydata->spy_thr_low = threshold->low;
+ spydata->spy_thr_high = threshold->high;
/* Clear flag */
memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
@@ -147,8 +147,8 @@ int iw_handler_get_thrspy(struct net_device * dev,
return -EOPNOTSUPP;
/* Just do it */
- memcpy(&(threshold->low), &(spydata->spy_thr_low),
- 2 * sizeof(struct iw_quality));
+ threshold->low = spydata->spy_thr_low;
+ threshold->high = spydata->spy_thr_high;
return 0;
}
@@ -173,10 +173,10 @@ static void iw_send_thrspy_event(struct net_device * dev,
memcpy(threshold.addr.sa_data, address, ETH_ALEN);
threshold.addr.sa_family = ARPHRD_ETHER;
/* Copy stats */
- memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality));
+ threshold.qual = *wstats;
/* Copy also thresholds */
- memcpy(&(threshold.low), &(spydata->spy_thr_low),
- 2 * sizeof(struct iw_quality));
+ threshold.low = spydata->spy_thr_low;
+ threshold.high = spydata->spy_thr_high;
/* Send event to user space */
wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index ce66323102f9..d12bb906c9c9 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -131,6 +131,13 @@ __xfrm_spi_hash(const xfrm_address_t *daddr, __be32 spi, u8 proto,
return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
}
+static inline unsigned int
+__xfrm_seq_hash(u32 seq, unsigned int hmask)
+{
+ unsigned int h = seq;
+ return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
+}
+
static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
{
return (index ^ (index >> 8)) & hmask;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 1158cd0311d7..3df0861d4390 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -612,7 +612,7 @@ lock:
goto drop_unlock;
}
- if (x->repl->check(x, skb, seq)) {
+ if (xfrm_replay_check(x, skb, seq)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
goto drop_unlock;
}
@@ -660,12 +660,12 @@ resume:
/* only the first xfrm gets the encap type */
encap_type = 0;
- if (x->repl->recheck(x, skb, seq)) {
+ if (xfrm_replay_recheck(x, skb, seq)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
goto drop_unlock;
}
- x->repl->advance(x, seq);
+ xfrm_replay_advance(x, seq);
x->curlft.bytes += skb->len;
x->curlft.packets++;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index e4cb0ff4dcf4..ab2fbe432261 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -77,6 +77,83 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
return 0;
}
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+static int mip6_rthdr_offset(struct sk_buff *skb, u8 **nexthdr, int type)
+{
+ const unsigned char *nh = skb_network_header(skb);
+ unsigned int offset = sizeof(struct ipv6hdr);
+ unsigned int packet_len;
+ int found_rhdr = 0;
+
+ packet_len = skb_tail_pointer(skb) - nh;
+ *nexthdr = &ipv6_hdr(skb)->nexthdr;
+
+ while (offset <= packet_len) {
+ struct ipv6_opt_hdr *exthdr;
+
+ switch (**nexthdr) {
+ case NEXTHDR_HOP:
+ break;
+ case NEXTHDR_ROUTING:
+ if (type == IPPROTO_ROUTING && offset + 3 <= packet_len) {
+ struct ipv6_rt_hdr *rt;
+
+ rt = (struct ipv6_rt_hdr *)(nh + offset);
+ if (rt->type != 0)
+ return offset;
+ }
+ found_rhdr = 1;
+ break;
+ case NEXTHDR_DEST:
+ /* HAO MUST NOT appear more than once.
+ * XXX: It is better to try to find by the end of
+ * XXX: packet if HAO exists.
+ */
+ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
+ net_dbg_ratelimited("mip6: hao exists already, override\n");
+ return offset;
+ }
+
+ if (found_rhdr)
+ return offset;
+
+ break;
+ default:
+ return offset;
+ }
+
+ if (offset + sizeof(struct ipv6_opt_hdr) > packet_len)
+ return -EINVAL;
+
+ exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+ offset);
+ offset += ipv6_optlen(exthdr);
+ if (offset > IPV6_MAXPLEN)
+ return -EINVAL;
+ *nexthdr = &exthdr->nexthdr;
+ }
+
+ return -EINVAL;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr)
+{
+ switch (x->type->proto) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_ROUTING:
+ return mip6_rthdr_offset(skb, prevhdr, x->type->proto);
+#endif
+ default:
+ break;
+ }
+
+ return ip6_find_1stfragopt(skb, prevhdr);
+}
+#endif
+
/* Add encapsulation header.
*
* The IP header and mutable extension headers will be moved forward to make
@@ -92,7 +169,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
iph = ipv6_hdr(skb);
skb_set_inner_transport_header(skb, skb_transport_offset(skb));
- hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+ hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr);
if (hdr_len < 0)
return hdr_len;
skb_set_mac_header(skb,
@@ -122,7 +199,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
iph = ipv6_hdr(skb);
- hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+ hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr);
if (hdr_len < 0)
return hdr_len;
skb_set_mac_header(skb,
@@ -448,7 +525,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
goto error;
}
- err = x->repl->overflow(x, skb);
+ err = xfrm_replay_overflow(x, skb);
if (err) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR);
goto error;
@@ -565,6 +642,42 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb
return 0;
}
+/* For partial checksum offload, the outer header checksum is calculated
+ * by software and the inner header checksum is calculated by hardware.
+ * This requires hardware to know the inner packet type to calculate
+ * the inner header checksum. Save inner ip protocol here to avoid
+ * traversing the packet in the vendor's xmit code.
+ * If the encap type is IPIP, just save skb->inner_ipproto. Otherwise,
+ * get the ip protocol from the IP header.
+ */
+static void xfrm_get_inner_ipproto(struct sk_buff *skb)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ const struct ethhdr *eth;
+
+ if (!xo)
+ return;
+
+ if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) {
+ xo->inner_ipproto = skb->inner_ipproto;
+ return;
+ }
+
+ if (skb->inner_protocol_type != ENCAP_TYPE_ETHER)
+ return;
+
+ eth = (struct ethhdr *)skb_inner_mac_header(skb);
+
+ switch (ntohs(eth->h_proto)) {
+ case ETH_P_IPV6:
+ xo->inner_ipproto = inner_ipv6_hdr(skb)->nexthdr;
+ break;
+ case ETH_P_IP:
+ xo->inner_ipproto = inner_ip_hdr(skb)->protocol;
+ break;
+ }
+}
+
int xfrm_output(struct sock *sk, struct sk_buff *skb)
{
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -594,12 +707,15 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
return -ENOMEM;
}
- skb->encapsulation = 1;
sp->olen++;
sp->xvec[sp->len++] = x;
xfrm_state_hold(x);
+ if (skb->encapsulation)
+ xfrm_get_inner_ipproto(skb);
+ skb->encapsulation = 1;
+
if (skb_is_gso(skb)) {
if (skb->inner_protocol)
return xfrm_output_gso(net, sk, skb);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ce500f847b99..1e24b21457f7 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3247,7 +3247,7 @@ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
/*
* 0 or more than 0 is returned when validation is succeeded (either bypass
- * because of optional transport mode, or next index of the mathced secpath
+ * because of optional transport mode, or next index of the matched secpath
* state with the template.
* -1 is returned when no matching template is found.
* Otherwise "-2 - errored_index" is returned.
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index c6a4338a0d08..9277d81b344c 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -34,8 +34,11 @@ u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq)
return seq_hi;
}
EXPORT_SYMBOL(xfrm_replay_seqhi);
-;
-static void xfrm_replay_notify(struct xfrm_state *x, int event)
+
+static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event);
+static void xfrm_replay_notify_esn(struct xfrm_state *x, int event);
+
+void xfrm_replay_notify(struct xfrm_state *x, int event)
{
struct km_event c;
/* we send notify messages in case
@@ -48,6 +51,17 @@ static void xfrm_replay_notify(struct xfrm_state *x, int event)
* The state structure must be locked!
*/
+ switch (x->repl_mode) {
+ case XFRM_REPLAY_MODE_LEGACY:
+ break;
+ case XFRM_REPLAY_MODE_BMP:
+ xfrm_replay_notify_bmp(x, event);
+ return;
+ case XFRM_REPLAY_MODE_ESN:
+ xfrm_replay_notify_esn(x, event);
+ return;
+ }
+
switch (event) {
case XFRM_REPLAY_UPDATE:
if (!x->replay_maxdiff ||
@@ -81,7 +95,7 @@ static void xfrm_replay_notify(struct xfrm_state *x, int event)
x->xflags &= ~XFRM_TIME_DEFER;
}
-static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
+static int __xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
{
int err = 0;
struct net *net = xs_net(x);
@@ -98,14 +112,14 @@ static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
return err;
}
if (xfrm_aevent_is_on(net))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
return err;
}
-static int xfrm_replay_check(struct xfrm_state *x,
- struct sk_buff *skb, __be32 net_seq)
+static int xfrm_replay_check_legacy(struct xfrm_state *x,
+ struct sk_buff *skb, __be32 net_seq)
{
u32 diff;
u32 seq = ntohl(net_seq);
@@ -136,14 +150,26 @@ err:
return -EINVAL;
}
-static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
+static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq);
+static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq);
+
+void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
{
- u32 diff;
- u32 seq = ntohl(net_seq);
+ u32 diff, seq;
+
+ switch (x->repl_mode) {
+ case XFRM_REPLAY_MODE_LEGACY:
+ break;
+ case XFRM_REPLAY_MODE_BMP:
+ return xfrm_replay_advance_bmp(x, net_seq);
+ case XFRM_REPLAY_MODE_ESN:
+ return xfrm_replay_advance_esn(x, net_seq);
+ }
if (!x->props.replay_window)
return;
+ seq = ntohl(net_seq);
if (seq > x->replay.seq) {
diff = seq - x->replay.seq;
if (diff < x->props.replay_window)
@@ -157,7 +183,7 @@ static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
}
if (xfrm_aevent_is_on(xs_net(x)))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb)
@@ -178,7 +204,7 @@ static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb)
return err;
}
if (xfrm_aevent_is_on(net))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
return err;
@@ -273,7 +299,7 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq)
replay_esn->bmp[nr] |= (1U << bitnr);
if (xfrm_aevent_is_on(xs_net(x)))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event)
@@ -416,7 +442,7 @@ static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb)
}
}
if (xfrm_aevent_is_on(net))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
return err;
@@ -481,6 +507,21 @@ err:
return -EINVAL;
}
+int xfrm_replay_check(struct xfrm_state *x,
+ struct sk_buff *skb, __be32 net_seq)
+{
+ switch (x->repl_mode) {
+ case XFRM_REPLAY_MODE_LEGACY:
+ break;
+ case XFRM_REPLAY_MODE_BMP:
+ return xfrm_replay_check_bmp(x, skb, net_seq);
+ case XFRM_REPLAY_MODE_ESN:
+ return xfrm_replay_check_esn(x, skb, net_seq);
+ }
+
+ return xfrm_replay_check_legacy(x, skb, net_seq);
+}
+
static int xfrm_replay_recheck_esn(struct xfrm_state *x,
struct sk_buff *skb, __be32 net_seq)
{
@@ -493,6 +534,22 @@ static int xfrm_replay_recheck_esn(struct xfrm_state *x,
return xfrm_replay_check_esn(x, skb, net_seq);
}
+int xfrm_replay_recheck(struct xfrm_state *x,
+ struct sk_buff *skb, __be32 net_seq)
+{
+ switch (x->repl_mode) {
+ case XFRM_REPLAY_MODE_LEGACY:
+ break;
+ case XFRM_REPLAY_MODE_BMP:
+ /* no special recheck treatment */
+ return xfrm_replay_check_bmp(x, skb, net_seq);
+ case XFRM_REPLAY_MODE_ESN:
+ return xfrm_replay_recheck_esn(x, skb, net_seq);
+ }
+
+ return xfrm_replay_check_legacy(x, skb, net_seq);
+}
+
static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
{
unsigned int bitnr, nr, i;
@@ -548,7 +605,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
replay_esn->bmp[nr] |= (1U << bitnr);
if (xfrm_aevent_is_on(xs_net(x)))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
#ifdef CONFIG_XFRM_OFFLOAD
@@ -560,7 +617,7 @@ static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *sk
__u32 oseq = x->replay.oseq;
if (!xo)
- return xfrm_replay_overflow(x, skb);
+ return __xfrm_replay_overflow(x, skb);
if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
if (!skb_is_gso(skb)) {
@@ -585,7 +642,7 @@ static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *sk
x->replay.oseq = oseq;
if (xfrm_aevent_is_on(net))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
return err;
@@ -625,7 +682,7 @@ static int xfrm_replay_overflow_offload_bmp(struct xfrm_state *x, struct sk_buff
}
if (xfrm_aevent_is_on(net))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
return err;
@@ -674,59 +731,39 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
replay_esn->oseq = oseq;
if (xfrm_aevent_is_on(net))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
return err;
}
-static const struct xfrm_replay xfrm_replay_legacy = {
- .advance = xfrm_replay_advance,
- .check = xfrm_replay_check,
- .recheck = xfrm_replay_check,
- .notify = xfrm_replay_notify,
- .overflow = xfrm_replay_overflow_offload,
-};
-
-static const struct xfrm_replay xfrm_replay_bmp = {
- .advance = xfrm_replay_advance_bmp,
- .check = xfrm_replay_check_bmp,
- .recheck = xfrm_replay_check_bmp,
- .notify = xfrm_replay_notify_bmp,
- .overflow = xfrm_replay_overflow_offload_bmp,
-};
-
-static const struct xfrm_replay xfrm_replay_esn = {
- .advance = xfrm_replay_advance_esn,
- .check = xfrm_replay_check_esn,
- .recheck = xfrm_replay_recheck_esn,
- .notify = xfrm_replay_notify_esn,
- .overflow = xfrm_replay_overflow_offload_esn,
-};
+int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
+{
+ switch (x->repl_mode) {
+ case XFRM_REPLAY_MODE_LEGACY:
+ break;
+ case XFRM_REPLAY_MODE_BMP:
+ return xfrm_replay_overflow_offload_bmp(x, skb);
+ case XFRM_REPLAY_MODE_ESN:
+ return xfrm_replay_overflow_offload_esn(x, skb);
+ }
+
+ return xfrm_replay_overflow_offload(x, skb);
+}
#else
-static const struct xfrm_replay xfrm_replay_legacy = {
- .advance = xfrm_replay_advance,
- .check = xfrm_replay_check,
- .recheck = xfrm_replay_check,
- .notify = xfrm_replay_notify,
- .overflow = xfrm_replay_overflow,
-};
-
-static const struct xfrm_replay xfrm_replay_bmp = {
- .advance = xfrm_replay_advance_bmp,
- .check = xfrm_replay_check_bmp,
- .recheck = xfrm_replay_check_bmp,
- .notify = xfrm_replay_notify_bmp,
- .overflow = xfrm_replay_overflow_bmp,
-};
-
-static const struct xfrm_replay xfrm_replay_esn = {
- .advance = xfrm_replay_advance_esn,
- .check = xfrm_replay_check_esn,
- .recheck = xfrm_replay_recheck_esn,
- .notify = xfrm_replay_notify_esn,
- .overflow = xfrm_replay_overflow_esn,
-};
+int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
+{
+ switch (x->repl_mode) {
+ case XFRM_REPLAY_MODE_LEGACY:
+ break;
+ case XFRM_REPLAY_MODE_BMP:
+ return xfrm_replay_overflow_bmp(x, skb);
+ case XFRM_REPLAY_MODE_ESN:
+ return xfrm_replay_overflow_esn(x, skb);
+ }
+
+ return __xfrm_replay_overflow(x, skb);
+}
#endif
int xfrm_init_replay(struct xfrm_state *x)
@@ -741,12 +778,12 @@ int xfrm_init_replay(struct xfrm_state *x)
if (x->props.flags & XFRM_STATE_ESN) {
if (replay_esn->replay_window == 0)
return -EINVAL;
- x->repl = &xfrm_replay_esn;
+ x->repl_mode = XFRM_REPLAY_MODE_ESN;
} else {
- x->repl = &xfrm_replay_bmp;
+ x->repl_mode = XFRM_REPLAY_MODE_BMP;
}
} else {
- x->repl = &xfrm_replay_legacy;
+ x->repl_mode = XFRM_REPLAY_MODE_LEGACY;
}
return 0;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 4496f7efa220..c2ce1e6f4760 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -78,10 +78,16 @@ xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
}
+static unsigned int xfrm_seq_hash(struct net *net, u32 seq)
+{
+ return __xfrm_seq_hash(seq, net->xfrm.state_hmask);
+}
+
static void xfrm_hash_transfer(struct hlist_head *list,
struct hlist_head *ndsttable,
struct hlist_head *nsrctable,
struct hlist_head *nspitable,
+ struct hlist_head *nseqtable,
unsigned int nhashmask)
{
struct hlist_node *tmp;
@@ -106,6 +112,11 @@ static void xfrm_hash_transfer(struct hlist_head *list,
nhashmask);
hlist_add_head_rcu(&x->byspi, nspitable + h);
}
+
+ if (x->km.seq) {
+ h = __xfrm_seq_hash(x->km.seq, nhashmask);
+ hlist_add_head_rcu(&x->byseq, nseqtable + h);
+ }
}
}
@@ -117,7 +128,7 @@ static unsigned long xfrm_hash_new_size(unsigned int state_hmask)
static void xfrm_hash_resize(struct work_struct *work)
{
struct net *net = container_of(work, struct net, xfrm.state_hash_work);
- struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
+ struct hlist_head *ndst, *nsrc, *nspi, *nseq, *odst, *osrc, *ospi, *oseq;
unsigned long nsize, osize;
unsigned int nhashmask, ohashmask;
int i;
@@ -137,6 +148,13 @@ static void xfrm_hash_resize(struct work_struct *work)
xfrm_hash_free(nsrc, nsize);
return;
}
+ nseq = xfrm_hash_alloc(nsize);
+ if (!nseq) {
+ xfrm_hash_free(ndst, nsize);
+ xfrm_hash_free(nsrc, nsize);
+ xfrm_hash_free(nspi, nsize);
+ return;
+ }
spin_lock_bh(&net->xfrm.xfrm_state_lock);
write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
@@ -144,15 +162,17 @@ static void xfrm_hash_resize(struct work_struct *work)
nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
for (i = net->xfrm.state_hmask; i >= 0; i--)
- xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
+ xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nseq, nhashmask);
osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
+ oseq = xfrm_state_deref_prot(net->xfrm.state_byseq, net);
ohashmask = net->xfrm.state_hmask;
rcu_assign_pointer(net->xfrm.state_bydst, ndst);
rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
rcu_assign_pointer(net->xfrm.state_byspi, nspi);
+ rcu_assign_pointer(net->xfrm.state_byseq, nseq);
net->xfrm.state_hmask = nhashmask;
write_seqcount_end(&net->xfrm.xfrm_state_hash_generation);
@@ -165,6 +185,7 @@ static void xfrm_hash_resize(struct work_struct *work)
xfrm_hash_free(odst, osize);
xfrm_hash_free(osrc, osize);
xfrm_hash_free(ospi, osize);
+ xfrm_hash_free(oseq, osize);
}
static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
@@ -621,6 +642,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
INIT_HLIST_NODE(&x->bydst);
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
+ INIT_HLIST_NODE(&x->byseq);
hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT);
x->mtimer.function = xfrm_timer_handler;
timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
@@ -664,6 +686,8 @@ int __xfrm_state_delete(struct xfrm_state *x)
list_del(&x->km.all);
hlist_del_rcu(&x->bydst);
hlist_del_rcu(&x->bysrc);
+ if (x->km.seq)
+ hlist_del_rcu(&x->byseq);
if (x->id.spi)
hlist_del_rcu(&x->byspi);
net->xfrm.state_num--;
@@ -1148,6 +1172,10 @@ found:
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
}
+ if (x->km.seq) {
+ h = xfrm_seq_hash(net, x->km.seq);
+ hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h);
+ }
x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
hrtimer_start(&x->mtimer,
ktime_set(net->xfrm.sysctl_acq_expires, 0),
@@ -1263,6 +1291,12 @@ static void __xfrm_state_insert(struct xfrm_state *x)
hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
}
+ if (x->km.seq) {
+ h = xfrm_seq_hash(net, x->km.seq);
+
+ hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h);
+ }
+
hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
if (x->replay_maxage)
mod_timer(&x->rtimer, jiffies + x->replay_maxage);
@@ -1932,20 +1966,18 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
{
- int i;
-
- for (i = 0; i <= net->xfrm.state_hmask; i++) {
- struct xfrm_state *x;
+ unsigned int h = xfrm_seq_hash(net, seq);
+ struct xfrm_state *x;
- hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
- if (x->km.seq == seq &&
- (mark & x->mark.m) == x->mark.v &&
- x->km.state == XFRM_STATE_ACQ) {
- xfrm_state_hold(x);
- return x;
- }
+ hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) {
+ if (x->km.seq == seq &&
+ (mark & x->mark.m) == x->mark.v &&
+ x->km.state == XFRM_STATE_ACQ) {
+ xfrm_state_hold(x);
+ return x;
}
}
+
return NULL;
}
@@ -2145,7 +2177,7 @@ static void xfrm_replay_timer_handler(struct timer_list *t)
if (x->km.state == XFRM_STATE_VALID) {
if (xfrm_aevent_is_on(xs_net(x)))
- x->repl->notify(x, XFRM_REPLAY_TIMEOUT);
+ xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
else
x->xflags |= XFRM_TIME_DEFER;
}
@@ -2660,6 +2692,9 @@ int __net_init xfrm_state_init(struct net *net)
net->xfrm.state_byspi = xfrm_hash_alloc(sz);
if (!net->xfrm.state_byspi)
goto out_byspi;
+ net->xfrm.state_byseq = xfrm_hash_alloc(sz);
+ if (!net->xfrm.state_byseq)
+ goto out_byseq;
net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
net->xfrm.state_num = 0;
@@ -2669,6 +2704,8 @@ int __net_init xfrm_state_init(struct net *net)
&net->xfrm.xfrm_state_lock);
return 0;
+out_byseq:
+ xfrm_hash_free(net->xfrm.state_byspi, sz);
out_byspi:
xfrm_hash_free(net->xfrm.state_bysrc, sz);
out_bysrc:
@@ -2688,6 +2725,8 @@ void xfrm_state_fini(struct net *net)
WARN_ON(!list_empty(&net->xfrm.state_all));
sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
+ WARN_ON(!hlist_empty(net->xfrm.state_byseq));
+ xfrm_hash_free(net->xfrm.state_byseq, sz);
WARN_ON(!hlist_empty(net->xfrm.state_byspi));
xfrm_hash_free(net->xfrm.state_byspi, sz);
WARN_ON(!hlist_empty(net->xfrm.state_bysrc));