diff options
Diffstat (limited to 'net')
185 files changed, 4379 insertions, 2098 deletions
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index e3f6ff05a528..1a705a4ef7fa 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -108,7 +108,8 @@ static inline netdev_features_t vlan_tnl_features(struct net_device *real_dev) netdev_features_t ret; ret = real_dev->hw_enc_features & - (NETIF_F_CSUM_MASK | NETIF_F_ALL_TSO | NETIF_F_GSO_ENCAP_ALL); + (NETIF_F_CSUM_MASK | NETIF_F_GSO_SOFTWARE | + NETIF_F_GSO_ENCAP_ALL); if ((ret & NETIF_F_GSO_ENCAP_ALL) && (ret & NETIF_F_CSUM_MASK)) return (ret & ~NETIF_F_CSUM_MASK) | NETIF_F_HW_CSUM; diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index be18af481d7d..c7236daa2415 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -768,7 +768,7 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, if (a && a->status & ATIF_PROBE) { a->status |= ATIF_PROBE_FAIL; /* - * we do not respond to probe or request packets for + * we do not respond to probe or request packets of * this address while we are probing this address */ goto unlock; diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 680def809838..12022378f892 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -409,8 +409,10 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) if (WARN_ON(!forw_packet->if_outgoing)) return; - if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface)) + if (forw_packet->if_outgoing->soft_iface != soft_iface) { + pr_warn("%s: soft interface switch for queued OGM\n", __func__); return; + } if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE) return; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 93144e0c7efa..4d93c6c32a71 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -3229,7 +3229,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan) { struct l2cap_chan *chan; - bt_dev_dbg(pchan->conn->hcon->hdev, "pchan %p", pchan); + BT_DBG("pchan %p", pchan); chan = l2cap_chan_create(); if (!chan) @@ -3250,7 +3250,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan) */ atomic_set(&chan->nesting, L2CAP_NESTING_SMP); - bt_dev_dbg(pchan->conn->hcon->hdev, "created chan %p", chan); + BT_DBG("created chan %p", chan); return chan; } @@ -3354,7 +3354,7 @@ static void smp_del_chan(struct l2cap_chan *chan) { struct smp_dev *smp; - bt_dev_dbg(chan->conn->hcon->hdev, "chan %p", chan); + BT_DBG("chan %p", chan); smp = chan->data; if (smp) { diff --git a/net/bridge/br_cfm.c b/net/bridge/br_cfm.c index 001064f7583d..a3c755d0a09d 100644 --- a/net/bridge/br_cfm.c +++ b/net/bridge/br_cfm.c @@ -142,7 +142,7 @@ static void br_cfm_notify(int event, const struct net_bridge_port *port) { u32 filter = RTEXT_FILTER_CFM_STATUS; - return br_info_notify(event, port->br, NULL, filter); + br_info_notify(event, port->br, NULL, filter); } static void cc_peer_enable(struct br_cfm_peer_mep *peer_mep) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 698b79747d32..16f9434fdb5d 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -727,8 +727,9 @@ static inline size_t fdb_nlmsg_size(void) } static int br_fdb_replay_one(struct notifier_block *nb, - struct net_bridge_fdb_entry *fdb, - struct net_device *dev) + const struct net_bridge_fdb_entry *fdb, + struct net_device *dev, unsigned long action, + const void *ctx) { struct switchdev_notifier_fdb_info item; int err; @@ -737,17 +738,20 @@ static int br_fdb_replay_one(struct notifier_block *nb, item.vid = fdb->key.vlan_id; item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags); + item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags); item.info.dev = dev; + item.info.ctx = ctx; - err = nb->notifier_call(nb, SWITCHDEV_FDB_ADD_TO_DEVICE, &item); + err = nb->notifier_call(nb, action, &item); return notifier_to_errno(err); } -int br_fdb_replay(struct net_device *br_dev, struct net_device *dev, - struct notifier_block *nb) +int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, + const void *ctx, bool adding, struct notifier_block *nb) { struct net_bridge_fdb_entry *fdb; struct net_bridge *br; + unsigned long action; int err = 0; if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev)) @@ -755,17 +759,22 @@ int br_fdb_replay(struct net_device *br_dev, struct net_device *dev, br = netdev_priv(br_dev); + if (adding) + action = SWITCHDEV_FDB_ADD_TO_DEVICE; + else + action = SWITCHDEV_FDB_DEL_TO_DEVICE; + rcu_read_lock(); hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) { - struct net_bridge_port *dst = READ_ONCE(fdb->dst); + const struct net_bridge_port *dst = READ_ONCE(fdb->dst); struct net_device *dst_dev; dst_dev = dst ? dst->dev : br->dev; if (dst_dev != br_dev && dst_dev != dev) continue; - err = br_fdb_replay_one(nb, fdb, dst_dev); + err = br_fdb_replay_one(nb, fdb, dst_dev, action, ctx); if (err) break; } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 3f839a8cc9fb..17a720b4473f 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -567,19 +567,21 @@ static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb, } static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, - struct switchdev_obj_port_mdb *mdb, + const struct switchdev_obj_port_mdb *mdb, + unsigned long action, const void *ctx, struct netlink_ext_ack *extack) { struct switchdev_notifier_port_obj_info obj_info = { .info = { .dev = dev, .extack = extack, + .ctx = ctx, }, .obj = &mdb->obj, }; int err; - err = nb->notifier_call(nb, SWITCHDEV_PORT_OBJ_ADD, &obj_info); + err = nb->notifier_call(nb, action, &obj_info); return notifier_to_errno(err); } @@ -603,11 +605,13 @@ static int br_mdb_queue_one(struct list_head *mdb_list, } int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, - struct notifier_block *nb, struct netlink_ext_ack *extack) + const void *ctx, bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack) { - struct net_bridge_mdb_entry *mp; + const struct net_bridge_mdb_entry *mp; struct switchdev_obj *obj, *tmp; struct net_bridge *br; + unsigned long action; LIST_HEAD(mdb_list); int err = 0; @@ -632,8 +636,8 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, rcu_read_lock(); hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { - struct net_bridge_port_group __rcu **pp; - struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu * const *pp; + const struct net_bridge_port_group *p; if (mp->host_joined) { err = br_mdb_queue_one(&mdb_list, @@ -662,9 +666,14 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, rcu_read_unlock(); + if (adding) + action = SWITCHDEV_PORT_OBJ_ADD; + else + action = SWITCHDEV_PORT_OBJ_DEL; + list_for_each_entry(obj, &mdb_list, list) { err = br_mdb_replay_one(nb, dev, SWITCHDEV_OBJ_PORT_MDB(obj), - extack); + action, ctx, extack); if (err) goto out_free_mdb; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index ec661130c2d0..a684d0cfc58c 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -90,8 +90,8 @@ struct bridge_mcast_stats { #endif struct br_tunnel_info { - __be64 tunnel_id; - struct metadata_dst *tunnel_dst; + __be64 tunnel_id; + struct metadata_dst __rcu *tunnel_dst; }; /* private vlan flags */ diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 3dafb6143cff..1d80f34a139c 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -639,9 +639,9 @@ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time) return 0; } -clock_t br_get_ageing_time(struct net_device *br_dev) +clock_t br_get_ageing_time(const struct net_device *br_dev) { - struct net_bridge *br; + const struct net_bridge *br; if (!netif_is_bridge_master(br_dev)) return 0; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index da3256a3eed0..a08e9f193009 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -113,9 +113,7 @@ static void __vlan_add_list(struct net_bridge_vlan *v) headp = &vg->vlan_list; list_for_each_prev(hpos, headp) { vent = list_entry(hpos, struct net_bridge_vlan, vlist); - if (v->vid < vent->vid) - continue; - else + if (v->vid >= vent->vid) break; } list_add_rcu(&v->vlist, hpos); @@ -1809,28 +1807,32 @@ out_kfree: static int br_vlan_replay_one(struct notifier_block *nb, struct net_device *dev, struct switchdev_obj_port_vlan *vlan, + const void *ctx, unsigned long action, struct netlink_ext_ack *extack) { struct switchdev_notifier_port_obj_info obj_info = { .info = { .dev = dev, .extack = extack, + .ctx = ctx, }, .obj = &vlan->obj, }; int err; - err = nb->notifier_call(nb, SWITCHDEV_PORT_OBJ_ADD, &obj_info); + err = nb->notifier_call(nb, action, &obj_info); return notifier_to_errno(err); } int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, - struct notifier_block *nb, struct netlink_ext_ack *extack) + const void *ctx, bool adding, struct notifier_block *nb, + struct netlink_ext_ack *extack) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; struct net_bridge_port *p; struct net_bridge *br; + unsigned long action; int err = 0; u16 pvid; @@ -1857,6 +1859,11 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, if (!vg) return 0; + if (adding) + action = SWITCHDEV_PORT_OBJ_ADD; + else + action = SWITCHDEV_PORT_OBJ_DEL; + pvid = br_get_pvid(vg); list_for_each_entry(v, &vg->vlan_list, vlist) { @@ -1870,7 +1877,7 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev, if (!br_vlan_should_use(v)) continue; - err = br_vlan_replay_one(nb, dev, &vlan, extack); + err = br_vlan_replay_one(nb, dev, &vlan, ctx, action, extack); if (err) return err; } diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c index 0d3a8c01552e..01017448ebde 100644 --- a/net/bridge/br_vlan_tunnel.c +++ b/net/bridge/br_vlan_tunnel.c @@ -41,26 +41,33 @@ static struct net_bridge_vlan *br_vlan_tunnel_lookup(struct rhashtable *tbl, br_vlan_tunnel_rht_params); } +static void vlan_tunnel_info_release(struct net_bridge_vlan *vlan) +{ + struct metadata_dst *tdst = rtnl_dereference(vlan->tinfo.tunnel_dst); + + WRITE_ONCE(vlan->tinfo.tunnel_id, 0); + RCU_INIT_POINTER(vlan->tinfo.tunnel_dst, NULL); + dst_release(&tdst->dst); +} + void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, struct net_bridge_vlan *vlan) { - if (!vlan->tinfo.tunnel_dst) + if (!rcu_access_pointer(vlan->tinfo.tunnel_dst)) return; rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode, br_vlan_tunnel_rht_params); - vlan->tinfo.tunnel_id = 0; - dst_release(&vlan->tinfo.tunnel_dst->dst); - vlan->tinfo.tunnel_dst = NULL; + vlan_tunnel_info_release(vlan); } static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg, struct net_bridge_vlan *vlan, u32 tun_id) { - struct metadata_dst *metadata = NULL; + struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst); __be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id)); int err; - if (vlan->tinfo.tunnel_dst) + if (metadata) return -EEXIST; metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY, @@ -69,8 +76,8 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg, return -EINVAL; metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE; - vlan->tinfo.tunnel_dst = metadata; - vlan->tinfo.tunnel_id = key; + rcu_assign_pointer(vlan->tinfo.tunnel_dst, metadata); + WRITE_ONCE(vlan->tinfo.tunnel_id, key); err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode, br_vlan_tunnel_rht_params); @@ -79,9 +86,7 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg, return 0; out: - dst_release(&vlan->tinfo.tunnel_dst->dst); - vlan->tinfo.tunnel_dst = NULL; - vlan->tinfo.tunnel_id = 0; + vlan_tunnel_info_release(vlan); return err; } @@ -182,12 +187,15 @@ int br_handle_ingress_vlan_tunnel(struct sk_buff *skb, int br_handle_egress_vlan_tunnel(struct sk_buff *skb, struct net_bridge_vlan *vlan) { + struct metadata_dst *tunnel_dst; + __be64 tunnel_id; int err; - if (!vlan || !vlan->tinfo.tunnel_id) + if (!vlan) return 0; - if (unlikely(!skb_vlan_tag_present(skb))) + tunnel_id = READ_ONCE(vlan->tinfo.tunnel_id); + if (!tunnel_id || unlikely(!skb_vlan_tag_present(skb))) return 0; skb_dst_drop(skb); @@ -195,7 +203,9 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb, if (err) return err; - skb_dst_set(skb, dst_clone(&vlan->tinfo.tunnel_dst->dst)); + tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst); + if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst)) + skb_dst_set(skb, &tunnel_dst->dst); return 0; } diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index cac30e676ac9..23267c8db7c4 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -480,7 +480,7 @@ got_phyid: phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC); if (!phyinfo) { res = -ENOMEM; - goto out_err; + goto out; } phy_layer->id = phyid; diff --git a/net/can/bcm.c b/net/can/bcm.c index 909b9e684e04..f3e4d9528fa3 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -125,7 +125,7 @@ struct bcm_sock { struct sock sk; int bound; int ifindex; - struct notifier_block notifier; + struct list_head notifier; struct list_head rx_ops; struct list_head tx_ops; unsigned long dropped_usr_msgs; @@ -133,6 +133,10 @@ struct bcm_sock { char procname [32]; /* inode number in decimal with \0 */ }; +static LIST_HEAD(bcm_notifier_list); +static DEFINE_SPINLOCK(bcm_notifier_lock); +static struct bcm_sock *bcm_busy_notifier; + static inline struct bcm_sock *bcm_sk(const struct sock *sk) { return (struct bcm_sock *)sk; @@ -402,6 +406,7 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) if (!op->count && (op->flags & TX_COUNTEVT)) { /* create notification to user */ + memset(&msg_head, 0, sizeof(msg_head)); msg_head.opcode = TX_EXPIRED; msg_head.flags = op->flags; msg_head.count = op->count; @@ -439,6 +444,7 @@ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data) /* this element is not throttled anymore */ data->flags &= (BCM_CAN_FLAGS_MASK|RX_RECV); + memset(&head, 0, sizeof(head)); head.opcode = RX_CHANGED; head.flags = op->flags; head.count = op->count; @@ -560,6 +566,7 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) } /* create notification to user */ + memset(&msg_head, 0, sizeof(msg_head)); msg_head.opcode = RX_TIMEOUT; msg_head.flags = op->flags; msg_head.count = op->count; @@ -1378,20 +1385,15 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) /* * notification handler for netdevice status changes */ -static int bcm_notifier(struct notifier_block *nb, unsigned long msg, - void *ptr) +static void bcm_notify(struct bcm_sock *bo, unsigned long msg, + struct net_device *dev) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier); struct sock *sk = &bo->sk; struct bcm_op *op; int notify_enodev = 0; if (!net_eq(dev_net(dev), sock_net(sk))) - return NOTIFY_DONE; - - if (dev->type != ARPHRD_CAN) - return NOTIFY_DONE; + return; switch (msg) { @@ -1426,7 +1428,28 @@ static int bcm_notifier(struct notifier_block *nb, unsigned long msg, sk->sk_error_report(sk); } } +} +static int bcm_notifier(struct notifier_block *nb, unsigned long msg, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (dev->type != ARPHRD_CAN) + return NOTIFY_DONE; + if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN) + return NOTIFY_DONE; + if (unlikely(bcm_busy_notifier)) /* Check for reentrant bug. */ + return NOTIFY_DONE; + + spin_lock(&bcm_notifier_lock); + list_for_each_entry(bcm_busy_notifier, &bcm_notifier_list, notifier) { + spin_unlock(&bcm_notifier_lock); + bcm_notify(bcm_busy_notifier, msg, dev); + spin_lock(&bcm_notifier_lock); + } + bcm_busy_notifier = NULL; + spin_unlock(&bcm_notifier_lock); return NOTIFY_DONE; } @@ -1446,9 +1469,9 @@ static int bcm_init(struct sock *sk) INIT_LIST_HEAD(&bo->rx_ops); /* set notifier */ - bo->notifier.notifier_call = bcm_notifier; - - register_netdevice_notifier(&bo->notifier); + spin_lock(&bcm_notifier_lock); + list_add_tail(&bo->notifier, &bcm_notifier_list); + spin_unlock(&bcm_notifier_lock); return 0; } @@ -1471,7 +1494,14 @@ static int bcm_release(struct socket *sock) /* remove bcm_ops, timer, rx_unregister(), etc. */ - unregister_netdevice_notifier(&bo->notifier); + spin_lock(&bcm_notifier_lock); + while (bcm_busy_notifier == bo) { + spin_unlock(&bcm_notifier_lock); + schedule_timeout_uninterruptible(1); + spin_lock(&bcm_notifier_lock); + } + list_del(&bo->notifier); + spin_unlock(&bcm_notifier_lock); lock_sock(sk); @@ -1692,6 +1722,10 @@ static struct pernet_operations canbcm_pernet_ops __read_mostly = { .exit = canbcm_pernet_exit, }; +static struct notifier_block canbcm_notifier = { + .notifier_call = bcm_notifier +}; + static int __init bcm_module_init(void) { int err; @@ -1705,12 +1739,14 @@ static int __init bcm_module_init(void) } register_pernet_subsys(&canbcm_pernet_ops); + register_netdevice_notifier(&canbcm_notifier); return 0; } static void __exit bcm_module_exit(void) { can_proto_unregister(&bcm_can_proto); + unregister_netdevice_notifier(&canbcm_notifier); unregister_pernet_subsys(&canbcm_pernet_ops); } diff --git a/net/can/isotp.c b/net/can/isotp.c index f995eaef5d7b..bd49299319a1 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -143,10 +143,14 @@ struct isotp_sock { u32 force_tx_stmin; u32 force_rx_stmin; struct tpcon rx, tx; - struct notifier_block notifier; + struct list_head notifier; wait_queue_head_t wait; }; +static LIST_HEAD(isotp_notifier_list); +static DEFINE_SPINLOCK(isotp_notifier_lock); +static struct isotp_sock *isotp_busy_notifier; + static inline struct isotp_sock *isotp_sk(const struct sock *sk) { return (struct isotp_sock *)sk; @@ -1015,7 +1019,14 @@ static int isotp_release(struct socket *sock) /* wait for complete transmission of current pdu */ wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); - unregister_netdevice_notifier(&so->notifier); + spin_lock(&isotp_notifier_lock); + while (isotp_busy_notifier == so) { + spin_unlock(&isotp_notifier_lock); + schedule_timeout_uninterruptible(1); + spin_lock(&isotp_notifier_lock); + } + list_del(&so->notifier); + spin_unlock(&isotp_notifier_lock); lock_sock(sk); @@ -1319,21 +1330,16 @@ static int isotp_getsockopt(struct socket *sock, int level, int optname, return 0; } -static int isotp_notifier(struct notifier_block *nb, unsigned long msg, - void *ptr) +static void isotp_notify(struct isotp_sock *so, unsigned long msg, + struct net_device *dev) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct isotp_sock *so = container_of(nb, struct isotp_sock, notifier); struct sock *sk = &so->sk; if (!net_eq(dev_net(dev), sock_net(sk))) - return NOTIFY_DONE; - - if (dev->type != ARPHRD_CAN) - return NOTIFY_DONE; + return; if (so->ifindex != dev->ifindex) - return NOTIFY_DONE; + return; switch (msg) { case NETDEV_UNREGISTER: @@ -1359,7 +1365,28 @@ static int isotp_notifier(struct notifier_block *nb, unsigned long msg, sk->sk_error_report(sk); break; } +} +static int isotp_notifier(struct notifier_block *nb, unsigned long msg, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (dev->type != ARPHRD_CAN) + return NOTIFY_DONE; + if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN) + return NOTIFY_DONE; + if (unlikely(isotp_busy_notifier)) /* Check for reentrant bug. */ + return NOTIFY_DONE; + + spin_lock(&isotp_notifier_lock); + list_for_each_entry(isotp_busy_notifier, &isotp_notifier_list, notifier) { + spin_unlock(&isotp_notifier_lock); + isotp_notify(isotp_busy_notifier, msg, dev); + spin_lock(&isotp_notifier_lock); + } + isotp_busy_notifier = NULL; + spin_unlock(&isotp_notifier_lock); return NOTIFY_DONE; } @@ -1396,8 +1423,9 @@ static int isotp_init(struct sock *sk) init_waitqueue_head(&so->wait); - so->notifier.notifier_call = isotp_notifier; - register_netdevice_notifier(&so->notifier); + spin_lock(&isotp_notifier_lock); + list_add_tail(&so->notifier, &isotp_notifier_list); + spin_unlock(&isotp_notifier_lock); return 0; } @@ -1444,6 +1472,10 @@ static const struct can_proto isotp_can_proto = { .prot = &isotp_proto, }; +static struct notifier_block canisotp_notifier = { + .notifier_call = isotp_notifier +}; + static __init int isotp_module_init(void) { int err; @@ -1453,6 +1485,8 @@ static __init int isotp_module_init(void) err = can_proto_register(&isotp_can_proto); if (err < 0) pr_err("can: registration of isotp protocol failed %pe\n", ERR_PTR(err)); + else + register_netdevice_notifier(&canisotp_notifier); return err; } @@ -1460,6 +1494,7 @@ static __init int isotp_module_init(void) static __exit void isotp_module_exit(void) { can_proto_unregister(&isotp_can_proto); + unregister_netdevice_notifier(&canisotp_notifier); } module_init(isotp_module_init); diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index e09d087ba240..c3946c355882 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -330,6 +330,9 @@ static void j1939_session_skb_drop_old(struct j1939_session *session) if ((do_skcb->offset + do_skb->len) < offset_start) { __skb_unlink(do_skb, &session->skb_queue); + /* drop ref taken in j1939_session_skb_queue() */ + skb_unref(do_skb); + kfree_skb(do_skb); } spin_unlock_irqrestore(&session->skb_queue.lock, flags); @@ -349,12 +352,13 @@ void j1939_session_skb_queue(struct j1939_session *session, skcb->flags |= J1939_ECU_LOCAL_SRC; + skb_get(skb); skb_queue_tail(&session->skb_queue, skb); } static struct -sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session, - unsigned int offset_start) +sk_buff *j1939_session_skb_get_by_offset(struct j1939_session *session, + unsigned int offset_start) { struct j1939_priv *priv = session->priv; struct j1939_sk_buff_cb *do_skcb; @@ -371,6 +375,10 @@ sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session, skb = do_skb; } } + + if (skb) + skb_get(skb); + spin_unlock_irqrestore(&session->skb_queue.lock, flags); if (!skb) @@ -381,12 +389,12 @@ sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session, return skb; } -static struct sk_buff *j1939_session_skb_find(struct j1939_session *session) +static struct sk_buff *j1939_session_skb_get(struct j1939_session *session) { unsigned int offset_start; offset_start = session->pkt.dpo * 7; - return j1939_session_skb_find_by_offset(session, offset_start); + return j1939_session_skb_get_by_offset(session, offset_start); } /* see if we are receiver @@ -776,7 +784,7 @@ static int j1939_session_tx_dat(struct j1939_session *session) int ret = 0; u8 dat[8]; - se_skb = j1939_session_skb_find_by_offset(session, session->pkt.tx * 7); + se_skb = j1939_session_skb_get_by_offset(session, session->pkt.tx * 7); if (!se_skb) return -ENOBUFS; @@ -801,7 +809,8 @@ static int j1939_session_tx_dat(struct j1939_session *session) netdev_err_once(priv->ndev, "%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n", __func__, session, skcb->offset, se_skb->len , session->pkt.tx); - return -EOVERFLOW; + ret = -EOVERFLOW; + goto out_free; } if (!len) { @@ -835,6 +844,12 @@ static int j1939_session_tx_dat(struct j1939_session *session) if (pkt_done) j1939_tp_set_rxtimeout(session, 250); + out_free: + if (ret) + kfree_skb(se_skb); + else + consume_skb(se_skb); + return ret; } @@ -1007,7 +1022,7 @@ static int j1939_xtp_txnext_receiver(struct j1939_session *session) static int j1939_simple_txnext(struct j1939_session *session) { struct j1939_priv *priv = session->priv; - struct sk_buff *se_skb = j1939_session_skb_find(session); + struct sk_buff *se_skb = j1939_session_skb_get(session); struct sk_buff *skb; int ret; @@ -1015,8 +1030,10 @@ static int j1939_simple_txnext(struct j1939_session *session) return 0; skb = skb_clone(se_skb, GFP_ATOMIC); - if (!skb) - return -ENOMEM; + if (!skb) { + ret = -ENOMEM; + goto out_free; + } can_skb_set_owner(skb, se_skb->sk); @@ -1024,12 +1041,18 @@ static int j1939_simple_txnext(struct j1939_session *session) ret = j1939_send_one(priv, skb); if (ret) - return ret; + goto out_free; j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED); j1939_sk_queue_activate_next(session); - return 0; + out_free: + if (ret) + kfree_skb(se_skb); + else + consume_skb(se_skb); + + return ret; } static bool j1939_session_deactivate_locked(struct j1939_session *session) @@ -1170,9 +1193,10 @@ static void j1939_session_completed(struct j1939_session *session) struct sk_buff *skb; if (!session->transmission) { - skb = j1939_session_skb_find(session); + skb = j1939_session_skb_get(session); /* distribute among j1939 receivers */ j1939_sk_recv(session->priv, skb); + consume_skb(skb); } j1939_session_deactivate_activate_next(session); @@ -1744,7 +1768,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, { struct j1939_priv *priv = session->priv; struct j1939_sk_buff_cb *skcb; - struct sk_buff *se_skb; + struct sk_buff *se_skb = NULL; const u8 *dat; u8 *tpdat; int offset; @@ -1786,7 +1810,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, goto out_session_cancel; } - se_skb = j1939_session_skb_find_by_offset(session, packet * 7); + se_skb = j1939_session_skb_get_by_offset(session, packet * 7); if (!se_skb) { netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__, session); @@ -1848,11 +1872,13 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, j1939_tp_set_rxtimeout(session, 250); } session->last_cmd = 0xff; + consume_skb(se_skb); j1939_session_put(session); return; out_session_cancel: + kfree_skb(se_skb); j1939_session_timers_cancel(session); j1939_session_cancel(session, J1939_XTP_ABORT_FAULT); j1939_session_put(session); diff --git a/net/can/raw.c b/net/can/raw.c index 139d9471ddcf..ac96fc210025 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -83,7 +83,7 @@ struct raw_sock { struct sock sk; int bound; int ifindex; - struct notifier_block notifier; + struct list_head notifier; int loopback; int recv_own_msgs; int fd_frames; @@ -95,6 +95,10 @@ struct raw_sock { struct uniqframe __percpu *uniq; }; +static LIST_HEAD(raw_notifier_list); +static DEFINE_SPINLOCK(raw_notifier_lock); +static struct raw_sock *raw_busy_notifier; + /* Return pointer to store the extra msg flags for raw_recvmsg(). * We use the space of one unsigned int beyond the 'struct sockaddr_can' * in skb->cb. @@ -263,21 +267,16 @@ static int raw_enable_allfilters(struct net *net, struct net_device *dev, return err; } -static int raw_notifier(struct notifier_block *nb, - unsigned long msg, void *ptr) +static void raw_notify(struct raw_sock *ro, unsigned long msg, + struct net_device *dev) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct raw_sock *ro = container_of(nb, struct raw_sock, notifier); struct sock *sk = &ro->sk; if (!net_eq(dev_net(dev), sock_net(sk))) - return NOTIFY_DONE; - - if (dev->type != ARPHRD_CAN) - return NOTIFY_DONE; + return; if (ro->ifindex != dev->ifindex) - return NOTIFY_DONE; + return; switch (msg) { case NETDEV_UNREGISTER: @@ -305,7 +304,28 @@ static int raw_notifier(struct notifier_block *nb, sk->sk_error_report(sk); break; } +} + +static int raw_notifier(struct notifier_block *nb, unsigned long msg, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (dev->type != ARPHRD_CAN) + return NOTIFY_DONE; + if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN) + return NOTIFY_DONE; + if (unlikely(raw_busy_notifier)) /* Check for reentrant bug. */ + return NOTIFY_DONE; + spin_lock(&raw_notifier_lock); + list_for_each_entry(raw_busy_notifier, &raw_notifier_list, notifier) { + spin_unlock(&raw_notifier_lock); + raw_notify(raw_busy_notifier, msg, dev); + spin_lock(&raw_notifier_lock); + } + raw_busy_notifier = NULL; + spin_unlock(&raw_notifier_lock); return NOTIFY_DONE; } @@ -334,9 +354,9 @@ static int raw_init(struct sock *sk) return -ENOMEM; /* set notifier */ - ro->notifier.notifier_call = raw_notifier; - - register_netdevice_notifier(&ro->notifier); + spin_lock(&raw_notifier_lock); + list_add_tail(&ro->notifier, &raw_notifier_list); + spin_unlock(&raw_notifier_lock); return 0; } @@ -351,7 +371,14 @@ static int raw_release(struct socket *sock) ro = raw_sk(sk); - unregister_netdevice_notifier(&ro->notifier); + spin_lock(&raw_notifier_lock); + while (raw_busy_notifier == ro) { + spin_unlock(&raw_notifier_lock); + schedule_timeout_uninterruptible(1); + spin_lock(&raw_notifier_lock); + } + list_del(&ro->notifier); + spin_unlock(&raw_notifier_lock); lock_sock(sk); @@ -889,6 +916,10 @@ static const struct can_proto raw_can_proto = { .prot = &raw_proto, }; +static struct notifier_block canraw_notifier = { + .notifier_call = raw_notifier +}; + static __init int raw_module_init(void) { int err; @@ -898,6 +929,8 @@ static __init int raw_module_init(void) err = can_proto_register(&raw_can_proto); if (err < 0) pr_err("can: registration of raw protocol failed\n"); + else + register_netdevice_notifier(&canraw_notifier); return err; } @@ -905,6 +938,7 @@ static __init int raw_module_init(void) static __exit void raw_module_exit(void) { can_proto_unregister(&raw_can_proto); + unregister_netdevice_notifier(&canraw_notifier); } module_init(raw_module_init); diff --git a/net/core/dev.c b/net/core/dev.c index 50531a2d0b20..991d09b67bd9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3852,10 +3852,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); if (q->flags & TCQ_F_NOLOCK) { + if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) && + qdisc_run_begin(q)) { + /* Retest nolock_qdisc_is_empty() within the protection + * of q->seqlock to protect from racing with requeuing. + */ + if (unlikely(!nolock_qdisc_is_empty(q))) { + rc = q->enqueue(skb, q, &to_free) & + NET_XMIT_MASK; + __qdisc_run(q); + qdisc_run_end(q); + + goto no_lock_out; + } + + qdisc_bstats_cpu_update(q, skb); + if (sch_direct_xmit(skb, q, dev, txq, NULL, true) && + !nolock_qdisc_is_empty(q)) + __qdisc_run(q); + + qdisc_run_end(q); + return NET_XMIT_SUCCESS; + } + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; - if (likely(!netif_xmit_frozen_or_stopped(txq))) - qdisc_run(q); + qdisc_run(q); +no_lock_out: if (unlikely(to_free)) kfree_skb_list(to_free); return rc; diff --git a/net/core/devlink.c b/net/core/devlink.c index 566ddd147633..8fdd04f00fd7 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2709,23 +2709,16 @@ static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { struct devlink_rate *devlink_rate; - u16 old_mode; - int err; - - if (!devlink->ops->eswitch_mode_get) - return -EOPNOTSUPP; - err = devlink->ops->eswitch_mode_get(devlink, &old_mode); - if (err) - return err; - - if (old_mode == mode) - return 0; + /* Take the lock to sync with devlink_rate_nodes_destroy() */ + mutex_lock(&devlink->lock); list_for_each_entry(devlink_rate, &devlink->rate_list, list) if (devlink_rate_is_node(devlink_rate)) { + mutex_unlock(&devlink->lock); NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists."); return -EBUSY; } + mutex_unlock(&devlink->lock); return 0; } @@ -9275,6 +9268,8 @@ void devlink_rate_leaf_destroy(struct devlink_port *devlink_port) mutex_lock(&devlink->lock); devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL); + if (devlink_rate->parent) + refcount_dec(&devlink_rate->parent->refcnt); list_del(&devlink_rate->list); devlink_port->devlink_rate = NULL; mutex_unlock(&devlink->lock); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2b2f333bcdfe..53e85c70c6e5 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -238,6 +238,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) write_lock(&n->lock); if ((n->nud_state == NUD_FAILED) || + (n->nud_state == NUD_NOARP) || (tbl->is_multicast && tbl->is_multicast(n->primary_key)) || time_after(tref, n->updated)) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 43b6ac4c4439..9b5a767eddd5 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -641,6 +641,18 @@ void __put_net(struct net *net) } EXPORT_SYMBOL_GPL(__put_net); +/** + * get_net_ns - increment the refcount of the network namespace + * @ns: common namespace (net) + * + * Returns the net's common namespace. + */ +struct ns_common *get_net_ns(struct ns_common *ns) +{ + return &get_net(container_of(ns, struct net, ns))->ns; +} +EXPORT_SYMBOL_GPL(get_net_ns); + struct net *get_net_ns_by_fd(int fd) { struct file *file; @@ -660,14 +672,8 @@ struct net *get_net_ns_by_fd(int fd) fput(file); return net; } - -#else -struct net *get_net_ns_by_fd(int fd) -{ - return ERR_PTR(-EINVAL); -} -#endif EXPORT_SYMBOL_GPL(get_net_ns_by_fd); +#endif struct net *get_net_ns_by_pid(pid_t pid) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5baa86bca876..745965e49f78 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4850,10 +4850,12 @@ static int rtnl_bridge_notify(struct net_device *dev) if (err < 0) goto errout; - if (!skb->len) { - err = -EINVAL; + /* Notification info is only filled for bridge ports, not the bridge + * device itself. Therefore, a zero notification length is valid and + * should not result in an error. + */ + if (!skb->len) goto errout; - } rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); return 0; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a0b1d4847efe..2531ac4ffa69 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1258,6 +1258,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg) struct sock *sk = skb->sk; struct sk_buff_head *q; unsigned long flags; + bool is_zerocopy; u32 lo, hi; u16 len; @@ -1272,6 +1273,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg) len = uarg->len; lo = uarg->id; hi = uarg->id + len - 1; + is_zerocopy = uarg->zerocopy; serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); @@ -1279,7 +1281,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg) serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY; serr->ee.ee_data = hi; serr->ee.ee_info = lo; - if (!uarg->zerocopy) + if (!is_zerocopy) serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED; q = &sk->sk_error_queue; diff --git a/net/core/sock.c b/net/core/sock.c index ddfa88082a2b..a2337b37eba6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1635,6 +1635,13 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_bound_dev_if; break; + case SO_NETNS_COOKIE: + lv = sizeof(u64); + if (len != lv) + return -EINVAL; + v.val64 = sock_net(sk)->net_cookie; + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index de5ee3ae86d5..3f00a28fe762 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -6,6 +6,7 @@ * selecting the socket index from the array of available sockets. */ +#include <net/ip.h> #include <net/sock_reuseport.h> #include <linux/bpf.h> #include <linux/idr.h> @@ -536,7 +537,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk, socks = READ_ONCE(reuse->num_socks); if (unlikely(!socks)) - goto out; + goto failure; /* paired with smp_wmb() in __reuseport_add_sock() */ smp_rmb(); @@ -546,13 +547,13 @@ struct sock *reuseport_migrate_sock(struct sock *sk, if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) { if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req) goto select_by_hash; - goto out; + goto failure; } if (!skb) { skb = alloc_skb(0, GFP_ATOMIC); if (!skb) - goto out; + goto failure; allocated = true; } @@ -565,12 +566,18 @@ select_by_hash: if (!nsk) nsk = reuseport_select_sock_by_hash(reuse, hash, socks); - if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt))) + if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt))) { nsk = NULL; + goto failure; + } out: rcu_read_unlock(); return nsk; + +failure: + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); + goto out; } EXPORT_SYMBOL(reuseport_migrate_sock); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index b71e87909f0e..9000a8c84baf 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -219,21 +219,6 @@ static void dsa_tree_put(struct dsa_switch_tree *dst) kref_put(&dst->refcount, dsa_tree_release); } -static bool dsa_port_is_dsa(struct dsa_port *port) -{ - return port->type == DSA_PORT_TYPE_DSA; -} - -static bool dsa_port_is_cpu(struct dsa_port *port) -{ - return port->type == DSA_PORT_TYPE_CPU; -} - -static bool dsa_port_is_user(struct dsa_port *dp) -{ - return dp->type == DSA_PORT_TYPE_USER; -} - static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst, struct device_node *dn) { @@ -1259,6 +1244,13 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds, if (!ds->dst) return -ENOMEM; + if (dsa_switch_find(ds->dst->index, ds->index)) { + dev_err(ds->dev, + "A DSA switch with index %d already exists in tree %d\n", + ds->index, ds->dst->index); + return -EEXIST; + } + return 0; } diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index b8b17474b72b..c8712942002f 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -84,7 +84,7 @@ struct dsa_notifier_vlan_info { /* DSA_NOTIFIER_MTU */ struct dsa_notifier_mtu_info { - bool propagate_upstream; + bool targeted_match; int sw_index; int port; int mtu; @@ -188,19 +188,23 @@ void dsa_port_disable_rt(struct dsa_port *dp); void dsa_port_disable(struct dsa_port *dp); int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, struct netlink_ext_ack *extack); +int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br, + struct netlink_ext_ack *extack); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); int dsa_port_lag_change(struct dsa_port *dp, struct netdev_lag_lower_state_info *linfo); int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev, struct netdev_lag_upper_info *uinfo, struct netlink_ext_ack *extack); +int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev, + struct netlink_ext_ack *extack); void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev); int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, struct netlink_ext_ack *extack); bool dsa_port_skip_vlan_configuration(struct dsa_port *dp); int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock); int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu, - bool propagate_upstream); + bool targeted_match); int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, u16 vid); int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, diff --git a/net/dsa/port.c b/net/dsa/port.c index 6379d66a6bb3..46089dd2b2ec 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -194,26 +194,51 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp, if (err && err != -EOPNOTSUPP) return err; - err = br_mdb_replay(br, brport_dev, - &dsa_slave_switchdev_blocking_notifier, - extack); + err = br_mdb_replay(br, brport_dev, dp, true, + &dsa_slave_switchdev_blocking_notifier, extack); if (err && err != -EOPNOTSUPP) return err; - err = br_fdb_replay(br, brport_dev, &dsa_slave_switchdev_notifier); + err = br_fdb_replay(br, brport_dev, dp, true, + &dsa_slave_switchdev_notifier); if (err && err != -EOPNOTSUPP) return err; - err = br_vlan_replay(br, brport_dev, - &dsa_slave_switchdev_blocking_notifier, - extack); + err = br_vlan_replay(br, brport_dev, dp, true, + &dsa_slave_switchdev_blocking_notifier, extack); if (err && err != -EOPNOTSUPP) return err; return 0; } -static void dsa_port_switchdev_unsync(struct dsa_port *dp) +static int dsa_port_switchdev_unsync_objs(struct dsa_port *dp, + struct net_device *br, + struct netlink_ext_ack *extack) +{ + struct net_device *brport_dev = dsa_port_to_bridge_port(dp); + int err; + + /* Delete the switchdev objects left on this port */ + err = br_mdb_replay(br, brport_dev, dp, false, + &dsa_slave_switchdev_blocking_notifier, extack); + if (err && err != -EOPNOTSUPP) + return err; + + err = br_fdb_replay(br, brport_dev, dp, false, + &dsa_slave_switchdev_notifier); + if (err && err != -EOPNOTSUPP) + return err; + + err = br_vlan_replay(br, brport_dev, dp, false, + &dsa_slave_switchdev_blocking_notifier, extack); + if (err && err != -EOPNOTSUPP) + return err; + + return 0; +} + +static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp) { /* Configure the port for standalone mode (no address learning, * flood everything). @@ -279,6 +304,12 @@ out_rollback: return err; } +int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br, + struct netlink_ext_ack *extack) +{ + return dsa_port_switchdev_unsync_objs(dp, br, extack); +} + void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) { struct dsa_notifier_bridge_info info = { @@ -298,7 +329,7 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) if (err) pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); - dsa_port_switchdev_unsync(dp); + dsa_port_switchdev_unsync_attrs(dp); } int dsa_port_lag_change(struct dsa_port *dp, @@ -366,6 +397,15 @@ err_lag_join: return err; } +int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag, + struct netlink_ext_ack *extack) +{ + if (dp->bridge_dev) + return dsa_port_pre_bridge_leave(dp, dp->bridge_dev, extack); + + return 0; +} + void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag) { struct dsa_notifier_lag_info info = { @@ -567,11 +607,11 @@ int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, } int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu, - bool propagate_upstream) + bool targeted_match) { struct dsa_notifier_mtu_info info = { .sw_index = dp->ds->index, - .propagate_upstream = propagate_upstream, + .targeted_match = targeted_match, .port = dp->index, .mtu = new_mtu, }; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 798944aa847a..898ed9cf756f 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -271,13 +271,16 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return phylink_mii_ioctl(p->dp->pl, ifr, cmd); } -static int dsa_slave_port_attr_set(struct net_device *dev, +static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, const struct switchdev_attr *attr, struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_slave_to_port(dev); int ret; + if (ctx && ctx != dp) + return 0; + switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) @@ -394,13 +397,16 @@ static int dsa_slave_vlan_add(struct net_device *dev, return vlan_vid_add(master, htons(ETH_P_8021Q), vlan.vid); } -static int dsa_slave_port_obj_add(struct net_device *dev, +static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj, struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_slave_to_port(dev); int err; + if (ctx && ctx != dp) + return 0; + switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) @@ -469,12 +475,15 @@ static int dsa_slave_vlan_del(struct net_device *dev, return 0; } -static int dsa_slave_port_obj_del(struct net_device *dev, +static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj) { struct dsa_port *dp = dsa_slave_to_port(dev); int err; + if (ctx && ctx != dp) + return 0; + switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) @@ -1528,6 +1537,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp_iter; struct dsa_port *cpu_dp; int port = p->dp->index; int largest_mtu = 0; @@ -1535,31 +1545,31 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) int old_master_mtu; int mtu_limit; int cpu_mtu; - int err, i; + int err; if (!ds->ops->port_change_mtu) return -EOPNOTSUPP; - for (i = 0; i < ds->num_ports; i++) { + list_for_each_entry(dp_iter, &ds->dst->ports, list) { int slave_mtu; - if (!dsa_is_user_port(ds, i)) + if (!dsa_port_is_user(dp_iter)) continue; /* During probe, this function will be called for each slave * device, while not all of them have been allocated. That's * ok, it doesn't change what the maximum is, so ignore it. */ - if (!dsa_to_port(ds, i)->slave) + if (!dp_iter->slave) continue; /* Pretend that we already applied the setting, which we * actually haven't (still haven't done all integrity checks) */ - if (i == port) + if (dp_iter == dp) slave_mtu = new_mtu; else - slave_mtu = dsa_to_port(ds, i)->slave->mtu; + slave_mtu = dp_iter->slave->mtu; if (largest_mtu < slave_mtu) largest_mtu = slave_mtu; @@ -1585,14 +1595,15 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) goto out_master_failed; /* We only need to propagate the MTU of the CPU port to - * upstream switches. + * upstream switches, so create a non-targeted notifier which + * updates all switches. */ - err = dsa_port_mtu_change(cpu_dp, cpu_mtu, true); + err = dsa_port_mtu_change(cpu_dp, cpu_mtu, false); if (err) goto out_cpu_failed; } - err = dsa_port_mtu_change(dp, new_mtu, false); + err = dsa_port_mtu_change(dp, new_mtu, true); if (err) goto out_port_failed; @@ -1606,7 +1617,7 @@ out_port_failed: if (new_master_mtu != old_master_mtu) dsa_port_mtu_change(cpu_dp, old_master_mtu - dsa_tag_protocol_overhead(cpu_dp->tag_ops), - true); + false); out_cpu_failed: if (new_master_mtu != old_master_mtu) dev_set_mtu(master, old_master_mtu); @@ -2066,6 +2077,26 @@ static int dsa_slave_changeupper(struct net_device *dev, return err; } +static int dsa_slave_prechangeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct netlink_ext_ack *extack; + int err = 0; + + extack = netdev_notifier_info_to_extack(&info->info); + + if (netif_is_bridge_master(info->upper_dev) && !info->linking) + err = dsa_port_pre_bridge_leave(dp, info->upper_dev, extack); + else if (netif_is_lag_master(info->upper_dev) && !info->linking) + err = dsa_port_pre_lag_leave(dp, info->upper_dev, extack); + /* dsa_port_pre_hsr_leave is not yet necessary since hsr cannot be + * meaningfully enslaved to a bridge yet + */ + + return notifier_from_errno(err); +} + static int dsa_slave_lag_changeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) @@ -2092,6 +2123,35 @@ dsa_slave_lag_changeupper(struct net_device *dev, return err; } +/* Same as dsa_slave_lag_changeupper() except that it calls + * dsa_slave_prechangeupper() + */ +static int +dsa_slave_lag_prechangeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *lower; + struct list_head *iter; + int err = NOTIFY_DONE; + struct dsa_port *dp; + + netdev_for_each_lower_dev(dev, lower, iter) { + if (!dsa_slave_dev_check(lower)) + continue; + + dp = dsa_slave_to_port(lower); + if (!dp->lag_dev) + /* Software LAG */ + continue; + + err = dsa_slave_prechangeupper(lower, info); + if (notifier_to_errno(err)) + break; + } + + return err; +} + static int dsa_prevent_bridging_8021q_upper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) @@ -2155,6 +2215,32 @@ dsa_slave_check_8021q_upper(struct net_device *dev, return NOTIFY_DONE; } +static int +dsa_slave_prechangeupper_sanity_check(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct dsa_switch *ds; + struct dsa_port *dp; + int err; + + if (!dsa_slave_dev_check(dev)) + return dsa_prevent_bridging_8021q_upper(dev, info); + + dp = dsa_slave_to_port(dev); + ds = dp->ds; + + if (ds->ops->port_prechangeupper) { + err = ds->ops->port_prechangeupper(ds, dp->index, info); + if (err) + return notifier_from_errno(err); + } + + if (is_vlan_dev(info->upper_dev)) + return dsa_slave_check_8021q_upper(dev, info); + + return NOTIFY_DONE; +} + static int dsa_slave_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -2163,24 +2249,18 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, switch (event) { case NETDEV_PRECHANGEUPPER: { struct netdev_notifier_changeupper_info *info = ptr; - struct dsa_switch *ds; - struct dsa_port *dp; int err; - if (!dsa_slave_dev_check(dev)) - return dsa_prevent_bridging_8021q_upper(dev, ptr); + err = dsa_slave_prechangeupper_sanity_check(dev, info); + if (err != NOTIFY_DONE) + return err; - dp = dsa_slave_to_port(dev); - ds = dp->ds; + if (dsa_slave_dev_check(dev)) + return dsa_slave_prechangeupper(dev, ptr); - if (ds->ops->port_prechangeupper) { - err = ds->ops->port_prechangeupper(ds, dp->index, info); - if (err) - return notifier_from_errno(err); - } + if (netif_is_lag_master(dev)) + return dsa_slave_lag_prechangeupper(dev, ptr); - if (is_vlan_dev(info->upper_dev)) - return dsa_slave_check_8021q_upper(dev, ptr); break; } case NETDEV_CHANGEUPPER: diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 9bf8e20ecdf3..c1e5afafe633 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -52,10 +52,13 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds, static bool dsa_switch_mtu_match(struct dsa_switch *ds, int port, struct dsa_notifier_mtu_info *info) { - if (ds->index == info->sw_index) - return (port == info->port) || dsa_is_dsa_port(ds, port); + if (ds->index == info->sw_index && port == info->port) + return true; - if (!info->propagate_upstream) + /* Do not propagate to other switches in the tree if the notifier was + * targeted for a single switch. + */ + if (info->targeted_match) return false; if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) @@ -232,36 +235,15 @@ static int dsa_switch_lag_leave(struct dsa_switch *ds, return 0; } -static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port, - struct dsa_notifier_mdb_info *info) -{ - if (ds->index == info->sw_index && port == info->port) - return true; - - if (dsa_is_dsa_port(ds, port)) - return true; - - return false; -} - static int dsa_switch_mdb_add(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { - int err = 0; - int port; + int port = dsa_towards_port(ds, info->sw_index, info->port); if (!ds->ops->port_mdb_add) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_mdb_match(ds, port, info)) { - err = ds->ops->port_mdb_add(ds, port, info->mdb); - if (err) - break; - } - } - - return err; + return ds->ops->port_mdb_add(ds, port, info->mdb); } static int dsa_switch_mdb_del(struct dsa_switch *ds, @@ -364,36 +346,16 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds, return 0; } -static bool dsa_switch_mrp_match(struct dsa_switch *ds, int port, - struct dsa_notifier_mrp_info *info) -{ - if (ds->index == info->sw_index && port == info->port) - return true; - - if (dsa_is_dsa_port(ds, port)) - return true; - - return false; -} - static int dsa_switch_mrp_add(struct dsa_switch *ds, struct dsa_notifier_mrp_info *info) { - int err = 0; - int port; - if (!ds->ops->port_mrp_add) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_mrp_match(ds, port, info)) { - err = ds->ops->port_mrp_add(ds, port, info->mrp); - if (err) - break; - } - } + if (ds->index == info->sw_index) + return ds->ops->port_mrp_add(ds, info->port, info->mrp); - return err; + return 0; } static int dsa_switch_mrp_del(struct dsa_switch *ds, @@ -408,39 +370,18 @@ static int dsa_switch_mrp_del(struct dsa_switch *ds, return 0; } -static bool -dsa_switch_mrp_ring_role_match(struct dsa_switch *ds, int port, - struct dsa_notifier_mrp_ring_role_info *info) -{ - if (ds->index == info->sw_index && port == info->port) - return true; - - if (dsa_is_dsa_port(ds, port)) - return true; - - return false; -} - static int dsa_switch_mrp_add_ring_role(struct dsa_switch *ds, struct dsa_notifier_mrp_ring_role_info *info) { - int err = 0; - int port; - if (!ds->ops->port_mrp_add) return -EOPNOTSUPP; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_switch_mrp_ring_role_match(ds, port, info)) { - err = ds->ops->port_mrp_add_ring_role(ds, port, - info->mrp); - if (err) - break; - } - } + if (ds->index == info->sw_index) + return ds->ops->port_mrp_add_ring_role(ds, info->port, + info->mrp); - return err; + return 0; } static int diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index 2a6733a6449a..7e6b37a54add 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -95,7 +95,7 @@ static int get_module_eeprom_by_page(struct net_device *dev, if (dev->sfp_bus) return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack); - if (ops->get_module_info) + if (ops->get_module_eeprom_by_page) return ops->get_module_eeprom_by_page(dev, page_data, extack); return -EOPNOTSUPP; @@ -159,9 +159,6 @@ static int eeprom_parse_request(struct ethnl_req_info *req_info, struct nlattr * request->offset = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_OFFSET]); request->length = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_LENGTH]); - if (!request->length) - return -EINVAL; - /* The following set of conditions limit the API to only dump 1/2 * EEPROM page without crossing low page boundary located at offset 128. * This means user may only request dumps of length limited to 128 from @@ -180,10 +177,6 @@ static int eeprom_parse_request(struct ethnl_req_info *req_info, struct nlattr * NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_LENGTH], "reading cross half page boundary is illegal"); return -EINVAL; - } else if (request->offset >= ETH_MODULE_EEPROM_PAGE_LEN * 2) { - NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_OFFSET], - "offset is out of bounds"); - return -EINVAL; } else if (request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN * 2) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_LENGTH], "reading cross page boundary is illegal"); @@ -236,8 +229,10 @@ const struct ethnl_request_ops ethnl_module_eeprom_request_ops = { const struct nla_policy ethnl_module_eeprom_get_policy[] = { [ETHTOOL_A_MODULE_EEPROM_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), - [ETHTOOL_A_MODULE_EEPROM_OFFSET] = { .type = NLA_U32 }, - [ETHTOOL_A_MODULE_EEPROM_LENGTH] = { .type = NLA_U32 }, + [ETHTOOL_A_MODULE_EEPROM_OFFSET] = + NLA_POLICY_MAX(NLA_U32, ETH_MODULE_EEPROM_PAGE_LEN * 2 - 1), + [ETHTOOL_A_MODULE_EEPROM_LENGTH] = + NLA_POLICY_RANGE(NLA_U32, 1, ETH_MODULE_EEPROM_PAGE_LEN), [ETHTOOL_A_MODULE_EEPROM_PAGE] = { .type = NLA_U8 }, [ETHTOOL_A_MODULE_EEPROM_BANK] = { .type = NLA_U8 }, [ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS] = diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 3fa7a394eabf..baa5d10043cb 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1421,7 +1421,7 @@ static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr, if (eeprom.offset + eeprom.len > total_len) return -EINVAL; - data = kmalloc(PAGE_SIZE, GFP_USER); + data = kzalloc(PAGE_SIZE, GFP_USER); if (!data) return -ENOMEM; @@ -1486,7 +1486,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) return -EINVAL; - data = kmalloc(PAGE_SIZE, GFP_USER); + data = kzalloc(PAGE_SIZE, GFP_USER); if (!data) return -ENOMEM; @@ -1765,7 +1765,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) return -EFAULT; test.len = test_len; - data = kmalloc_array(test_len, sizeof(u64), GFP_USER); + data = kcalloc(test_len, sizeof(u64), GFP_USER); if (!data) return -ENOMEM; @@ -2293,7 +2293,7 @@ static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr) ret = ethtool_tunable_valid(&tuna); if (ret) return ret; - data = kmalloc(tuna.len, GFP_USER); + data = kzalloc(tuna.len, GFP_USER); if (!data) return -ENOMEM; ret = ops->get_tunable(dev, &tuna, data); @@ -2485,7 +2485,7 @@ static int get_phy_tunable(struct net_device *dev, void __user *useraddr) ret = ethtool_phy_tunable_valid(&tuna); if (ret) return ret; - data = kmalloc(tuna.len, GFP_USER); + data = kzalloc(tuna.len, GFP_USER); if (!data) return -ENOMEM; if (phy_drv_tunable) { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 90b10966b16b..3e25a47fd482 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -380,7 +380,7 @@ extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_T extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1]; extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1]; extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1]; -extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_DATA + 1]; +extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS + 1]; extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1]; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index b3029fff715d..2d51b7ab4dc5 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -353,6 +353,8 @@ static int strset_reply_size(const struct ethnl_req_info *req_base, int len = 0; int ret; + len += nla_total_size(0); /* ETHTOOL_A_STRSET_STRINGSETS */ + for (i = 0; i < ETH_SS_COUNT; i++) { const struct strset_info *set_info = &data->sets[i]; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 750f388a4a68..54648181dd56 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -575,7 +575,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, return err; } - if (!inet_sk(sk)->inet_num && inet_autobind(sk)) + if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk)) return -EAGAIN; return sk->sk_prot->connect(sk, uaddr, addr_len); } @@ -803,7 +803,7 @@ int inet_send_prepare(struct sock *sk) sock_rps_record_flow(sk); /* We may need to bind the socket. */ - if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && + if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind && inet_autobind(sk)) return -EAGAIN; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 36ed85bf2ad5..2d2d08aa787d 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -554,7 +554,6 @@ static int ah4_rcv_cb(struct sk_buff *skb, int err) static const struct xfrm_type ah_type = { - .description = "AH4", .owner = THIS_MODULE, .proto = IPPROTO_AH, .flags = XFRM_TYPE_REPLAY_PROT, diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index d6e3a92841e3..099259fc826a 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -471,6 +471,7 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def) kfree(doi_def->map.std->lvl.local); kfree(doi_def->map.std->cat.cipso); kfree(doi_def->map.std->cat.local); + kfree(doi_def->map.std); break; } kfree(doi_def); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 50deeff48c8b..73721a4448bd 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1989,7 +1989,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla, return -EAFNOSUPPORT; if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) - BUG(); + return -EINVAL; if (tb[IFLA_INET_CONF]) { nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 35803ab7ac80..f5362b9d75eb 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -1198,7 +1198,6 @@ static int esp4_rcv_cb(struct sk_buff *skb, int err) static const struct xfrm_type esp_type = { - .description = "ESP4", .owner = THIS_MODULE, .proto = IPPROTO_ESP, .flags = XFRM_TYPE_REPLAY_PROT, diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 33687cf58286..8e4e9aa12130 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -33,12 +33,11 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, struct xfrm_state *x; __be32 seq; __be32 spi; - int err; if (!pskb_pull(skb, offset)) return NULL; - if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) + if (xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq) != 0) goto out; xo = xfrm_offload(skb); @@ -343,7 +342,6 @@ static const struct net_offload esp4_offload = { }; static const struct xfrm_type_offload esp_type_offload = { - .description = "ESP4 OFFLOAD", .owner = THIS_MODULE, .proto = IPPROTO_ESP, .input_tail = esp_input_tail, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2e09d62d59e3..c695d294a5df 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -759,6 +759,13 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, icmp_param.data_len = room; icmp_param.head_len = sizeof(struct icmphdr); + /* if we don't have a source address at this point, fall back to the + * dummy address instead of sending out a packet with a source address + * of 0.0.0.0 + */ + if (!fl4.saddr) + fl4.saddr = htonl(INADDR_DUMMY); + icmp_push_reply(&icmp_param, &fl4, &ipc, &rt); ende: ip_rt_put(rt); @@ -986,14 +993,8 @@ static bool icmp_redirect(struct sk_buff *skb) static bool icmp_echo(struct sk_buff *skb) { - struct icmp_ext_hdr *ext_hdr, _ext_hdr; - struct icmp_ext_echo_iio *iio, _iio; struct icmp_bxm icmp_param; - struct net_device *dev; - char buff[IFNAMSIZ]; struct net *net; - u16 ident_len; - u8 status; net = dev_net(skb_dst(skb)->dev); /* should there be an ICMP stat for ignored echos? */ @@ -1006,20 +1007,46 @@ static bool icmp_echo(struct sk_buff *skb) icmp_param.data_len = skb->len; icmp_param.head_len = sizeof(struct icmphdr); - if (icmp_param.data.icmph.type == ICMP_ECHO) { + if (icmp_param.data.icmph.type == ICMP_ECHO) icmp_param.data.icmph.type = ICMP_ECHOREPLY; - goto send_reply; - } - if (!net->ipv4.sysctl_icmp_echo_enable_probe) + else if (!icmp_build_probe(skb, &icmp_param.data.icmph)) return true; + + icmp_reply(&icmp_param, skb); + return true; +} + +/* Helper for icmp_echo and icmpv6_echo_reply. + * Searches for net_device that matches PROBE interface identifier + * and builds PROBE reply message in icmphdr. + * + * Returns false if PROBE responses are disabled via sysctl + */ + +bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) +{ + struct icmp_ext_hdr *ext_hdr, _ext_hdr; + struct icmp_ext_echo_iio *iio, _iio; + struct net *net = dev_net(skb->dev); + struct net_device *dev; + char buff[IFNAMSIZ]; + u16 ident_len; + u8 status; + + if (!net->ipv4.sysctl_icmp_echo_enable_probe) + return false; + /* We currently only support probing interfaces on the proxy node * Check to ensure L-bit is set */ - if (!(ntohs(icmp_param.data.icmph.un.echo.sequence) & 1)) - return true; + if (!(ntohs(icmphdr->un.echo.sequence) & 1)) + return false; /* Clear status bits in reply message */ - icmp_param.data.icmph.un.echo.sequence &= htons(0xFF00); - icmp_param.data.icmph.type = ICMP_EXT_ECHOREPLY; + icmphdr->un.echo.sequence &= htons(0xFF00); + if (icmphdr->type == ICMP_EXT_ECHO) + icmphdr->type = ICMP_EXT_ECHOREPLY; + else + icmphdr->type = ICMPV6_EXT_ECHO_REPLY; ext_hdr = skb_header_pointer(skb, 0, sizeof(_ext_hdr), &_ext_hdr); /* Size of iio is class_type dependent. * Only check header here and assign length based on ctype in the switch statement @@ -1080,8 +1107,8 @@ static bool icmp_echo(struct sk_buff *skb) goto send_mal_query; } if (!dev) { - icmp_param.data.icmph.code = ICMP_EXT_CODE_NO_IF; - goto send_reply; + icmphdr->code = ICMP_EXT_CODE_NO_IF; + return true; } /* Fill bits in reply message */ if (dev->flags & IFF_UP) @@ -1091,14 +1118,13 @@ static bool icmp_echo(struct sk_buff *skb) if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list)) status |= ICMP_EXT_ECHOREPLY_IPV6; dev_put(dev); - icmp_param.data.icmph.un.echo.sequence |= htons(status); -send_reply: - icmp_reply(&icmp_param, skb); - return true; + icmphdr->un.echo.sequence |= htons(status); + return true; send_mal_query: - icmp_param.data.icmph.code = ICMP_EXT_CODE_MAL_QUERY; - goto send_reply; + icmphdr->code = ICMP_EXT_CODE_MAL_QUERY; + return true; } +EXPORT_SYMBOL_GPL(icmp_build_probe); /* * Handle ICMP Timestamp requests. diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 7b272bbed2b4..6b3c558a4f23 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1801,6 +1801,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev) while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) { in_dev->mc_list = i->next_rcu; in_dev->mc_count--; + ip_mc_clear_src(i); ip_ma_put(i); } } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0eea878edc30..754013fa393b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -703,6 +703,8 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req, nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN); if (!nreq) { + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); + /* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */ sock_put(sk); return NULL; @@ -876,9 +878,10 @@ static void reqsk_timer_handler(struct timer_list *t) if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) { /* delete timer */ inet_csk_reqsk_queue_drop(sk_listener, nreq); - goto drop; + goto no_ownership; } + __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQSUCCESS); reqsk_migrate_reset(oreq); reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq); reqsk_put(oreq); @@ -887,17 +890,19 @@ static void reqsk_timer_handler(struct timer_list *t) return; } -drop: /* Even if we can clone the req, we may need not retransmit any more * SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another * CPU may win the "own_req" race so that inet_ehash_insert() fails. */ if (nreq) { + __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQFAILURE); +no_ownership: reqsk_migrate_reset(nreq); reqsk_queue_removed(queue, nreq); __reqsk_free(nreq); } +drop: inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq); } @@ -1135,11 +1140,13 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, refcount_set(&nreq->rsk_refcnt, 1); if (inet_csk_reqsk_queue_add(sk, nreq, child)) { + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQSUCCESS); reqsk_migrate_reset(req); reqsk_put(req); return child; } + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); reqsk_migrate_reset(nreq); __reqsk_free(nreq); } else if (inet_csk_reqsk_queue_add(sk, req, child)) { @@ -1188,8 +1195,12 @@ void inet_csk_listen_stop(struct sock *sk) refcount_set(&nreq->rsk_refcnt, 1); if (inet_csk_reqsk_queue_add(nsk, nreq, child)) { + __NET_INC_STATS(sock_net(nsk), + LINUX_MIB_TCPMIGRATEREQSUCCESS); reqsk_migrate_reset(req); } else { + __NET_INC_STATS(sock_net(nsk), + LINUX_MIB_TCPMIGRATEREQFAILURE); reqsk_migrate_reset(nreq); __reqsk_free(nreq); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a68bf4c6fe9b..12dca0c85f3c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -107,6 +107,8 @@ module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static struct rtnl_link_ops ipgre_link_ops __read_mostly; +static const struct header_ops ipgre_header_ops; + static int ipgre_tunnel_init(struct net_device *dev); static void erspan_build_header(struct sk_buff *skb, u32 id, u32 index, @@ -364,7 +366,10 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, raw_proto, false) < 0) goto drop; - if (tunnel->dev->type != ARPHRD_NONE) + /* Special case for ipgre_header_parse(), which expects the + * mac_header to point to the outer IP header. + */ + if (tunnel->dev->header_ops == &ipgre_header_ops) skb_pop_mac_header(skb); else skb_reset_mac_header(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c3efc7d658f6..8d8a8da3ae7e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1054,7 +1054,7 @@ static int __ip_append_data(struct sock *sk, unsigned int datalen; unsigned int fraglen; unsigned int fraggap; - unsigned int alloclen; + unsigned int alloclen, alloc_extra; unsigned int pagedlen; struct sk_buff *skb_prev; alloc_new_skb: @@ -1074,35 +1074,39 @@ alloc_new_skb: fraglen = datalen + fragheaderlen; pagedlen = 0; + alloc_extra = hh_len + 15; + alloc_extra += exthdrlen; + + /* The last fragment gets additional space at tail. + * Note, with MSG_MORE we overallocate on fragments, + * because we have no idea what fragment will be + * the last. + */ + if (datalen == length + fraggap) + alloc_extra += rt->dst.trailer_len; + if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; - else if (!paged) + else if (!paged && + (fraglen + alloc_extra < SKB_MAX_ALLOC || + !(rt->dst.dev->features & NETIF_F_SG))) alloclen = fraglen; else { alloclen = min_t(int, fraglen, MAX_HEADER); pagedlen = fraglen - alloclen; } - alloclen += exthdrlen; - - /* The last fragment gets additional space at tail. - * Note, with MSG_MORE we overallocate on fragments, - * because we have no idea what fragment will be - * the last. - */ - if (datalen == length + fraggap) - alloclen += rt->dst.trailer_len; + alloclen += alloc_extra; if (transhdrlen) { - skb = sock_alloc_send_skb(sk, - alloclen + hh_len + 15, + skb = sock_alloc_send_skb(sk, alloclen, (flags & MSG_DONTWAIT), &err); } else { skb = NULL; if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 2 * sk->sk_sndbuf) - skb = alloc_skb(alloclen + hh_len + 15, + skb = alloc_skb(alloclen, sk->sk_allocation); if (unlikely(!skb)) err = -ENOBUFS; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index b42683212c65..2e69e81e1f5d 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -152,7 +152,6 @@ static int ipcomp4_rcv_cb(struct sk_buff *skb, int err) } static const struct xfrm_type ipcomp_type = { - .description = "IPCOMP4", .owner = THIS_MODULE, .proto = IPPROTO_COMP, .init_state = ipcomp4_init_state, diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index d5bfa087c23a..266c65577ba6 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -242,6 +242,8 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) if (!tun_dst) return 0; } + skb_reset_mac_header(skb); + return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 1c9f71a37258..95a718397fd1 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -954,6 +954,7 @@ bool ping_rcv(struct sk_buff *skb) struct sock *sk; struct net *net = dev_net(skb->dev); struct icmphdr *icmph = icmp_hdr(skb); + bool rc = false; /* We assume the packet has already been checked by icmp_rcv */ @@ -968,14 +969,15 @@ bool ping_rcv(struct sk_buff *skb) struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); pr_debug("rcv on socket %p\n", sk); - if (skb2) - ping_queue_rcv_skb(sk, skb2); + if (skb2 && !ping_queue_rcv_skb(sk, skb2)) + rc = true; sock_put(sk); - return true; } - pr_debug("no socket, dropping\n"); - return false; + if (!rc) + pr_debug("no socket, dropping\n"); + + return rc; } EXPORT_SYMBOL_GPL(ping_rcv); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 6d46297a99f8..b0d3a09dc84e 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -295,6 +295,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH), SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS), SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS), + SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS), + SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a4c477475f4c..66aacb939d3e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2179,6 +2179,19 @@ martian_source: return err; } +/* get device for dst_alloc with local routes */ +static struct net_device *ip_rt_get_dev(struct net *net, + const struct fib_result *res) +{ + struct fib_nh_common *nhc = res->fi ? res->nhc : NULL; + struct net_device *dev = NULL; + + if (nhc) + dev = l3mdev_master_dev_rcu(nhc->nhc_dev); + + return dev ? : net->loopback_dev; +} + /* * NOTE. We drop all the packets that has local source * addresses, because every properly looped back packet @@ -2335,7 +2348,7 @@ local_input: } } - rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, + rth = rt_dst_alloc(ip_rt_get_dev(net, res), flags | RTCF_LOCAL, res->type, IN_DEV_ORCONF(in_dev, NOPOLICY), false); if (!rth) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f258a4c0da71..0a4f3f16140a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -786,6 +786,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, return inet_csk_complete_hashdance(sk, child, req, own_req); listen_overflow: + if (sk != req->rsk_listener) + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); + if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) { inet_rsk(req)->acked = 1; return NULL; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 15f5504adf5b..1307ad0d3b9e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2607,6 +2607,9 @@ void udp_destroy_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); bool slow = lock_sock_fast(sk); + + /* protects from races with udp_abort() */ + sock_set_flag(sk, SOCK_DEAD); udp_flush_pending_frames(sk); unlock_sock_fast(sk, slow); if (static_branch_unlikely(&udp_encap_needed_key)) { @@ -2857,10 +2860,17 @@ int udp_abort(struct sock *sk, int err) { lock_sock(sk); + /* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing + * with close() + */ + if (sock_flag(sk, SOCK_DEAD)) + goto out; + sk->sk_err = err; sk->sk_error_report(sk); __udp_disconnect(sk, 0); +out: release_sock(sk); return 0; diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index fb0648e7fb32..f4555a88f86b 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -42,7 +42,6 @@ static void ipip_destroy(struct xfrm_state *x) } static const struct xfrm_type ipip_type = { - .description = "IPIP", .owner = THIS_MODULE, .proto = IPPROTO_IPIP, .init_state = ipip_init_state, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 048570900fdf..3bf685fe64b9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5827,7 +5827,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla, return -EAFNOSUPPORT; if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) - BUG(); + return -EINVAL; if (tb[IFLA_INET6_TOKEN]) { err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]), diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 20d492da725a..828e62514260 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -755,7 +755,6 @@ static int ah6_rcv_cb(struct sk_buff *skb, int err) } static const struct xfrm_type ah6_type = { - .description = "AH6", .owner = THIS_MODULE, .proto = IPPROTO_AH, .flags = XFRM_TYPE_REPLAY_PROT, @@ -763,7 +762,6 @@ static const struct xfrm_type ah6_type = { .destructor = ah6_destroy, .input = ah6_input, .output = ah6_output, - .hdr_offset = xfrm6_find_1stfragopt, }; static struct xfrm6_protocol ah6_protocol = { diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 393ae2b78e7d..37c4b1726c5e 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -1243,7 +1243,6 @@ static int esp6_rcv_cb(struct sk_buff *skb, int err) } static const struct xfrm_type esp6_type = { - .description = "ESP6", .owner = THIS_MODULE, .proto = IPPROTO_ESP, .flags = XFRM_TYPE_REPLAY_PROT, @@ -1251,7 +1250,6 @@ static const struct xfrm_type esp6_type = { .destructor = esp6_destroy, .input = esp6_input, .output = esp6_output, - .hdr_offset = xfrm6_find_1stfragopt, }; static struct xfrm6_protocol esp6_protocol = { diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 40ed4fcf1cf4..a349d4798077 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -377,7 +377,6 @@ static const struct net_offload esp6_offload = { }; static const struct xfrm_type_offload esp6_type_offload = { - .description = "ESP6 OFFLOAD", .owner = THIS_MODULE, .proto = IPPROTO_ESP, .input_tail = esp6_input_tail, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index e8398ffb5e35..a7c31ab67c5d 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -725,6 +725,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct ipcm6_cookie ipc6; u32 mark = IP6_REPLY_MARK(net, skb->mark); bool acast; + u8 type; if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) && net->ipv6.sysctl.icmpv6_echo_ignore_multicast) @@ -740,8 +741,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) !(net->ipv6.sysctl.anycast_src_echo_reply && acast)) saddr = NULL; + if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST) + type = ICMPV6_EXT_ECHO_REPLY; + else + type = ICMPV6_ECHO_REPLY; + memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); - tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY; + tmp_hdr.icmp6_type = type; memset(&fl6, 0, sizeof(fl6)); if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES) @@ -752,7 +758,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (saddr) fl6.saddr = *saddr; fl6.flowi6_oif = icmp6_iif(skb); - fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; + fl6.fl6_icmp_type = type; fl6.flowi6_mark = mark; fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); @@ -783,13 +789,17 @@ static void icmpv6_echo_reply(struct sk_buff *skb) msg.skb = skb; msg.offset = 0; - msg.type = ICMPV6_ECHO_REPLY; + msg.type = type; ipcm6_init_sk(&ipc6, np); ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); ipc6.sockc.mark = mark; + if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST) + if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr)) + goto out_dst_release; + if (ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), &ipc6, &fl6, @@ -911,6 +921,11 @@ static int icmpv6_rcv(struct sk_buff *skb) if (!net->ipv6.sysctl.icmpv6_echo_ignore_all) icmpv6_echo_reply(skb); break; + case ICMPV6_EXT_ECHO_REQUEST: + if (!net->ipv6.sysctl.icmpv6_echo_ignore_all && + net->ipv4.sysctl_icmp_echo_enable_probe) + icmpv6_echo_reply(skb); + break; case ICMPV6_ECHO_REPLY: success = ping_rcv(skb); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ff4f9ebcf7f6..984050f35c61 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1055,13 +1055,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, * ip6_route_output will fail given src=any saddr, though, so * that's why we try it again later. */ - if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) { + if (ipv6_addr_any(&fl6->saddr)) { struct fib6_info *from; struct rt6_info *rt; - bool had_dst = *dst != NULL; - if (!had_dst) - *dst = ip6_route_output(net, sk, fl6); + *dst = ip6_route_output(net, sk, fl6); rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; rcu_read_lock(); @@ -1078,7 +1076,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, * never existed and let the SA-enabled version take * over. */ - if (!had_dst && (*dst)->error) { + if ((*dst)->error) { dst_release(*dst); *dst = NULL; } @@ -1555,7 +1553,7 @@ emsgsize: unsigned int datalen; unsigned int fraglen; unsigned int fraggap; - unsigned int alloclen; + unsigned int alloclen, alloc_extra; unsigned int pagedlen; alloc_new_skb: /* There's no room in the current skb */ @@ -1582,17 +1580,28 @@ alloc_new_skb: fraglen = datalen + fragheaderlen; pagedlen = 0; + alloc_extra = hh_len; + alloc_extra += dst_exthdrlen; + alloc_extra += rt->dst.trailer_len; + + /* We just reserve space for fragment header. + * Note: this may be overallocation if the message + * (without MSG_MORE) fits into the MTU. + */ + alloc_extra += sizeof(struct frag_hdr); + if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; - else if (!paged) + else if (!paged && + (fraglen + alloc_extra < SKB_MAX_ALLOC || + !(rt->dst.dev->features & NETIF_F_SG))) alloclen = fraglen; else { alloclen = min_t(int, fraglen, MAX_HEADER); pagedlen = fraglen - alloclen; } - - alloclen += dst_exthdrlen; + alloclen += alloc_extra; if (datalen != length + fraggap) { /* @@ -1602,30 +1611,21 @@ alloc_new_skb: datalen += rt->dst.trailer_len; } - alloclen += rt->dst.trailer_len; fraglen = datalen + fragheaderlen; - /* - * We just reserve space for fragment header. - * Note: this may be overallocation if the message - * (without MSG_MORE) fits into the MTU. - */ - alloclen += sizeof(struct frag_hdr); - copy = datalen - transhdrlen - fraggap - pagedlen; if (copy < 0) { err = -EINVAL; goto error; } if (transhdrlen) { - skb = sock_alloc_send_skb(sk, - alloclen + hh_len, + skb = sock_alloc_send_skb(sk, alloclen, (flags & MSG_DONTWAIT), &err); } else { skb = NULL; if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 2 * sk->sk_sndbuf) - skb = alloc_skb(alloclen + hh_len, + skb = alloc_skb(alloclen, sk->sk_allocation); if (unlikely(!skb)) err = -ENOBUFS; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 288bafded998..0b8a38687ce4 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -837,6 +837,7 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { skb->dev = tunnel->dev; + skb_reset_mac_header(skb); } skb_reset_network_header(skb); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index daef890460b7..15f984be3570 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -172,14 +172,12 @@ static int ipcomp6_rcv_cb(struct sk_buff *skb, int err) } static const struct xfrm_type ipcomp6_type = { - .description = "IPCOMP6", .owner = THIS_MODULE, .proto = IPPROTO_COMP, .init_state = ipcomp6_init_state, .destructor = ipcomp_destroy, .input = ipcomp_input, .output = ipcomp_output, - .hdr_offset = xfrm6_find_1stfragopt, }; static struct xfrm6_protocol ipcomp6_protocol = { diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 878fcec14949..aeb35d26e474 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -247,54 +247,6 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, return err; } -static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, - u8 **nexthdr) -{ - u16 offset = sizeof(struct ipv6hdr); - struct ipv6_opt_hdr *exthdr = - (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); - const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb_tail_pointer(skb) - - skb_network_header(skb); - int found_rhdr = 0; - - *nexthdr = &ipv6_hdr(skb)->nexthdr; - - while (offset + 1 <= packet_len) { - - switch (**nexthdr) { - case NEXTHDR_HOP: - break; - case NEXTHDR_ROUTING: - found_rhdr = 1; - break; - case NEXTHDR_DEST: - /* - * HAO MUST NOT appear more than once. - * XXX: It is better to try to find by the end of - * XXX: packet if HAO exists. - */ - if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { - net_dbg_ratelimited("mip6: hao exists already, override\n"); - return offset; - } - - if (found_rhdr) - return offset; - - break; - default: - return offset; - } - - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr *)(nh + offset); - } - - return offset; -} - static int mip6_destopt_init_state(struct xfrm_state *x) { if (x->id.spi) { @@ -324,7 +276,6 @@ static void mip6_destopt_destroy(struct xfrm_state *x) } static const struct xfrm_type mip6_destopt_type = { - .description = "MIP6DESTOPT", .owner = THIS_MODULE, .proto = IPPROTO_DSTOPTS, .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR, @@ -333,7 +284,6 @@ static const struct xfrm_type mip6_destopt_type = { .input = mip6_destopt_input, .output = mip6_destopt_output, .reject = mip6_destopt_reject, - .hdr_offset = mip6_destopt_offset, }; static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) @@ -383,53 +333,6 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } -static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb, - u8 **nexthdr) -{ - u16 offset = sizeof(struct ipv6hdr); - struct ipv6_opt_hdr *exthdr = - (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); - const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb_tail_pointer(skb) - - skb_network_header(skb); - int found_rhdr = 0; - - *nexthdr = &ipv6_hdr(skb)->nexthdr; - - while (offset + 1 <= packet_len) { - - switch (**nexthdr) { - case NEXTHDR_HOP: - break; - case NEXTHDR_ROUTING: - if (offset + 3 <= packet_len) { - struct ipv6_rt_hdr *rt; - rt = (struct ipv6_rt_hdr *)(nh + offset); - if (rt->type != 0) - return offset; - } - found_rhdr = 1; - break; - case NEXTHDR_DEST: - if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) - return offset; - - if (found_rhdr) - return offset; - - break; - default: - return offset; - } - - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr *)(nh + offset); - } - - return offset; -} - static int mip6_rthdr_init_state(struct xfrm_state *x) { if (x->id.spi) { @@ -456,7 +359,6 @@ static void mip6_rthdr_destroy(struct xfrm_state *x) } static const struct xfrm_type mip6_rthdr_type = { - .description = "MIP6RT", .owner = THIS_MODULE, .proto = IPPROTO_ROUTING, .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR, @@ -464,7 +366,6 @@ static const struct xfrm_type mip6_rthdr_type = { .destructor = mip6_rthdr_destroy, .input = mip6_rthdr_input, .output = mip6_rthdr_output, - .hdr_offset = mip6_rthdr_offset, }; static int __init mip6_init(void) diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index e204163c7036..92f3235fa287 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -135,6 +135,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, } EXPORT_SYMBOL_GPL(nft_fib6_eval_type); +static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const struct ipv6hdr *iph) +{ + if (likely(next != IPPROTO_ICMPV6)) + return false; + + if (ipv6_addr_type(&iph->saddr) != IPV6_ADDR_ANY) + return false; + + return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL; +} + void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { @@ -163,10 +174,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); - if (nft_hook(pkt) == NF_INET_PRE_ROUTING && - nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { - nft_fib_store_result(dest, priv, nft_in(pkt)); - return; + if (nft_hook(pkt) == NF_INET_PRE_ROUTING || + nft_hook(pkt) == NF_INET_INGRESS) { + if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) || + nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) { + nft_fib_store_result(dest, priv, nft_in(pkt)); + return; + } } *dest = 0; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 4ff38cb08f4b..60bf3b877957 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -87,10 +87,10 @@ struct seg6_end_dt_info { int vrf_ifindex; int vrf_table; - /* tunneled packet proto and family (IPv4 or IPv6) */ - __be16 proto; + /* tunneled packet family (IPv4 or IPv6). + * Protocol and header length are inferred from family. + */ u16 family; - int hdrlen; }; struct pcpu_seg6_local_counters { @@ -521,19 +521,6 @@ static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg, info->net = net; info->vrf_ifindex = vrf_ifindex; - switch (family) { - case AF_INET: - info->proto = htons(ETH_P_IP); - info->hdrlen = sizeof(struct iphdr); - break; - case AF_INET6: - info->proto = htons(ETH_P_IPV6); - info->hdrlen = sizeof(struct ipv6hdr); - break; - default: - return -EINVAL; - } - info->family = family; info->mode = DT_VRF_MODE; @@ -622,22 +609,44 @@ error: } static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, - struct seg6_local_lwt *slwt) + struct seg6_local_lwt *slwt, u16 family) { struct seg6_end_dt_info *info = &slwt->dt_info; struct net_device *vrf; + __be16 protocol; + int hdrlen; vrf = end_dt_get_vrf_rcu(skb, info); if (unlikely(!vrf)) goto drop; - skb->protocol = info->proto; + switch (family) { + case AF_INET: + protocol = htons(ETH_P_IP); + hdrlen = sizeof(struct iphdr); + break; + case AF_INET6: + protocol = htons(ETH_P_IPV6); + hdrlen = sizeof(struct ipv6hdr); + break; + case AF_UNSPEC: + fallthrough; + default: + goto drop; + } + + if (unlikely(info->family != AF_UNSPEC && info->family != family)) { + pr_warn_once("seg6local: SRv6 End.DT* family mismatch"); + goto drop; + } + + skb->protocol = protocol; skb_dst_drop(skb); - skb_set_transport_header(skb, info->hdrlen); + skb_set_transport_header(skb, hdrlen); - return end_dt_vrf_rcv(skb, info->family, vrf); + return end_dt_vrf_rcv(skb, family, vrf); drop: kfree_skb(skb); @@ -656,7 +665,7 @@ static int input_action_end_dt4(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto drop; - skb = end_dt_vrf_core(skb, slwt); + skb = end_dt_vrf_core(skb, slwt, AF_INET); if (!skb) /* packet has been processed and consumed by the VRF */ return 0; @@ -739,7 +748,7 @@ static int input_action_end_dt6(struct sk_buff *skb, goto legacy_mode; /* DT6_VRF_MODE */ - skb = end_dt_vrf_core(skb, slwt); + skb = end_dt_vrf_core(skb, slwt, AF_INET6); if (!skb) /* packet has been processed and consumed by the VRF */ return 0; @@ -767,6 +776,36 @@ drop: return -EINVAL; } +#ifdef CONFIG_NET_L3_MASTER_DEV +static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg, + struct netlink_ext_ack *extack) +{ + return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack); +} + +static int input_action_end_dt46(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + unsigned int off = 0; + int nexthdr; + + nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL); + if (unlikely(nexthdr < 0)) + goto drop; + + switch (nexthdr) { + case IPPROTO_IPIP: + return input_action_end_dt4(skb, slwt); + case IPPROTO_IPV6: + return input_action_end_dt6(skb, slwt); + } + +drop: + kfree_skb(skb); + return -EINVAL; +} +#endif + /* push an SRH on top of the current one */ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) { @@ -969,6 +1008,17 @@ static struct seg6_action_desc seg6_action_table[] = { .input = input_action_end_dt6, }, { + .action = SEG6_LOCAL_ACTION_END_DT46, + .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE), + .optattrs = SEG6_F_LOCAL_COUNTERS, +#ifdef CONFIG_NET_L3_MASTER_DEV + .input = input_action_end_dt46, + .slwt_ops = { + .build_state = seg6_end_dt46_build, + }, +#endif + }, + { .action = SEG6_LOCAL_ACTION_END_B6, .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH), .optattrs = SEG6_F_LOCAL_COUNTERS, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e0a39b0bb4c1..df5bea818410 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -710,6 +710,8 @@ static int ipip6_rcv(struct sk_buff *skb) * old iph is no longer valid */ iph = (const struct iphdr *)skb_mac_header(skb); + skb_reset_mac_header(skb); + err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) @@ -780,6 +782,8 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto) tpi = &ipip_tpi; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; + skb_reset_mac_header(skb); + return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 199b080d418a..3fcd86f4dfdc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1598,6 +1598,9 @@ void udpv6_destroy_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); lock_sock(sk); + + /* protects from races with udp_abort() */ + sock_set_flag(sk, SOCK_DEAD); udp_v6_flush_pending_frames(sk); release_sock(sk); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 8b84d534b19d..57fa27c1cdf9 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -16,13 +16,6 @@ #include <net/ip6_route.h> #include <net/xfrm.h> -int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, - u8 **prevhdr) -{ - return ip6_find_1stfragopt(skb, prevhdr); -} -EXPORT_SYMBOL(xfrm6_find_1stfragopt); - void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) { struct flowi6 fl6; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index f696d46e6910..2b31112c0856 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -291,7 +291,6 @@ static void xfrm6_tunnel_destroy(struct xfrm_state *x) } static const struct xfrm_type xfrm6_tunnel_type = { - .description = "IP6IP6", .owner = THIS_MODULE, .proto = IPPROTO_IPV6, .init_state = xfrm6_tunnel_init_state, diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 1c572c8daced..6201965bd822 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1066,11 +1066,6 @@ out_error: goto partial_message; } - if (skb_has_frag_list(head)) { - kfree_skb_list(skb_shinfo(head)->frag_list); - skb_shinfo(head)->frag_list = NULL; - } - if (head != kcm->seq_skb) kfree_skb(head); diff --git a/net/key/af_key.c b/net/key/af_key.c index ef9b4ac03e7b..de24a7d474df 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -141,7 +141,6 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol, struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; struct pfkey_sock *pfk; - int err; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -150,10 +149,9 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol, if (protocol != PF_KEY_V2) return -EPROTONOSUPPORT; - err = -ENOMEM; sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, kern); if (sk == NULL) - goto out; + return -ENOMEM; pfk = pfkey_sk(sk); mutex_init(&pfk->dump_lock); @@ -169,8 +167,6 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol, pfkey_insert(sk); return 0; -out: - return err; } static int pfkey_release(struct socket *sock) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7a99892e5aba..84cc7733ea66 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1442,6 +1442,38 @@ static void sta_apply_mesh_params(struct ieee80211_local *local, #endif } +static void sta_apply_airtime_params(struct ieee80211_local *local, + struct sta_info *sta, + struct station_parameters *params) +{ + u8 ac; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + struct airtime_sched_info *air_sched = &local->airtime[ac]; + struct airtime_info *air_info = &sta->airtime[ac]; + struct txq_info *txqi; + u8 tid; + + spin_lock_bh(&air_sched->lock); + for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) { + if (air_info->weight == params->airtime_weight || + !sta->sta.txq[tid] || + ac != ieee80211_ac_from_tid(tid)) + continue; + + airtime_weight_set(air_info, params->airtime_weight); + + txqi = to_txq_info(sta->sta.txq[tid]); + if (RB_EMPTY_NODE(&txqi->schedule_order)) + continue; + + ieee80211_update_airtime_weight(local, air_sched, + 0, true); + } + spin_unlock_bh(&air_sched->lock); + } +} + static int sta_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) @@ -1629,7 +1661,8 @@ static int sta_apply_parameters(struct ieee80211_local *local, sta_apply_mesh_params(local, sta, params); if (params->airtime_weight) - sta->airtime_weight = params->airtime_weight; + sta_apply_airtime_params(local, sta, params); + /* set the STA state after all sta info from usermode has been set */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) || @@ -1693,15 +1726,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, test_sta_flag(sta, WLAN_STA_ASSOC)) rate_control_rate_init(sta); - err = sta_info_insert_rcu(sta); - if (err) { - rcu_read_unlock(); - return err; - } - - rcu_read_unlock(); - - return 0; + return sta_info_insert(sta); } static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 907bb1f748a1..76fc36a68750 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * mac80211 - channel management + * Copyright 2020 - 2021 Intel Corporation */ #include <linux/nl80211.h> @@ -308,8 +309,8 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, * the max of min required widths of all the interfaces bound to this * channel context. */ -void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, - struct ieee80211_chanctx *ctx) +static u32 _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) { enum nl80211_chan_width max_bw; struct cfg80211_chan_def min_def; @@ -326,7 +327,7 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, ctx->conf.def.width == NL80211_CHAN_WIDTH_16 || ctx->conf.radar_enabled) { ctx->conf.min_def = ctx->conf.def; - return; + return 0; } max_bw = ieee80211_get_chanctx_max_required_bw(local, &ctx->conf); @@ -337,17 +338,21 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, ieee80211_chandef_downgrade(&min_def); if (cfg80211_chandef_identical(&ctx->conf.min_def, &min_def)) - return; + return 0; ctx->conf.min_def = min_def; if (!ctx->driver_present) - return; + return 0; - drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_MIN_WIDTH); + return IEEE80211_CHANCTX_CHANGE_MIN_WIDTH; } +/* calling this function is assuming that station vif is updated to + * lates changes by calling ieee80211_vif_update_chandef + */ static void ieee80211_chan_bw_change(struct ieee80211_local *local, - struct ieee80211_chanctx *ctx) + struct ieee80211_chanctx *ctx, + bool narrowed) { struct sta_info *sta; struct ieee80211_supported_band *sband = @@ -366,9 +371,16 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local, continue; new_sta_bw = ieee80211_sta_cur_vht_bw(sta); + + /* nothing change */ if (new_sta_bw == sta->sta.bandwidth) continue; + /* vif changed to narrow BW and narrow BW for station wasn't + * requested or vise versa */ + if ((new_sta_bw < sta->sta.bandwidth) == !narrowed) + continue; + sta->sta.bandwidth = new_sta_bw; rate_control_rate_update(local, sband, sta, IEEE80211_RC_BW_CHANGED); @@ -376,21 +388,34 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local, rcu_read_unlock(); } -static void ieee80211_change_chanctx(struct ieee80211_local *local, - struct ieee80211_chanctx *ctx, - const struct cfg80211_chan_def *chandef) +/* + * recalc the min required chan width of the channel context, which is + * the max of min required widths of all the interfaces bound to this + * channel context. + */ +void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) { - enum nl80211_chan_width width; + u32 changed = _ieee80211_recalc_chanctx_min_def(local, ctx); - if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) { - ieee80211_recalc_chanctx_min_def(local, ctx); + if (!changed) return; - } - WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef)); + /* check is BW narrowed */ + ieee80211_chan_bw_change(local, ctx, true); - width = ctx->conf.def.width; - ctx->conf.def = *chandef; + drv_change_chanctx(local, ctx, changed); + + /* check is BW wider */ + ieee80211_chan_bw_change(local, ctx, false); +} + +static void ieee80211_change_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + struct ieee80211_chanctx *old_ctx, + const struct cfg80211_chan_def *chandef) +{ + u32 changed; /* expected to handle only 20/40/80/160 channel widths */ switch (chandef->width) { @@ -405,19 +430,33 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local, WARN_ON(1); } - if (chandef->width < width) - ieee80211_chan_bw_change(local, ctx); + /* Check maybe BW narrowed - we do this _before_ calling recalc_chanctx_min_def + * due to maybe not returning from it, e.g in case new context was added + * first time with all parameters up to date. + */ + ieee80211_chan_bw_change(local, old_ctx, true); + + if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) { + ieee80211_recalc_chanctx_min_def(local, ctx); + return; + } - drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_WIDTH); - ieee80211_recalc_chanctx_min_def(local, ctx); + WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef)); + + ctx->conf.def = *chandef; + + /* check if min chanctx also changed */ + changed = IEEE80211_CHANCTX_CHANGE_WIDTH | + _ieee80211_recalc_chanctx_min_def(local, ctx); + drv_change_chanctx(local, ctx, changed); if (!local->use_chanctx) { local->_oper_chandef = *chandef; ieee80211_hw_config(local, 0); } - if (chandef->width > width) - ieee80211_chan_bw_change(local, ctx); + /* check is BW wider */ + ieee80211_chan_bw_change(local, old_ctx, false); } static struct ieee80211_chanctx * @@ -450,7 +489,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local, if (!compat) continue; - ieee80211_change_chanctx(local, ctx, compat); + ieee80211_change_chanctx(local, ctx, ctx, compat); return ctx; } @@ -679,7 +718,7 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, if (!compat) return; - ieee80211_change_chanctx(local, ctx, compat); + ieee80211_change_chanctx(local, ctx, ctx, compat); } static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, @@ -1107,13 +1146,12 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata) if (WARN_ON(!chandef)) return -EINVAL; - if (old_ctx->conf.def.width > new_ctx->conf.def.width) - ieee80211_chan_bw_change(local, new_ctx); + if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width) + changed = BSS_CHANGED_BANDWIDTH; - ieee80211_change_chanctx(local, new_ctx, chandef); + ieee80211_vif_update_chandef(sdata, &sdata->reserved_chandef); - if (old_ctx->conf.def.width < new_ctx->conf.def.width) - ieee80211_chan_bw_change(local, new_ctx); + ieee80211_change_chanctx(local, new_ctx, old_ctx, chandef); vif_chsw[0].vif = &sdata->vif; vif_chsw[0].old_ctx = &old_ctx->conf; @@ -1142,14 +1180,9 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata) if (ieee80211_chanctx_refcount(local, old_ctx) == 0) ieee80211_free_chanctx(local, old_ctx); - if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width) - changed = BSS_CHANGED_BANDWIDTH; - - ieee80211_vif_update_chandef(sdata, &sdata->reserved_chandef); - + ieee80211_recalc_chanctx_min_def(local, new_ctx); ieee80211_recalc_smps_chanctx(local, new_ctx); ieee80211_recalc_radar_chanctx(local, new_ctx); - ieee80211_recalc_chanctx_min_def(local, new_ctx); if (changed) ieee80211_bss_info_change_notify(sdata, changed); @@ -1188,7 +1221,7 @@ ieee80211_vif_use_reserved_assign(struct ieee80211_sub_if_data *sdata) if (WARN_ON(!chandef)) return -EINVAL; - ieee80211_change_chanctx(local, new_ctx, chandef); + ieee80211_change_chanctx(local, new_ctx, new_ctx, chandef); list_del(&sdata->reserved_chanctx_list); sdata->reserved_chanctx = NULL; @@ -1505,7 +1538,6 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) ieee80211_recalc_smps_chanctx(local, ctx); ieee80211_recalc_radar_chanctx(local, ctx); ieee80211_recalc_chanctx_min_def(local, ctx); - ieee80211_chan_bw_change(local, ctx); list_for_each_entry_safe(sdata, sdata_tmp, &ctx->reserved_vifs, reserved_chanctx_list) { diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 9245c0421bda..8dbfe325ee66 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -4,7 +4,7 @@ * * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018 - 2019 Intel Corporation + * Copyright (C) 2018 - 2019, 2021 Intel Corporation */ #include <linux/debugfs.h> @@ -216,14 +216,14 @@ static ssize_t aql_txq_limit_read(struct file *file, "VI %u %u\n" "BE %u %u\n" "BK %u %u\n", - local->aql_txq_limit_low[IEEE80211_AC_VO], - local->aql_txq_limit_high[IEEE80211_AC_VO], - local->aql_txq_limit_low[IEEE80211_AC_VI], - local->aql_txq_limit_high[IEEE80211_AC_VI], - local->aql_txq_limit_low[IEEE80211_AC_BE], - local->aql_txq_limit_high[IEEE80211_AC_BE], - local->aql_txq_limit_low[IEEE80211_AC_BK], - local->aql_txq_limit_high[IEEE80211_AC_BK]); + local->airtime[IEEE80211_AC_VO].aql_txq_limit_low, + local->airtime[IEEE80211_AC_VO].aql_txq_limit_high, + local->airtime[IEEE80211_AC_VI].aql_txq_limit_low, + local->airtime[IEEE80211_AC_VI].aql_txq_limit_high, + local->airtime[IEEE80211_AC_BE].aql_txq_limit_low, + local->airtime[IEEE80211_AC_BE].aql_txq_limit_high, + local->airtime[IEEE80211_AC_BK].aql_txq_limit_low, + local->airtime[IEEE80211_AC_BK].aql_txq_limit_high); return simple_read_from_buffer(user_buf, count, ppos, buf, len); } @@ -255,11 +255,11 @@ static ssize_t aql_txq_limit_write(struct file *file, if (ac >= IEEE80211_NUM_ACS) return -EINVAL; - q_limit_low_old = local->aql_txq_limit_low[ac]; - q_limit_high_old = local->aql_txq_limit_high[ac]; + q_limit_low_old = local->airtime[ac].aql_txq_limit_low; + q_limit_high_old = local->airtime[ac].aql_txq_limit_high; - local->aql_txq_limit_low[ac] = q_limit_low; - local->aql_txq_limit_high[ac] = q_limit_high; + local->airtime[ac].aql_txq_limit_low = q_limit_low; + local->airtime[ac].aql_txq_limit_high = q_limit_high; mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { @@ -382,15 +382,62 @@ static const struct file_operations force_tx_status_ops = { .llseek = default_llseek, }; +static ssize_t airtime_read(struct file *file, + char __user *user_buf, + size_t count, + loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + char buf[200]; + u64 v_t[IEEE80211_NUM_ACS]; + u64 wt[IEEE80211_NUM_ACS]; + int len = 0, ac; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + spin_lock_bh(&local->airtime[ac].lock); + v_t[ac] = local->airtime[ac].v_t; + wt[ac] = local->airtime[ac].weight_sum; + spin_unlock_bh(&local->airtime[ac].lock); + } + len = scnprintf(buf, sizeof(buf), + "\tVO VI BE BK\n" + "Virt-t\t%-10llu %-10llu %-10llu %-10llu\n" + "Weight\t%-10llu %-10llu %-10llu %-10llu\n", + v_t[0], + v_t[1], + v_t[2], + v_t[3], + wt[0], + wt[1], + wt[2], + wt[3]); + + return simple_read_from_buffer(user_buf, count, ppos, + buf, len); +} + +static const struct file_operations airtime_ops = { + .read = airtime_read, + .open = simple_open, + .llseek = default_llseek, +}; + #ifdef CONFIG_PM static ssize_t reset_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; + int ret; rtnl_lock(); + wiphy_lock(local->hw.wiphy); __ieee80211_suspend(&local->hw, NULL); - __ieee80211_resume(&local->hw); + ret = __ieee80211_resume(&local->hw); + wiphy_unlock(local->hw.wiphy); + + if (ret) + cfg80211_shutdown_all_interfaces(local->hw.wiphy); + rtnl_unlock(); return count; @@ -625,7 +672,11 @@ void debugfs_hw_add(struct ieee80211_local *local) if (local->ops->wake_tx_queue) DEBUGFS_ADD_MODE(aqm, 0600); - DEBUGFS_ADD_MODE(airtime_flags, 0600); + if (wiphy_ext_feature_isset(local->hw.wiphy, + NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) { + DEBUGFS_ADD_MODE(airtime, 0600); + DEBUGFS_ADD_MODE(airtime_flags, 0600); + } DEBUGFS_ADD(aql_txq_limit); debugfs_create_u32("aql_threshold", 0600, diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 0ad3860852ff..db724fc10a5f 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -57,7 +57,6 @@ static ssize_t ieee80211_if_write( return -EFAULT; buf[count] = '\0'; - ret = -ENODEV; rtnl_lock(); ret = (*write)(sdata, buf, count); rtnl_unlock(); @@ -513,6 +512,34 @@ static ssize_t ieee80211_if_fmt_aqm( } IEEE80211_IF_FILE_R(aqm); +static ssize_t ieee80211_if_fmt_airtime( + const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_txq *txq = sdata->vif.txq; + struct airtime_info *air_info; + int len; + + if (!txq) + return 0; + + spin_lock_bh(&local->airtime[txq->ac].lock); + air_info = to_airtime_info(txq); + len = scnprintf(buf, + buflen, + "RX: %llu us\nTX: %llu us\nWeight: %u\n" + "Virt-T: %lld us\n", + air_info->rx_airtime, + air_info->tx_airtime, + air_info->weight, + air_info->v_t); + spin_unlock_bh(&local->airtime[txq->ac].lock); + + return len; +} + +IEEE80211_IF_FILE_R(airtime); + IEEE80211_IF_FILE(multicast_to_unicast, u.ap.multicast_to_unicast, HEX); /* IBSS attributes */ @@ -658,8 +685,10 @@ static void add_common_files(struct ieee80211_sub_if_data *sdata) if (sdata->local->ops->wake_tx_queue && sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && - sdata->vif.type != NL80211_IFTYPE_NAN) + sdata->vif.type != NL80211_IFTYPE_NAN) { DEBUGFS_ADD(aqm); + DEBUGFS_ADD(airtime); + } } static void add_sta_files(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 936c9dfa86c8..8be28cfd6f64 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -202,7 +202,7 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf, size_t bufsz = 400; char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf; u64 rx_airtime = 0, tx_airtime = 0; - s64 deficit[IEEE80211_NUM_ACS]; + u64 v_t[IEEE80211_NUM_ACS]; ssize_t rv; int ac; @@ -210,18 +210,18 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf, return -ENOMEM; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { - spin_lock_bh(&local->active_txq_lock[ac]); + spin_lock_bh(&local->airtime[ac].lock); rx_airtime += sta->airtime[ac].rx_airtime; tx_airtime += sta->airtime[ac].tx_airtime; - deficit[ac] = sta->airtime[ac].deficit; - spin_unlock_bh(&local->active_txq_lock[ac]); + v_t[ac] = sta->airtime[ac].v_t; + spin_unlock_bh(&local->airtime[ac].lock); } p += scnprintf(p, bufsz + buf - p, "RX: %llu us\nTX: %llu us\nWeight: %u\n" - "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n", - rx_airtime, tx_airtime, sta->airtime_weight, - deficit[0], deficit[1], deficit[2], deficit[3]); + "Virt-T: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n", + rx_airtime, tx_airtime, sta->airtime[0].weight, + v_t[0], v_t[1], v_t[2], v_t[3]); rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); kfree(buf); @@ -236,11 +236,11 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf, int ac; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { - spin_lock_bh(&local->active_txq_lock[ac]); + spin_lock_bh(&local->airtime[ac].lock); sta->airtime[ac].rx_airtime = 0; sta->airtime[ac].tx_airtime = 0; - sta->airtime[ac].deficit = sta->airtime_weight; - spin_unlock_bh(&local->active_txq_lock[ac]); + sta->airtime[ac].v_t = 0; + spin_unlock_bh(&local->airtime[ac].lock); } return count; @@ -263,10 +263,10 @@ static ssize_t sta_aql_read(struct file *file, char __user *userbuf, return -ENOMEM; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { - spin_lock_bh(&local->active_txq_lock[ac]); + spin_lock_bh(&local->airtime[ac].lock); q_limit_l[ac] = sta->airtime[ac].aql_limit_low; q_limit_h[ac] = sta->airtime[ac].aql_limit_high; - spin_unlock_bh(&local->active_txq_lock[ac]); + spin_unlock_bh(&local->airtime[ac].lock); q_depth[ac] = atomic_read(&sta->airtime[ac].aql_tx_pending); } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 604ca59937f0..bcb7cc06db3d 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016 Intel Deutschland GmbH -* Copyright (C) 2018 - 2019 Intel Corporation +* Copyright (C) 2018 - 2019, 2021 Intel Corporation */ #ifndef __MAC80211_DRIVER_OPS @@ -821,7 +821,7 @@ drv_allow_buffered_frames(struct ieee80211_local *local, static inline void drv_mgd_prepare_tx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - u16 duration) + struct ieee80211_prep_tx_info *info) { might_sleep(); @@ -829,9 +829,27 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local, return; WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION); - trace_drv_mgd_prepare_tx(local, sdata, duration); + trace_drv_mgd_prepare_tx(local, sdata, info->duration, + info->subtype, info->success); if (local->ops->mgd_prepare_tx) - local->ops->mgd_prepare_tx(&local->hw, &sdata->vif, duration); + local->ops->mgd_prepare_tx(&local->hw, &sdata->vif, info); + trace_drv_return_void(local); +} + +static inline void drv_mgd_complete_tx(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_prep_tx_info *info) +{ + might_sleep(); + + if (!check_sdata_in_driver(sdata)) + return; + WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION); + + trace_drv_mgd_complete_tx(local, sdata, info->duration, + info->subtype, info->success); + if (local->ops->mgd_complete_tx) + local->ops->mgd_complete_tx(&local->hw, &sdata->vif, info); trace_drv_return_void(local); } diff --git a/net/mac80211/he.c b/net/mac80211/he.c index 0c0b970835ce..c05af7018f79 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -111,7 +111,7 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap; - struct ieee80211_sta_he_cap own_he_cap = sband->iftype_data->he_cap; + struct ieee80211_sta_he_cap own_he_cap; struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie; u8 he_ppe_size; u8 mcs_nss_size; @@ -120,9 +120,13 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, memset(he_cap, 0, sizeof(*he_cap)); - if (!he_cap_ie || !ieee80211_get_he_sta_cap(sband)) + if (!he_cap_ie || + !ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif))) return; + own_he_cap = sband->iftype_data->he_cap; + /* Make sure size is OK */ mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap_ie_elem); he_ppe_size = diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 3d62a80b5790..2eb7641f5556 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright 2017 Intel Deutschland GmbH - * Copyright(c) 2020 Intel Corporation + * Copyright(c) 2020-2021 Intel Corporation */ #include <linux/ieee80211.h> @@ -555,17 +555,15 @@ void ieee80211_request_smps(struct ieee80211_vif *vif, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - if (WARN_ON_ONCE(vif->type != NL80211_IFTYPE_STATION && - vif->type != NL80211_IFTYPE_AP)) + if (WARN_ON_ONCE(vif->type != NL80211_IFTYPE_STATION)) return; - if (vif->type == NL80211_IFTYPE_STATION) { - if (sdata->u.mgd.driver_smps_mode == smps_mode) - return; - sdata->u.mgd.driver_smps_mode = smps_mode; - ieee80211_queue_work(&sdata->local->hw, - &sdata->u.mgd.request_smps_work); - } + if (sdata->u.mgd.driver_smps_mode == smps_mode) + return; + + sdata->u.mgd.driver_smps_mode = smps_mode; + ieee80211_queue_work(&sdata->local->hw, + &sdata->u.mgd.request_smps_work); } /* this might change ... don't want non-open drivers using it */ EXPORT_SYMBOL_GPL(ieee80211_request_smps); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 214404a558fb..22549b95d1aa 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2015 Intel Mobile Communications GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #ifndef IEEE80211_I_H @@ -831,17 +831,16 @@ enum txq_info_flags { * @def_flow: used as a fallback flow when a packet destined to @tin hashes to * a fq_flow which is already owned by a different tin * @def_cvars: codel vars for @def_flow - * @frags: used to keep fragments created after dequeue * @schedule_order: used with ieee80211_local->active_txqs - * @schedule_round: counter to prevent infinite loops on TXQ scheduling + * @frags: used to keep fragments created after dequeue */ struct txq_info { struct fq_tin tin; struct codel_vars def_cvars; struct codel_stats cstats; + struct rb_node schedule_order; + struct sk_buff_head frags; - struct list_head schedule_order; - u16 schedule_round; unsigned long flags; /* keep last! */ @@ -918,6 +917,8 @@ struct ieee80211_sub_if_data { struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS]; struct mac80211_qos_map __rcu *qos_map; + struct airtime_info airtime[IEEE80211_NUM_ACS]; + struct work_struct csa_finalize_work; bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */ struct cfg80211_chan_def csa_chandef; @@ -1130,6 +1131,44 @@ enum mac80211_scan_state { SCAN_ABORT, }; +/** + * struct airtime_sched_info - state used for airtime scheduling and AQL + * + * @lock: spinlock that protects all the fields in this struct + * @active_txqs: rbtree of currently backlogged queues, sorted by virtual time + * @schedule_pos: the current position maintained while a driver walks the tree + * with ieee80211_next_txq() + * @active_list: list of struct airtime_info structs that were active within + * the last AIRTIME_ACTIVE_DURATION (100 ms), used to compute + * weight_sum + * @last_weight_update: used for rate limiting walking active_list + * @last_schedule_time: tracks the last time a transmission was scheduled; used + * for catching up v_t if no stations are eligible for + * transmission. + * @v_t: global virtual time; queues with v_t < this are eligible for + * transmission + * @weight_sum: total sum of all active stations used for dividing airtime + * @weight_sum_reciprocal: reciprocal of weight_sum (to avoid divisions in fast + * path - see comment above + * IEEE80211_RECIPROCAL_DIVISOR_64) + * @aql_txq_limit_low: AQL limit when total outstanding airtime + * is < IEEE80211_AQL_THRESHOLD + * @aql_txq_limit_high: AQL limit when total outstanding airtime + * is > IEEE80211_AQL_THRESHOLD + */ +struct airtime_sched_info { + spinlock_t lock; + struct rb_root_cached active_txqs; + struct rb_node *schedule_pos; + struct list_head active_list; + u64 last_weight_update; + u64 last_schedule_activity; + u64 v_t; + u64 weight_sum; + u64 weight_sum_reciprocal; + u32 aql_txq_limit_low; + u32 aql_txq_limit_high; +}; DECLARE_STATIC_KEY_FALSE(aql_disable); struct ieee80211_local { @@ -1143,13 +1182,8 @@ struct ieee80211_local { struct codel_params cparams; /* protects active_txqs and txqi->schedule_order */ - spinlock_t active_txq_lock[IEEE80211_NUM_ACS]; - struct list_head active_txqs[IEEE80211_NUM_ACS]; - u16 schedule_round[IEEE80211_NUM_ACS]; - + struct airtime_sched_info airtime[IEEE80211_NUM_ACS]; u16 airtime_flags; - u32 aql_txq_limit_low[IEEE80211_NUM_ACS]; - u32 aql_txq_limit_high[IEEE80211_NUM_ACS]; u32 aql_threshold; atomic_t aql_total_pending_airtime; @@ -1414,10 +1448,6 @@ struct ieee80211_local { /* extended capabilities provided by mac80211 */ u8 ext_capa[8]; - - /* TDLS channel switch */ - struct work_struct tdls_chsw_work; - struct sk_buff_head skb_queue_tdls_chsw; }; static inline struct ieee80211_sub_if_data * @@ -1442,7 +1472,7 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata) rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (WARN_ON_ONCE(!chanctx_conf)) { + if (!chanctx_conf) { rcu_read_unlock(); return NULL; } @@ -1567,6 +1597,125 @@ static inline bool txq_has_queue(struct ieee80211_txq *txq) return !(skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets); } +static inline struct airtime_info *to_airtime_info(struct ieee80211_txq *txq) +{ + struct ieee80211_sub_if_data *sdata; + struct sta_info *sta; + + if (txq->sta) { + sta = container_of(txq->sta, struct sta_info, sta); + return &sta->airtime[txq->ac]; + } + + sdata = vif_to_sdata(txq->vif); + return &sdata->airtime[txq->ac]; +} + +/* To avoid divisions in the fast path, we keep pre-computed reciprocals for + * airtime weight calculations. There are two different weights to keep track + * of: The per-station weight and the sum of weights per phy. + * + * For the per-station weights (kept in airtime_info below), we use 32-bit + * reciprocals with a devisor of 2^19. This lets us keep the multiplications and + * divisions for the station weights as 32-bit operations at the cost of a bit + * of rounding error for high weights; but the choice of divisor keeps rounding + * errors <10% for weights <2^15, assuming no more than 8ms of airtime is + * reported at a time. + * + * For the per-phy sum of weights the values can get higher, so we use 64-bit + * operations for those with a 32-bit divisor, which should avoid any + * significant rounding errors. + */ +#define IEEE80211_RECIPROCAL_DIVISOR_64 0x100000000ULL +#define IEEE80211_RECIPROCAL_SHIFT_64 32 +#define IEEE80211_RECIPROCAL_DIVISOR_32 0x80000U +#define IEEE80211_RECIPROCAL_SHIFT_32 19 + +static inline void airtime_weight_set(struct airtime_info *air_info, u16 weight) +{ + if (air_info->weight == weight) + return; + + air_info->weight = weight; + if (weight) { + air_info->weight_reciprocal = + IEEE80211_RECIPROCAL_DIVISOR_32 / weight; + } else { + air_info->weight_reciprocal = 0; + } +} + +static inline void airtime_weight_sum_set(struct airtime_sched_info *air_sched, + int weight_sum) +{ + if (air_sched->weight_sum == weight_sum) + return; + + air_sched->weight_sum = weight_sum; + if (air_sched->weight_sum) { + air_sched->weight_sum_reciprocal = IEEE80211_RECIPROCAL_DIVISOR_64; + do_div(air_sched->weight_sum_reciprocal, air_sched->weight_sum); + } else { + air_sched->weight_sum_reciprocal = 0; + } +} + +/* A problem when trying to enforce airtime fairness is that we want to divide + * the airtime between the currently *active* stations. However, basing this on + * the instantaneous queue state of stations doesn't work, as queues tend to + * oscillate very quickly between empty and occupied, leading to the scheduler + * thinking only a single station is active when deciding whether to allow + * transmission (and thus not throttling correctly). + * + * To fix this we use a timer-based notion of activity: a station is considered + * active if it has been scheduled within the last 100 ms; we keep a separate + * list of all the stations considered active in this manner, and lazily update + * the total weight of active stations from this list (filtering the stations in + * the list by their 'last active' time). + * + * We add one additional safeguard to guard against stations that manage to get + * scheduled every 100 ms but don't transmit a lot of data, and thus don't use + * up any airtime. Such stations would be able to get priority for an extended + * period of time if they do start transmitting at full capacity again, and so + * we add an explicit maximum for how far behind a station is allowed to fall in + * the virtual airtime domain. This limit is set to a relatively high value of + * 20 ms because the main mechanism for catching up idle stations is the active + * state as described above; i.e., the hard limit should only be hit in + * pathological cases. + */ +#define AIRTIME_ACTIVE_DURATION (100 * NSEC_PER_MSEC) +#define AIRTIME_MAX_BEHIND 20000 /* 20 ms */ + +static inline bool airtime_is_active(struct airtime_info *air_info, u64 now) +{ + return air_info->last_scheduled >= now - AIRTIME_ACTIVE_DURATION; +} + +static inline void airtime_set_active(struct airtime_sched_info *air_sched, + struct airtime_info *air_info, u64 now) +{ + air_info->last_scheduled = now; + air_sched->last_schedule_activity = now; + list_move_tail(&air_info->list, &air_sched->active_list); +} + +static inline bool airtime_catchup_v_t(struct airtime_sched_info *air_sched, + u64 v_t, u64 now) +{ + air_sched->v_t = v_t; + return true; +} + +static inline void init_airtime_info(struct airtime_info *air_info, + struct airtime_sched_info *air_sched) +{ + atomic_set(&air_info->aql_tx_pending, 0); + air_info->aql_limit_low = air_sched->aql_txq_limit_low; + air_info->aql_limit_high = air_sched->aql_txq_limit_high; + airtime_weight_set(air_info, IEEE80211_DEFAULT_AIRTIME_WEIGHT); + INIT_LIST_HEAD(&air_info->list); +} + static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) { return ether_addr_equal(raddr, addr) || @@ -1809,6 +1958,14 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, u64 *cookie); int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len); +void ieee80211_resort_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq); +void ieee80211_unschedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq, + bool purge); +void ieee80211_update_airtime_weight(struct ieee80211_local *local, + struct airtime_sched_info *air_sched, + u64 now, bool force); /* HT */ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, @@ -1879,7 +2036,6 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta); enum ieee80211_sta_rx_bandwidth ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width); enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta); -void ieee80211_sta_set_rx_nss(struct sta_info *sta); void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt); u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, @@ -2287,9 +2443,13 @@ void ieee80211_tdls_cancel_channel_switch(struct wiphy *wiphy, struct net_device *dev, const u8 *addr); void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata); -void ieee80211_tdls_chsw_work(struct work_struct *wk); void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata, const u8 *peer, u16 reason); +void +ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); + + const char *ieee80211_get_reason_code_string(u16 reason_code); u16 ieee80211_encode_usf(int val); u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 2e2f73a4aa73..1e5e9fc45523 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -476,14 +476,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do GFP_KERNEL); } - /* APs need special treatment */ if (sdata->vif.type == NL80211_IFTYPE_AP) { - struct ieee80211_sub_if_data *vlan, *tmpsdata; - - /* down all dependent devices, that is VLANs */ - list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, - u.vlan.list) - dev_close(vlan->dev); WARN_ON(!list_empty(&sdata->u.ap.vlans)); } else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { /* remove all packets in parent bc_buf pointing to this dev */ @@ -641,6 +634,15 @@ static int ieee80211_stop(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + /* close all dependent VLAN interfaces before locking wiphy */ + if (sdata->vif.type == NL80211_IFTYPE_AP) { + struct ieee80211_sub_if_data *vlan, *tmpsdata; + + list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, + u.vlan.list) + dev_close(vlan->dev); + } + wiphy_lock(sdata->local->hw.wiphy); ieee80211_do_stop(sdata, true); wiphy_unlock(sdata->local->hw.wiphy); @@ -1316,13 +1318,130 @@ static void ieee80211_if_setup_no_queue(struct net_device *dev) dev->priv_flags |= IFF_NO_QUEUE; } +static void ieee80211_iface_process_skb(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (void *)skb->data; + + if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_BACK) { + struct sta_info *sta; + int len = skb->len; + + mutex_lock(&local->sta_mtx); + sta = sta_info_get_bss(sdata, mgmt->sa); + if (sta) { + switch (mgmt->u.action.u.addba_req.action_code) { + case WLAN_ACTION_ADDBA_REQ: + ieee80211_process_addba_request(local, sta, + mgmt, len); + break; + case WLAN_ACTION_ADDBA_RESP: + ieee80211_process_addba_resp(local, sta, + mgmt, len); + break; + case WLAN_ACTION_DELBA: + ieee80211_process_delba(sdata, sta, + mgmt, len); + break; + default: + WARN_ON(1); + break; + } + } + mutex_unlock(&local->sta_mtx); + } else if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_VHT) { + switch (mgmt->u.action.u.vht_group_notif.action_code) { + case WLAN_VHT_ACTION_OPMODE_NOTIF: { + struct ieee80211_rx_status *status; + enum nl80211_band band; + struct sta_info *sta; + u8 opmode; + + status = IEEE80211_SKB_RXCB(skb); + band = status->band; + opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode; + + mutex_lock(&local->sta_mtx); + sta = sta_info_get_bss(sdata, mgmt->sa); + + if (sta) + ieee80211_vht_handle_opmode(sdata, sta, opmode, + band); + + mutex_unlock(&local->sta_mtx); + break; + } + case WLAN_VHT_ACTION_GROUPID_MGMT: + ieee80211_process_mu_groups(sdata, mgmt); + break; + default: + WARN_ON(1); + break; + } + } else if (ieee80211_is_ext(mgmt->frame_control)) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) + ieee80211_sta_rx_queued_ext(sdata, skb); + else + WARN_ON(1); + } else if (ieee80211_is_data_qos(mgmt->frame_control)) { + struct ieee80211_hdr *hdr = (void *)mgmt; + struct sta_info *sta; + + /* + * So the frame isn't mgmt, but frame_control + * is at the right place anyway, of course, so + * the if statement is correct. + * + * Warn if we have other data frame types here, + * they must not get here. + */ + WARN_ON(hdr->frame_control & + cpu_to_le16(IEEE80211_STYPE_NULLFUNC)); + WARN_ON(!(hdr->seq_ctrl & + cpu_to_le16(IEEE80211_SCTL_FRAG))); + /* + * This was a fragment of a frame, received while + * a block-ack session was active. That cannot be + * right, so terminate the session. + */ + mutex_lock(&local->sta_mtx); + sta = sta_info_get_bss(sdata, mgmt->sa); + if (sta) { + u16 tid = ieee80211_get_tid(hdr); + + __ieee80211_stop_rx_ba_session( + sta, tid, WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_REQUIRE_SETUP, + true); + } + mutex_unlock(&local->sta_mtx); + } else switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + ieee80211_sta_rx_queued_mgmt(sdata, skb); + break; + case NL80211_IFTYPE_ADHOC: + ieee80211_ibss_rx_queued_mgmt(sdata, skb); + break; + case NL80211_IFTYPE_MESH_POINT: + if (!ieee80211_vif_is_mesh(&sdata->vif)) + break; + ieee80211_mesh_rx_queued_mgmt(sdata, skb); + break; + default: + WARN(1, "frame for unexpected interface type"); + break; + } +} + static void ieee80211_iface_work(struct work_struct *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, work); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; - struct sta_info *sta; if (!ieee80211_sdata_running(sdata)) return; @@ -1335,116 +1454,12 @@ static void ieee80211_iface_work(struct work_struct *work) /* first process frames */ while ((skb = skb_dequeue(&sdata->skb_queue))) { - struct ieee80211_mgmt *mgmt = (void *)skb->data; - kcov_remote_start_common(skb_get_kcov_handle(skb)); - if (ieee80211_is_action(mgmt->frame_control) && - mgmt->u.action.category == WLAN_CATEGORY_BACK) { - int len = skb->len; - - mutex_lock(&local->sta_mtx); - sta = sta_info_get_bss(sdata, mgmt->sa); - if (sta) { - switch (mgmt->u.action.u.addba_req.action_code) { - case WLAN_ACTION_ADDBA_REQ: - ieee80211_process_addba_request( - local, sta, mgmt, len); - break; - case WLAN_ACTION_ADDBA_RESP: - ieee80211_process_addba_resp(local, sta, - mgmt, len); - break; - case WLAN_ACTION_DELBA: - ieee80211_process_delba(sdata, sta, - mgmt, len); - break; - default: - WARN_ON(1); - break; - } - } - mutex_unlock(&local->sta_mtx); - } else if (ieee80211_is_action(mgmt->frame_control) && - mgmt->u.action.category == WLAN_CATEGORY_VHT) { - switch (mgmt->u.action.u.vht_group_notif.action_code) { - case WLAN_VHT_ACTION_OPMODE_NOTIF: { - struct ieee80211_rx_status *status; - enum nl80211_band band; - u8 opmode; - - status = IEEE80211_SKB_RXCB(skb); - band = status->band; - opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode; - - mutex_lock(&local->sta_mtx); - sta = sta_info_get_bss(sdata, mgmt->sa); - - if (sta) - ieee80211_vht_handle_opmode(sdata, sta, - opmode, - band); - - mutex_unlock(&local->sta_mtx); - break; - } - case WLAN_VHT_ACTION_GROUPID_MGMT: - ieee80211_process_mu_groups(sdata, mgmt); - break; - default: - WARN_ON(1); - break; - } - } else if (ieee80211_is_ext(mgmt->frame_control)) { - if (sdata->vif.type == NL80211_IFTYPE_STATION) - ieee80211_sta_rx_queued_ext(sdata, skb); - else - WARN_ON(1); - } else if (ieee80211_is_data_qos(mgmt->frame_control)) { - struct ieee80211_hdr *hdr = (void *)mgmt; - /* - * So the frame isn't mgmt, but frame_control - * is at the right place anyway, of course, so - * the if statement is correct. - * - * Warn if we have other data frame types here, - * they must not get here. - */ - WARN_ON(hdr->frame_control & - cpu_to_le16(IEEE80211_STYPE_NULLFUNC)); - WARN_ON(!(hdr->seq_ctrl & - cpu_to_le16(IEEE80211_SCTL_FRAG))); - /* - * This was a fragment of a frame, received while - * a block-ack session was active. That cannot be - * right, so terminate the session. - */ - mutex_lock(&local->sta_mtx); - sta = sta_info_get_bss(sdata, mgmt->sa); - if (sta) { - u16 tid = ieee80211_get_tid(hdr); - __ieee80211_stop_rx_ba_session( - sta, tid, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_REQUIRE_SETUP, - true); - } - mutex_unlock(&local->sta_mtx); - } else switch (sdata->vif.type) { - case NL80211_IFTYPE_STATION: - ieee80211_sta_rx_queued_mgmt(sdata, skb); - break; - case NL80211_IFTYPE_ADHOC: - ieee80211_ibss_rx_queued_mgmt(sdata, skb); - break; - case NL80211_IFTYPE_MESH_POINT: - if (!ieee80211_vif_is_mesh(&sdata->vif)) - break; - ieee80211_mesh_rx_queued_mgmt(sdata, skb); - break; - default: - WARN(1, "frame for unexpected interface type"); - break; - } + if (skb->protocol == cpu_to_be16(ETH_P_TDLS)) + ieee80211_process_tdls_channel_switch(sdata, skb); + else + ieee80211_iface_process_skb(local, sdata, skb); kfree_skb(skb); kcov_remote_stop(); @@ -1591,6 +1606,9 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP: + if (!list_empty(&sdata->u.ap.vlans)) + return -EBUSY; + break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_OCB: @@ -1959,6 +1977,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, } } + for (i = 0; i < IEEE80211_NUM_ACS; i++) + init_airtime_info(&sdata->airtime[i], &local->airtime[i]); + ieee80211_set_default_queues(sdata); sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL; diff --git a/net/mac80211/led.c b/net/mac80211/led.c index b275c8853074..6de8d0ad5497 100644 --- a/net/mac80211/led.c +++ b/net/mac80211/led.c @@ -259,7 +259,6 @@ static void tpt_trig_timer(struct timer_list *t) { struct tpt_led_trigger *tpt_trig = from_timer(tpt_trig, t, timer); struct ieee80211_local *local = tpt_trig->local; - struct led_classdev *led_cdev; unsigned long on, off, tpt; int i; @@ -283,10 +282,7 @@ static void tpt_trig_timer(struct timer_list *t) } } - read_lock(&local->tpt_led.leddev_list_lock); - list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list) - led_blink_set(led_cdev, &on, &off); - read_unlock(&local->tpt_led.leddev_list_lock); + led_trigger_blink(&local->tpt_led, &on, &off); } const char * @@ -341,7 +337,6 @@ static void ieee80211_start_tpt_led_trig(struct ieee80211_local *local) static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local) { struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; - struct led_classdev *led_cdev; if (!tpt_trig->running) return; @@ -349,10 +344,7 @@ static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local) tpt_trig->running = false; del_timer_sync(&tpt_trig->timer); - read_lock(&local->tpt_led.leddev_list_lock); - list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list) - led_set_brightness(led_cdev, LED_OFF); - read_unlock(&local->tpt_led.leddev_list_lock); + led_trigger_event(&local->tpt_led, LED_OFF); } void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 62145e5f9628..05f4c3c72619 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -252,18 +252,18 @@ static void ieee80211_restart_work(struct work_struct *work) struct ieee80211_local *local = container_of(work, struct ieee80211_local, restart_work); struct ieee80211_sub_if_data *sdata; + int ret; /* wait for scan work complete */ flush_workqueue(local->workqueue); flush_work(&local->sched_scan_stopped_work); + flush_work(&local->radar_detected_work); + + rtnl_lock(); WARN(test_bit(SCAN_HW_SCANNING, &local->scanning), "%s called with hardware scan in progress\n", __func__); - flush_work(&local->radar_detected_work); - /* we might do interface manipulations, so need both */ - rtnl_lock(); - wiphy_lock(local->hw.wiphy); list_for_each_entry(sdata, &local->interfaces, list) { /* * XXX: there may be more work for other vif types and even @@ -301,8 +301,12 @@ static void ieee80211_restart_work(struct work_struct *work) /* wait for all packet processing to be done */ synchronize_net(); - ieee80211_reconfig(local); + ret = ieee80211_reconfig(local); wiphy_unlock(local->hw.wiphy); + + if (ret) + cfg80211_shutdown_all_interfaces(local->hw.wiphy); + rtnl_unlock(); } @@ -701,10 +705,13 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, spin_lock_init(&local->queue_stop_reason_lock); for (i = 0; i < IEEE80211_NUM_ACS; i++) { - INIT_LIST_HEAD(&local->active_txqs[i]); - spin_lock_init(&local->active_txq_lock[i]); - local->aql_txq_limit_low[i] = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L; - local->aql_txq_limit_high[i] = + struct airtime_sched_info *air_sched = &local->airtime[i]; + + air_sched->active_txqs = RB_ROOT_CACHED; + INIT_LIST_HEAD(&air_sched->active_list); + spin_lock_init(&air_sched->lock); + air_sched->aql_txq_limit_low = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L; + air_sched->aql_txq_limit_high = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H; } @@ -734,8 +741,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, INIT_WORK(&local->sched_scan_stopped_work, ieee80211_sched_scan_stopped_work); - INIT_WORK(&local->tdls_chsw_work, ieee80211_tdls_chsw_work); - spin_lock_init(&local->ack_status_lock); idr_init(&local->ack_status_frames); @@ -752,7 +757,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, skb_queue_head_init(&local->skb_queue); skb_queue_head_init(&local->skb_queue_unreliable); - skb_queue_head_init(&local->skb_queue_tdls_chsw); ieee80211_alloc_led_names(local); @@ -1009,8 +1013,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) supp_ht = supp_ht || sband->ht_cap.ht_supported; supp_vht = supp_vht || sband->vht_cap.vht_supported; - if (!supp_he) - supp_he = !!ieee80211_get_he_sta_cap(sband); + for (i = 0; i < sband->n_iftype_data; i++) { + const struct ieee80211_sband_iftype_data *iftd; + + iftd = &sband->iftype_data[i]; + + supp_he = supp_he || (iftd && iftd->he_cap.has_he); + } /* HT, VHT, HE require QoS, thus >= 4 queues */ if (WARN_ON(local->hw.queues < IEEE80211_NUM_ACS && @@ -1384,7 +1393,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) cancel_delayed_work_sync(&local->roc_work); cancel_work_sync(&local->restart_work); cancel_work_sync(&local->reconfig_filter); - cancel_work_sync(&local->tdls_chsw_work); flush_work(&local->sched_scan_stopped_work); flush_work(&local->radar_detected_work); @@ -1396,7 +1404,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) wiphy_warn(local->hw.wiphy, "skb_queue not empty\n"); skb_queue_purge(&local->skb_queue); skb_queue_purge(&local->skb_queue_unreliable); - skb_queue_purge(&local->skb_queue_tdls_chsw); wiphy_unregister(local->hw.wiphy); destroy_workqueue(local->workqueue); diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 40492d1bd8fd..77080b4f87b8 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -134,7 +134,7 @@ struct mesh_path { * gate's mpath may or may not be resolved and active. * @gates_lock: protects updates to known_gates * @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr - * @walk_head: linked list containging all mesh_path objects + * @walk_head: linked list containing all mesh_path objects * @walk_lock: lock protecting walk_head * @entries: number of entries in the table */ diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 3db514c4c63a..a05b615deb51 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1124,7 +1124,7 @@ enddiscovery: * forwarding information is found. * * Returns: 0 if the next hop was found and -ENOENT if the frame was queued. - * skb is freeed here if no mpath could be allocated. + * skb is freed here if no mpath could be allocated. */ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 620ecf922408..efbefcbac3ac 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -122,7 +122,7 @@ static void prepare_for_gate(struct sk_buff *skb, char *dst_addr, hdr = (struct ieee80211_hdr *) skb->data; /* we preserve the previous mesh header and only add - * the new addreses */ + * the new addresses */ mshdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); mshdr->flags = MESH_FLAGS_AE_A5_A6; memcpy(mshdr->eaddr1, hdr->addr3, ETH_ALEN); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index aca26df7587d..a6915847d78a 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -150,7 +150,7 @@ out: * mesh STA in a MBSS. Three HT protection modes are supported for now, non-HT * mixed mode, 20MHz-protection and no-protection mode. non-HT mixed mode is * selected if any non-HT peers are present in our MBSS. 20MHz-protection mode - * is selected if all peers in our 20/40MHz MBSS support HT and atleast one + * is selected if all peers in our 20/40MHz MBSS support HT and at least one * HT20 peer is present. Otherwise no-protection mode is selected. */ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2480bd0577bb..a00f11a33699 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8,7 +8,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2020 Intel Corporation + * Copyright (C) 2018 - 2021 Intel Corporation */ #include <linux/delay.h> @@ -371,7 +371,6 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def chandef; u16 ht_opmode; u32 flags; - enum ieee80211_sta_rx_bandwidth new_sta_bw; u32 vht_cap_info = 0; int ret; @@ -385,7 +384,9 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, /* don't check HE if we associated as non-HE station */ if (ifmgd->flags & IEEE80211_STA_DISABLE_HE || - !ieee80211_get_he_sta_cap(sband)) + !ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif))) + he_oper = NULL; if (WARN_ON_ONCE(!sta)) @@ -445,40 +446,13 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, IEEE80211_STA_DISABLE_160MHZ)) || !cfg80211_chandef_valid(&chandef)) { sdata_info(sdata, - "AP %pM changed bandwidth in a way we can't support - disconnect\n", - ifmgd->bssid); + "AP %pM changed caps/bw in a way we can't support (0x%x/0x%x) - disconnect\n", + ifmgd->bssid, flags, ifmgd->flags); return -EINVAL; } - switch (chandef.width) { - case NL80211_CHAN_WIDTH_20_NOHT: - case NL80211_CHAN_WIDTH_20: - new_sta_bw = IEEE80211_STA_RX_BW_20; - break; - case NL80211_CHAN_WIDTH_40: - new_sta_bw = IEEE80211_STA_RX_BW_40; - break; - case NL80211_CHAN_WIDTH_80: - new_sta_bw = IEEE80211_STA_RX_BW_80; - break; - case NL80211_CHAN_WIDTH_80P80: - case NL80211_CHAN_WIDTH_160: - new_sta_bw = IEEE80211_STA_RX_BW_160; - break; - default: - return -EINVAL; - } - - if (new_sta_bw > sta->cur_max_bandwidth) - new_sta_bw = sta->cur_max_bandwidth; - - if (new_sta_bw < sta->sta.bandwidth) { - sta->sta.bandwidth = new_sta_bw; - rate_control_rate_update(local, sband, sta, - IEEE80211_RC_BW_CHANGED); - } - ret = ieee80211_vif_change_bandwidth(sdata, &chandef, changed); + if (ret) { sdata_info(sdata, "AP %pM changed bandwidth to incompatible one - disconnect\n", @@ -486,12 +460,6 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, return ret; } - if (new_sta_bw > sta->sta.bandwidth) { - sta->sta.bandwidth = new_sta_bw; - rate_control_rate_update(local, sband, sta, - IEEE80211_RC_BW_CHANGED); - } - return 0; } @@ -617,7 +585,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE; /* - * If some other vif is using the MU-MIMO capablity we cannot associate + * If some other vif is using the MU-MIMO capability we cannot associate * using MU-MIMO - this will lead to contradictions in the group-id * mechanism. * Ownership is defined since association request, in order to avoid @@ -676,7 +644,8 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); - he_cap = ieee80211_get_he_sta_cap(sband); + he_cap = ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); if (!he_cap || !reg_cap) return; @@ -712,6 +681,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) u32 rates = 0; __le16 listen_int; struct element *ext_capa = NULL; + enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif); + const struct ieee80211_sband_iftype_data *iftd; + struct ieee80211_prep_tx_info info = {}; /* we know it's writable, cast away the const */ if (assoc_data->ie_len) @@ -756,6 +728,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) } } + iftd = ieee80211_get_sband_iftype_data(sband, iftype); + skb = alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + /* bit too much but doesn't matter */ 2 + assoc_data->ssid_len + /* SSID */ @@ -770,7 +744,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) + assoc_data->ie_len + /* extra IEs */ (assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) + - 9, /* WMM */ + 9 + /* WMM */ + (iftd ? iftd->vendor_elems.len : 0), GFP_KERNEL); if (!skb) return; @@ -810,12 +785,14 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) mgmt->u.reassoc_req.listen_interval = listen_int; memcpy(mgmt->u.reassoc_req.current_ap, assoc_data->prev_bssid, ETH_ALEN); + info.subtype = IEEE80211_STYPE_REASSOC_REQ; } else { skb_put(skb, 4); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ASSOC_REQ); mgmt->u.assoc_req.capab_info = cpu_to_le16(capab); mgmt->u.assoc_req.listen_interval = listen_int; + info.subtype = IEEE80211_STYPE_ASSOC_REQ; } /* SSID */ @@ -1043,6 +1020,9 @@ skip_rates: ieee80211_add_s1g_capab_ie(sdata, &sband->s1g_cap, skb); } + if (iftd && iftd->vendor_elems.data && iftd->vendor_elems.len) + skb_put_data(skb, iftd->vendor_elems.data, iftd->vendor_elems.len); + /* add any remaining custom (i.e. vendor specific here) IEs */ if (assoc_data->ie_len) { noffset = assoc_data->ie_len; @@ -1060,7 +1040,7 @@ skip_rates: ifmgd->assoc_req_ies = kmemdup(ie_start, pos - ie_start, GFP_ATOMIC); ifmgd->assoc_req_ies_len = pos - ie_start; - drv_mgd_prepare_tx(local, sdata, 0); + drv_mgd_prepare_tx(local, sdata, &info); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) @@ -1094,11 +1074,6 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_hdr_3addr *nullfunc; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - /* Don't send NDPs when STA is connected HE */ - if (sdata->vif.type == NL80211_IFTYPE_STATION && - !(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) - return; - skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, !ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP)); if (!skb) @@ -1130,10 +1105,6 @@ static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) return; - /* Don't send NDPs when connected HE */ - if (!(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE)) - return; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + 30); if (!skb) return; @@ -1183,10 +1154,6 @@ static void ieee80211_chswitch_work(struct work_struct *work) */ if (sdata->reserved_chanctx) { - struct ieee80211_supported_band *sband = NULL; - struct sta_info *mgd_sta = NULL; - enum ieee80211_sta_rx_bandwidth bw = IEEE80211_STA_RX_BW_20; - /* * with multi-vif csa driver may call ieee80211_csa_finish() * many times while waiting for other interfaces to use their @@ -1195,48 +1162,6 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (sdata->reserved_ready) goto out; - if (sdata->vif.bss_conf.chandef.width != - sdata->csa_chandef.width) { - /* - * For managed interface, we need to also update the AP - * station bandwidth and align the rate scale algorithm - * on the bandwidth change. Here we only consider the - * bandwidth of the new channel definition (as channel - * switch flow does not have the full HT/VHT/HE - * information), assuming that if additional changes are - * required they would be done as part of the processing - * of the next beacon from the AP. - */ - switch (sdata->csa_chandef.width) { - case NL80211_CHAN_WIDTH_20_NOHT: - case NL80211_CHAN_WIDTH_20: - default: - bw = IEEE80211_STA_RX_BW_20; - break; - case NL80211_CHAN_WIDTH_40: - bw = IEEE80211_STA_RX_BW_40; - break; - case NL80211_CHAN_WIDTH_80: - bw = IEEE80211_STA_RX_BW_80; - break; - case NL80211_CHAN_WIDTH_80P80: - case NL80211_CHAN_WIDTH_160: - bw = IEEE80211_STA_RX_BW_160; - break; - } - - mgd_sta = sta_info_get(sdata, ifmgd->bssid); - sband = - local->hw.wiphy->bands[sdata->csa_chandef.chan->band]; - } - - if (sdata->vif.bss_conf.chandef.width > - sdata->csa_chandef.width) { - mgd_sta->sta.bandwidth = bw; - rate_control_rate_update(local, sband, mgd_sta, - IEEE80211_RC_BW_CHANGED); - } - ret = ieee80211_vif_use_reserved_context(sdata); if (ret) { sdata_info(sdata, @@ -1247,13 +1172,6 @@ static void ieee80211_chswitch_work(struct work_struct *work) goto out; } - if (sdata->vif.bss_conf.chandef.width < - sdata->csa_chandef.width) { - mgd_sta->sta.bandwidth = bw; - rate_control_rate_update(local, sband, mgd_sta, - IEEE80211_RC_BW_CHANGED); - } - goto out; } @@ -2341,6 +2259,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; u32 changed = 0; + struct ieee80211_prep_tx_info info = { + .subtype = stype, + }; sdata_assert_lock(sdata); @@ -2390,8 +2311,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, * driver requested so. */ if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) && - !ifmgd->have_beacon) - drv_mgd_prepare_tx(sdata->local, sdata, 0); + !ifmgd->have_beacon) { + drv_mgd_prepare_tx(sdata->local, sdata, &info); + } ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, ifmgd->bssid, stype, reason, @@ -2402,6 +2324,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, if (tx) ieee80211_flush_queues(local, sdata, false); + drv_mgd_complete_tx(sdata->local, sdata, &info); + /* clear bssid only after building the needed mgmt frames */ eth_zero_addr(ifmgd->bssid); @@ -2617,10 +2541,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) { ifmgd->nullfunc_failed = false; - if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) - ifmgd->probe_send_count--; - else - ieee80211_send_nullfunc(sdata->local, sdata, false); + ieee80211_send_nullfunc(sdata->local, sdata, false); } else { int ssid_len; @@ -2952,6 +2873,9 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, u8 *pos; struct ieee802_11_elems elems; u32 tx_flags = 0; + struct ieee80211_prep_tx_info info = { + .subtype = IEEE80211_STYPE_AUTH, + }; pos = mgmt->u.auth.variable; ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems, @@ -2959,7 +2883,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, if (!elems.challenge) return; auth_data->expected_transaction = 4; - drv_mgd_prepare_tx(sdata->local, sdata, 0); + drv_mgd_prepare_tx(sdata->local, sdata, &info); if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_INTFL_MLME_CONN_TX; @@ -3012,6 +2936,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, .type = MLME_EVENT, .u.mlme.data = AUTH_EVENT, }; + struct ieee80211_prep_tx_info info = { + .subtype = IEEE80211_STYPE_AUTH, + }; sdata_assert_lock(sdata); @@ -3040,7 +2967,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, mgmt->sa, auth_alg, ifmgd->auth_data->algorithm, auth_transaction, ifmgd->auth_data->expected_transaction); - return; + goto notify_driver; } if (status_code != WLAN_STATUS_SUCCESS) { @@ -3051,7 +2978,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, (auth_transaction == 1 && (status_code == WLAN_STATUS_SAE_HASH_TO_ELEMENT || status_code == WLAN_STATUS_SAE_PK)))) - return; + goto notify_driver; sdata_info(sdata, "%pM denied authentication (status %d)\n", mgmt->sa, status_code); @@ -3059,7 +2986,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, event.u.mlme.status = MLME_DENIED; event.u.mlme.reason = status_code; drv_event_callback(sdata->local, sdata, &event); - return; + goto notify_driver; } switch (ifmgd->auth_data->algorithm) { @@ -3081,10 +3008,11 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, default: WARN_ONCE(1, "invalid auth alg %d", ifmgd->auth_data->algorithm); - return; + goto notify_driver; } event.u.mlme.status = MLME_SUCCESS; + info.success = 1; drv_event_callback(sdata->local, sdata, &event); if (ifmgd->auth_data->algorithm != WLAN_AUTH_SAE || (auth_transaction == 2 && @@ -3098,6 +3026,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, } cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); +notify_driver: + drv_mgd_complete_tx(sdata->local, sdata, &info); } #define case_WLAN(type) \ @@ -3314,6 +3244,23 @@ static int ieee80211_recalc_twt_req(struct ieee80211_sub_if_data *sdata, return 0; } +static bool ieee80211_twt_bcast_support(struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *bss_conf, + struct ieee80211_supported_band *sband, + struct sta_info *sta) +{ + const struct ieee80211_sta_he_cap *own_he_cap = + ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); + + return bss_conf->he_support && + (sta->sta.he_cap.he_cap_elem.mac_cap_info[2] & + IEEE80211_HE_MAC_CAP2_BCAST_TWT) && + own_he_cap && + (own_he_cap->he_cap_elem.mac_cap_info[2] & + IEEE80211_HE_MAC_CAP2_BCAST_TWT); +} + static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, struct cfg80211_bss *cbss, struct ieee80211_mgmt *mgmt, size_t len, @@ -3529,6 +3476,9 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, bss_conf->twt_protected = false; } + bss_conf->twt_broadcast = + ieee80211_twt_bcast_support(sdata, bss_conf, sband, sta); + if (bss_conf->he_support) { bss_conf->he_bss_color.color = le32_get_bits(elems->he_operation->he_oper_params, @@ -3699,6 +3649,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, .type = MLME_EVENT, .u.mlme.data = ASSOC_EVENT, }; + struct ieee80211_prep_tx_info info = {}; sdata_assert_lock(sdata); @@ -3728,6 +3679,15 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, aid = 0; /* TODO */ } + /* + * Note: this may not be perfect, AP might misbehave - if + * anyone needs to rely on perfect complete notification + * with the exact right subtype, then we need to track what + * we actually transmitted. + */ + info.subtype = reassoc ? IEEE80211_STYPE_REASSOC_REQ : + IEEE80211_STYPE_ASSOC_REQ; + sdata_info(sdata, "RX %sssocResp from %pM (capab=0x%x status=%d aid=%d)\n", reassoc ? "Rea" : "A", mgmt->sa, @@ -3753,7 +3713,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, assoc_data->timeout_started = true; if (ms > IEEE80211_ASSOC_TIMEOUT) run_again(sdata, assoc_data->timeout); - return; + goto notify_driver; } if (status_code != WLAN_STATUS_SUCCESS) { @@ -3768,7 +3728,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, /* oops -- internal error -- send timeout for now */ ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, cbss); - return; + goto notify_driver; } event.u.mlme.status = MLME_SUCCESS; drv_event_callback(sdata->local, sdata, &event); @@ -3786,10 +3746,14 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) if (sdata->tx_conf[ac].uapsd) uapsd_queues |= ieee80211_ac_to_qos_mask[ac]; + + info.success = 1; } cfg80211_rx_assoc_resp(sdata->dev, cbss, (u8 *)mgmt, len, uapsd_queues, ifmgd->assoc_req_ies, ifmgd->assoc_req_ies_len); +notify_driver: + drv_mgd_complete_tx(sdata->local, sdata, &info); } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, @@ -4062,10 +4026,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (elems.mbssid_config_ie) bss_conf->profile_periodicity = elems.mbssid_config_ie->profile_periodicity; + else + bss_conf->profile_periodicity = 0; if (elems.ext_capab_len >= 11 && (elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) bss_conf->ema_ap = true; + else + bss_conf->ema_ap = false; /* continue assoc process */ ifmgd->assoc_data->timeout = jiffies; @@ -4404,7 +4372,9 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata) u32 tx_flags = 0; u16 trans = 1; u16 status = 0; - u16 prepare_tx_duration = 0; + struct ieee80211_prep_tx_info info = { + .subtype = IEEE80211_STYPE_AUTH, + }; sdata_assert_lock(sdata); @@ -4427,10 +4397,9 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata) } if (auth_data->algorithm == WLAN_AUTH_SAE) - prepare_tx_duration = - jiffies_to_msecs(IEEE80211_AUTH_TIMEOUT_SAE); + info.duration = jiffies_to_msecs(IEEE80211_AUTH_TIMEOUT_SAE); - drv_mgd_prepare_tx(local, sdata, prepare_tx_duration); + drv_mgd_prepare_tx(local, sdata, &info); sdata_info(sdata, "send auth to %pM (try %d/%d)\n", auth_data->bss->bssid, auth_data->tries, @@ -4925,11 +4894,13 @@ static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata, } static bool -ieee80211_verify_sta_he_mcs_support(struct ieee80211_supported_band *sband, +ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, const struct ieee80211_he_operation *he_op) { const struct ieee80211_sta_he_cap *sta_he_cap = - ieee80211_get_he_sta_cap(sband); + ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); u16 ap_min_req_set; int i; @@ -5023,7 +4994,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } - if (!ieee80211_get_he_sta_cap(sband)) + if (!ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif))) ifmgd->flags |= IEEE80211_STA_DISABLE_HE; rcu_read_lock(); @@ -5081,7 +5053,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, else he_oper = NULL; - if (!ieee80211_verify_sta_he_mcs_support(sband, he_oper)) + if (!ieee80211_verify_sta_he_mcs_support(sdata, sband, he_oper)) ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } @@ -5651,15 +5623,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, 2 * FILS_NONCE_LEN); assoc_data->bss = req->bss; - - if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) { - if (ifmgd->powersave) - sdata->smps_mode = IEEE80211_SMPS_DYNAMIC; - else - sdata->smps_mode = IEEE80211_SMPS_OFF; - } else - sdata->smps_mode = ifmgd->req_smps; - assoc_data->capability = req->bss->capability; assoc_data->supp_rates = bss->supp_rates; assoc_data->supp_rates_len = bss->supp_rates_len; @@ -5766,6 +5729,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, if (err) goto err_clear; + if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) { + if (ifmgd->powersave) + sdata->smps_mode = IEEE80211_SMPS_DYNAMIC; + else + sdata->smps_mode = IEEE80211_SMPS_OFF; + } else { + sdata->smps_mode = ifmgd->req_smps; + } + rcu_read_lock(); beacon_ies = rcu_dereference(req->bss->beacon_ies); @@ -5802,12 +5774,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, beacon_ies->data, beacon_ies->len); if (elem && elem->datalen >= 3) sdata->vif.bss_conf.profile_periodicity = elem->data[2]; + else + sdata->vif.bss_conf.profile_periodicity = 0; elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, beacon_ies->data, beacon_ies->len); if (elem && elem->datalen >= 11 && (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) sdata->vif.bss_conf.ema_ap = true; + else + sdata->vif.bss_conf.ema_ap = false; } else { assoc_data->timeout = jiffies; assoc_data->timeout_started = true; @@ -5846,6 +5822,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; bool tx = !req->local_state_change; + struct ieee80211_prep_tx_info info = { + .subtype = IEEE80211_STYPE_DEAUTH, + }; if (ifmgd->auth_data && ether_addr_equal(ifmgd->auth_data->bss->bssid, req->bssid)) { @@ -5854,7 +5833,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->bssid, req->reason_code, ieee80211_get_reason_code_string(req->reason_code)); - drv_mgd_prepare_tx(sdata->local, sdata, 0); + drv_mgd_prepare_tx(sdata->local, sdata, &info); ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid, IEEE80211_STYPE_DEAUTH, req->reason_code, tx, @@ -5863,7 +5842,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code, false); - + drv_mgd_complete_tx(sdata->local, sdata, &info); return 0; } @@ -5874,7 +5853,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->bssid, req->reason_code, ieee80211_get_reason_code_string(req->reason_code)); - drv_mgd_prepare_tx(sdata->local, sdata, 0); + drv_mgd_prepare_tx(sdata->local, sdata, &info); ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid, IEEE80211_STYPE_DEAUTH, req->reason_code, tx, @@ -5898,6 +5877,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code, false); + drv_mgd_complete_tx(sdata->local, sdata, &info); return 0; } diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 63652c39c8e0..e5935e3d7a07 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -297,15 +297,11 @@ void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata) static bool rc_no_data_or_no_ack_use_min(struct ieee80211_tx_rate_control *txrc) { struct sk_buff *skb = txrc->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - __le16 fc; - - fc = hdr->frame_control; return (info->flags & (IEEE80211_TX_CTL_NO_ACK | IEEE80211_TX_CTL_USE_MINRATE)) || - !ieee80211_is_data(fc); + !ieee80211_is_tx_data(skb); } static void rc_send_low_basicrate(struct ieee80211_tx_rate *rate, @@ -396,6 +392,10 @@ static bool rate_control_send_low(struct ieee80211_sta *pubsta, int mcast_rate; bool use_basicrate = false; + if (ieee80211_is_tx_data(txrc->skb) && + info->flags & IEEE80211_TX_CTL_NO_ACK) + return false; + if (!pubsta || rc_no_data_or_no_ack_use_min(txrc)) { __rate_control_send_low(txrc->hw, sband, pubsta, info, txrc->rate_idx_mask); @@ -870,7 +870,6 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, int max_rates) { struct ieee80211_sub_if_data *sdata; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_supported_band *sband; @@ -882,7 +881,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, sdata = vif_to_sdata(vif); sband = sdata->local->hw.wiphy->bands[info->band]; - if (ieee80211_is_data(hdr->frame_control)) + if (ieee80211_is_tx_data(skb)) rate_control_apply_mask(sdata, sta, sband, dest, max_rates); if (dest[0].idx < 0) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 6487b05da6fa..72b44d4c42d0 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -434,7 +434,7 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, unsigned int nsecs = 0, overhead = mi->overhead; unsigned int ampdu_len = 1; - /* do not account throughput if sucess prob is below 10% */ + /* do not account throughput if success prob is below 10% */ if (prob_avg < MINSTREL_FRAC(10, 100)) return 0; @@ -1176,29 +1176,6 @@ minstrel_downgrade_rate(struct minstrel_ht_sta *mi, u16 *idx, bool primary) } static void -minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct sta_info *sta = container_of(pubsta, struct sta_info, sta); - u16 tid; - - if (skb_get_queue_mapping(skb) == IEEE80211_AC_VO) - return; - - if (unlikely(!ieee80211_is_data_qos(hdr->frame_control))) - return; - - if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE))) - return; - - tid = ieee80211_get_tid(hdr); - if (likely(sta->ampdu_mlme.tid_tx[tid])) - return; - - ieee80211_start_tx_ba_session(pubsta, tid, 0); -} - -static void minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, void *priv_sta, struct ieee80211_tx_status *st) { @@ -1211,6 +1188,10 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, bool last, update = false; int i; + /* Ignore packet that was sent with noAck flag */ + if (info->flags & IEEE80211_TX_CTL_NO_ACK) + return; + /* This packet was aggregated but doesn't carry status info */ if ((info->flags & IEEE80211_TX_CTL_AMPDU) && !(info->flags & IEEE80211_TX_STAT_AMPDU)) @@ -1498,10 +1479,6 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, struct minstrel_priv *mp = priv; u16 sample_idx; - if (!(info->flags & IEEE80211_TX_CTL_AMPDU) && - !minstrel_ht_is_legacy_group(MI_RATE_GROUP(mi->max_prob_rate))) - minstrel_aggr_check(sta, txrc->skb); - info->flags |= mi->tx_flags; #ifdef CONFIG_MAC80211_DEBUGFS @@ -1514,7 +1491,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, (info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO)) return; - if (time_is_before_jiffies(mi->sample_time)) + if (time_is_after_jiffies(mi->sample_time)) return; mi->sample_time = jiffies + MINSTREL_SAMPLE_INTERVAL; @@ -1907,6 +1884,7 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta) static const struct rate_control_ops mac80211_minstrel_ht = { .name = "minstrel_ht", + .capa = RATE_CTRL_CAPA_AMPDU_TRIGGER, .tx_status_ext = minstrel_ht_tx_status, .get_rate = minstrel_ht_get_rate, .rate_init = minstrel_ht_rate_init, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1bb43edd47b6..771921c057e8 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -214,6 +214,24 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, return len; } +static void __ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct sk_buff *skb) +{ + skb_queue_tail(&sdata->skb_queue, skb); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); + if (sta) + sta->rx_stats.packets++; +} + +static void ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct sk_buff *skb) +{ + skb->protocol = 0; + __ieee80211_queue_skb_to_iface(sdata, sta, skb); +} + static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int rtap_space) @@ -254,8 +272,7 @@ static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata, if (!skb) return; - skb_queue_tail(&sdata->skb_queue, skb); - ieee80211_queue_work(&sdata->local->hw, &sdata->work); + ieee80211_queue_skb_to_iface(sdata, NULL, skb); } /* @@ -1339,7 +1356,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) { struct sk_buff *skb = rx->skb; - struct ieee80211_local *local = rx->local; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct sta_info *sta = rx->sta; struct tid_ampdu_rx *tid_agg_rx; @@ -1391,8 +1407,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, /* if this mpdu is fragmented - terminate rx aggregation session */ sc = le16_to_cpu(hdr->seq_ctrl); if (sc & IEEE80211_SCTL_FRAG) { - skb_queue_tail(&rx->sdata->skb_queue, skb); - ieee80211_queue_work(&local->hw, &rx->sdata->work); + ieee80211_queue_skb_to_iface(rx->sdata, NULL, skb); return; } @@ -1563,12 +1578,8 @@ static void sta_ps_start(struct sta_info *sta) for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) { struct ieee80211_txq *txq = sta->sta.txq[tid]; - struct txq_info *txqi = to_txq_info(txq); - spin_lock(&local->active_txq_lock[txq->ac]); - if (!list_empty(&txqi->schedule_order)) - list_del_init(&txqi->schedule_order); - spin_unlock(&local->active_txq_lock[txq->ac]); + ieee80211_unschedule_txq(&local->hw, txq, false); if (txq_has_queue(txq)) set_bit(tid, &sta->txq_buffered_tids); @@ -2240,17 +2251,15 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) sc = le16_to_cpu(hdr->seq_ctrl); frag = sc & IEEE80211_SCTL_FRAG; - if (is_multicast_ether_addr(hdr->addr1)) { - I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount); - goto out_no_led; - } - if (rx->sta) cache = &rx->sta->frags; if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) goto out; + if (is_multicast_ether_addr(hdr->addr1)) + return RX_DROP_MONITOR; + I802_DEBUG_INC(rx->local->rx_handlers_fragments); if (skb_linearize(rx->skb)) @@ -2376,7 +2385,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) out: ieee80211_led_rx(rx->local); - out_no_led: if (rx->sta) rx->sta->rx_stats.packets++; return RX_CONTINUE; @@ -3012,11 +3020,8 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) tf->category == WLAN_CATEGORY_TDLS && (tf->action_code == WLAN_TDLS_CHANNEL_SWITCH_REQUEST || tf->action_code == WLAN_TDLS_CHANNEL_SWITCH_RESPONSE)) { - skb_queue_tail(&local->skb_queue_tdls_chsw, rx->skb); - schedule_work(&local->tdls_chsw_work); - if (rx->sta) - rx->sta->rx_stats.packets++; - + rx->skb->protocol = cpu_to_be16(ETH_P_TDLS); + __ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb); return RX_QUEUED; } } @@ -3496,10 +3501,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) return RX_QUEUED; queue: - skb_queue_tail(&sdata->skb_queue, rx->skb); - ieee80211_queue_work(&local->hw, &sdata->work); - if (rx->sta) - rx->sta->rx_stats.packets++; + ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb); return RX_QUEUED; } @@ -3647,10 +3649,7 @@ ieee80211_rx_h_ext(struct ieee80211_rx_data *rx) return RX_DROP_MONITOR; /* for now only beacons are ext, so queue them */ - skb_queue_tail(&sdata->skb_queue, rx->skb); - ieee80211_queue_work(&rx->local->hw, &sdata->work); - if (rx->sta) - rx->sta->rx_stats.packets++; + ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb); return RX_QUEUED; } @@ -3707,11 +3706,7 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) return RX_DROP_MONITOR; } - /* queue up frame and kick off work to process it */ - skb_queue_tail(&sdata->skb_queue, rx->skb); - ieee80211_queue_work(&rx->local->hw, &sdata->work); - if (rx->sta) - rx->sta->rx_stats.packets++; + ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb); return RX_QUEUED; } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index d4cc9ac2d703..6b50cb5e0e3c 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -251,13 +251,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) struct ieee80211_mgmt *mgmt = (void *)skb->data; struct ieee80211_bss *bss; struct ieee80211_channel *channel; + size_t min_hdr_len = offsetof(struct ieee80211_mgmt, + u.probe_resp.variable); + + if (!ieee80211_is_probe_resp(mgmt->frame_control) && + !ieee80211_is_beacon(mgmt->frame_control) && + !ieee80211_is_s1g_beacon(mgmt->frame_control)) + return; if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { - if (skb->len < 15) - return; - } else if (skb->len < 24 || - (!ieee80211_is_probe_resp(mgmt->frame_control) && - !ieee80211_is_beacon(mgmt->frame_control))) + if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) + min_hdr_len = offsetof(struct ieee80211_ext, + u.s1g_short_beacon.variable); + else + min_hdr_len = offsetof(struct ieee80211_ext, + u.s1g_beacon); + } + + if (skb->len < min_hdr_len) return; sdata1 = rcu_dereference(local->scan_sdata); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f2fb69da9b6e..a5505ee51229 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -425,15 +425,11 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (sta_prepare_rate_control(local, sta, gfp)) goto free_txq; - sta->airtime_weight = IEEE80211_DEFAULT_AIRTIME_WEIGHT; for (i = 0; i < IEEE80211_NUM_ACS; i++) { skb_queue_head_init(&sta->ps_tx_buf[i]); skb_queue_head_init(&sta->tx_filtered[i]); - sta->airtime[i].deficit = sta->airtime_weight; - atomic_set(&sta->airtime[i].aql_tx_pending, 0); - sta->airtime[i].aql_limit_low = local->aql_txq_limit_low[i]; - sta->airtime[i].aql_limit_high = local->aql_txq_limit_high[i]; + init_airtime_info(&sta->airtime[i], &local->airtime[i]); } for (i = 0; i < IEEE80211_NUM_TIDS; i++) @@ -1398,11 +1394,6 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid, struct ieee80211_tx_info *info; struct ieee80211_chanctx_conf *chanctx_conf; - /* Don't send NDPs when STA is connected HE */ - if (sdata->vif.type == NL80211_IFTYPE_STATION && - !(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE)) - return; - if (qos) { fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC | @@ -1897,24 +1888,59 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, } EXPORT_SYMBOL(ieee80211_sta_set_buffered); -void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid, - u32 tx_airtime, u32 rx_airtime) +void ieee80211_register_airtime(struct ieee80211_txq *txq, + u32 tx_airtime, u32 rx_airtime) { - struct sta_info *sta = container_of(pubsta, struct sta_info, sta); - struct ieee80211_local *local = sta->sdata->local; - u8 ac = ieee80211_ac_from_tid(tid); + struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif); + struct ieee80211_local *local = sdata->local; + u64 weight_sum, weight_sum_reciprocal; + struct airtime_sched_info *air_sched; + struct airtime_info *air_info; u32 airtime = 0; - if (sta->local->airtime_flags & AIRTIME_USE_TX) + air_sched = &local->airtime[txq->ac]; + air_info = to_airtime_info(txq); + + if (local->airtime_flags & AIRTIME_USE_TX) airtime += tx_airtime; - if (sta->local->airtime_flags & AIRTIME_USE_RX) + if (local->airtime_flags & AIRTIME_USE_RX) airtime += rx_airtime; - spin_lock_bh(&local->active_txq_lock[ac]); - sta->airtime[ac].tx_airtime += tx_airtime; - sta->airtime[ac].rx_airtime += rx_airtime; - sta->airtime[ac].deficit -= airtime; - spin_unlock_bh(&local->active_txq_lock[ac]); + /* Weights scale so the unit weight is 256 */ + airtime <<= 8; + + spin_lock_bh(&air_sched->lock); + + air_info->tx_airtime += tx_airtime; + air_info->rx_airtime += rx_airtime; + + if (air_sched->weight_sum) { + weight_sum = air_sched->weight_sum; + weight_sum_reciprocal = air_sched->weight_sum_reciprocal; + } else { + weight_sum = air_info->weight; + weight_sum_reciprocal = air_info->weight_reciprocal; + } + + /* Round the calculation of global vt */ + air_sched->v_t += (u64)((airtime + (weight_sum >> 1)) * + weight_sum_reciprocal) >> IEEE80211_RECIPROCAL_SHIFT_64; + air_info->v_t += (u32)((airtime + (air_info->weight >> 1)) * + air_info->weight_reciprocal) >> IEEE80211_RECIPROCAL_SHIFT_32; + ieee80211_resort_txq(&local->hw, txq); + + spin_unlock_bh(&air_sched->lock); +} + +void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid, + u32 tx_airtime, u32 rx_airtime) +{ + struct ieee80211_txq *txq = pubsta->txq[tid]; + + if (!txq) + return; + + ieee80211_register_airtime(txq, tx_airtime, rx_airtime); } EXPORT_SYMBOL(ieee80211_sta_register_airtime); @@ -2093,10 +2119,9 @@ static struct ieee80211_sta_rx_stats * sta_get_last_rx_stats(struct sta_info *sta) { struct ieee80211_sta_rx_stats *stats = &sta->rx_stats; - struct ieee80211_local *local = sta->local; int cpu; - if (!ieee80211_hw_check(&local->hw, USES_RSS)) + if (!sta->pcpu_rx_stats) return stats; for_each_possible_cpu(cpu) { @@ -2196,9 +2221,7 @@ static void sta_set_tidstats(struct sta_info *sta, int cpu; if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) { - if (!ieee80211_hw_check(&local->hw, USES_RSS)) - tidstats->rx_msdu += - sta_get_tidstats_msdu(&sta->rx_stats, tid); + tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->rx_stats, tid); if (sta->pcpu_rx_stats) { for_each_possible_cpu(cpu) { @@ -2277,7 +2300,6 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->rx_beacon = sdata->u.mgd.count_beacon_signal; drv_sta_statistics(local, sdata, &sta->sta, sinfo); - sinfo->filled |= BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME) | BIT_ULL(NL80211_STA_INFO_STA_FLAGS) | BIT_ULL(NL80211_STA_INFO_BSS_PARAM) | @@ -2312,8 +2334,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) | BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) { - if (!ieee80211_hw_check(&local->hw, USES_RSS)) - sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats); + sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats); if (sta->pcpu_rx_stats) { for_each_possible_cpu(cpu) { @@ -2363,7 +2384,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) { - sinfo->airtime_weight = sta->airtime_weight; + sinfo->airtime_weight = sta->airtime[0].weight; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT); } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 0333072ebd98..ba2796782008 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -135,18 +135,25 @@ enum ieee80211_agg_stop_reason { #define AIRTIME_USE_TX BIT(0) #define AIRTIME_USE_RX BIT(1) + struct airtime_info { u64 rx_airtime; u64 tx_airtime; - s64 deficit; + u64 v_t; + u64 last_scheduled; + struct list_head list; atomic_t aql_tx_pending; /* Estimated airtime for frames pending */ u32 aql_limit_low; u32 aql_limit_high; + u32 weight_reciprocal; + u16 weight; }; void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, struct sta_info *sta, u8 ac, u16 tx_airtime, bool tx_completed); +void ieee80211_register_airtime(struct ieee80211_txq *txq, + u32 tx_airtime, u32 rx_airtime); struct sta_info; @@ -515,7 +522,6 @@ struct ieee80211_fragment_cache { * @tid_seq: per-TID sequence numbers for sending to this STA * @airtime: per-AC struct airtime_info describing airtime statistics for this * station - * @airtime_weight: station weight for airtime fairness calculation purposes * @ampdu_mlme: A-MPDU state machine state * @mesh: mesh STA information * @debugfs_dir: debug filesystem directory dentry @@ -646,7 +652,6 @@ struct sta_info { u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; struct airtime_info airtime[IEEE80211_NUM_ACS]; - u16 airtime_weight; /* * Aggregation information, locked with lock. diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 9baf185ee4c7..bae321ff77f6 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -970,6 +970,25 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && acked) ieee80211_frame_acked(sta, skb); + } else if (wiphy_ext_feature_isset(local->hw.wiphy, + NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) { + struct ieee80211_sub_if_data *sdata; + struct ieee80211_txq *txq; + u32 airtime; + + /* Account airtime to multicast queue */ + sdata = ieee80211_sdata_from_skb(local, skb); + + if (sdata && (txq = sdata->vif.txq)) { + airtime = info->status.tx_time ?: + ieee80211_calc_expected_tx_airtime(hw, + &sdata->vif, + NULL, + skb->len, + false); + + ieee80211_register_airtime(txq, airtime, 0); + } } /* SNMP counters @@ -1006,12 +1025,11 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) && !(info->flags & IEEE80211_TX_CTL_INJECTED) && local->ps_sdata && !(local->scanning)) { - if (info->flags & IEEE80211_TX_STAT_ACK) { + if (info->flags & IEEE80211_TX_STAT_ACK) local->ps_sdata->u.mgd.flags |= IEEE80211_STA_NULLFUNC_ACKED; - } else - mod_timer(&local->dynamic_ps_timer, jiffies + - msecs_to_jiffies(10)); + mod_timer(&local->dynamic_ps_timer, + jiffies + msecs_to_jiffies(10)); } ieee80211_report_used_skb(local, skb, false); diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index f91d02b81b92..45e532ad1215 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -1920,7 +1920,7 @@ out: return ret; } -static void +void ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { @@ -1971,32 +1971,6 @@ void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata) rcu_read_unlock(); } -void ieee80211_tdls_chsw_work(struct work_struct *wk) -{ - struct ieee80211_local *local = - container_of(wk, struct ieee80211_local, tdls_chsw_work); - struct ieee80211_sub_if_data *sdata; - struct sk_buff *skb; - struct ieee80211_tdls_data *tf; - - wiphy_lock(local->hw.wiphy); - while ((skb = skb_dequeue(&local->skb_queue_tdls_chsw))) { - tf = (struct ieee80211_tdls_data *)skb->data; - list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata) || - sdata->vif.type != NL80211_IFTYPE_STATION || - !ether_addr_equal(tf->da, sdata->vif.addr)) - continue; - - ieee80211_process_tdls_channel_switch(sdata, skb); - break; - } - - kfree_skb(skb); - } - wiphy_unlock(local->hw.wiphy); -} - void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata, const u8 *peer, u16 reason) { diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 8fcc39056402..f6ef15366938 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH -* Copyright (C) 2018 - 2020 Intel Corporation +* Copyright (C) 2018 - 2021 Intel Corporation */ #if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) @@ -1461,31 +1461,52 @@ DEFINE_EVENT(release_evt, drv_allow_buffered_frames, TP_ARGS(local, sta, tids, num_frames, reason, more_data) ); -TRACE_EVENT(drv_mgd_prepare_tx, +DECLARE_EVENT_CLASS(mgd_prepare_complete_tx_evt, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - u16 duration), + u16 duration, u16 subtype, bool success), - TP_ARGS(local, sdata, duration), + TP_ARGS(local, sdata, duration, subtype, success), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u32, duration) + __field(u16, subtype) + __field(u8, success) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->duration = duration; + __entry->subtype = subtype; + __entry->success = success; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT " duration: %u", - LOCAL_PR_ARG, VIF_PR_ARG, __entry->duration + LOCAL_PR_FMT VIF_PR_FMT " duration: %u, subtype:0x%x, success:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->duration, + __entry->subtype, __entry->success ) ); +DEFINE_EVENT(mgd_prepare_complete_tx_evt, drv_mgd_prepare_tx, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u16 duration, u16 subtype, bool success), + + TP_ARGS(local, sdata, duration, subtype, success) +); + +DEFINE_EVENT(mgd_prepare_complete_tx_evt, drv_mgd_complete_tx, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u16 duration, u16 subtype, bool success), + + TP_ARGS(local, sdata, duration, subtype, success) +); + DEFINE_EVENT(local_sdata_evt, drv_mgd_protect_tdls_discover, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 0b719f3d2dec..e96981144358 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -18,6 +18,7 @@ #include <linux/bitmap.h> #include <linux/rcupdate.h> #include <linux/export.h> +#include <linux/timekeeping.h> #include <net/net_namespace.h> #include <net/ieee80211_radiotap.h> #include <net/cfg80211.h> @@ -666,6 +667,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) u32 len; struct ieee80211_tx_rate_control txrc; struct ieee80211_sta_rates *ratetbl = NULL; + bool encap = info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP; bool assoc = false; memset(&txrc, 0, sizeof(txrc)); @@ -707,7 +709,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) * just wants a probe response. */ if (tx->sdata->vif.bss_conf.use_short_preamble && - (ieee80211_is_data(hdr->frame_control) || + (ieee80211_is_tx_data(tx->skb) || (tx->sta && test_sta_flag(tx->sta, WLAN_STA_SHORT_PREAMBLE)))) txrc.short_preamble = true; @@ -729,7 +731,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) "%s: Dropped data frame as no usable bitrate found while " "scanning and associated. Target station: " "%pM on %d GHz band\n", - tx->sdata->name, hdr->addr1, + tx->sdata->name, + encap ? ((struct ethhdr *)hdr)->h_dest : hdr->addr1, info->band ? 5 : 2)) return TX_DROP; @@ -763,7 +766,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) if (txrc.reported_rate.idx < 0) { txrc.reported_rate = tx->rate; - if (tx->sta && ieee80211_is_data(hdr->frame_control)) + if (tx->sta && ieee80211_is_tx_data(tx->skb)) tx->sta->tx_stats.last_rate = txrc.reported_rate; } else if (tx->sta) tx->sta->tx_stats.last_rate = txrc.reported_rate; @@ -1447,7 +1450,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, codel_vars_init(&txqi->def_cvars); codel_stats_init(&txqi->cstats); __skb_queue_head_init(&txqi->frags); - INIT_LIST_HEAD(&txqi->schedule_order); + RB_CLEAR_NODE(&txqi->schedule_order); txqi->txq.vif = &sdata->vif; @@ -1491,9 +1494,7 @@ void ieee80211_txq_purge(struct ieee80211_local *local, ieee80211_purge_tx_queue(&local->hw, &txqi->frags); spin_unlock_bh(&fq->lock); - spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]); - list_del_init(&txqi->schedule_order); - spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]); + ieee80211_unschedule_txq(&local->hw, &txqi->txq, true); } void ieee80211_txq_set_params(struct ieee80211_local *local) @@ -1768,8 +1769,6 @@ static int invoke_tx_handlers_early(struct ieee80211_tx_data *tx) CALL_TXH(ieee80211_tx_h_ps_buf); CALL_TXH(ieee80211_tx_h_check_control_port_protocol); CALL_TXH(ieee80211_tx_h_select_key); - if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL)) - CALL_TXH(ieee80211_tx_h_rate_ctrl); txh_done: if (unlikely(res == TX_DROP)) { @@ -1802,6 +1801,9 @@ static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx) goto txh_done; } + if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL)) + CALL_TXH(ieee80211_tx_h_rate_ctrl); + CALL_TXH(ieee80211_tx_h_michael_mic_add); CALL_TXH(ieee80211_tx_h_sequence); CALL_TXH(ieee80211_tx_h_fragment); @@ -2014,6 +2016,26 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, ieee80211_tx(sdata, sta, skb, false); } +static bool ieee80211_validate_radiotap_len(struct sk_buff *skb) +{ + struct ieee80211_radiotap_header *rthdr = + (struct ieee80211_radiotap_header *)skb->data; + + /* check for not even having the fixed radiotap header part */ + if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header))) + return false; /* too short to be possibly valid */ + + /* is it a header version we can trust to find length from? */ + if (unlikely(rthdr->it_version)) + return false; /* only version 0 is supported */ + + /* does the skb contain enough to deliver on the alleged length? */ + if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data))) + return false; /* skb too short for claimed rt header extent */ + + return true; +} + bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, struct net_device *dev) { @@ -2022,8 +2044,6 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, struct ieee80211_radiotap_header *rthdr = (struct ieee80211_radiotap_header *) skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_supported_band *sband = - local->hw.wiphy->bands[info->band]; int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, NULL); u16 txflags; @@ -2036,17 +2056,8 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, u8 vht_mcs = 0, vht_nss = 0; int i; - /* check for not even having the fixed radiotap header part */ - if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header))) - return false; /* too short to be possibly valid */ - - /* is it a header version we can trust to find length from? */ - if (unlikely(rthdr->it_version)) - return false; /* only version 0 is supported */ - - /* does the skb contain enough to deliver on the alleged length? */ - if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data))) - return false; /* skb too short for claimed rt header extent */ + if (!ieee80211_validate_radiotap_len(skb)) + return false; info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | IEEE80211_TX_CTL_DONTFRAG; @@ -2186,6 +2197,9 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, return false; if (rate_found) { + struct ieee80211_supported_band *sband = + local->hw.wiphy->bands[info->band]; + info->control.flags |= IEEE80211_TX_CTRL_RATE_INJECT; for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { @@ -2199,7 +2213,7 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, } else if (rate_flags & IEEE80211_TX_RC_VHT_MCS) { ieee80211_rate_set_vht(info->control.rates, vht_mcs, vht_nss); - } else { + } else if (sband) { for (i = 0; i < sband->n_bitrates; i++) { if (rate * 5 != sband->bitrates[i].bitrate) continue; @@ -2236,8 +2250,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS | IEEE80211_TX_CTL_INJECTED; - /* Sanity-check and process the injection radiotap header */ - if (!ieee80211_parse_tx_radiotap(skb, dev)) + /* Sanity-check the length of the radiotap header */ + if (!ieee80211_validate_radiotap_len(skb)) goto fail; /* we now know there is a radiotap header with a length we can use */ @@ -2351,6 +2365,14 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, ieee80211_select_queue_80211(sdata, skb, hdr); skb_set_queue_mapping(skb, ieee80211_ac_from_tid(skb->priority)); + /* + * Process the radiotap header. This will now take into account the + * selected chandef above to accurately set injection rates and + * retransmissions. + */ + if (!ieee80211_parse_tx_radiotap(skb, dev)) + goto fail_rcu; + /* remove the injection radiotap header */ skb_pull(skb, len_rthdr); @@ -3264,6 +3286,9 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (!ieee80211_hw_check(&local->hw, TX_AMSDU)) return false; + if (sdata->vif.offload_flags & IEEE80211_OFFLOAD_ENCAP_ENABLED) + return false; + if (skb_is_gso(skb)) return false; @@ -3369,15 +3394,21 @@ out: * Can be called while the sta lock is held. Anything that can cause packets to * be generated will cause deadlock! */ -static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, u8 pn_offs, - struct ieee80211_key *key, - struct sk_buff *skb) +static ieee80211_tx_result +ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, u8 pn_offs, + struct ieee80211_key *key, + struct ieee80211_tx_data *tx) { + struct sk_buff *skb = tx->skb; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr = (void *)skb->data; u8 tid = IEEE80211_NUM_TIDS; + if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL) && + ieee80211_tx_h_rate_ctrl(tx) != TX_CONTINUE) + return TX_DROP; + if (key) info->control.hw_key = &key->conf; @@ -3426,6 +3457,8 @@ static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata, break; } } + + return TX_CONTINUE; } static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, @@ -3529,24 +3562,17 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, tx.sta = sta; tx.key = fast_tx->key; - if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { - tx.skb = skb; - r = ieee80211_tx_h_rate_ctrl(&tx); - skb = tx.skb; - tx.skb = NULL; - - if (r != TX_CONTINUE) { - if (r != TX_QUEUED) - kfree_skb(skb); - return true; - } - } - if (ieee80211_queue_skb(local, sdata, sta, skb)) return true; - ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs, - fast_tx->key, skb); + tx.skb = skb; + r = ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs, + fast_tx->key, &tx); + tx.skb = NULL; + if (r == TX_DROP) { + kfree_skb(skb); + return true; + } if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, @@ -3651,8 +3677,16 @@ begin: else info->flags &= ~IEEE80211_TX_CTL_AMPDU; - if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) + if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) { + if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { + r = ieee80211_tx_h_rate_ctrl(&tx); + if (r != TX_CONTINUE) { + ieee80211_free_txskb(&local->hw, skb); + goto begin; + } + } goto encap_out; + } if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) { struct sta_info *sta = container_of(txq->sta, struct sta_info, @@ -3663,8 +3697,12 @@ begin: (tx.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) pn_offs = ieee80211_hdrlen(hdr->frame_control); - ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs, - tx.key, skb); + r = ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs, + tx.key, &tx); + if (r != TX_CONTINUE) { + ieee80211_free_txskb(&local->hw, skb); + goto begin; + } } else { if (invoke_tx_handlers_late(&tx)) goto begin; @@ -3744,102 +3782,259 @@ EXPORT_SYMBOL(ieee80211_tx_dequeue); struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac) { struct ieee80211_local *local = hw_to_local(hw); + struct airtime_sched_info *air_sched; + u64 now = ktime_get_boottime_ns(); struct ieee80211_txq *ret = NULL; - struct txq_info *txqi = NULL, *head = NULL; - bool found_eligible_txq = false; + struct airtime_info *air_info; + struct txq_info *txqi = NULL; + struct rb_node *node; + bool first = false; - spin_lock_bh(&local->active_txq_lock[ac]); + air_sched = &local->airtime[ac]; + spin_lock_bh(&air_sched->lock); - begin: - txqi = list_first_entry_or_null(&local->active_txqs[ac], - struct txq_info, - schedule_order); - if (!txqi) + node = air_sched->schedule_pos; + +begin: + if (!node) { + node = rb_first_cached(&air_sched->active_txqs); + first = true; + } else { + node = rb_next(node); + } + + if (!node) goto out; - if (txqi == head) { - if (!found_eligible_txq) - goto out; - else - found_eligible_txq = false; + txqi = container_of(node, struct txq_info, schedule_order); + air_info = to_airtime_info(&txqi->txq); + + if (air_info->v_t > air_sched->v_t && + (!first || !airtime_catchup_v_t(air_sched, air_info->v_t, now))) + goto out; + + if (!ieee80211_txq_airtime_check(hw, &txqi->txq)) { + first = false; + goto begin; } - if (!head) - head = txqi; + air_sched->schedule_pos = node; + air_sched->last_schedule_activity = now; + ret = &txqi->txq; +out: + spin_unlock_bh(&air_sched->lock); + return ret; +} +EXPORT_SYMBOL(ieee80211_next_txq); - if (txqi->txq.sta) { - struct sta_info *sta = container_of(txqi->txq.sta, - struct sta_info, sta); - bool aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq); - s64 deficit = sta->airtime[txqi->txq.ac].deficit; +static void __ieee80211_insert_txq(struct rb_root_cached *root, + struct txq_info *txqi) +{ + struct rb_node **new = &root->rb_root.rb_node; + struct airtime_info *old_air, *new_air; + struct rb_node *parent = NULL; + struct txq_info *__txqi; + bool leftmost = true; + + while (*new) { + parent = *new; + __txqi = rb_entry(parent, struct txq_info, schedule_order); + old_air = to_airtime_info(&__txqi->txq); + new_air = to_airtime_info(&txqi->txq); + + if (new_air->v_t <= old_air->v_t) { + new = &parent->rb_left; + } else { + new = &parent->rb_right; + leftmost = false; + } + } - if (aql_check) - found_eligible_txq = true; + rb_link_node(&txqi->schedule_order, parent, new); + rb_insert_color_cached(&txqi->schedule_order, root, leftmost); +} - if (deficit < 0) - sta->airtime[txqi->txq.ac].deficit += - sta->airtime_weight; +void ieee80211_resort_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) +{ + struct airtime_info *air_info = to_airtime_info(txq); + struct ieee80211_local *local = hw_to_local(hw); + struct txq_info *txqi = to_txq_info(txq); + struct airtime_sched_info *air_sched; - if (deficit < 0 || !aql_check) { - list_move_tail(&txqi->schedule_order, - &local->active_txqs[txqi->txq.ac]); - goto begin; + air_sched = &local->airtime[txq->ac]; + + lockdep_assert_held(&air_sched->lock); + + if (!RB_EMPTY_NODE(&txqi->schedule_order)) { + struct airtime_info *a_prev = NULL, *a_next = NULL; + struct txq_info *t_prev, *t_next; + struct rb_node *n_prev, *n_next; + + /* Erasing a node can cause an expensive rebalancing operation, + * so we check the previous and next nodes first and only remove + * and re-insert if the current node is not already in the + * correct position. + */ + if ((n_prev = rb_prev(&txqi->schedule_order)) != NULL) { + t_prev = container_of(n_prev, struct txq_info, + schedule_order); + a_prev = to_airtime_info(&t_prev->txq); + } + + if ((n_next = rb_next(&txqi->schedule_order)) != NULL) { + t_next = container_of(n_next, struct txq_info, + schedule_order); + a_next = to_airtime_info(&t_next->txq); } + + if ((!a_prev || a_prev->v_t <= air_info->v_t) && + (!a_next || a_next->v_t > air_info->v_t)) + return; + + if (air_sched->schedule_pos == &txqi->schedule_order) + air_sched->schedule_pos = n_prev; + + rb_erase_cached(&txqi->schedule_order, + &air_sched->active_txqs); + RB_CLEAR_NODE(&txqi->schedule_order); + __ieee80211_insert_txq(&air_sched->active_txqs, txqi); } +} + +void ieee80211_update_airtime_weight(struct ieee80211_local *local, + struct airtime_sched_info *air_sched, + u64 now, bool force) +{ + struct airtime_info *air_info, *tmp; + u64 weight_sum = 0; + + if (unlikely(!now)) + now = ktime_get_boottime_ns(); + lockdep_assert_held(&air_sched->lock); + + if (!force && (air_sched->last_weight_update < + now - AIRTIME_ACTIVE_DURATION)) + return; + + list_for_each_entry_safe(air_info, tmp, + &air_sched->active_list, list) { + if (airtime_is_active(air_info, now)) + weight_sum += air_info->weight; + else + list_del_init(&air_info->list); + } + airtime_weight_sum_set(air_sched, weight_sum); + air_sched->last_weight_update = now; +} - if (txqi->schedule_round == local->schedule_round[ac]) +void ieee80211_schedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) + __acquires(txq_lock) __releases(txq_lock) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct txq_info *txqi = to_txq_info(txq); + struct airtime_sched_info *air_sched; + u64 now = ktime_get_boottime_ns(); + struct airtime_info *air_info; + u8 ac = txq->ac; + bool was_active; + + air_sched = &local->airtime[ac]; + air_info = to_airtime_info(txq); + + spin_lock_bh(&air_sched->lock); + was_active = airtime_is_active(air_info, now); + airtime_set_active(air_sched, air_info, now); + + if (!RB_EMPTY_NODE(&txqi->schedule_order)) goto out; - list_del_init(&txqi->schedule_order); - txqi->schedule_round = local->schedule_round[ac]; - ret = &txqi->txq; + /* If the station has been inactive for a while, catch up its v_t so it + * doesn't get indefinite priority; see comment above the definition of + * AIRTIME_MAX_BEHIND. + */ + if ((!was_active && air_info->v_t < air_sched->v_t) || + air_info->v_t < air_sched->v_t - AIRTIME_MAX_BEHIND) + air_info->v_t = air_sched->v_t; + + ieee80211_update_airtime_weight(local, air_sched, now, !was_active); + __ieee80211_insert_txq(&air_sched->active_txqs, txqi); out: - spin_unlock_bh(&local->active_txq_lock[ac]); - return ret; + spin_unlock_bh(&air_sched->lock); } -EXPORT_SYMBOL(ieee80211_next_txq); +EXPORT_SYMBOL(ieee80211_schedule_txq); -void __ieee80211_schedule_txq(struct ieee80211_hw *hw, - struct ieee80211_txq *txq, - bool force) +static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq, + bool purge) { struct ieee80211_local *local = hw_to_local(hw); struct txq_info *txqi = to_txq_info(txq); + struct airtime_sched_info *air_sched; + struct airtime_info *air_info; - spin_lock_bh(&local->active_txq_lock[txq->ac]); - - if (list_empty(&txqi->schedule_order) && - (force || !skb_queue_empty(&txqi->frags) || - txqi->tin.backlog_packets)) { - /* If airtime accounting is active, always enqueue STAs at the - * head of the list to ensure that they only get moved to the - * back by the airtime DRR scheduler once they have a negative - * deficit. A station that already has a negative deficit will - * get immediately moved to the back of the list on the next - * call to ieee80211_next_txq(). - */ - if (txqi->txq.sta && local->airtime_flags && - wiphy_ext_feature_isset(local->hw.wiphy, - NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) - list_add(&txqi->schedule_order, - &local->active_txqs[txq->ac]); - else - list_add_tail(&txqi->schedule_order, - &local->active_txqs[txq->ac]); + air_sched = &local->airtime[txq->ac]; + air_info = to_airtime_info(&txqi->txq); + + lockdep_assert_held(&air_sched->lock); + + if (purge) { + list_del_init(&air_info->list); + ieee80211_update_airtime_weight(local, air_sched, 0, true); } - spin_unlock_bh(&local->active_txq_lock[txq->ac]); + if (RB_EMPTY_NODE(&txqi->schedule_order)) + return; + + if (air_sched->schedule_pos == &txqi->schedule_order) + air_sched->schedule_pos = rb_prev(&txqi->schedule_order); + + if (!purge) + airtime_set_active(air_sched, air_info, + ktime_get_boottime_ns()); + + rb_erase_cached(&txqi->schedule_order, + &air_sched->active_txqs); + RB_CLEAR_NODE(&txqi->schedule_order); } -EXPORT_SYMBOL(__ieee80211_schedule_txq); + +void ieee80211_unschedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq, + bool purge) + __acquires(txq_lock) __releases(txq_lock) +{ + struct ieee80211_local *local = hw_to_local(hw); + + spin_lock_bh(&local->airtime[txq->ac].lock); + __ieee80211_unschedule_txq(hw, txq, purge); + spin_unlock_bh(&local->airtime[txq->ac].lock); +} + +void ieee80211_return_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq, bool force) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct txq_info *txqi = to_txq_info(txq); + + spin_lock_bh(&local->airtime[txq->ac].lock); + + if (!RB_EMPTY_NODE(&txqi->schedule_order) && !force && + !txq_has_queue(txq)) + __ieee80211_unschedule_txq(hw, txq, false); + + spin_unlock_bh(&local->airtime[txq->ac].lock); +} +EXPORT_SYMBOL(ieee80211_return_txq); DEFINE_STATIC_KEY_FALSE(aql_disable); bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw, struct ieee80211_txq *txq) { - struct sta_info *sta; + struct airtime_info *air_info = to_airtime_info(txq); struct ieee80211_local *local = hw_to_local(hw); if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) @@ -3854,15 +4049,12 @@ bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw, if (unlikely(txq->tid == IEEE80211_NUM_TIDS)) return true; - sta = container_of(txq->sta, struct sta_info, sta); - if (atomic_read(&sta->airtime[txq->ac].aql_tx_pending) < - sta->airtime[txq->ac].aql_limit_low) + if (atomic_read(&air_info->aql_tx_pending) < air_info->aql_limit_low) return true; if (atomic_read(&local->aql_total_pending_airtime) < local->aql_threshold && - atomic_read(&sta->airtime[txq->ac].aql_tx_pending) < - sta->airtime[txq->ac].aql_limit_high) + atomic_read(&air_info->aql_tx_pending) < air_info->aql_limit_high) return true; return false; @@ -3872,63 +4064,85 @@ EXPORT_SYMBOL(ieee80211_txq_airtime_check); bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, struct ieee80211_txq *txq) { + struct txq_info *first_txqi = NULL, *txqi = to_txq_info(txq); struct ieee80211_local *local = hw_to_local(hw); - struct txq_info *iter, *tmp, *txqi = to_txq_info(txq); - struct sta_info *sta; - u8 ac = txq->ac; + struct airtime_sched_info *air_sched; + struct airtime_info *air_info; + struct rb_node *node = NULL; + bool ret = false; + u64 now; - spin_lock_bh(&local->active_txq_lock[ac]); - if (!txqi->txq.sta) - goto out; + if (!ieee80211_txq_airtime_check(hw, txq)) + return false; + + air_sched = &local->airtime[txq->ac]; + spin_lock_bh(&air_sched->lock); - if (list_empty(&txqi->schedule_order)) + if (RB_EMPTY_NODE(&txqi->schedule_order)) goto out; - list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac], - schedule_order) { - if (iter == txqi) - break; + now = ktime_get_boottime_ns(); - if (!iter->txq.sta) { - list_move_tail(&iter->schedule_order, - &local->active_txqs[ac]); - continue; - } - sta = container_of(iter->txq.sta, struct sta_info, sta); - if (sta->airtime[ac].deficit < 0) - sta->airtime[ac].deficit += sta->airtime_weight; - list_move_tail(&iter->schedule_order, &local->active_txqs[ac]); - } + /* Like in ieee80211_next_txq(), make sure the first station in the + * scheduling order is eligible for transmission to avoid starvation. + */ + node = rb_first_cached(&air_sched->active_txqs); + if (node) { + first_txqi = container_of(node, struct txq_info, + schedule_order); + air_info = to_airtime_info(&first_txqi->txq); - sta = container_of(txqi->txq.sta, struct sta_info, sta); - if (sta->airtime[ac].deficit >= 0) - goto out; + if (air_sched->v_t < air_info->v_t) + airtime_catchup_v_t(air_sched, air_info->v_t, now); + } - sta->airtime[ac].deficit += sta->airtime_weight; - list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]); - spin_unlock_bh(&local->active_txq_lock[ac]); + air_info = to_airtime_info(&txqi->txq); + if (air_info->v_t <= air_sched->v_t) { + air_sched->last_schedule_activity = now; + ret = true; + } - return false; out: - if (!list_empty(&txqi->schedule_order)) - list_del_init(&txqi->schedule_order); - spin_unlock_bh(&local->active_txq_lock[ac]); - - return true; + spin_unlock_bh(&air_sched->lock); + return ret; } EXPORT_SYMBOL(ieee80211_txq_may_transmit); void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac) { struct ieee80211_local *local = hw_to_local(hw); + struct airtime_sched_info *air_sched = &local->airtime[ac]; - spin_lock_bh(&local->active_txq_lock[ac]); - local->schedule_round[ac]++; - spin_unlock_bh(&local->active_txq_lock[ac]); + spin_lock_bh(&air_sched->lock); + air_sched->schedule_pos = NULL; + spin_unlock_bh(&air_sched->lock); } EXPORT_SYMBOL(ieee80211_txq_schedule_start); +static void +ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct sk_buff *skb) +{ + struct rate_control_ref *ref = sdata->local->rate_ctrl; + u16 tid; + + if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER)) + return; + + if (!sta || !sta->sta.ht_cap.ht_supported || + !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO || + skb->protocol == sdata->control_port_protocol) + return; + + tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; + if (likely(sta->ampdu_mlme.tid_tx[tid])) + return; + + ieee80211_start_tx_ba_session(&sta->sta, tid, 0); +} + void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags, @@ -3959,6 +4173,8 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, skb_get_hash(skb); } + ieee80211_aggr_check(sdata, sta, skb); + if (sta) { struct ieee80211_fast_tx *fast_tx; @@ -4222,6 +4438,8 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, memset(info, 0, sizeof(*info)); + ieee80211_aggr_check(sdata, sta, skb); + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); if (tid_tx) { diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 0a0481f5af48..05e96212b104 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -6,7 +6,7 @@ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * utilities for mac80211 */ @@ -947,7 +947,7 @@ static void ieee80211_parse_extension_element(u32 *crc, switch (elem->data[0]) { case WLAN_EID_EXT_HE_MU_EDCA: - if (len == sizeof(*elems->mu_edca_param_set)) { + if (len >= sizeof(*elems->mu_edca_param_set)) { elems->mu_edca_param_set = data; if (crc) *crc = crc32_be(*crc, (void *)elem, @@ -968,7 +968,7 @@ static void ieee80211_parse_extension_element(u32 *crc, } break; case WLAN_EID_EXT_UORA: - if (len == 1) + if (len >= 1) elems->uora_element = data; break; case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME: @@ -976,7 +976,7 @@ static void ieee80211_parse_extension_element(u32 *crc, elems->max_channel_switch_time = data; break; case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION: - if (len == sizeof(*elems->mbssid_config_ie)) + if (len >= sizeof(*elems->mbssid_config_ie)) elems->mbssid_config_ie = data; break; case WLAN_EID_EXT_HE_SPR: @@ -985,7 +985,7 @@ static void ieee80211_parse_extension_element(u32 *crc, elems->he_spr = data; break; case WLAN_EID_EXT_HE_6GHZ_CAPA: - if (len == sizeof(*elems->he_6ghz_capa)) + if (len >= sizeof(*elems->he_6ghz_capa)) elems->he_6ghz_capa = data; break; } @@ -1074,14 +1074,14 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, switch (id) { case WLAN_EID_LINK_ID: - if (elen + 2 != sizeof(struct ieee80211_tdls_lnkie)) { + if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) { elem_parse_failed = true; break; } elems->lnk_id = (void *)(pos - 2); break; case WLAN_EID_CHAN_SWITCH_TIMING: - if (elen != sizeof(struct ieee80211_ch_switch_timing)) { + if (elen < sizeof(struct ieee80211_ch_switch_timing)) { elem_parse_failed = true; break; } @@ -1244,7 +1244,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elems->sec_chan_offs = (void *)pos; break; case WLAN_EID_CHAN_SWITCH_PARAM: - if (elen != + if (elen < sizeof(*elems->mesh_chansw_params_ie)) { elem_parse_failed = true; break; @@ -1253,7 +1253,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, break; case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: if (!action || - elen != sizeof(*elems->wide_bw_chansw_ie)) { + elen < sizeof(*elems->wide_bw_chansw_ie)) { elem_parse_failed = true; break; } @@ -1272,7 +1272,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, pos, elen); if (ie) { - if (ie[1] == sizeof(*elems->wide_bw_chansw_ie)) + if (ie[1] >= sizeof(*elems->wide_bw_chansw_ie)) elems->wide_bw_chansw_ie = (void *)(ie + 2); else @@ -1316,7 +1316,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elems->cisco_dtpc_elem = pos; break; case WLAN_EID_ADDBA_EXT: - if (elen != sizeof(struct ieee80211_addba_ext_ie)) { + if (elen < sizeof(struct ieee80211_addba_ext_ie)) { elem_parse_failed = true; break; } @@ -1342,7 +1342,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elem, elems); break; case WLAN_EID_S1G_CAPABILITIES: - if (elen == sizeof(*elems->s1g_capab)) + if (elen >= sizeof(*elems->s1g_capab)) elems->s1g_capab = (void *)pos; else elem_parse_failed = true; @@ -1693,7 +1693,10 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, if (auth_alg == WLAN_AUTH_SHARED_KEY && transaction == 3) { mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); err = ieee80211_wep_encrypt(local, skb, key, key_len, key_idx); - WARN_ON(err); + if (WARN_ON(err)) { + kfree_skb(skb); + return; + } } IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | @@ -1934,13 +1937,26 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, *offset = noffset; } - he_cap = ieee80211_get_he_sta_cap(sband); - if (he_cap) { + he_cap = ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); + if (he_cap && + cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band), + IEEE80211_CHAN_NO_HE)) { pos = ieee80211_ie_build_he_cap(pos, he_cap, end); if (!pos) goto out_err; + } + + if (cfg80211_any_usable_channels(local->hw.wiphy, + BIT(NL80211_BAND_6GHZ), + IEEE80211_CHAN_NO_HE)) { + struct ieee80211_supported_band *sband6; + + sband6 = local->hw.wiphy->bands[NL80211_BAND_6GHZ]; + he_cap = ieee80211_get_he_iftype_cap(sband6, + ieee80211_vif_type_p2p(&sdata->vif)); - if (sband->band == NL80211_BAND_6GHZ) { + if (he_cap) { enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif); __le16 cap = ieee80211_get_he_6ghz_capa(sband, iftype); @@ -2178,8 +2194,6 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local) list_for_each_entry(ctx, &local->chanctx_list, list) ctx->driver_present = false; mutex_unlock(&local->chanctx_mtx); - - cfg80211_shutdown_all_interfaces(local->hw.wiphy); } static void ieee80211_assign_chanctx(struct ieee80211_local *local, @@ -2946,12 +2960,15 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata, u8 *pos; u16 cap; - sband = ieee80211_get_sband(sdata); - if (!sband) + if (!cfg80211_any_usable_channels(sdata->local->hw.wiphy, + BIT(NL80211_BAND_6GHZ), + IEEE80211_CHAN_NO_HE)) return; + sband = sdata->local->hw.wiphy->bands[NL80211_BAND_6GHZ]; + iftd = ieee80211_get_sband_iftype_data(sband, iftype); - if (WARN_ON(!iftd)) + if (!iftd) return; /* Check for device HE 6 GHz capability before adding element */ diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 1ec4d36a39f0..7d738bd06f2c 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -23,6 +23,8 @@ struct mptcp_pernet { u8 mptcp_enabled; unsigned int add_addr_timeout; + u8 checksum_enabled; + u8 allow_join_initial_addr_port; }; static struct mptcp_pernet *mptcp_get_pernet(struct net *net) @@ -40,10 +42,22 @@ unsigned int mptcp_get_add_addr_timeout(struct net *net) return mptcp_get_pernet(net)->add_addr_timeout; } +int mptcp_is_checksum_enabled(struct net *net) +{ + return mptcp_get_pernet(net)->checksum_enabled; +} + +int mptcp_allow_join_id0(struct net *net) +{ + return mptcp_get_pernet(net)->allow_join_initial_addr_port; +} + static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; pernet->add_addr_timeout = TCP_RTO_MAX; + pernet->checksum_enabled = 0; + pernet->allow_join_initial_addr_port = 1; } #ifdef CONFIG_SYSCTL @@ -65,6 +79,22 @@ static struct ctl_table mptcp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "checksum_enabled", + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, + { + .procname = "allow_join_initial_addr_port", + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, {} }; @@ -82,6 +112,8 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[0].data = &pernet->mptcp_enabled; table[1].data = &pernet->add_addr_timeout; + table[2].data = &pernet->checksum_enabled; + table[3].data = &pernet->allow_join_initial_addr_port; hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); if (!hdr) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index eb2dc6dbe212..52ea2517e856 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -25,6 +25,8 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), + SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH), + SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR), SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL), SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE), SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index f0da4f060fe1..193466c9b549 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -18,6 +18,8 @@ enum linux_mptcp_mib_field { MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */ MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */ + MPTCP_MIB_DSSTCPMISMATCH, /* DSS-mapping did not map with TCP's sequence numbers */ + MPTCP_MIB_DATACSUMERR, /* The data checksum fail */ MPTCP_MIB_OFOQUEUETAIL, /* Segments inserted into OoO queue tail */ MPTCP_MIB_OFOQUEUE, /* Segments inserted into OoO queue */ MPTCP_MIB_OFOMERGE, /* Segments merged in OoO queue */ diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index f16d9b5ee978..8f88ddeab6a2 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -144,6 +144,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, info->mptcpi_write_seq = READ_ONCE(msk->write_seq); info->mptcpi_snd_una = READ_ONCE(msk->snd_una); info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); + info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); unlock_sock_fast(sk, slow); } diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 6b825fb3fa83..a05270996613 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -44,7 +44,20 @@ static void mptcp_parse_option(const struct sk_buff *skb, else expected_opsize = TCPOLEN_MPTCP_MPC_SYN; } - if (opsize != expected_opsize) + + /* Cfr RFC 8684 Section 3.3.0: + * If a checksum is present but its use had + * not been negotiated in the MP_CAPABLE handshake, the receiver MUST + * close the subflow with a RST, as it is not behaving as negotiated. + * If a checksum is not present when its use has been negotiated, the + * receiver MUST close the subflow with a RST, as it is considered + * broken + * We parse even option with mismatching csum presence, so that + * later in subflow_data_ready we can trigger the reset. + */ + if (opsize != expected_opsize && + (expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA || + opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM)) break; /* try to be gentle vs future versions on the initial syn */ @@ -66,16 +79,12 @@ static void mptcp_parse_option(const struct sk_buff *skb, * host requires the use of checksums, checksums MUST be used. * In other words, the only way for checksums not to be used * is if both hosts in their SYNs set A=0." - * - * Section 3.3.0: - * "If a checksum is not present when its use has been - * negotiated, the receiver MUST close the subflow with a RST as - * it is considered broken." - * - * We don't implement DSS checksum - fall back to TCP. */ if (flags & MPTCP_CAP_CHECKSUM_REQD) - break; + mp_opt->csum_reqd = 1; + + if (flags & MPTCP_CAP_DENY_JOIN_ID0) + mp_opt->deny_join_id0 = 1; mp_opt->mp_capable = 1; if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) { @@ -86,7 +95,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->rcvr_key = get_unaligned_be64(ptr); ptr += 8; } - if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) { + if (opsize >= TCPOLEN_MPTCP_MPC_ACK_DATA) { /* Section 3.1.: * "the data parameters in a MP_CAPABLE are semantically * equivalent to those in a DSS option and can be used @@ -98,9 +107,14 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; } - pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d", + if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) { + mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + mp_opt->csum_reqd = 1; + ptr += 2; + } + pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u", version, flags, opsize, mp_opt->sndr_key, - mp_opt->rcvr_key, mp_opt->data_len); + mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum); break; case MPTCPOPT_MP_JOIN: @@ -171,10 +185,8 @@ static void mptcp_parse_option(const struct sk_buff *skb, expected_opsize += TCPOLEN_MPTCP_DSS_MAP32; } - /* RFC 6824, Section 3.3: - * If a checksum is present, but its use had - * not been negotiated in the MP_CAPABLE handshake, - * the checksum field MUST be ignored. + /* Always parse any csum presence combination, we will enforce + * RFC 8684 Section 3.3.0 checks later in subflow_data_ready */ if (opsize != expected_opsize && opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) @@ -209,9 +221,15 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; - pr_debug("data_seq=%llu subflow_seq=%u data_len=%u", + if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) { + mp_opt->csum_reqd = 1; + mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + ptr += 2; + } + + pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", mp_opt->data_seq, mp_opt->subflow_seq, - mp_opt->data_len); + mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum); } break; @@ -323,9 +341,12 @@ static void mptcp_parse_option(const struct sk_buff *skb, } } -void mptcp_get_options(const struct sk_buff *skb, +void mptcp_get_options(const struct sock *sk, + const struct sk_buff *skb, struct mptcp_options_received *mp_opt) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); const struct tcphdr *th = tcp_hdr(skb); const unsigned char *ptr; int length; @@ -341,6 +362,8 @@ void mptcp_get_options(const struct sk_buff *skb, mp_opt->dss = 0; mp_opt->mp_prio = 0; mp_opt->reset = 0; + mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled); + mp_opt->deny_join_id0 = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); @@ -356,6 +379,8 @@ void mptcp_get_options(const struct sk_buff *skb, length--; continue; default: + if (length < 2) + return; opsize = *ptr++; if (opsize < 2) /* "silly options" */ return; @@ -380,6 +405,8 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, subflow->snd_isn = TCP_SKB_CB(skb)->end_seq; if (subflow->request_mptcp) { opts->suboptions = OPTION_MPTCP_MPC_SYN; + opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk)); + opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk)); *size = TCPOLEN_MPTCP_MPC_SYN; return true; } else if (subflow->request_join) { @@ -435,8 +462,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_ext *mpext; unsigned int data_len; + u8 len; /* When skb is not available, we better over-estimate the emitted * options len. A full DSS option (28 bytes) is longer than @@ -465,16 +494,27 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, opts->suboptions = OPTION_MPTCP_MPC_ACK; opts->sndr_key = subflow->local_key; opts->rcvr_key = subflow->remote_key; + opts->csum_reqd = READ_ONCE(msk->csum_enabled); + opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk)); /* Section 3.1. * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK * packets that start the first subflow of an MPTCP connection, * as well as the first packet that carries data */ - if (data_len > 0) - *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4); - else + if (data_len > 0) { + len = TCPOLEN_MPTCP_MPC_ACK_DATA; + if (opts->csum_reqd) { + /* we need to propagate more info to csum the pseudo hdr */ + opts->ext_copy.data_seq = mpext->data_seq; + opts->ext_copy.subflow_seq = mpext->subflow_seq; + opts->ext_copy.csum = mpext->csum; + len += TCPOLEN_MPTCP_DSS_CHECKSUM; + } + *size = ALIGN(len, 4); + } else { *size = TCPOLEN_MPTCP_MPC_ACK; + } pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d", subflow, subflow->local_key, subflow->remote_key, @@ -535,18 +575,21 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, bool ret = false; u64 ack_seq; + opts->csum_reqd = READ_ONCE(msk->csum_enabled); mpext = skb ? mptcp_get_ext(skb) : NULL; if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) { - unsigned int map_size; + unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; - map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; + if (mpext) { + if (opts->csum_reqd) + map_size += TCPOLEN_MPTCP_DSS_CHECKSUM; - remaining -= map_size; - dss_size = map_size; - if (mpext) opts->ext_copy = *mpext; + } + remaining -= map_size; + dss_size = map_size; if (skb && snd_data_fin_enable) mptcp_write_data_fin(subflow, skb, &opts->ext_copy); ret = true; @@ -789,6 +832,8 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, if (subflow_req->mp_capable) { opts->suboptions = OPTION_MPTCP_MPC_SYNACK; opts->sndr_key = subflow_req->local_key; + opts->csum_reqd = subflow_req->csum_reqd; + opts->allow_join_id0 = subflow_req->allow_join_id0; *size = TCPOLEN_MPTCP_MPC_SYNACK; pr_debug("subflow_req=%p, local_key=%llu", subflow_req, subflow_req->local_key); @@ -867,6 +912,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, return false; } + if (mp_opt->deny_join_id0) + WRITE_ONCE(msk->pm.remote_deny_join_id0, true); + if (unlikely(!READ_ONCE(msk->pm.server_side))) pr_warn_once("bogus mpc option on established client sk"); mptcp_subflow_fully_established(subflow, mp_opt); @@ -1007,7 +1055,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) return; } - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return; @@ -1099,6 +1147,10 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) } mpext->data_len = mp_opt.data_len; mpext->use_map = 1; + mpext->csum_reqd = mp_opt.csum_reqd; + + if (mpext->csum_reqd) + mpext->csum = mp_opt.csum; } } @@ -1118,25 +1170,53 @@ static void mptcp_set_rwin(const struct tcp_sock *tp) WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); } +static u16 mptcp_make_csum(const struct mptcp_ext *mpext) +{ + struct csum_pseudo_header header; + __wsum csum; + + /* cfr RFC 8684 3.3.1.: + * the data sequence number used in the pseudo-header is + * always the 64-bit value, irrespective of what length is used in the + * DSS option itself. + */ + header.data_seq = cpu_to_be64(mpext->data_seq); + header.subflow_seq = htonl(mpext->subflow_seq); + header.data_len = htons(mpext->data_len); + header.csum = 0; + + csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum)); + return (__force u16)csum_fold(csum); +} + void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts) { if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) & opts->suboptions) { - u8 len; + u8 len, flag = MPTCP_CAP_HMAC_SHA256; - if (OPTION_MPTCP_MPC_SYN & opts->suboptions) + if (OPTION_MPTCP_MPC_SYN & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYN; - else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) + } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYNACK; - else if (opts->ext_copy.data_len) + } else if (opts->ext_copy.data_len) { len = TCPOLEN_MPTCP_MPC_ACK_DATA; - else + if (opts->csum_reqd) + len += TCPOLEN_MPTCP_DSS_CHECKSUM; + } else { len = TCPOLEN_MPTCP_MPC_ACK; + } + + if (opts->csum_reqd) + flag |= MPTCP_CAP_CHECKSUM_REQD; + + if (!opts->allow_join_id0) + flag |= MPTCP_CAP_DENY_JOIN_ID0; *ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len, MPTCP_SUPPORTED_VERSION, - MPTCP_CAP_HMAC_SHA256); + flag); if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) & opts->suboptions)) @@ -1152,8 +1232,13 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, if (!opts->ext_copy.data_len) goto mp_capable_done; - put_unaligned_be32(opts->ext_copy.data_len << 16 | - TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + if (opts->csum_reqd) { + put_unaligned_be32(opts->ext_copy.data_len << 16 | + mptcp_make_csum(&opts->ext_copy), ptr); + } else { + put_unaligned_be32(opts->ext_copy.data_len << 16 | + TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + } ptr += 1; } @@ -1305,6 +1390,9 @@ mp_capable_done: flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64; if (mpext->data_fin) flags |= MPTCP_DSS_DATA_FIN; + + if (opts->csum_reqd) + len += TCPOLEN_MPTCP_DSS_CHECKSUM; } *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); @@ -1324,8 +1412,13 @@ mp_capable_done: ptr += 2; put_unaligned_be32(mpext->subflow_seq, ptr); ptr += 1; - put_unaligned_be32(mpext->data_len << 16 | - TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + if (opts->csum_reqd) { + put_unaligned_be32(mpext->data_len << 16 | + mptcp_make_csum(mpext), ptr); + } else { + put_unaligned_be32(mpext->data_len << 16 | + TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + } } } diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 9d00fa6d22e9..639271e09604 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -320,6 +320,7 @@ void mptcp_pm_data_init(struct mptcp_sock *msk) WRITE_ONCE(msk->pm.addr_signal, 0); WRITE_ONCE(msk->pm.accept_addr, false); WRITE_ONCE(msk->pm.accept_subflow, false); + WRITE_ONCE(msk->pm.remote_deny_join_id0, false); msk->pm.status = 0; spin_lock_init(&msk->pm.lock); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 09722598994d..d2591ebf01d9 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -451,7 +451,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* check if should create a new subflow */ if (msk->pm.local_addr_used < local_addr_max && - msk->pm.subflows < subflows_max) { + msk->pm.subflows < subflows_max && + !READ_ONCE(msk->pm.remote_deny_join_id0)) { local = select_local_address(pernet, msk); if (local) { struct mptcp_addr_info remote = { 0 }; @@ -540,6 +541,7 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node); if (subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow; spin_unlock_bh(&msk->pm.lock); pr_debug("send ack for %s%s%s", @@ -547,9 +549,9 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "", mptcp_pm_should_add_signal_port(msk) ? " [port]" : ""); - lock_sock(ssk); + slow = lock_sock_fast(ssk); tcp_send_ack(ssk); - release_sock(ssk); + unlock_sock_fast(ssk, slow); spin_lock_bh(&msk->pm.lock); } } @@ -566,6 +568,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct sock *sk = (struct sock *)msk; struct mptcp_addr_info local; + bool slow; local_address((struct sock_common *)ssk, &local); if (!addresses_equal(&local, addr, addr->port)) @@ -578,9 +581,9 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, spin_unlock_bh(&msk->pm.lock); pr_debug("send ack for mp_prio"); - lock_sock(ssk); + slow = lock_sock_fast(ssk); tcp_send_ack(ssk); - release_sock(ssk); + unlock_sock_fast(ssk, slow); spin_lock_bh(&msk->pm.lock); return 0; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 993095089990..7bb82424e551 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -286,11 +286,13 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, /* try to fetch required memory from subflow */ if (!sk_rmem_schedule(sk, skb, skb->truesize)) { - if (ssk->sk_forward_alloc < skb->truesize) - goto drop; - __sk_mem_reclaim(ssk, skb->truesize); - if (!sk_rmem_schedule(sk, skb, skb->truesize)) + int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT; + + if (ssk->sk_forward_alloc < amount) goto drop; + + ssk->sk_forward_alloc -= amount; + sk->sk_forward_alloc += amount; } has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; @@ -431,56 +433,55 @@ static void mptcp_send_ack(struct mptcp_sock *msk) mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow; - lock_sock(ssk); + slow = lock_sock_fast(ssk); if (tcp_can_send_ack(ssk)) tcp_send_ack(ssk); - release_sock(ssk); + unlock_sock_fast(ssk, slow); } } -static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk) +static void mptcp_subflow_cleanup_rbuf(struct sock *ssk) { - int ret; + bool slow; - lock_sock(ssk); - ret = tcp_can_send_ack(ssk); - if (ret) + slow = lock_sock_fast(ssk); + if (tcp_can_send_ack(ssk)) tcp_cleanup_rbuf(ssk, 1); - release_sock(ssk); - return ret; + unlock_sock_fast(ssk, slow); +} + +static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty) +{ + const struct inet_connection_sock *icsk = inet_csk(ssk); + u8 ack_pending = READ_ONCE(icsk->icsk_ack.pending); + const struct tcp_sock *tp = tcp_sk(ssk); + + return (ack_pending & ICSK_ACK_SCHED) && + ((READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->rcv_wup) > + READ_ONCE(icsk->icsk_ack.rcv_mss)) || + (rx_empty && ack_pending & + (ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED))); } static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) { - struct sock *ack_hint = READ_ONCE(msk->ack_hint); int old_space = READ_ONCE(msk->old_wspace); struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - bool cleanup; + int space = __mptcp_space(sk); + bool cleanup, rx_empty; - /* this is a simple superset of what tcp_cleanup_rbuf() implements - * so that we don't have to acquire the ssk socket lock most of the time - * to do actually nothing - */ - cleanup = __mptcp_space(sk) - old_space >= max(0, old_space); - if (!cleanup) - return; + cleanup = (space > 0) && (space >= (old_space << 1)); + rx_empty = !atomic_read(&sk->sk_rmem_alloc); - /* if the hinted ssk is still active, try to use it */ - if (likely(ack_hint)) { - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - if (ack_hint == ssk && mptcp_subflow_cleanup_rbuf(ssk)) - return; - } + if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty)) + mptcp_subflow_cleanup_rbuf(ssk); } - - /* otherwise pick the first active subflow */ - mptcp_for_each_subflow(msk, subflow) - if (mptcp_subflow_cleanup_rbuf(mptcp_subflow_tcp_sock(subflow))) - return; } static bool mptcp_check_data_fin(struct sock *sk) @@ -625,7 +626,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, break; } } while (more_data_avail); - WRITE_ONCE(msk->ack_hint, ssk); *bytes += moved; return done; @@ -677,18 +677,19 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) /* In most cases we will be able to lock the mptcp socket. If its already * owned, we need to defer to the work queue to avoid ABBA deadlock. */ -static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) +static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) { struct sock *sk = (struct sock *)msk; unsigned int moved = 0; - if (inet_sk_state_load(sk) == TCP_CLOSE) - return; - - mptcp_data_lock(sk); - __mptcp_move_skbs_from_subflow(msk, ssk, &moved); __mptcp_ofo_queue(msk); + if (unlikely(ssk->sk_err)) { + if (!sock_owned_by_user(sk)) + __mptcp_error_report(sk); + else + set_bit(MPTCP_ERROR_REPORT, &msk->flags); + } /* If the moves have caught up with the DATA_FIN sequence number * it's time to ack the DATA_FIN and change socket state, but @@ -697,7 +698,7 @@ static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) */ if (mptcp_pending_data_fin(sk, NULL)) mptcp_schedule_work(sk); - mptcp_data_unlock(sk); + return moved > 0; } void mptcp_data_ready(struct sock *sk, struct sock *ssk) @@ -705,7 +706,6 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_sock *msk = mptcp_sk(sk); int sk_rbuf, ssk_rbuf; - bool wake; /* The peer can send data while we are shutting down this * subflow at msk destruction time, but we must avoid enqueuing @@ -714,28 +714,22 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) if (unlikely(subflow->disposable)) return; - /* move_skbs_to_msk below can legitly clear the data_avail flag, - * but we will need later to properly woke the reader, cache its - * value - */ - wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL; - if (wake) - set_bit(MPTCP_DATA_READY, &msk->flags); - ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); sk_rbuf = READ_ONCE(sk->sk_rcvbuf); if (unlikely(ssk_rbuf > sk_rbuf)) sk_rbuf = ssk_rbuf; - /* over limit? can't append more skbs to msk */ + /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) - goto wake; - - move_skbs_to_msk(msk, ssk); + return; -wake: - if (wake) + /* Wake-up the reader only for in-sequence data */ + mptcp_data_lock(sk); + if (move_skbs_to_msk(msk, ssk)) { + set_bit(MPTCP_DATA_READY, &msk->flags); sk->sk_data_ready(sk); + } + mptcp_data_unlock(sk); } static bool mptcp_do_flush_join_list(struct mptcp_sock *msk) @@ -867,7 +861,7 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk) sock_owned_by_me(sk); mptcp_for_each_subflow(msk, subflow) { - if (subflow->data_avail) + if (READ_ONCE(subflow->data_avail)) return mptcp_subflow_tcp_sock(subflow); } @@ -903,22 +897,14 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk, df->data_seq + df->data_len == msk->write_seq; } -static int mptcp_wmem_with_overhead(struct sock *sk, int size) +static int mptcp_wmem_with_overhead(int size) { - struct mptcp_sock *msk = mptcp_sk(sk); - int ret, skbs; - - ret = size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT); - skbs = (msk->tx_pending_data + size) / msk->size_goal_cache; - if (skbs < msk->skb_tx_cache.qlen) - return ret; - - return ret + (skbs - msk->skb_tx_cache.qlen) * SKB_TRUESIZE(MAX_TCP_HEADER); + return size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT); } static void __mptcp_wmem_reserve(struct sock *sk, int size) { - int amount = mptcp_wmem_with_overhead(sk, size); + int amount = mptcp_wmem_with_overhead(size); struct mptcp_sock *msk = mptcp_sk(sk); WARN_ON_ONCE(msk->wmem_reserved); @@ -1213,49 +1199,8 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp) return NULL; } -static bool mptcp_tx_cache_refill(struct sock *sk, int size, - struct sk_buff_head *skbs, int *total_ts) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - struct sk_buff *skb; - int space_needed; - - if (unlikely(tcp_under_memory_pressure(sk))) { - mptcp_mem_reclaim_partial(sk); - - /* under pressure pre-allocate at most a single skb */ - if (msk->skb_tx_cache.qlen) - return true; - space_needed = msk->size_goal_cache; - } else { - space_needed = msk->tx_pending_data + size - - msk->skb_tx_cache.qlen * msk->size_goal_cache; - } - - while (space_needed > 0) { - skb = __mptcp_do_alloc_tx_skb(sk, sk->sk_allocation); - if (unlikely(!skb)) { - /* under memory pressure, try to pass the caller a - * single skb to allow forward progress - */ - while (skbs->qlen > 1) { - skb = __skb_dequeue_tail(skbs); - *total_ts -= skb->truesize; - __kfree_skb(skb); - } - return skbs->qlen > 0; - } - - *total_ts += skb->truesize; - __skb_queue_tail(skbs, skb); - space_needed -= msk->size_goal_cache; - } - return true; -} - static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp) { - struct mptcp_sock *msk = mptcp_sk(sk); struct sk_buff *skb; if (ssk->sk_tx_skb_cache) { @@ -1266,22 +1211,6 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp) return true; } - skb = skb_peek(&msk->skb_tx_cache); - if (skb) { - if (likely(sk_wmem_schedule(ssk, skb->truesize))) { - skb = __skb_dequeue(&msk->skb_tx_cache); - if (WARN_ON_ONCE(!skb)) - return false; - - mptcp_wmem_uncharge(sk, skb->truesize); - ssk->sk_tx_skb_cache = skb; - return true; - } - - /* over memory limit, no point to try to allocate a new skb */ - return false; - } - skb = __mptcp_do_alloc_tx_skb(sk, gfp); if (!skb) return false; @@ -1297,7 +1226,6 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp) static bool mptcp_must_reclaim_memory(struct sock *sk, struct sock *ssk) { return !ssk->sk_tx_skb_cache && - !skb_peek(&mptcp_sk(sk)->skb_tx_cache) && tcp_under_memory_pressure(sk); } @@ -1308,6 +1236,18 @@ static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk) return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation); } +/* note: this always recompute the csum on the whole skb, even + * if we just appended a single frag. More status info needed + */ +static void mptcp_update_data_checksum(struct sk_buff *skb, int added) +{ + struct mptcp_ext *mpext = mptcp_get_ext(skb); + __wsum csum = ~csum_unfold(mpext->csum); + int offset = skb->len - added; + + mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset)); +} + static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, struct mptcp_data_frag *dfrag, struct mptcp_sendmsg_info *info) @@ -1328,7 +1268,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, /* compute send limit */ info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags); avail_size = info->size_goal; - msk->size_goal_cache = info->size_goal; skb = tcp_write_queue_tail(ssk); if (skb) { /* Limit the write to the size available in the @@ -1402,10 +1341,14 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, if (zero_window_probe) { mptcp_subflow_ctx(ssk)->rel_write_seq += ret; mpext->frozen = 1; - ret = 0; + if (READ_ONCE(msk->csum_enabled)) + mptcp_update_data_checksum(tail, ret); tcp_push_pending_frames(ssk); + return 0; } out: + if (READ_ONCE(msk->csum_enabled)) + mptcp_update_data_checksum(tail, ret); mptcp_subflow_ctx(ssk)->rel_write_seq += ret; return ret; } @@ -1673,7 +1616,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) while (msg_data_left(msg)) { int total_ts, frag_truesize = 0; struct mptcp_data_frag *dfrag; - struct sk_buff_head skbs; bool dfrag_collapsed; size_t psize, offset; @@ -1706,16 +1648,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) psize = pfrag->size - offset; psize = min_t(size_t, psize, msg_data_left(msg)); total_ts = psize + frag_truesize; - __skb_queue_head_init(&skbs); - if (!mptcp_tx_cache_refill(sk, psize, &skbs, &total_ts)) - goto wait_for_memory; - if (!mptcp_wmem_alloc(sk, total_ts)) { - __skb_queue_purge(&skbs); + if (!mptcp_wmem_alloc(sk, total_ts)) goto wait_for_memory; - } - skb_queue_splice_tail(&skbs, &msk->skb_tx_cache); if (copy_page_from_iter(dfrag->page, offset, psize, &msg->msg_iter) != psize) { mptcp_wmem_uncharge(sk, psize + frag_truesize); @@ -1772,7 +1708,7 @@ static void mptcp_wait_data(struct sock *sk, long *timeo) sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); sk_wait_event(sk, timeo, - test_and_clear_bit(MPTCP_DATA_READY, &msk->flags), &wait); + test_bit(MPTCP_DATA_READY, &msk->flags), &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); remove_wait_queue(sk_sleep(sk), &wait); @@ -1970,7 +1906,9 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk) __mptcp_update_rmem(sk); done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); mptcp_data_unlock(sk); - tcp_cleanup_rbuf(ssk, moved); + + if (unlikely(ssk->sk_err)) + __mptcp_error_report(sk); unlock_sock_fast(ssk, slowpath); } while (!done); @@ -1983,7 +1921,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk) ret |= __mptcp_ofo_queue(msk); __mptcp_splice_receive_queue(sk); mptcp_data_unlock(sk); - mptcp_cleanup_rbuf(msk); } if (ret) mptcp_check_data_fin((struct sock *)msk); @@ -2093,10 +2030,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, */ if (unlikely(__mptcp_move_skbs(msk))) set_bit(MPTCP_DATA_READY, &msk->flags); - } else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) { - /* data to read but mptcp_wait_data() cleared DATA_READY */ - set_bit(MPTCP_DATA_READY, &msk->flags); } + out_err: if (cmsg_flags && copied >= 0) { if (cmsg_flags & MPTCP_CMSG_TS) @@ -2234,9 +2169,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, if (ssk == msk->last_snd) msk->last_snd = NULL; - if (ssk == msk->ack_hint) - msk->ack_hint = NULL; - if (ssk == msk->first) msk->first = NULL; @@ -2308,13 +2240,14 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); + bool slow; - lock_sock(tcp_sk); + slow = lock_sock_fast(tcp_sk); if (tcp_sk->sk_state != TCP_CLOSE) { tcp_send_active_reset(tcp_sk, GFP_ATOMIC); tcp_set_state(tcp_sk, TCP_CLOSE); } - release_sock(tcp_sk); + unlock_sock_fast(tcp_sk, slow); } inet_sk_state_store(sk, TCP_CLOSE); @@ -2359,8 +2292,8 @@ static void __mptcp_retrans(struct sock *sk) /* limit retransmission to the bytes already sent on some subflows */ info.sent = 0; - info.limit = dfrag->already_sent; - while (info.sent < dfrag->already_sent) { + info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent; + while (info.sent < info.limit) { if (!mptcp_alloc_tx_skb(sk, ssk)) break; @@ -2372,9 +2305,11 @@ static void __mptcp_retrans(struct sock *sk) copied += ret; info.sent += ret; } - if (copied) + if (copied) { + dfrag->already_sent = max(dfrag->already_sent, info.sent); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); + } mptcp_set_timeout(sk, ssk); release_sock(ssk); @@ -2442,17 +2377,15 @@ static int __mptcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&msk->rtx_queue); INIT_WORK(&msk->work, mptcp_worker); __skb_queue_head_init(&msk->receive_queue); - __skb_queue_head_init(&msk->skb_tx_cache); msk->out_of_order_queue = RB_ROOT; msk->first_pending = NULL; msk->wmem_reserved = 0; msk->rmem_released = 0; msk->tx_pending_data = 0; - msk->size_goal_cache = TCP_BASE_MSS; - msk->ack_hint = NULL; msk->first = NULL; inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; + WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); mptcp_pm_data_init(msk); @@ -2504,15 +2437,10 @@ static void __mptcp_clear_xmit(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_data_frag *dtmp, *dfrag; - struct sk_buff *skb; WRITE_ONCE(msk->first_pending, NULL); list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) dfrag_clear(sk, dfrag); - while ((skb = __skb_dequeue(&msk->skb_tx_cache)) != NULL) { - sk->sk_forward_alloc += skb->truesize; - kfree_skb(skb); - } } static void mptcp_cancel_work(struct sock *sk) @@ -2793,6 +2721,8 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->token = subflow_req->token; msk->subflow = NULL; WRITE_ONCE(msk->fully_established, false); + if (mp_opt->csum_reqd) + WRITE_ONCE(msk->csum_enabled, true); msk->write_seq = subflow_req->idsn + 1; msk->snd_nxt = msk->write_seq; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 89f6b73783d5..d8ad3270dfab 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -68,6 +68,8 @@ #define TCPOLEN_MPTCP_FASTCLOSE 12 #define TCPOLEN_MPTCP_RST 4 +#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA) + /* MPTCP MP_JOIN flags */ #define MPTCPOPT_BACKUP BIT(0) #define MPTCPOPT_HMAC_LEN 20 @@ -77,8 +79,9 @@ #define MPTCP_VERSION_MASK (0x0F) #define MPTCP_CAP_CHECKSUM_REQD BIT(7) #define MPTCP_CAP_EXTENSIBILITY BIT(6) +#define MPTCP_CAP_DENY_JOIN_ID0 BIT(5) #define MPTCP_CAP_HMAC_SHA256 BIT(0) -#define MPTCP_CAP_FLAG_MASK (0x3F) +#define MPTCP_CAP_FLAG_MASK (0x1F) /* MPTCP DSS flags */ #define MPTCP_DSS_DATA_FIN BIT(4) @@ -124,6 +127,7 @@ struct mptcp_options_received { u64 data_seq; u32 subflow_seq; u16 data_len; + __sum16 csum; u16 mp_capable : 1, mp_join : 1, fastclose : 1, @@ -133,7 +137,9 @@ struct mptcp_options_received { rm_addr : 1, mp_prio : 1, echo : 1, - backup : 1; + csum_reqd : 1, + backup : 1, + deny_join_id0 : 1; u32 token; u32 nonce; u64 thmac; @@ -188,6 +194,7 @@ struct mptcp_pm_data { bool work_pending; bool accept_addr; bool accept_subflow; + bool remote_deny_join_id0; u8 add_addr_signaled; u8 add_addr_accepted; u8 local_addr_used; @@ -234,15 +241,13 @@ struct mptcp_sock { bool snd_data_fin_enable; bool rcv_fastclose; bool use_64bit_ack; /* Set when we received a 64-bit DSN */ + bool csum_enabled; spinlock_t join_list_lock; - struct sock *ack_hint; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; struct sk_buff_head receive_queue; - struct sk_buff_head skb_tx_cache; /* this is wmem accounted */ int tx_pending_data; - int size_goal_cache; struct list_head conn_list; struct list_head rtx_queue; struct mptcp_data_frag *first_pending; @@ -335,11 +340,20 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); } +struct csum_pseudo_header { + __be64 data_seq; + __be32 subflow_seq; + __be16 data_len; + __sum16 csum; +}; + struct mptcp_subflow_request_sock { struct tcp_request_sock sk; u16 mp_capable : 1, mp_join : 1, - backup : 1; + backup : 1, + csum_reqd : 1, + allow_join_id0 : 1; u8 local_id; u8 remote_id; u64 local_key; @@ -362,7 +376,6 @@ mptcp_subflow_rsk(const struct request_sock *rsk) enum mptcp_data_avail { MPTCP_SUBFLOW_NODATA, MPTCP_SUBFLOW_DATA_AVAIL, - MPTCP_SUBFLOW_OOO_DATA }; struct mptcp_delegated_action { @@ -387,6 +400,8 @@ struct mptcp_subflow_context { u32 map_subflow_seq; u32 ssn_offset; u32 map_data_len; + __wsum map_data_csum; + u32 map_csum_len; u32 request_mptcp : 1, /* send MP_CAPABLE */ request_join : 1, /* send MP_JOIN */ request_bkup : 1, @@ -396,6 +411,8 @@ struct mptcp_subflow_context { pm_notified : 1, /* PM hook called for established status */ conn_finished : 1, map_valid : 1, + map_csum_reqd : 1, + map_data_fin : 1, mpc_map : 1, backup : 1, send_mp_prio : 1, @@ -525,6 +542,8 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su int mptcp_is_enabled(struct net *net); unsigned int mptcp_get_add_addr_timeout(struct net *net); +int mptcp_is_checksum_enabled(struct net *net); +int mptcp_allow_join_id0(struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool mptcp_subflow_data_available(struct sock *sk); @@ -576,7 +595,8 @@ int __init mptcp_proto_v6_init(void); struct sock *mptcp_sk_clone(const struct sock *sk, const struct mptcp_options_received *mp_opt, struct request_sock *req); -void mptcp_get_options(const struct sk_buff *skb, +void mptcp_get_options(const struct sock *sk, + const struct sk_buff *skb, struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 33956337c46b..d55f4ef736a5 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -108,6 +108,8 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis subflow_req->mp_capable = 0; subflow_req->mp_join = 0; + subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener)); + subflow_req->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk_listener)); subflow_req->msk = NULL; mptcp_token_init_request(req); } @@ -150,7 +152,7 @@ static int subflow_check_req(struct request_sock *req, return -EINVAL; #endif - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk_listener, skb, &mp_opt); if (mp_opt.mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); @@ -247,7 +249,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, int err; subflow_init_req(req, sk_listener); - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk_listener, skb, &mp_opt); if (mp_opt.mp_capable && mp_opt.mp_join) return -EINVAL; @@ -394,7 +396,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->ssn_offset = TCP_SKB_CB(skb)->seq; pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset); - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (subflow->request_mptcp) { if (!mp_opt.mp_capable) { MPTCP_INC_STATS(sock_net(sk), @@ -404,6 +406,10 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) goto fallback; } + if (mp_opt.csum_reqd) + WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true); + if (mp_opt.deny_join_id0) + WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true); subflow->mp_capable = 1; subflow->can_ack = 1; subflow->remote_key = mp_opt.sndr_key; @@ -638,7 +644,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * reordered MPC will cause fallback, but we don't have other * options. */ - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!mp_opt.mp_capable) { fallback = true; goto create_child; @@ -648,7 +654,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, if (!new_msk) fallback = true; } else if (subflow_req->mp_join) { - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) || !mptcp_can_accept_new_subflow(subflow_req->msk)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); @@ -784,10 +790,10 @@ static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq) return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32)); } -static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) +static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) { - WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d", - ssn, subflow->map_subflow_seq, subflow->map_data_len); + pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d", + ssn, subflow->map_subflow_seq, subflow->map_data_len); } static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb) @@ -812,22 +818,104 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) /* Mapping covers data later in the subflow stream, * currently unsupported. */ - warn_bad_map(subflow, ssn); + dbg_bad_map(subflow, ssn); return false; } if (unlikely(!before(ssn, subflow->map_subflow_seq + subflow->map_data_len))) { /* Mapping does covers past subflow data, invalid */ - warn_bad_map(subflow, ssn + skb->len); + dbg_bad_map(subflow, ssn); return false; } return true; } +static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb, + bool csum_reqd) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct csum_pseudo_header header; + u32 offset, seq, delta; + __wsum csum; + int len; + + if (!csum_reqd) + return MAPPING_OK; + + /* mapping already validated on previous traversal */ + if (subflow->map_csum_len == subflow->map_data_len) + return MAPPING_OK; + + /* traverse the receive queue, ensuring it contains a full + * DSS mapping and accumulating the related csum. + * Preserve the accoumlate csum across multiple calls, to compute + * the csum only once + */ + delta = subflow->map_data_len - subflow->map_csum_len; + for (;;) { + seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len; + offset = seq - TCP_SKB_CB(skb)->seq; + + /* if the current skb has not been accounted yet, csum its contents + * up to the amount covered by the current DSS + */ + if (offset < skb->len) { + __wsum csum; + + len = min(skb->len - offset, delta); + csum = skb_checksum(skb, offset, len, 0); + subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum, + subflow->map_csum_len); + + delta -= len; + subflow->map_csum_len += len; + } + if (delta == 0) + break; + + if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) { + /* if this subflow is closed, the partial mapping + * will be never completed; flush the pending skbs, so + * that subflow_sched_work_if_closed() can kick in + */ + if (unlikely(ssk->sk_state == TCP_CLOSE)) + while ((skb = skb_peek(&ssk->sk_receive_queue))) + sk_eat_skb(ssk, skb); + + /* not enough data to validate the csum */ + return MAPPING_EMPTY; + } + + /* the DSS mapping for next skbs will be validated later, + * when a get_mapping_status call will process such skb + */ + skb = skb->next; + } + + /* note that 'map_data_len' accounts only for the carried data, does + * not include the eventual seq increment due to the data fin, + * while the pseudo header requires the original DSS data len, + * including that + */ + header.data_seq = cpu_to_be64(subflow->map_seq); + header.subflow_seq = htonl(subflow->map_subflow_seq); + header.data_len = htons(subflow->map_data_len + subflow->map_data_fin); + header.csum = 0; + + csum = csum_partial(&header, sizeof(header), subflow->map_data_csum); + if (unlikely(csum_fold(csum))) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); + return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; + } + + return MAPPING_OK; +} + static enum mapping_status get_mapping_status(struct sock *ssk, struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + bool csum_reqd = READ_ONCE(msk->csum_enabled); struct mptcp_ext *mpext; struct sk_buff *skb; u16 data_len; @@ -920,9 +1008,10 @@ static enum mapping_status get_mapping_status(struct sock *ssk, /* Allow replacing only with an identical map */ if (subflow->map_seq == map_seq && subflow->map_subflow_seq == mpext->subflow_seq && - subflow->map_data_len == data_len) { + subflow->map_data_len == data_len && + subflow->map_csum_reqd == mpext->csum_reqd) { skb_ext_del(skb, SKB_EXT_MPTCP); - return MAPPING_OK; + goto validate_csum; } /* If this skb data are fully covered by the current mapping, @@ -934,27 +1023,41 @@ static enum mapping_status get_mapping_status(struct sock *ssk, } /* will validate the next map after consuming the current one */ - return MAPPING_OK; + goto validate_csum; } subflow->map_seq = map_seq; subflow->map_subflow_seq = mpext->subflow_seq; subflow->map_data_len = data_len; subflow->map_valid = 1; + subflow->map_data_fin = mpext->data_fin; subflow->mpc_map = mpext->mpc_map; - pr_debug("new map seq=%llu subflow_seq=%u data_len=%u", + subflow->map_csum_reqd = mpext->csum_reqd; + subflow->map_csum_len = 0; + subflow->map_data_csum = csum_unfold(mpext->csum); + + /* Cfr RFC 8684 Section 3.3.0 */ + if (unlikely(subflow->map_csum_reqd != csum_reqd)) + return MAPPING_INVALID; + + pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", subflow->map_seq, subflow->map_subflow_seq, - subflow->map_data_len); + subflow->map_data_len, subflow->map_csum_reqd, + subflow->map_data_csum); validate_seq: /* we revalidate valid mapping on new skb, because we must ensure * the current skb is completely covered by the available mapping */ - if (!validate_mapping(ssk, skb)) + if (!validate_mapping(ssk, skb)) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSTCPMISMATCH); return MAPPING_INVALID; + } skb_ext_del(skb, SKB_EXT_MPTCP); - return MAPPING_OK; + +validate_csum: + return validate_data_csum(ssk, skb, csum_reqd); } static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, @@ -1000,7 +1103,7 @@ static bool subflow_check_data_avail(struct sock *ssk) struct sk_buff *skb; if (!skb_peek(&ssk->sk_receive_queue)) - subflow->data_avail = 0; + WRITE_ONCE(subflow->data_avail, 0); if (subflow->data_avail) return true; @@ -1039,18 +1142,13 @@ static bool subflow_check_data_avail(struct sock *ssk) ack_seq = mptcp_subflow_get_mapped_dsn(subflow); pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack, ack_seq); - if (ack_seq == old_ack) { - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; - break; - } else if (after64(ack_seq, old_ack)) { - subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA; - break; + if (unlikely(before64(ack_seq, old_ack))) { + mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); + continue; } - /* only accept in-sequence mapping. Old values are spurious - * retransmission - */ - mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + break; } return true; @@ -1065,12 +1163,11 @@ fallback: * subflow_error_report() will introduce the appropriate barriers */ ssk->sk_err = EBADMSG; - ssk->sk_error_report(ssk); tcp_set_state(ssk, TCP_CLOSE); subflow->reset_transient = 0; subflow->reset_reason = MPTCP_RST_EMPTCP; tcp_send_active_reset(ssk, GFP_ATOMIC); - subflow->data_avail = 0; + WRITE_ONCE(subflow->data_avail, 0); return false; } @@ -1080,7 +1177,7 @@ fallback: subflow->map_seq = READ_ONCE(msk->ack_seq); subflow->map_data_len = skb->len; subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); return true; } @@ -1092,7 +1189,7 @@ bool mptcp_subflow_data_available(struct sock *sk) if (subflow->map_valid && mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) { subflow->map_valid = 0; - subflow->data_avail = 0; + WRITE_ONCE(subflow->data_avail, 0); pr_debug("Done with mapping: seq=%u data_len=%u", subflow->map_subflow_seq, @@ -1120,41 +1217,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) *full_space = tcp_full_space(sk); } -static void subflow_data_ready(struct sock *sk) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - u16 state = 1 << inet_sk_state_load(sk); - struct sock *parent = subflow->conn; - struct mptcp_sock *msk; - - msk = mptcp_sk(parent); - if (state & TCPF_LISTEN) { - /* MPJ subflow are removed from accept queue before reaching here, - * avoid stray wakeups - */ - if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue)) - return; - - set_bit(MPTCP_DATA_READY, &msk->flags); - parent->sk_data_ready(parent); - return; - } - - WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && - !subflow->mp_join && !(state & TCPF_CLOSE)); - - if (mptcp_subflow_data_available(sk)) - mptcp_data_ready(parent, sk); -} - -static void subflow_write_space(struct sock *ssk) -{ - struct sock *sk = mptcp_subflow_ctx(ssk)->conn; - - mptcp_propagate_sndbuf(sk, ssk); - mptcp_write_space(sk); -} - void __mptcp_error_report(struct sock *sk) { struct mptcp_subflow_context *subflow; @@ -1195,6 +1257,43 @@ static void subflow_error_report(struct sock *ssk) mptcp_data_unlock(sk); } +static void subflow_data_ready(struct sock *sk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + u16 state = 1 << inet_sk_state_load(sk); + struct sock *parent = subflow->conn; + struct mptcp_sock *msk; + + msk = mptcp_sk(parent); + if (state & TCPF_LISTEN) { + /* MPJ subflow are removed from accept queue before reaching here, + * avoid stray wakeups + */ + if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue)) + return; + + set_bit(MPTCP_DATA_READY, &msk->flags); + parent->sk_data_ready(parent); + return; + } + + WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && + !subflow->mp_join && !(state & TCPF_CLOSE)); + + if (mptcp_subflow_data_available(sk)) + mptcp_data_ready(parent, sk); + else if (unlikely(sk->sk_err)) + subflow_error_report(sk); +} + +static void subflow_write_space(struct sock *ssk) +{ + struct sock *sk = mptcp_subflow_ctx(ssk)->conn; + + mptcp_propagate_sndbuf(sk, ssk); + mptcp_write_space(sk); +} + static struct inet_connection_sock_af_ops * subflow_default_af_ops(struct sock *sk) { @@ -1505,6 +1604,8 @@ static void subflow_state_change(struct sock *sk) */ if (mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); + else if (unlikely(sk->sk_err)) + subflow_error_report(sk); subflow_sched_work_if_closed(mptcp_sk(parent), sk); diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 87112dad1fd4..049890e00a3d 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -74,7 +74,7 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \ nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \ nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \ - nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o \ + nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o nft_last.o \ nft_chain_route.o nf_tables_offload.o \ nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \ nft_set_pipapo.o diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index be14e0bea4c8..55647409a9be 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -45,12 +45,13 @@ static DEFINE_MUTEX(nf_ct_proto_mutex); #ifdef CONFIG_SYSCTL -__printf(5, 6) +__printf(4, 5) void nf_l4proto_log_invalid(const struct sk_buff *skb, - struct net *net, - u16 pf, u8 protonum, + const struct nf_hook_state *state, + u8 protonum, const char *fmt, ...) { + struct net *net = state->net; struct va_format vaf; va_list args; @@ -62,15 +63,16 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb, vaf.fmt = fmt; vaf.va = &args; - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_proto_%d: %pV ", protonum, &vaf); + nf_log_packet(net, state->pf, 0, skb, state->in, state->out, + NULL, "nf_ct_proto_%d: %pV ", protonum, &vaf); va_end(args); } EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid); -__printf(3, 4) +__printf(4, 5) void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, const struct nf_conn *ct, + const struct nf_hook_state *state, const char *fmt, ...) { struct va_format vaf; @@ -85,7 +87,7 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, vaf.fmt = fmt; vaf.va = &args; - nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct), + nf_l4proto_log_invalid(skb, state, nf_ct_protonum(ct), "%pV", &vaf); va_end(args); } diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 4f33307fa3cf..c1557d47ccd1 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -382,7 +382,8 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = static noinline bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, - const struct dccp_hdr *dh) + const struct dccp_hdr *dh, + const struct nf_hook_state *hook_state) { struct net *net = nf_ct_net(ct); struct nf_dccp_net *dn; @@ -414,7 +415,7 @@ dccp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; out_invalid: - nf_ct_l4proto_log_invalid(skb, ct, "%s", msg); + nf_ct_l4proto_log_invalid(skb, ct, hook_state, "%s", msg); return false; } @@ -464,8 +465,7 @@ static bool dccp_error(const struct dccp_hdr *dh, } return false; out_invalid: - nf_l4proto_log_invalid(skb, state->net, state->pf, - IPPROTO_DCCP, "%s", msg); + nf_l4proto_log_invalid(skb, state, IPPROTO_DCCP, "%s", msg); return true; } @@ -488,7 +488,7 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb, return -NF_ACCEPT; type = dh->dccph_type; - if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh)) + if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state)) return -NF_ACCEPT; if (type == DCCP_PKT_RESET && @@ -543,11 +543,11 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb, ct->proto.dccp.last_pkt = type; spin_unlock_bh(&ct->lock); - nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet"); + nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid packet"); return NF_ACCEPT; case CT_DCCP_INVALID: spin_unlock_bh(&ct->lock); - nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition"); + nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid state transition"); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c index 4efd8741c105..b38b7164acd5 100644 --- a/net/netfilter/nf_conntrack_proto_icmp.c +++ b/net/netfilter/nf_conntrack_proto_icmp.c @@ -170,12 +170,12 @@ int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb, ct_daddr = &ct->tuplehash[dir].tuple.dst.u3; if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) { if (state->pf == AF_INET) { - nf_l4proto_log_invalid(skb, state->net, state->pf, + nf_l4proto_log_invalid(skb, state, l4proto, "outer daddr %pI4 != inner %pI4", &outer_daddr->ip, &ct_daddr->ip); } else if (state->pf == AF_INET6) { - nf_l4proto_log_invalid(skb, state->net, state->pf, + nf_l4proto_log_invalid(skb, state, l4proto, "outer daddr %pI6 != inner %pI6", &outer_daddr->ip6, &ct_daddr->ip6); @@ -197,8 +197,7 @@ static void icmp_error_log(const struct sk_buff *skb, const struct nf_hook_state *state, const char *msg) { - nf_l4proto_log_invalid(skb, state->net, state->pf, - IPPROTO_ICMP, "%s", msg); + nf_l4proto_log_invalid(skb, state, IPPROTO_ICMP, "%s", msg); } /* Small and modified version of icmp_rcv */ diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index facd8c64ec4e..61e3b05cf02c 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -126,8 +126,7 @@ static void icmpv6_error_log(const struct sk_buff *skb, const struct nf_hook_state *state, const char *msg) { - nf_l4proto_log_invalid(skb, state->net, state->pf, - IPPROTO_ICMPV6, "%s", msg); + nf_l4proto_log_invalid(skb, state, IPPROTO_ICMPV6, "%s", msg); } int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index fb8dc02e502f..2394238d01c9 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -351,7 +351,7 @@ static bool sctp_error(struct sk_buff *skb, } return false; out_invalid: - nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_SCTP, "%s", logmsg); + nf_l4proto_log_invalid(skb, state, IPPROTO_SCTP, "%s", logmsg); return true; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index de840fc41a2e..f7e8baf59b51 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -446,14 +446,15 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, } } -static bool tcp_in_window(const struct nf_conn *ct, - struct ip_ct_tcp *state, +static bool tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, unsigned int index, const struct sk_buff *skb, unsigned int dataoff, - const struct tcphdr *tcph) + const struct tcphdr *tcph, + const struct nf_hook_state *hook_state) { + struct ip_ct_tcp *state = &ct->proto.tcp; struct net *net = nf_ct_net(ct); struct nf_tcp_net *tn = nf_tcp_pernet(net); struct ip_ct_tcp_state *sender = &state->seen[dir]; @@ -670,7 +671,7 @@ static bool tcp_in_window(const struct nf_conn *ct, tn->tcp_be_liberal) res = true; if (!res) { - nf_ct_l4proto_log_invalid(skb, ct, + nf_ct_l4proto_log_invalid(skb, ct, hook_state, "%s", before(seq, sender->td_maxend + 1) ? in_recv_win ? @@ -710,7 +711,7 @@ static void tcp_error_log(const struct sk_buff *skb, const struct nf_hook_state *state, const char *msg) { - nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_TCP, "%s", msg); + nf_l4proto_log_invalid(skb, state, IPPROTO_TCP, "%s", msg); } /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ @@ -970,7 +971,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, IP_CT_EXP_CHALLENGE_ACK; } spin_unlock_bh(&ct->lock); - nf_ct_l4proto_log_invalid(skb, ct, + nf_ct_l4proto_log_invalid(skb, ct, state, "packet (index %d) in dir %d ignored, state %s", index, dir, tcp_conntrack_names[old_state]); @@ -995,7 +996,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); - nf_ct_l4proto_log_invalid(skb, ct, "invalid state"); + nf_ct_l4proto_log_invalid(skb, ct, state, "invalid state"); return -NF_ACCEPT; case TCP_CONNTRACK_TIME_WAIT: /* RFC5961 compliance cause stack to send "challenge-ACK" @@ -1010,7 +1011,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, /* Detected RFC5961 challenge ACK */ ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; spin_unlock_bh(&ct->lock); - nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored"); + nf_ct_l4proto_log_invalid(skb, ct, state, "challenge-ack ignored"); return NF_ACCEPT; /* Don't change state */ } break; @@ -1035,7 +1036,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) { /* Invalid RST */ spin_unlock_bh(&ct->lock); - nf_ct_l4proto_log_invalid(skb, ct, "invalid rst"); + nf_ct_l4proto_log_invalid(skb, ct, state, "invalid rst"); return -NF_ACCEPT; } @@ -1079,8 +1080,8 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, break; } - if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, - skb, dataoff, th)) { + if (!tcp_in_window(ct, dir, index, + skb, dataoff, th, state)) { spin_unlock_bh(&ct->lock); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 68911fcaa0f1..698fee49e732 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -38,8 +38,7 @@ static void udp_error_log(const struct sk_buff *skb, const struct nf_hook_state *state, const char *msg) { - nf_l4proto_log_invalid(skb, state->net, state->pf, - IPPROTO_UDP, "%s", msg); + nf_l4proto_log_invalid(skb, state, IPPROTO_UDP, "%s", msg); } static bool udp_error(struct sk_buff *skb, @@ -130,8 +129,7 @@ static void udplite_error_log(const struct sk_buff *skb, const struct nf_hook_state *state, const char *msg) { - nf_l4proto_log_invalid(skb, state->net, state->pf, - IPPROTO_UDPLITE, "%s", msg); + nf_l4proto_log_invalid(skb, state, IPPROTO_UDPLITE, "%s", msg); } static bool udplite_error(struct sk_buff *skb, diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index b100c04a0e43..3d6d49420db8 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -31,6 +31,9 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, int length = (th->doff * 4) - sizeof(*th); u8 buf[40], *ptr; + if (unlikely(length < 0)) + return false; + ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf); if (ptr == NULL) return false; @@ -47,6 +50,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, length--; continue; default: + if (length < 2) + return true; opsize = *ptr++; if (opsize < 2) return true; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f20f6ae0e215..d6214242fe7f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4338,13 +4338,45 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, err = nf_tables_set_alloc_name(&ctx, set, name); kfree(name); if (err < 0) - goto err_set_alloc_name; + goto err_set_name; + + udata = NULL; + if (udlen) { + udata = set->data + size; + nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen); + } + + INIT_LIST_HEAD(&set->bindings); + INIT_LIST_HEAD(&set->catchall_list); + set->table = table; + write_pnet(&set->net, net); + set->ops = ops; + set->ktype = ktype; + set->klen = desc.klen; + set->dtype = dtype; + set->objtype = objtype; + set->dlen = desc.dlen; + set->flags = flags; + set->size = desc.size; + set->policy = policy; + set->udlen = udlen; + set->udata = udata; + set->timeout = timeout; + set->gc_int = gc_int; + + set->field_count = desc.field_count; + for (i = 0; i < desc.field_count; i++) + set->field_len[i] = desc.field_len[i]; + + err = ops->init(set, &desc, nla); + if (err < 0) + goto err_set_init; if (nla[NFTA_SET_EXPR]) { expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]); if (IS_ERR(expr)) { err = PTR_ERR(expr); - goto err_set_alloc_name; + goto err_set_expr_alloc; } set->exprs[0] = expr; set->num_exprs++; @@ -4355,75 +4387,44 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(flags & NFT_SET_EXPR)) { err = -EINVAL; - goto err_set_alloc_name; + goto err_set_expr_alloc; } i = 0; nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { if (i == NFT_SET_EXPR_MAX) { err = -E2BIG; - goto err_set_init; + goto err_set_expr_alloc; } if (nla_type(tmp) != NFTA_LIST_ELEM) { err = -EINVAL; - goto err_set_init; + goto err_set_expr_alloc; } expr = nft_set_elem_expr_alloc(&ctx, set, tmp); if (IS_ERR(expr)) { err = PTR_ERR(expr); - goto err_set_init; + goto err_set_expr_alloc; } set->exprs[i++] = expr; set->num_exprs++; } } - udata = NULL; - if (udlen) { - udata = set->data + size; - nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen); - } - - INIT_LIST_HEAD(&set->bindings); - INIT_LIST_HEAD(&set->catchall_list); - set->table = table; - write_pnet(&set->net, net); - set->ops = ops; - set->ktype = ktype; - set->klen = desc.klen; - set->dtype = dtype; - set->objtype = objtype; - set->dlen = desc.dlen; - set->flags = flags; - set->size = desc.size; - set->policy = policy; - set->udlen = udlen; - set->udata = udata; - set->timeout = timeout; - set->gc_int = gc_int; set->handle = nf_tables_alloc_handle(table); - set->field_count = desc.field_count; - for (i = 0; i < desc.field_count; i++) - set->field_len[i] = desc.field_len[i]; - - err = ops->init(set, &desc, nla); - if (err < 0) - goto err_set_init; - err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) - goto err_set_trans; + goto err_set_expr_alloc; list_add_tail_rcu(&set->list, &table->sets); table->use++; return 0; -err_set_trans: - ops->destroy(set); -err_set_init: +err_set_expr_alloc: for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(&ctx, set->exprs[i]); -err_set_alloc_name: + + ops->destroy(set); +err_set_init: kfree(set->name); err_set_name: kvfree(set); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 7780342e2f2d..866cfba04d6c 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -268,6 +268,7 @@ static struct nft_expr_type *nft_basic_types[] = { &nft_meta_type, &nft_rt_type, &nft_exthdr_type, + &nft_last_type, }; static struct nft_object_type *nft_basic_objects[] = { diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 58fda6ac663b..50b4e3c9347a 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -126,8 +126,10 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb, #ifdef CONFIG_KALLSYMS ret = snprintf(sym, sizeof(sym), "%ps", ops->hook); - if (ret < 0 || ret > (int)sizeof(sym)) + if (ret >= sizeof(sym)) { + ret = -EINVAL; goto nla_put_failure; + } module_name = strstr(sym, " ["); if (module_name) { diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 7f705b5c09de..4f583d2e220e 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -164,7 +164,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt, { struct tcphdr *tcph; - if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP) + if (pkt->tprot != IPPROTO_TCP) return NULL; tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer); @@ -312,6 +312,9 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr, const struct sctp_chunkhdr *sch; struct sctp_chunkhdr _sch; + if (pkt->tprot != IPPROTO_SCTP) + goto err; + do { sch = skb_header_pointer(pkt->skb, offset, sizeof(_sch), &_sch); if (!sch || !sch->length) @@ -334,7 +337,7 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr, } offset += SCTP_PAD4(ntohs(sch->length)); } while (offset < pkt->skb->len); - +err: if (priv->flags & NFT_EXTHDR_F_PRESENT) nft_reg_store8(dest, false); else diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c new file mode 100644 index 000000000000..913ac45167f2 --- /dev/null +++ b/net/netfilter/nft_last.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> + +struct nft_last_priv { + unsigned long last_jiffies; + unsigned int last_set; +}; + +static const struct nla_policy nft_last_policy[NFTA_LAST_MAX + 1] = { + [NFTA_LAST_SET] = { .type = NLA_U32 }, + [NFTA_LAST_MSECS] = { .type = NLA_U64 }, +}; + +static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_last_priv *priv = nft_expr_priv(expr); + u64 last_jiffies; + int err; + + if (tb[NFTA_LAST_MSECS]) { + err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies); + if (err < 0) + return err; + + priv->last_jiffies = jiffies + (unsigned long)last_jiffies; + priv->last_set = 1; + } + + return 0; +} + +static void nft_last_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt) +{ + struct nft_last_priv *priv = nft_expr_priv(expr); + + priv->last_jiffies = jiffies; + priv->last_set = 1; +} + +static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_last_priv *priv = nft_expr_priv(expr); + __be64 msecs; + + if (time_before(jiffies, priv->last_jiffies)) + priv->last_set = 0; + + if (priv->last_set) + msecs = nf_jiffies64_to_msecs(jiffies - priv->last_jiffies); + else + msecs = 0; + + if (nla_put_be32(skb, NFTA_LAST_SET, htonl(priv->last_set)) || + nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static const struct nft_expr_ops nft_last_ops = { + .type = &nft_last_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_last_priv)), + .eval = nft_last_eval, + .init = nft_last_init, + .dump = nft_last_dump, +}; + +struct nft_expr_type nft_last_type __read_mostly = { + .name = "last", + .ops = &nft_last_ops, + .policy = nft_last_policy, + .maxattr = NFTA_LAST_MAX, + .flags = NFT_EXPR_STATEFUL, + .owner = THIS_MODULE, +}; diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index 41109c326f3a..28982630bef3 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile @@ -13,6 +13,7 @@ openvswitch-y := \ flow_netlink.o \ flow_table.o \ meter.o \ + openvswitch_trace.o \ vport.o \ vport-internal_dev.o \ vport-netdev.o @@ -24,3 +25,5 @@ endif obj-$(CONFIG_OPENVSWITCH_VXLAN)+= vport-vxlan.o obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o obj-$(CONFIG_OPENVSWITCH_GRE) += vport-gre.o + +CFLAGS_openvswitch_trace.o = -I$(src) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 77d924ab8cdb..ef15d9eb4774 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -30,6 +30,7 @@ #include "conntrack.h" #include "vport.h" #include "flow_netlink.h" +#include "openvswitch_trace.h" struct deferred_action { struct sk_buff *skb; @@ -1242,6 +1243,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, a = nla_next(a, &rem)) { int err = 0; + if (trace_ovs_do_execute_action_enabled()) + trace_ovs_do_execute_action(dp, skb, key, a, rem); + switch (nla_type(a)) { case OVS_ACTION_ATTR_OUTPUT: { int port = nla_get_u32(a); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 9d6ef6cb9b26..bc164b35e67d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -43,6 +43,7 @@ #include "flow_table.h" #include "flow_netlink.h" #include "meter.h" +#include "openvswitch_trace.h" #include "vport-internal_dev.h" #include "vport-netdev.h" @@ -275,6 +276,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, struct dp_stats_percpu *stats; int err; + if (trace_ovs_dp_upcall_enabled()) + trace_ovs_dp_upcall(dp, skb, key, upcall_info); + if (upcall_info->portid == 0) { err = -ENOTCONN; goto err; diff --git a/net/openvswitch/openvswitch_trace.c b/net/openvswitch/openvswitch_trace.c new file mode 100644 index 000000000000..62c5f7d6f023 --- /dev/null +++ b/net/openvswitch/openvswitch_trace.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +/* bug in tracepoint.h, it should include this */ +#include <linux/module.h> + +/* sparse isn't too happy with all macros... */ +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "openvswitch_trace.h" + +#endif diff --git a/net/openvswitch/openvswitch_trace.h b/net/openvswitch/openvswitch_trace.h new file mode 100644 index 000000000000..3eb35d9eb700 --- /dev/null +++ b/net/openvswitch/openvswitch_trace.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM openvswitch + +#if !defined(_TRACE_OPENVSWITCH_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_OPENVSWITCH_H + +#include <linux/tracepoint.h> + +#include "datapath.h" + +TRACE_EVENT(ovs_do_execute_action, + + TP_PROTO(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, const struct nlattr *a, int rem), + + TP_ARGS(dp, skb, key, a, rem), + + TP_STRUCT__entry( + __field( void *, dpaddr ) + __string( dp_name, ovs_dp_name(dp) ) + __string( dev_name, skb->dev->name ) + __field( void *, skbaddr ) + __field( unsigned int, len ) + __field( unsigned int, data_len ) + __field( unsigned int, truesize ) + __field( u8, nr_frags ) + __field( u16, gso_size ) + __field( u16, gso_type ) + __field( u32, ovs_flow_hash ) + __field( u32, recirc_id ) + __field( void *, keyaddr ) + __field( u16, key_eth_type ) + __field( u8, key_ct_state ) + __field( u8, key_ct_orig_proto ) + __field( u16, key_ct_zone ) + __field( unsigned int, flow_key_valid ) + __field( u8, action_type ) + __field( unsigned int, action_len ) + __field( void *, action_data ) + __field( u8, is_last ) + ), + + TP_fast_assign( + __entry->dpaddr = dp; + __assign_str(dp_name, ovs_dp_name(dp)); + __assign_str(dev_name, skb->dev->name); + __entry->skbaddr = skb; + __entry->len = skb->len; + __entry->data_len = skb->data_len; + __entry->truesize = skb->truesize; + __entry->nr_frags = skb_shinfo(skb)->nr_frags; + __entry->gso_size = skb_shinfo(skb)->gso_size; + __entry->gso_type = skb_shinfo(skb)->gso_type; + __entry->ovs_flow_hash = key->ovs_flow_hash; + __entry->recirc_id = key->recirc_id; + __entry->keyaddr = key; + __entry->key_eth_type = key->eth.type; + __entry->key_ct_state = key->ct_state; + __entry->key_ct_orig_proto = key->ct_orig_proto; + __entry->key_ct_zone = key->ct_zone; + __entry->flow_key_valid = !(key->mac_proto & SW_FLOW_KEY_INVALID); + __entry->action_type = nla_type(a); + __entry->action_len = nla_len(a); + __entry->action_data = nla_data(a); + __entry->is_last = nla_is_last(a, rem); + ), + + TP_printk("dpaddr=%p dp_name=%s dev=%s skbaddr=%p len=%u data_len=%u truesize=%u nr_frags=%d gso_size=%d gso_type=%#x ovs_flow_hash=0x%08x recirc_id=0x%08x keyaddr=%p eth_type=0x%04x ct_state=%02x ct_orig_proto=%02x ct_Zone=%04x flow_key_valid=%d action_type=%u action_len=%u action_data=%p is_last=%d", + __entry->dpaddr, __get_str(dp_name), __get_str(dev_name), + __entry->skbaddr, __entry->len, __entry->data_len, + __entry->truesize, __entry->nr_frags, __entry->gso_size, + __entry->gso_type, __entry->ovs_flow_hash, + __entry->recirc_id, __entry->keyaddr, __entry->key_eth_type, + __entry->key_ct_state, __entry->key_ct_orig_proto, + __entry->key_ct_zone, + __entry->flow_key_valid, + __entry->action_type, __entry->action_len, + __entry->action_data, __entry->is_last) +); + +TRACE_EVENT(ovs_dp_upcall, + + TP_PROTO(struct datapath *dp, struct sk_buff *skb, + const struct sw_flow_key *key, + const struct dp_upcall_info *upcall_info), + + TP_ARGS(dp, skb, key, upcall_info), + + TP_STRUCT__entry( + __field( void *, dpaddr ) + __string( dp_name, ovs_dp_name(dp) ) + __string( dev_name, skb->dev->name ) + __field( void *, skbaddr ) + __field( unsigned int, len ) + __field( unsigned int, data_len ) + __field( unsigned int, truesize ) + __field( u8, nr_frags ) + __field( u16, gso_size ) + __field( u16, gso_type ) + __field( u32, ovs_flow_hash ) + __field( u32, recirc_id ) + __field( const void *, keyaddr ) + __field( u16, key_eth_type ) + __field( u8, key_ct_state ) + __field( u8, key_ct_orig_proto ) + __field( u16, key_ct_zone ) + __field( unsigned int, flow_key_valid ) + __field( u8, upcall_cmd ) + __field( u32, upcall_port ) + __field( u16, upcall_mru ) + ), + + TP_fast_assign( + __entry->dpaddr = dp; + __assign_str(dp_name, ovs_dp_name(dp)); + __assign_str(dev_name, skb->dev->name); + __entry->skbaddr = skb; + __entry->len = skb->len; + __entry->data_len = skb->data_len; + __entry->truesize = skb->truesize; + __entry->nr_frags = skb_shinfo(skb)->nr_frags; + __entry->gso_size = skb_shinfo(skb)->gso_size; + __entry->gso_type = skb_shinfo(skb)->gso_type; + __entry->ovs_flow_hash = key->ovs_flow_hash; + __entry->recirc_id = key->recirc_id; + __entry->keyaddr = key; + __entry->key_eth_type = key->eth.type; + __entry->key_ct_state = key->ct_state; + __entry->key_ct_orig_proto = key->ct_orig_proto; + __entry->key_ct_zone = key->ct_zone; + __entry->flow_key_valid = !(key->mac_proto & SW_FLOW_KEY_INVALID); + __entry->upcall_cmd = upcall_info->cmd; + __entry->upcall_port = upcall_info->portid; + __entry->upcall_mru = upcall_info->mru; + ), + + TP_printk("dpaddr=%p dp_name=%s dev=%s skbaddr=%p len=%u data_len=%u truesize=%u nr_frags=%d gso_size=%d gso_type=%#x ovs_flow_hash=0x%08x recirc_id=0x%08x keyaddr=%p eth_type=0x%04x ct_state=%02x ct_orig_proto=%02x ct_zone=%04x flow_key_valid=%d upcall_cmd=%u upcall_port=%u upcall_mru=%u", + __entry->dpaddr, __get_str(dp_name), __get_str(dev_name), + __entry->skbaddr, __entry->len, __entry->data_len, + __entry->truesize, __entry->nr_frags, __entry->gso_size, + __entry->gso_type, __entry->ovs_flow_hash, + __entry->recirc_id, __entry->keyaddr, __entry->key_eth_type, + __entry->key_ct_state, __entry->key_ct_orig_proto, + __entry->key_ct_zone, + __entry->flow_key_valid, + __entry->upcall_cmd, __entry->upcall_port, + __entry->upcall_mru) +); + +#endif /* _TRACE_OPENVSWITCH_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE openvswitch_trace +#include <trace/define_trace.h> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 71dd6b910f7c..77b0cdab3810 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2683,7 +2683,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); - proto = po->num; + proto = READ_ONCE(po->num); } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -2896,7 +2896,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); - proto = po->num; + proto = READ_ONCE(po->num); } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -3034,10 +3034,13 @@ static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); - if (po->tx_ring.pg_vec) + /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy. + * tpacket_snd() will redo the check safely. + */ + if (data_race(po->tx_ring.pg_vec)) return tpacket_snd(po, msg); - else - return packet_snd(sock, msg, len); + + return packet_snd(sock, msg, len); } /* @@ -3168,7 +3171,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, /* prevents packet_notifier() from calling * register_prot_hook() */ - po->num = 0; + WRITE_ONCE(po->num, 0); __unregister_prot_hook(sk, true); rcu_read_lock(); dev_curr = po->prot_hook.dev; @@ -3178,17 +3181,17 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, } BUG_ON(po->running); - po->num = proto; + WRITE_ONCE(po->num, proto); po->prot_hook.type = proto; if (unlikely(unlisted)) { dev_put(dev); po->prot_hook.dev = NULL; - po->ifindex = -1; + WRITE_ONCE(po->ifindex, -1); packet_cached_dev_reset(po); } else { po->prot_hook.dev = dev; - po->ifindex = dev ? dev->ifindex : 0; + WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0); packet_cached_dev_assign(po, dev); } } @@ -3502,7 +3505,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, uaddr->sa_family = AF_PACKET; memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); rcu_read_lock(); - dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); + dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); rcu_read_unlock(); @@ -3517,16 +3520,18 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr); + int ifindex; if (peer) return -EOPNOTSUPP; + ifindex = READ_ONCE(po->ifindex); sll->sll_family = AF_PACKET; - sll->sll_ifindex = po->ifindex; - sll->sll_protocol = po->num; + sll->sll_ifindex = ifindex; + sll->sll_protocol = READ_ONCE(po->num); sll->sll_pkttype = 0; rcu_read_lock(); - dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); + dev = dev_get_by_index_rcu(sock_net(sk), ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; @@ -4102,7 +4107,7 @@ static int packet_notifier(struct notifier_block *this, } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); - po->ifindex = -1; + WRITE_ONCE(po->ifindex, -1); if (po->prot_hook.dev) dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; @@ -4408,7 +4413,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, was_running = po->running; num = po->num; if (was_running) { - po->num = 0; + WRITE_ONCE(po->num, 0); __unregister_prot_hook(sk, false); } spin_unlock(&po->bind_lock); @@ -4443,7 +4448,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, spin_lock(&po->bind_lock); if (was_running) { - po->num = num; + WRITE_ONCE(po->num, num); register_prot_hook(sk); } spin_unlock(&po->bind_lock); @@ -4613,8 +4618,8 @@ static int packet_seq_show(struct seq_file *seq, void *v) s, refcount_read(&s->sk_refcnt), s->sk_type, - ntohs(po->num), - po->ifindex, + ntohs(READ_ONCE(po->num)), + READ_ONCE(po->ifindex), po->running, atomic_read(&s->sk_rmem_alloc), from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index c0477bec09bd..f2efaa4225f9 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -436,7 +436,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) struct qrtr_sock *ipc; struct sk_buff *skb; struct qrtr_cb *cb; - unsigned int size; + size_t size; unsigned int ver; size_t hdrlen; diff --git a/net/rds/recv.c b/net/rds/recv.c index 4db109fb6ec2..5b426dc3634d 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -714,7 +714,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, if (rds_cmsg_recv(inc, msg, rs)) { ret = -EFAULT; - goto out; + break; } rds_recvmsg_zcookie(rs, msg); diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 18edd9ad1410..a656baa321fe 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -904,14 +904,19 @@ static int tcf_ct_act_nat(struct sk_buff *skb, } err = ct_nat_execute(skb, ct, ctinfo, range, maniptype); - if (err == NF_ACCEPT && - ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) { - if (maniptype == NF_NAT_MANIP_SRC) - maniptype = NF_NAT_MANIP_DST; - else - maniptype = NF_NAT_MANIP_SRC; - - err = ct_nat_execute(skb, ct, ctinfo, range, maniptype); + if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { + if (ct->status & IPS_SRC_NAT) { + if (maniptype == NF_NAT_MANIP_SRC) + maniptype = NF_NAT_MANIP_DST; + else + maniptype = NF_NAT_MANIP_SRC; + + err = ct_nat_execute(skb, ct, ctinfo, range, + maniptype); + } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { + err = ct_nat_execute(skb, ct, ctinfo, NULL, + NF_NAT_MANIP_SRC); + } } return err; #else diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 2e704c7a105a..d7869a984881 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1531,13 +1531,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb, &mask->basic.n_proto, TCA_FLOWER_UNSPEC, sizeof(key->basic.n_proto)); - mask->basic.n_proto = cpu_to_be16(0); } else { key->basic.n_proto = ethertype; + mask->basic.n_proto = cpu_to_be16(~0); } } } else { key->basic.n_proto = ethertype; + mask->basic.n_proto = cpu_to_be16(~0); } } diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 7d37638ee1c7..951542843cab 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -943,7 +943,7 @@ static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb, } tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); - if (!tcph) + if (!tcph || tcph->doff < 5) return NULL; return skb_header_pointer(skb, offset, @@ -967,6 +967,8 @@ static const void *cake_get_tcpopt(const struct tcphdr *tcph, length--; continue; } + if (length < 2) + break; opsize = *ptr++; if (opsize < 2 || opsize > length) break; @@ -1104,6 +1106,8 @@ static bool cake_tcph_may_drop(const struct tcphdr *tcph, length--; continue; } + if (length < 2) + break; opsize = *ptr++; if (opsize < 2 || opsize > length) break; @@ -2338,7 +2342,7 @@ static int cake_config_precedence(struct Qdisc *sch) /* List of known Diffserv codepoints: * - * Least Effort (CS1) + * Least Effort (CS1, LE) * Best Effort (CS0) * Max Reliability & LLT "Lo" (TOS1) * Max Throughput (TOS2) @@ -2360,7 +2364,7 @@ static int cake_config_precedence(struct Qdisc *sch) * Total 25 codepoints. */ -/* List of traffic classes in RFC 4594: +/* List of traffic classes in RFC 4594, updated by RFC 8622: * (roughly descending order of contended priority) * (roughly ascending order of uncontended throughput) * @@ -2375,7 +2379,7 @@ static int cake_config_precedence(struct Qdisc *sch) * Ops, Admin, Management (CS2,TOS1) - eg. ssh * Standard Service (CS0 & unrecognised codepoints) * High Throughput Data (AF1x,TOS2) - eg. web traffic - * Low Priority Data (CS1) - eg. BitTorrent + * Low Priority Data (CS1,LE) - eg. BitTorrent * Total 12 traffic classes. */ @@ -2391,7 +2395,7 @@ static int cake_config_diffserv8(struct Qdisc *sch) * Video Streaming (AF4x, AF3x, CS3) * Bog Standard (CS0 etc.) * High Throughput (AF1x, TOS2) - * Background Traffic (CS1) + * Background Traffic (CS1, LE) * * Total 8 traffic classes. */ @@ -2435,7 +2439,7 @@ static int cake_config_diffserv4(struct Qdisc *sch) * Latency Sensitive (CS7, CS6, EF, VA, CS5, CS4) * Streaming Media (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1) * Best Effort (CS0, AF1x, TOS2, and those not specified) - * Background Traffic (CS1) + * Background Traffic (CS1, LE) * * Total 4 traffic classes. */ @@ -2473,7 +2477,7 @@ static int cake_config_diffserv4(struct Qdisc *sch) static int cake_config_diffserv3(struct Qdisc *sch) { /* Simplified Diffserv structure with 3 tins. - * Low Priority (CS1) + * Low Priority (CS1, LE) * Best Effort * Latency Sensitive (TOS4, VA, EF, CS6, CS7) */ diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e9c0afc8becc..d9ac60ffe927 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -52,6 +52,8 @@ static void qdisc_maybe_clear_missed(struct Qdisc *q, */ if (!netif_xmit_frozen_or_stopped(txq)) set_bit(__QDISC_STATE_MISSED, &q->state); + else + set_bit(__QDISC_STATE_DRAINING, &q->state); } /* Main transmission queue. */ @@ -164,9 +166,13 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) skb = next; } - if (lock) + + if (lock) { spin_unlock(lock); - __netif_schedule(q); + set_bit(__QDISC_STATE_MISSED, &q->state); + } else { + __netif_schedule(q); + } } static void try_bulk_dequeue_skb(struct Qdisc *q, @@ -409,7 +415,11 @@ void __qdisc_run(struct Qdisc *q) while (qdisc_restart(q, &packets)) { quota -= packets; if (quota <= 0) { - __netif_schedule(q); + if (q->flags & TCQ_F_NOLOCK) + set_bit(__QDISC_STATE_MISSED, &q->state); + else + __netif_schedule(q); + break; } } @@ -698,13 +708,14 @@ retry: if (likely(skb)) { qdisc_update_stats_at_dequeue(qdisc, skb); } else if (need_retry && - test_bit(__QDISC_STATE_MISSED, &qdisc->state)) { + READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY) { /* Delay clearing the STATE_MISSED here to reduce * the overhead of the second spin_trylock() in * qdisc_run_begin() and __netif_schedule() calling * in qdisc_run_end(). */ clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); /* Make sure dequeuing happens after clearing * STATE_MISSED. @@ -714,8 +725,6 @@ retry: need_retry = false; goto retry; - } else { - WRITE_ONCE(qdisc->empty, true); } return skb; @@ -916,7 +925,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; - sch->empty = true; dev_hold(dev); refcount_set(&sch->refcnt, 1); @@ -1222,6 +1230,7 @@ static void dev_reset_queue(struct net_device *dev, spin_unlock_bh(qdisc_lock(qdisc)); if (nolock) { clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); spin_unlock_bh(&qdisc->seqlock); } } diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 336df4b36655..be29da09cc7a 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -98,6 +98,7 @@ static struct sctp_association *sctp_association_init( * sock configured value. */ asoc->hbinterval = msecs_to_jiffies(sp->hbinterval); + asoc->probe_interval = msecs_to_jiffies(sp->probe_interval); asoc->encap_port = sp->encap_port; @@ -625,6 +626,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, * association configured value. */ peer->hbinterval = asoc->hbinterval; + peer->probe_interval = asoc->probe_interval; peer->encap_port = asoc->encap_port; @@ -714,6 +716,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, return NULL; } + sctp_transport_pl_reset(peer); + /* Attach the remote transport to our asoc. */ list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list); asoc->peer.transport_count++; @@ -812,6 +816,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, spc_state = SCTP_ADDR_CONFIRMED; transport->state = SCTP_ACTIVE; + sctp_transport_pl_reset(transport); break; case SCTP_TRANSPORT_DOWN: @@ -821,6 +826,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, */ if (transport->state != SCTP_UNCONFIRMED) { transport->state = SCTP_INACTIVE; + sctp_transport_pl_reset(transport); spc_state = SCTP_ADDR_UNREACHABLE; } else { sctp_transport_dst_release(transport); diff --git a/net/sctp/debug.c b/net/sctp/debug.c index c4d9c7feffb9..ccd773e4c371 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -154,6 +154,7 @@ static const char *const sctp_timer_tbl[] = { "TIMEOUT_T5_SHUTDOWN_GUARD", "TIMEOUT_HEARTBEAT", "TIMEOUT_RECONF", + "TIMEOUT_PROBE", "TIMEOUT_SACK", "TIMEOUT_AUTOCLOSE", }; diff --git a/net/sctp/input.c b/net/sctp/input.c index d508f6f3dd08..fe6429cc012f 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -385,7 +385,9 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t, __u32 pmtu) { - if (!t || (t->pathmtu <= pmtu)) + if (!t || + (t->pathmtu <= pmtu && + t->pl.probe_size + sctp_transport_pl_hlen(t) <= pmtu)) return; if (sock_owned_by_user(sk)) { @@ -554,6 +556,49 @@ void sctp_err_finish(struct sock *sk, struct sctp_transport *t) sctp_transport_put(t); } +static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb, + __u8 type, __u8 code, __u32 info) +{ + struct sctp_association *asoc = t->asoc; + struct sock *sk = asoc->base.sk; + int err = 0; + + switch (type) { + case ICMP_PARAMETERPROB: + err = EPROTO; + break; + case ICMP_DEST_UNREACH: + if (code > NR_ICMP_UNREACH) + return; + if (code == ICMP_FRAG_NEEDED) { + sctp_icmp_frag_needed(sk, asoc, t, SCTP_TRUNC4(info)); + return; + } + if (code == ICMP_PROT_UNREACH) { + sctp_icmp_proto_unreachable(sk, asoc, t); + return; + } + err = icmp_err_convert[code].errno; + break; + case ICMP_TIME_EXCEEDED: + if (code == ICMP_EXC_FRAGTIME) + return; + + err = EHOSTUNREACH; + break; + case ICMP_REDIRECT: + sctp_icmp_redirect(sk, t, skb); + default: + return; + } + if (!sock_owned_by_user(sk) && inet_sk(sk)->recverr) { + sk->sk_err = err; + sk->sk_error_report(sk); + } else { /* Only an error on timeout */ + sk->sk_err_soft = err; + } +} + /* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should @@ -572,22 +617,19 @@ void sctp_err_finish(struct sock *sk, struct sctp_transport *t) int sctp_v4_err(struct sk_buff *skb, __u32 info) { const struct iphdr *iph = (const struct iphdr *)skb->data; - const int ihlen = iph->ihl * 4; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; - struct sock *sk; - struct sctp_association *asoc = NULL; + struct net *net = dev_net(skb->dev); struct sctp_transport *transport; - struct inet_sock *inet; + struct sctp_association *asoc; __u16 saveip, savesctp; - int err; - struct net *net = dev_net(skb->dev); + struct sock *sk; /* Fix up skb to look at the embedded net header. */ saveip = skb->network_header; savesctp = skb->transport_header; skb_reset_network_header(skb); - skb_set_transport_header(skb, ihlen); + skb_set_transport_header(skb, iph->ihl * 4); sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &transport); /* Put back, the original values. */ skb->network_header = saveip; @@ -596,59 +638,41 @@ int sctp_v4_err(struct sk_buff *skb, __u32 info) __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return -ENOENT; } - /* Warning: The sock lock is held. Remember to call - * sctp_err_finish! - */ - switch (type) { - case ICMP_PARAMETERPROB: - err = EPROTO; - break; - case ICMP_DEST_UNREACH: - if (code > NR_ICMP_UNREACH) - goto out_unlock; + sctp_v4_err_handle(transport, skb, type, code, info); + sctp_err_finish(sk, transport); - /* PMTU discovery (RFC1191) */ - if (ICMP_FRAG_NEEDED == code) { - sctp_icmp_frag_needed(sk, asoc, transport, - SCTP_TRUNC4(info)); - goto out_unlock; - } else { - if (ICMP_PROT_UNREACH == code) { - sctp_icmp_proto_unreachable(sk, asoc, - transport); - goto out_unlock; - } - } - err = icmp_err_convert[code].errno; - break; - case ICMP_TIME_EXCEEDED: - /* Ignore any time exceeded errors due to fragment reassembly - * timeouts. - */ - if (ICMP_EXC_FRAGTIME == code) - goto out_unlock; + return 0; +} - err = EHOSTUNREACH; - break; - case ICMP_REDIRECT: - sctp_icmp_redirect(sk, transport, skb); - /* Fall through to out_unlock. */ - default: - goto out_unlock; +int sctp_udp_v4_err(struct sock *sk, struct sk_buff *skb) +{ + struct net *net = dev_net(skb->dev); + struct sctp_association *asoc; + struct sctp_transport *t; + struct icmphdr *hdr; + __u32 info = 0; + + skb->transport_header += sizeof(struct udphdr); + sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &t); + if (!sk) { + __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); + return -ENOENT; } - inet = inet_sk(sk); - if (!sock_owned_by_user(sk) && inet->recverr) { - sk->sk_err = err; - sk->sk_error_report(sk); - } else { /* Only an error on timeout */ - sk->sk_err_soft = err; + skb->transport_header -= sizeof(struct udphdr); + hdr = (struct icmphdr *)(skb_network_header(skb) - sizeof(struct icmphdr)); + if (hdr->type == ICMP_REDIRECT) { + /* can't be handled without outer iphdr known, leave it to udp_err */ + sctp_err_finish(sk, t); + return 0; } + if (hdr->type == ICMP_DEST_UNREACH && hdr->code == ICMP_FRAG_NEEDED) + info = ntohs(hdr->un.frag.mtu); + sctp_v4_err_handle(t, skb, hdr->type, hdr->code, info); -out_unlock: - sctp_err_finish(sk, transport); - return 0; + sctp_err_finish(sk, t); + return 1; } /* diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index bd08807c9e44..05f81a4d0ee7 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -122,54 +122,28 @@ static struct notifier_block sctp_inet6addr_notifier = { .notifier_call = sctp_inet6addr_event, }; -/* ICMP error handler. */ -static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) +static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, + __u8 type, __u8 code, __u32 info) { - struct inet6_dev *idev; - struct sock *sk; - struct sctp_association *asoc; - struct sctp_transport *transport; + struct sctp_association *asoc = t->asoc; + struct sock *sk = asoc->base.sk; struct ipv6_pinfo *np; - __u16 saveip, savesctp; - int err, ret = 0; - struct net *net = dev_net(skb->dev); - - idev = in6_dev_get(skb->dev); - - /* Fix up skb to look at the embedded net header. */ - saveip = skb->network_header; - savesctp = skb->transport_header; - skb_reset_network_header(skb); - skb_set_transport_header(skb, offset); - sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport); - /* Put back, the original pointers. */ - skb->network_header = saveip; - skb->transport_header = savesctp; - if (!sk) { - __ICMP6_INC_STATS(net, idev, ICMP6_MIB_INERRORS); - ret = -ENOENT; - goto out; - } - - /* Warning: The sock lock is held. Remember to call - * sctp_err_finish! - */ + int err = 0; switch (type) { case ICMPV6_PKT_TOOBIG: if (ip6_sk_accept_pmtu(sk)) - sctp_icmp_frag_needed(sk, asoc, transport, ntohl(info)); - goto out_unlock; + sctp_icmp_frag_needed(sk, asoc, t, info); + return; case ICMPV6_PARAMPROB: if (ICMPV6_UNK_NEXTHDR == code) { - sctp_icmp_proto_unreachable(sk, asoc, transport); - goto out_unlock; + sctp_icmp_proto_unreachable(sk, asoc, t); + return; } break; case NDISC_REDIRECT: - sctp_icmp_redirect(sk, transport, skb); - goto out_unlock; + sctp_icmp_redirect(sk, t, skb); + return; default: break; } @@ -179,17 +153,69 @@ static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk->sk_error_report(sk); - } else { /* Only an error on timeout */ + } else { sk->sk_err_soft = err; } +} + +/* ICMP error handler. */ +static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct net *net = dev_net(skb->dev); + struct sctp_transport *transport; + struct sctp_association *asoc; + __u16 saveip, savesctp; + struct sock *sk; + + /* Fix up skb to look at the embedded net header. */ + saveip = skb->network_header; + savesctp = skb->transport_header; + skb_reset_network_header(skb); + skb_set_transport_header(skb, offset); + sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport); + /* Put back, the original pointers. */ + skb->network_header = saveip; + skb->transport_header = savesctp; + if (!sk) { + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + return -ENOENT; + } -out_unlock: + sctp_v6_err_handle(transport, skb, type, code, ntohl(info)); sctp_err_finish(sk, transport); -out: - if (likely(idev != NULL)) - in6_dev_put(idev); - return ret; + return 0; +} + +int sctp_udp_v6_err(struct sock *sk, struct sk_buff *skb) +{ + struct net *net = dev_net(skb->dev); + struct sctp_association *asoc; + struct sctp_transport *t; + struct icmp6hdr *hdr; + __u32 info = 0; + + skb->transport_header += sizeof(struct udphdr); + sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &t); + if (!sk) { + __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + return -ENOENT; + } + + skb->transport_header -= sizeof(struct udphdr); + hdr = (struct icmp6hdr *)(skb_network_header(skb) - sizeof(struct icmp6hdr)); + if (hdr->icmp6_type == NDISC_REDIRECT) { + /* can't be handled without outer ip6hdr known, leave it to udpv6_err */ + sctp_err_finish(sk, t); + return 0; + } + if (hdr->icmp6_type == ICMPV6_PKT_TOOBIG) + info = ntohl(hdr->icmp6_mtu); + sctp_v6_err_handle(t, skb, hdr->icmp6_type, hdr->icmp6_code, info); + + sctp_err_finish(sk, t); + return 1; } static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t) diff --git a/net/sctp/output.c b/net/sctp/output.c index a6aa17df09ef..9032ce60d50e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -103,7 +103,8 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, sctp_transport_route(tp, NULL, sp); if (asoc->param_flags & SPP_PMTUD_ENABLE) sctp_assoc_sync_pmtu(asoc); - } else if (!sctp_transport_pmtu_check(tp)) { + } else if (!sctp_transport_pl_enabled(tp) && + !sctp_transport_pmtu_check(tp)) { if (asoc->param_flags & SPP_PMTUD_ENABLE) sctp_assoc_sync_pmtu(asoc); } @@ -211,6 +212,30 @@ enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet, return retval; } +/* Try to bundle a pad chunk into a packet with a heartbeat chunk for PLPMTUTD probe */ +static enum sctp_xmit sctp_packet_bundle_pad(struct sctp_packet *pkt, struct sctp_chunk *chunk) +{ + struct sctp_transport *t = pkt->transport; + struct sctp_chunk *pad; + int overhead = 0; + + if (!chunk->pmtu_probe) + return SCTP_XMIT_OK; + + /* calculate the Padding Data size for the pad chunk */ + overhead += sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr); + overhead += sizeof(struct sctp_sender_hb_info) + sizeof(struct sctp_pad_chunk); + pad = sctp_make_pad(t->asoc, t->pl.probe_size - overhead); + if (!pad) + return SCTP_XMIT_DELAY; + + list_add_tail(&pad->list, &pkt->chunk_list); + pkt->size += SCTP_PAD4(ntohs(pad->chunk_hdr->length)); + chunk->transport = t; + + return SCTP_XMIT_OK; +} + /* Try to bundle an auth chunk into the packet. */ static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt, struct sctp_chunk *chunk) @@ -382,6 +407,10 @@ enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet, goto finish; retval = __sctp_packet_append_chunk(packet, chunk); + if (retval != SCTP_XMIT_OK) + goto finish; + + retval = sctp_packet_bundle_pad(packet, chunk); finish: return retval; @@ -553,7 +582,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) sk = chunk->skb->sk; /* check gso */ - if (packet->size > tp->pathmtu && !packet->ipfragok) { + if (packet->size > tp->pathmtu && !packet->ipfragok && !chunk->pmtu_probe) { if (!sk_can_gso(sk)) { pr_err_once("Trying to GSO but underlying device doesn't support it."); goto out; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 5cb1aa5f067b..ff47091c385e 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -769,7 +769,11 @@ static int sctp_packet_singleton(struct sctp_transport *transport, sctp_packet_init(&singleton, transport, sport, dport); sctp_packet_config(&singleton, vtag, 0); - sctp_packet_append_chunk(&singleton, chunk); + if (sctp_packet_append_chunk(&singleton, chunk) != SCTP_XMIT_OK) { + list_del_init(&chunk->list); + sctp_chunk_free(chunk); + return -ENOMEM; + } return sctp_packet_transmit(&singleton, gfp); } @@ -929,8 +933,13 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx) one_packet = 1; fallthrough; - case SCTP_CID_SACK: case SCTP_CID_HEARTBEAT: + if (chunk->pmtu_probe) { + sctp_packet_singleton(ctx->transport, chunk, ctx->gfp); + break; + } + fallthrough; + case SCTP_CID_SACK: case SCTP_CID_SHUTDOWN: case SCTP_CID_ECN_ECNE: case SCTP_CID_ASCONF: diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index baa4e770e4ba..bc5db0b404ce 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -850,23 +850,6 @@ static int sctp_udp_rcv(struct sock *sk, struct sk_buff *skb) return 0; } -static int sctp_udp_err_lookup(struct sock *sk, struct sk_buff *skb) -{ - struct sctp_association *asoc; - struct sctp_transport *t; - int family; - - skb->transport_header += sizeof(struct udphdr); - family = (ip_hdr(skb)->version == 4) ? AF_INET : AF_INET6; - sk = sctp_err_lookup(dev_net(skb->dev), family, skb, sctp_hdr(skb), - &asoc, &t); - if (!sk) - return -ENOENT; - - sctp_err_finish(sk, t); - return 0; -} - int sctp_udp_sock_start(struct net *net) { struct udp_tunnel_sock_cfg tuncfg = {NULL}; @@ -885,7 +868,7 @@ int sctp_udp_sock_start(struct net *net) tuncfg.encap_type = 1; tuncfg.encap_rcv = sctp_udp_rcv; - tuncfg.encap_err_lookup = sctp_udp_err_lookup; + tuncfg.encap_err_lookup = sctp_udp_v4_err; setup_udp_tunnel_sock(net, sock, &tuncfg); net->sctp.udp4_sock = sock->sk; @@ -907,7 +890,7 @@ int sctp_udp_sock_start(struct net *net) tuncfg.encap_type = 1; tuncfg.encap_rcv = sctp_udp_rcv; - tuncfg.encap_err_lookup = sctp_udp_err_lookup; + tuncfg.encap_err_lookup = sctp_udp_v6_err; setup_udp_tunnel_sock(net, sock, &tuncfg); net->sctp.udp6_sock = sock->sk; #endif diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 5b44d228b6ca..b0eaa93a9cc6 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1160,7 +1160,8 @@ nodata: /* Make a HEARTBEAT chunk. */ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, - const struct sctp_transport *transport) + const struct sctp_transport *transport, + __u32 probe_size) { struct sctp_sender_hb_info hbinfo; struct sctp_chunk *retval; @@ -1176,6 +1177,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, hbinfo.daddr = transport->ipaddr; hbinfo.sent_at = jiffies; hbinfo.hb_nonce = transport->hb_nonce; + hbinfo.probe_size = probe_size; /* Cast away the 'const', as this is just telling the chunk * what transport it belongs to. @@ -1183,6 +1185,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, retval->transport = (struct sctp_transport *) transport; retval->subh.hbs_hdr = sctp_addto_chunk(retval, sizeof(hbinfo), &hbinfo); + retval->pmtu_probe = !!probe_size; nodata: return retval; @@ -1218,6 +1221,32 @@ nodata: return retval; } +/* RFC4820 3. Padding Chunk (PAD) + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 0x84 | Flags=0 | Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * \ Padding Data / + * / \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct sctp_chunk *sctp_make_pad(const struct sctp_association *asoc, int len) +{ + struct sctp_chunk *retval; + + retval = sctp_make_control(asoc, SCTP_CID_PAD, 0, len, GFP_ATOMIC); + if (!retval) + return NULL; + + skb_put_zero(retval->skb, len); + retval->chunk_hdr->length = htons(ntohs(retval->chunk_hdr->length) + len); + retval->chunk_end = skb_tail_pointer(retval->skb); + + return retval; +} + /* Create an Operation Error chunk with the specified space reserved. * This routine can be used for containing multiple causes in the chunk. */ diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index ce15d590a615..b3815b568e8e 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -471,6 +471,38 @@ out_unlock: sctp_transport_put(transport); } +/* Handle the timeout of the probe timer. */ +void sctp_generate_probe_event(struct timer_list *t) +{ + struct sctp_transport *transport = from_timer(transport, t, probe_timer); + struct sctp_association *asoc = transport->asoc; + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); + int error = 0; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + pr_debug("%s: sock is busy\n", __func__); + + /* Try again later. */ + if (!mod_timer(&transport->probe_timer, jiffies + (HZ / 20))) + sctp_transport_hold(transport); + goto out_unlock; + } + + error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT, + SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_PROBE), + asoc->state, asoc->ep, asoc, + transport, GFP_ATOMIC); + + if (error) + sk->sk_err = -error; + +out_unlock: + bh_unlock_sock(sk); + sctp_transport_put(transport); +} + /* Inject a SACK Timeout event into the state machine. */ static void sctp_generate_sack_event(struct timer_list *t) { @@ -1641,6 +1673,11 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, sctp_cmd_hb_timers_stop(commands, asoc); break; + case SCTP_CMD_PROBE_TIMER_UPDATE: + t = cmd->obj.transport; + sctp_transport_reset_probe_timer(t); + break; + case SCTP_CMD_REPORT_ERROR: error = cmd->obj.error; break; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 4f30388a0dd0..09a8f23ec709 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1004,7 +1004,7 @@ static enum sctp_disposition sctp_sf_heartbeat( struct sctp_chunk *reply; /* Send a heartbeat to our peer. */ - reply = sctp_make_heartbeat(asoc, transport); + reply = sctp_make_heartbeat(asoc, transport, 0); if (!reply) return SCTP_DISPOSITION_NOMEM; @@ -1095,6 +1095,32 @@ enum sctp_disposition sctp_sf_send_reconf(struct net *net, return SCTP_DISPOSITION_CONSUME; } +/* send hb chunk with padding for PLPMUTD. */ +enum sctp_disposition sctp_sf_send_probe(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) +{ + struct sctp_transport *transport = (struct sctp_transport *)arg; + struct sctp_chunk *reply; + + if (!sctp_transport_pl_enabled(transport)) + return SCTP_DISPOSITION_CONSUME; + + sctp_transport_pl_send(transport); + + reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size); + if (!reply) + return SCTP_DISPOSITION_NOMEM; + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); + sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE, + SCTP_TRANSPORT(transport)); + + return SCTP_DISPOSITION_CONSUME; +} + /* * Process an heartbeat request. * @@ -1243,6 +1269,18 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net, if (hbinfo->hb_nonce != link->hb_nonce) return SCTP_DISPOSITION_DISCARD; + if (hbinfo->probe_size) { + if (hbinfo->probe_size != link->pl.probe_size || + !sctp_transport_pl_enabled(link)) + return SCTP_DISPOSITION_DISCARD; + + sctp_transport_pl_recv(link); + if (link->pl.state == SCTP_PL_COMPLETE) + return SCTP_DISPOSITION_CONSUME; + + return sctp_sf_send_probe(net, ep, asoc, type, link, commands); + } + max_interval = link->hbinterval + link->rto; /* Check if the timestamp looks valid. */ diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 88ea87f4f0e7..1816a4410b2b 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -527,6 +527,26 @@ auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = { }; /*state_fn_t auth_chunk_event_table[][] */ static const struct sctp_sm_table_entry +pad_chunk_event_table[SCTP_STATE_NUM_STATES] = { + /* SCTP_STATE_CLOSED */ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), + /* SCTP_STATE_COOKIE_WAIT */ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), + /* SCTP_STATE_COOKIE_ECHOED */ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), + /* SCTP_STATE_ESTABLISHED */ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), + /* SCTP_STATE_SHUTDOWN_PENDING */ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), + /* SCTP_STATE_SHUTDOWN_SENT */ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), + /* SCTP_STATE_SHUTDOWN_RECEIVED */ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), + /* SCTP_STATE_SHUTDOWN_ACK_SENT */ + TYPE_SCTP_FUNC(sctp_sf_discard_chunk), +}; /* chunk pad */ + +static const struct sctp_sm_table_entry chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = { /* SCTP_STATE_CLOSED */ TYPE_SCTP_FUNC(sctp_sf_ootb), @@ -947,6 +967,25 @@ other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = { TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ } +#define TYPE_SCTP_EVENT_TIMEOUT_PROBE { \ + /* SCTP_STATE_CLOSED */ \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ + /* SCTP_STATE_COOKIE_WAIT */ \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ + /* SCTP_STATE_COOKIE_ECHOED */ \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ + /* SCTP_STATE_ESTABLISHED */ \ + TYPE_SCTP_FUNC(sctp_sf_send_probe), \ + /* SCTP_STATE_SHUTDOWN_PENDING */ \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ + /* SCTP_STATE_SHUTDOWN_SENT */ \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ + /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ + /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ +} + static const struct sctp_sm_table_entry timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = { TYPE_SCTP_EVENT_TIMEOUT_NONE, @@ -958,6 +997,7 @@ timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = { TYPE_SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD, TYPE_SCTP_EVENT_TIMEOUT_HEARTBEAT, TYPE_SCTP_EVENT_TIMEOUT_RECONF, + TYPE_SCTP_EVENT_TIMEOUT_PROBE, TYPE_SCTP_EVENT_TIMEOUT_SACK, TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE, }; @@ -992,6 +1032,9 @@ static const struct sctp_sm_table_entry *sctp_chunk_event_lookup( case SCTP_CID_AUTH: return &auth_chunk_event_table[0][state]; + + case SCTP_CID_PAD: + return &pad_chunk_event_table[state]; } return &chunk_event_table_unknown[state]; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a79d193ff872..e64e01f61b11 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2496,6 +2496,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, sctp_transport_pmtu(trans, sctp_opt2sk(sp)); sctp_assoc_sync_pmtu(asoc); } + sctp_transport_pl_reset(trans); } else if (asoc) { asoc->param_flags = (asoc->param_flags & ~SPP_PMTUD) | pmtud_change; @@ -4481,6 +4482,61 @@ static int sctp_setsockopt_encap_port(struct sock *sk, return 0; } +static int sctp_setsockopt_probe_interval(struct sock *sk, + struct sctp_probeinterval *params, + unsigned int optlen) +{ + struct sctp_association *asoc; + struct sctp_transport *t; + __u32 probe_interval; + + if (optlen != sizeof(*params)) + return -EINVAL; + + probe_interval = params->spi_interval; + if (probe_interval && probe_interval < SCTP_PROBE_TIMER_MIN) + return -EINVAL; + + /* If an address other than INADDR_ANY is specified, and + * no transport is found, then the request is invalid. + */ + if (!sctp_is_any(sk, (union sctp_addr *)¶ms->spi_address)) { + t = sctp_addr_id2transport(sk, ¶ms->spi_address, + params->spi_assoc_id); + if (!t) + return -EINVAL; + + t->probe_interval = msecs_to_jiffies(probe_interval); + sctp_transport_pl_reset(t); + return 0; + } + + /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. + */ + asoc = sctp_id2assoc(sk, params->spi_assoc_id); + if (!asoc && params->spi_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + + /* If changes are for association, also apply probe_interval to + * each transport. + */ + if (asoc) { + list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { + t->probe_interval = msecs_to_jiffies(probe_interval); + sctp_transport_pl_reset(t); + } + + asoc->probe_interval = msecs_to_jiffies(probe_interval); + return 0; + } + + sctp_sk(sk)->probe_interval = probe_interval; + return 0; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -4703,6 +4759,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_REMOTE_UDP_ENCAPS_PORT: retval = sctp_setsockopt_encap_port(sk, kopt, optlen); break; + case SCTP_PLPMTUD_PROBE_INTERVAL: + retval = sctp_setsockopt_probe_interval(sk, kopt, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -4989,6 +5048,7 @@ static int sctp_init_sock(struct sock *sk) atomic_set(&sp->pd_mode, 0); skb_queue_head_init(&sp->pd_lobby); sp->frag_interleave = 0; + sp->probe_interval = net->sctp.probe_interval; /* Create a per socket endpoint structure. Even if we * change the data structure relationships, this may still @@ -7905,6 +7965,66 @@ out: return 0; } +static int sctp_getsockopt_probe_interval(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_probeinterval params; + struct sctp_association *asoc; + struct sctp_transport *t; + __u32 probe_interval; + + if (len < sizeof(params)) + return -EINVAL; + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + return -EFAULT; + + /* If an address other than INADDR_ANY is specified, and + * no transport is found, then the request is invalid. + */ + if (!sctp_is_any(sk, (union sctp_addr *)¶ms.spi_address)) { + t = sctp_addr_id2transport(sk, ¶ms.spi_address, + params.spi_assoc_id); + if (!t) { + pr_debug("%s: failed no transport\n", __func__); + return -EINVAL; + } + + probe_interval = jiffies_to_msecs(t->probe_interval); + goto out; + } + + /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. + */ + asoc = sctp_id2assoc(sk, params.spi_assoc_id); + if (!asoc && params.spi_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { + pr_debug("%s: failed no association\n", __func__); + return -EINVAL; + } + + if (asoc) { + probe_interval = jiffies_to_msecs(asoc->probe_interval); + goto out; + } + + probe_interval = sctp_sk(sk)->probe_interval; + +out: + params.spi_interval = probe_interval; + if (copy_to_user(optval, ¶ms, len)) + return -EFAULT; + + if (put_user(len, optlen)) + return -EFAULT; + + return 0; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -8128,6 +8248,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_REMOTE_UDP_ENCAPS_PORT: retval = sctp_getsockopt_encap_port(sk, len, optval, optlen); break; + case SCTP_PLPMTUD_PROBE_INTERVAL: + retval = sctp_getsockopt_probe_interval(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 55871b277f47..b46a416787ec 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -55,6 +55,8 @@ static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_auth(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); +static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table sctp_table[] = { { @@ -294,6 +296,13 @@ static struct ctl_table sctp_net_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "plpmtud_probe_interval", + .data = &init_net.sctp.probe_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_sctp_do_probe_interval, + }, + { .procname = "udp_port", .data = &init_net.sctp.udp_port, .maxlen = sizeof(int), @@ -539,6 +548,32 @@ static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write, return ret; } +static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + struct ctl_table tbl; + int ret, new_value; + + memset(&tbl, 0, sizeof(struct ctl_table)); + tbl.maxlen = sizeof(unsigned int); + + if (write) + tbl.data = &new_value; + else + tbl.data = &net->sctp.probe_interval; + + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) { + if (new_value && new_value < SCTP_PROBE_TIMER_MIN) + return -EINVAL; + + net->sctp.probe_interval = new_value; + } + + return ret; +} + int sctp_sysctl_net_register(struct net *net) { struct ctl_table *table; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index bf0ac467e757..5f23804f21c7 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -75,6 +75,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net, timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0); timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0); timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0); + timer_setup(&peer->probe_timer, sctp_generate_probe_event, 0); timer_setup(&peer->proto_unreach_timer, sctp_generate_proto_unreach_event, 0); @@ -131,6 +132,9 @@ void sctp_transport_free(struct sctp_transport *transport) if (del_timer(&transport->reconf_timer)) sctp_transport_put(transport); + if (del_timer(&transport->probe_timer)) + sctp_transport_put(transport); + /* Delete the ICMP proto unreachable timer if it's active. */ if (del_timer(&transport->proto_unreach_timer)) sctp_transport_put(transport); @@ -207,6 +211,15 @@ void sctp_transport_reset_reconf_timer(struct sctp_transport *transport) sctp_transport_hold(transport); } +void sctp_transport_reset_probe_timer(struct sctp_transport *transport) +{ + if (timer_pending(&transport->probe_timer)) + return; + if (!mod_timer(&transport->probe_timer, + jiffies + transport->probe_interval)) + sctp_transport_hold(transport); +} + /* This transport has been assigned to an association. * Initialize fields from the association or from the sock itself. * Register the reference count in the association. @@ -241,12 +254,143 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = sctp_dst_mtu(transport->dst); else transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; + + sctp_transport_pl_update(transport); +} + +void sctp_transport_pl_send(struct sctp_transport *t) +{ + pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", + __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); + + if (t->pl.probe_count < SCTP_MAX_PROBES) { + t->pl.probe_count++; + return; + } + + if (t->pl.state == SCTP_PL_BASE) { + if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */ + t->pl.state = SCTP_PL_ERROR; /* Base -> Error */ + + t->pl.pmtu = SCTP_MIN_PLPMTU; + t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); + sctp_assoc_sync_pmtu(t->asoc); + } + } else if (t->pl.state == SCTP_PL_SEARCH) { + if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */ + t->pl.state = SCTP_PL_BASE; /* Search -> Base */ + t->pl.probe_size = SCTP_BASE_PLPMTU; + t->pl.probe_high = 0; + + t->pl.pmtu = SCTP_BASE_PLPMTU; + t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); + sctp_assoc_sync_pmtu(t->asoc); + } else { /* Normal probe failure. */ + t->pl.probe_high = t->pl.probe_size; + t->pl.probe_size = t->pl.pmtu; + } + } else if (t->pl.state == SCTP_PL_COMPLETE) { + if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */ + t->pl.state = SCTP_PL_BASE; /* Search Complete -> Base */ + t->pl.probe_size = SCTP_BASE_PLPMTU; + + t->pl.pmtu = SCTP_BASE_PLPMTU; + t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); + sctp_assoc_sync_pmtu(t->asoc); + } + } + t->pl.probe_count = 1; +} + +void sctp_transport_pl_recv(struct sctp_transport *t) +{ + pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", + __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); + + t->pl.pmtu = t->pl.probe_size; + t->pl.probe_count = 0; + if (t->pl.state == SCTP_PL_BASE) { + t->pl.state = SCTP_PL_SEARCH; /* Base -> Search */ + t->pl.probe_size += SCTP_PL_BIG_STEP; + } else if (t->pl.state == SCTP_PL_ERROR) { + t->pl.state = SCTP_PL_SEARCH; /* Error -> Search */ + + t->pl.pmtu = t->pl.probe_size; + t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); + sctp_assoc_sync_pmtu(t->asoc); + t->pl.probe_size += SCTP_PL_BIG_STEP; + } else if (t->pl.state == SCTP_PL_SEARCH) { + if (!t->pl.probe_high) { + t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP, + SCTP_MAX_PLPMTU); + return; + } + t->pl.probe_size += SCTP_PL_MIN_STEP; + if (t->pl.probe_size >= t->pl.probe_high) { + t->pl.probe_high = 0; + t->pl.raise_count = 0; + t->pl.state = SCTP_PL_COMPLETE; /* Search -> Search Complete */ + + t->pl.probe_size = t->pl.pmtu; + t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); + sctp_assoc_sync_pmtu(t->asoc); + } + } else if (t->pl.state == SCTP_PL_COMPLETE && ++t->pl.raise_count == 30) { + /* Raise probe_size again after 30 * interval in Search Complete */ + t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ + t->pl.probe_size += SCTP_PL_MIN_STEP; + } +} + +static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu) +{ + pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, ptb: %d\n", + __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, pmtu); + + if (pmtu < SCTP_MIN_PLPMTU || pmtu >= t->pl.probe_size) + return false; + + if (t->pl.state == SCTP_PL_BASE) { + if (pmtu >= SCTP_MIN_PLPMTU && pmtu < SCTP_BASE_PLPMTU) { + t->pl.state = SCTP_PL_ERROR; /* Base -> Error */ + + t->pl.pmtu = SCTP_MIN_PLPMTU; + t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); + } + } else if (t->pl.state == SCTP_PL_SEARCH) { + if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) { + t->pl.state = SCTP_PL_BASE; /* Search -> Base */ + t->pl.probe_size = SCTP_BASE_PLPMTU; + t->pl.probe_count = 0; + + t->pl.probe_high = 0; + t->pl.pmtu = SCTP_BASE_PLPMTU; + t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); + } else if (pmtu > t->pl.pmtu && pmtu < t->pl.probe_size) { + t->pl.probe_size = pmtu; + t->pl.probe_count = 0; + + return false; + } + } else if (t->pl.state == SCTP_PL_COMPLETE) { + if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) { + t->pl.state = SCTP_PL_BASE; /* Complete -> Base */ + t->pl.probe_size = SCTP_BASE_PLPMTU; + t->pl.probe_count = 0; + + t->pl.probe_high = 0; + t->pl.pmtu = SCTP_BASE_PLPMTU; + t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); + } + } + + return true; } bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { - struct dst_entry *dst = sctp_transport_dst_check(t); struct sock *sk = t->asoc->base.sk; + struct dst_entry *dst; bool change = true; if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { @@ -257,6 +401,10 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) } pmtu = SCTP_TRUNC4(pmtu); + if (sctp_transport_pl_enabled(t)) + return sctp_transport_pl_toobig(t, pmtu - sctp_transport_pl_hlen(t)); + + dst = sctp_transport_dst_check(t); if (dst) { struct sctp_pf *pf = sctp_get_pf_specific(dst->ops->family); union sctp_addr addr; diff --git a/net/smc/smc_stats.c b/net/smc/smc_stats.c index 614013e3b574..e80e34f7ac15 100644 --- a/net/smc/smc_stats.c +++ b/net/smc/smc_stats.c @@ -393,17 +393,17 @@ int smc_nl_get_fback_stats(struct sk_buff *skb, struct netlink_callback *cb) continue; if (!skip_serv) { rc_srv = smc_nl_get_fback_details(skb, cb, k, is_srv); - if (rc_srv && rc_srv != ENODATA) + if (rc_srv && rc_srv != -ENODATA) break; } else { skip_serv = 0; } rc_clnt = smc_nl_get_fback_details(skb, cb, k, !is_srv); - if (rc_clnt && rc_clnt != ENODATA) { + if (rc_clnt && rc_clnt != -ENODATA) { skip_serv = 1; break; } - if (rc_clnt == ENODATA && rc_srv == ENODATA) + if (rc_clnt == -ENODATA && rc_srv == -ENODATA) break; } mutex_unlock(&net->smc.mutex_fback_rsn); diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 075c4f4b41cf..289025cd545a 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -154,6 +154,9 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) goto out_err; } + if (sk->sk_state == SMC_INIT) + return -ENOTCONN; + if (len > conn->sndbuf_desc->len) SMC_STAT_RMB_TX_SIZE_SMALL(smc, !conn->lnk); @@ -164,8 +167,6 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) SMC_STAT_INC(smc, urg_data_cnt); while (msg_data_left(msg)) { - if (sk->sk_state == SMC_INIT) - return -ENOTCONN; if (smc->sk.sk_shutdown & SEND_SHUTDOWN || (smc->sk.sk_err == ECONNABORTED) || conn->killed) diff --git a/net/socket.c b/net/socket.c index 27e3e7d53f8e..bd9233da2497 100644 --- a/net/socket.c +++ b/net/socket.c @@ -165,6 +165,54 @@ static const struct file_operations socket_file_ops = { .show_fdinfo = sock_show_fdinfo, }; +static const char * const pf_family_names[] = { + [PF_UNSPEC] = "PF_UNSPEC", + [PF_UNIX] = "PF_UNIX/PF_LOCAL", + [PF_INET] = "PF_INET", + [PF_AX25] = "PF_AX25", + [PF_IPX] = "PF_IPX", + [PF_APPLETALK] = "PF_APPLETALK", + [PF_NETROM] = "PF_NETROM", + [PF_BRIDGE] = "PF_BRIDGE", + [PF_ATMPVC] = "PF_ATMPVC", + [PF_X25] = "PF_X25", + [PF_INET6] = "PF_INET6", + [PF_ROSE] = "PF_ROSE", + [PF_DECnet] = "PF_DECnet", + [PF_NETBEUI] = "PF_NETBEUI", + [PF_SECURITY] = "PF_SECURITY", + [PF_KEY] = "PF_KEY", + [PF_NETLINK] = "PF_NETLINK/PF_ROUTE", + [PF_PACKET] = "PF_PACKET", + [PF_ASH] = "PF_ASH", + [PF_ECONET] = "PF_ECONET", + [PF_ATMSVC] = "PF_ATMSVC", + [PF_RDS] = "PF_RDS", + [PF_SNA] = "PF_SNA", + [PF_IRDA] = "PF_IRDA", + [PF_PPPOX] = "PF_PPPOX", + [PF_WANPIPE] = "PF_WANPIPE", + [PF_LLC] = "PF_LLC", + [PF_IB] = "PF_IB", + [PF_MPLS] = "PF_MPLS", + [PF_CAN] = "PF_CAN", + [PF_TIPC] = "PF_TIPC", + [PF_BLUETOOTH] = "PF_BLUETOOTH", + [PF_IUCV] = "PF_IUCV", + [PF_RXRPC] = "PF_RXRPC", + [PF_ISDN] = "PF_ISDN", + [PF_PHONET] = "PF_PHONET", + [PF_IEEE802154] = "PF_IEEE802154", + [PF_CAIF] = "PF_CAIF", + [PF_ALG] = "PF_ALG", + [PF_NFC] = "PF_NFC", + [PF_VSOCK] = "PF_VSOCK", + [PF_KCM] = "PF_KCM", + [PF_QIPCRTR] = "PF_QIPCRTR", + [PF_SMC] = "PF_SMC", + [PF_XDP] = "PF_XDP", +}; + /* * The protocol list. Each protocol is registered in here. */ @@ -1072,19 +1120,6 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * what to do with it - that's up to the protocol still. */ -/** - * get_net_ns - increment the refcount of the network namespace - * @ns: common namespace (net) - * - * Returns the net's common namespace. - */ - -struct ns_common *get_net_ns(struct ns_common *ns) -{ - return &get_net(container_of(ns, struct net, ns))->ns; -} -EXPORT_SYMBOL_GPL(get_net_ns); - static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct socket *sock; @@ -2988,7 +3023,7 @@ int sock_register(const struct net_proto_family *ops) } spin_unlock(&net_family_lock); - pr_info("NET: Registered protocol family %d\n", ops->family); + pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]); return err; } EXPORT_SYMBOL(sock_register); @@ -3016,7 +3051,7 @@ void sock_unregister(int family) synchronize_rcu(); - pr_info("NET: Unregistered protocol family %d\n", family); + pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]); } EXPORT_SYMBOL(sock_unregister); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f555d335e910..42623d6b8f0e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1677,13 +1677,6 @@ call_reserveresult(struct rpc_task *task) return; } - /* - * Even though there was an error, we may have acquired - * a request slot somehow. Make sure not to leak it. - */ - if (task->tk_rqstp) - xprt_release(task); - switch (status) { case -ENOMEM: rpc_delay(task, HZ >> 2); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e5b5a960a69b..3509a7f139b9 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -70,6 +70,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net); static __be32 xprt_alloc_xid(struct rpc_xprt *xprt); static void xprt_destroy(struct rpc_xprt *xprt); +static void xprt_request_init(struct rpc_task *task); static DEFINE_SPINLOCK(xprt_list_lock); static LIST_HEAD(xprt_list); @@ -1606,17 +1607,40 @@ xprt_transmit(struct rpc_task *task) spin_unlock(&xprt->queue_lock); } -static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) +static void xprt_complete_request_init(struct rpc_task *task) +{ + if (task->tk_rqstp) + xprt_request_init(task); +} + +void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) { set_bit(XPRT_CONGESTED, &xprt->state); - rpc_sleep_on(&xprt->backlog, task, NULL); + rpc_sleep_on(&xprt->backlog, task, xprt_complete_request_init); +} +EXPORT_SYMBOL_GPL(xprt_add_backlog); + +static bool __xprt_set_rq(struct rpc_task *task, void *data) +{ + struct rpc_rqst *req = data; + + if (task->tk_rqstp == NULL) { + memset(req, 0, sizeof(*req)); /* mark unused */ + task->tk_rqstp = req; + return true; + } + return false; } -static void xprt_wake_up_backlog(struct rpc_xprt *xprt) +bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req) { - if (rpc_wake_up_next(&xprt->backlog) == NULL) + if (rpc_wake_up_first(&xprt->backlog, __xprt_set_rq, req) == NULL) { clear_bit(XPRT_CONGESTED, &xprt->state); + return false; + } + return true; } +EXPORT_SYMBOL_GPL(xprt_wake_up_backlog); static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task) { @@ -1626,7 +1650,7 @@ static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task goto out; spin_lock(&xprt->reserve_lock); if (test_bit(XPRT_CONGESTED, &xprt->state)) { - rpc_sleep_on(&xprt->backlog, task, NULL); + xprt_add_backlog(xprt, task); ret = true; } spin_unlock(&xprt->reserve_lock); @@ -1703,11 +1727,11 @@ EXPORT_SYMBOL_GPL(xprt_alloc_slot); void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) { spin_lock(&xprt->reserve_lock); - if (!xprt_dynamic_free_slot(xprt, req)) { + if (!xprt_wake_up_backlog(xprt, req) && + !xprt_dynamic_free_slot(xprt, req)) { memset(req, 0, sizeof(*req)); /* mark unused */ list_add(&req->rq_list, &xprt->free); } - xprt_wake_up_backlog(xprt); spin_unlock(&xprt->reserve_lock); } EXPORT_SYMBOL_GPL(xprt_free_slot); @@ -1894,10 +1918,10 @@ void xprt_release(struct rpc_task *task) xdr_free_bvec(&req->rq_snd_buf); if (req->rq_cred != NULL) put_rpccred(req->rq_cred); - task->tk_rqstp = NULL; if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); + task->tk_rqstp = NULL; if (likely(!bc_prealloc(req))) xprt->ops->free_slot(xprt, req); else diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 649f7d8b9733..c335c1361564 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -628,8 +628,9 @@ out_mapping_err: return false; } -/* The tail iovec might not reside in the same page as the - * head iovec. +/* The tail iovec may include an XDR pad for the page list, + * as well as additional content, and may not reside in the + * same page as the head iovec. */ static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req, struct xdr_buf *xdr, @@ -747,19 +748,27 @@ static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, struct xdr_buf *xdr) { - struct kvec *tail = &xdr->tail[0]; - if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len)) return false; - /* If there is a Read chunk, the page list is handled + /* If there is a Read chunk, the page list is being handled * via explicit RDMA, and thus is skipped here. */ - if (tail->iov_len) { - if (!rpcrdma_prepare_tail_iov(req, xdr, - offset_in_page(tail->iov_base), - tail->iov_len)) + /* Do not include the tail if it is only an XDR pad */ + if (xdr->tail[0].iov_len > 3) { + unsigned int page_base, len; + + /* If the content in the page list is an odd length, + * xdr_write_pages() adds a pad at the beginning of + * the tail iovec. Force the tail's non-pad content to + * land at the next XDR position in the Send message. + */ + page_base = offset_in_page(xdr->tail[0].iov_base); + len = xdr->tail[0].iov_len; + page_base += len & 3; + len -= len & 3; + if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len)) return false; kref_get(&req->rl_kref); } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 09953597d055..19a49d26b1e4 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -520,9 +520,8 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) return; out_sleep: - set_bit(XPRT_CONGESTED, &xprt->state); - rpc_sleep_on(&xprt->backlog, task, NULL); task->tk_status = -EAGAIN; + xprt_add_backlog(xprt, task); } /** @@ -537,10 +536,11 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst) struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, rx_xprt); - memset(rqst, 0, sizeof(*rqst)); - rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); - if (unlikely(!rpc_wake_up_next(&xprt->backlog))) - clear_bit(XPRT_CONGESTED, &xprt->state); + rpcrdma_reply_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); + if (!xprt_wake_up_backlog(xprt, rqst)) { + memset(rqst, 0, sizeof(*rqst)); + rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst)); + } } static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt, diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 1e965a380896..649c23518ec0 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1201,6 +1201,20 @@ rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) } /** + * rpcrdma_reply_put - Put reply buffers back into pool + * @buffers: buffer pool + * @req: object to return + * + */ +void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) +{ + if (req->rl_reply) { + rpcrdma_rep_put(buffers, req->rl_reply); + req->rl_reply = NULL; + } +} + +/** * rpcrdma_buffer_get - Get a request buffer * @buffers: Buffer pool from which to obtain a buffer * @@ -1228,9 +1242,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) */ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) { - if (req->rl_reply) - rpcrdma_rep_put(buffers, req->rl_reply); - req->rl_reply = NULL; + rpcrdma_reply_put(buffers, req); spin_lock(&buffers->rb_lock); list_add(&req->rl_list, &buffers->rb_send_bufs); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 436ad7312614..5d231d94e944 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -479,6 +479,7 @@ struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req); void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep); +void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req); bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 47aa47a2b07c..316d04945587 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1010,6 +1010,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req) kernel_sock_shutdown(transport->sock, SHUT_RDWR); return -ENOTCONN; } + if (!transport->inet) + return -ENOTCONN; xs_pktdump("packet data:", req->rq_svec->iov_base, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 89a36db47ab4..070698dd19bc 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -381,19 +381,20 @@ EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers); static int __switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), - int (*add_cb)(struct net_device *dev, + int (*add_cb)(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj, struct netlink_ext_ack *extack)) { + struct switchdev_notifier_info *info = &port_obj_info->info; struct netlink_ext_ack *extack; struct net_device *lower_dev; struct list_head *iter; int err = -EOPNOTSUPP; - extack = switchdev_notifier_info_to_extack(&port_obj_info->info); + extack = switchdev_notifier_info_to_extack(info); if (check_cb(dev)) { - err = add_cb(dev, port_obj_info->obj, extack); + err = add_cb(dev, info->ctx, port_obj_info->obj, extack); if (err != -EOPNOTSUPP) port_obj_info->handled = true; return err; @@ -422,7 +423,7 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev, int switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), - int (*add_cb)(struct net_device *dev, + int (*add_cb)(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj, struct netlink_ext_ack *extack)) { @@ -439,15 +440,16 @@ EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add); static int __switchdev_handle_port_obj_del(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), - int (*del_cb)(struct net_device *dev, + int (*del_cb)(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj)) { + struct switchdev_notifier_info *info = &port_obj_info->info; struct net_device *lower_dev; struct list_head *iter; int err = -EOPNOTSUPP; if (check_cb(dev)) { - err = del_cb(dev, port_obj_info->obj); + err = del_cb(dev, info->ctx, port_obj_info->obj); if (err != -EOPNOTSUPP) port_obj_info->handled = true; return err; @@ -476,7 +478,7 @@ static int __switchdev_handle_port_obj_del(struct net_device *dev, int switchdev_handle_port_obj_del(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), - int (*del_cb)(struct net_device *dev, + int (*del_cb)(struct net_device *dev, const void *ctx, const struct switchdev_obj *obj)) { int err; @@ -492,19 +494,20 @@ EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del); static int __switchdev_handle_port_attr_set(struct net_device *dev, struct switchdev_notifier_port_attr_info *port_attr_info, bool (*check_cb)(const struct net_device *dev), - int (*set_cb)(struct net_device *dev, + int (*set_cb)(struct net_device *dev, const void *ctx, const struct switchdev_attr *attr, struct netlink_ext_ack *extack)) { + struct switchdev_notifier_info *info = &port_attr_info->info; struct netlink_ext_ack *extack; struct net_device *lower_dev; struct list_head *iter; int err = -EOPNOTSUPP; - extack = switchdev_notifier_info_to_extack(&port_attr_info->info); + extack = switchdev_notifier_info_to_extack(info); if (check_cb(dev)) { - err = set_cb(dev, port_attr_info->attr, extack); + err = set_cb(dev, info->ctx, port_attr_info->attr, extack); if (err != -EOPNOTSUPP) port_attr_info->handled = true; return err; @@ -533,7 +536,7 @@ static int __switchdev_handle_port_attr_set(struct net_device *dev, int switchdev_handle_port_attr_set(struct net_device *dev, struct switchdev_notifier_port_attr_info *port_attr_info, bool (*check_cb)(const struct net_device *dev), - int (*set_cb)(struct net_device *dev, + int (*set_cb)(struct net_device *dev, const void *ctx, const struct switchdev_attr *attr, struct netlink_ext_ack *extack)) { diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index d4beca895992..593846d25214 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -699,7 +699,7 @@ int tipc_bcast_init(struct net *net) spin_lock_init(&tipc_net(net)->bclock); if (!tipc_link_bc_create(net, 0, 0, NULL, - FB_MTU, + one_page_mtu, BCLINK_WIN_DEFAULT, BCLINK_WIN_DEFAULT, 0, diff --git a/net/tipc/msg.c b/net/tipc/msg.c index ce6ab54822d8..5c9fd4791c4b 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -41,19 +41,18 @@ #include "name_table.h" #include "crypto.h" +#define BUF_ALIGN(x) ALIGN(x, 4) #define MAX_FORWARD_SIZE 1024 #ifdef CONFIG_TIPC_CRYPTO #define BUF_HEADROOM ALIGN(((LL_MAX_HEADER + 48) + EHDR_MAX_SIZE), 16) -#define BUF_TAILROOM (TIPC_AES_GCM_TAG_SIZE) +#define BUF_OVERHEAD (BUF_HEADROOM + TIPC_AES_GCM_TAG_SIZE) #else #define BUF_HEADROOM (LL_MAX_HEADER + 48) -#define BUF_TAILROOM 16 +#define BUF_OVERHEAD BUF_HEADROOM #endif -static unsigned int align(unsigned int i) -{ - return (i + 3) & ~3u; -} +const int one_page_mtu = PAGE_SIZE - SKB_DATA_ALIGN(BUF_OVERHEAD) - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); /** * tipc_buf_acquire - creates a TIPC message buffer @@ -69,13 +68,8 @@ static unsigned int align(unsigned int i) struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp) { struct sk_buff *skb; -#ifdef CONFIG_TIPC_CRYPTO - unsigned int buf_size = (BUF_HEADROOM + size + BUF_TAILROOM + 3) & ~3u; -#else - unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; -#endif - skb = alloc_skb_fclone(buf_size, gfp); + skb = alloc_skb_fclone(BUF_OVERHEAD + size, gfp); if (skb) { skb_reserve(skb, BUF_HEADROOM); skb_put(skb, size); @@ -395,7 +389,8 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, if (unlikely(!skb)) { if (pktmax != MAX_MSG_SIZE) return -ENOMEM; - rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list); + rc = tipc_msg_build(mhdr, m, offset, dsz, + one_page_mtu, list); if (rc != dsz) return rc; if (tipc_msg_assemble(list)) @@ -490,7 +485,7 @@ static bool tipc_msg_bundle(struct sk_buff *bskb, struct tipc_msg *msg, msz = msg_size(msg); bsz = msg_size(bmsg); - offset = align(bsz); + offset = BUF_ALIGN(bsz); pad = offset - bsz; if (unlikely(skb_tailroom(bskb) < (pad + msz))) @@ -547,7 +542,7 @@ bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss, /* Make a new bundle of the two messages if possible */ tsz = msg_size(buf_msg(tskb)); - if (unlikely(mss < align(INT_H_SIZE + tsz) + msg_size(msg))) + if (unlikely(mss < BUF_ALIGN(INT_H_SIZE + tsz) + msg_size(msg))) return true; if (unlikely(pskb_expand_head(tskb, INT_H_SIZE, mss - tsz - INT_H_SIZE, GFP_ATOMIC))) @@ -606,7 +601,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) if (unlikely(!tipc_msg_validate(iskb))) goto none; - *pos += align(imsz); + *pos += BUF_ALIGN(imsz); return true; none: kfree_skb(skb); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 5d64596ba987..64ae4c4c44f8 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -99,9 +99,10 @@ struct plist; #define MAX_H_SIZE 60 /* Largest possible TIPC header size */ #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) -#define FB_MTU 3744 #define TIPC_MEDIA_INFO_OFFSET 5 +extern const int one_page_mtu; + struct tipc_skb_cb { union { struct { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4d4f24cbd86b..58c2f318b0a8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -262,6 +262,14 @@ static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) sk_add_node(sk, list); } +static void __unix_set_addr(struct sock *sk, struct unix_address *addr, + unsigned hash) +{ + __unix_remove_socket(sk); + smp_store_release(&unix_sk(sk)->addr, addr); + __unix_insert_socket(&unix_socket_table[hash], sk); +} + static inline void unix_remove_socket(struct sock *sk) { spin_lock(&unix_table_lock); @@ -278,11 +286,11 @@ static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) static struct sock *__unix_find_socket_byname(struct net *net, struct sockaddr_un *sunname, - int len, int type, unsigned int hash) + int len, unsigned int hash) { struct sock *s; - sk_for_each(s, &unix_socket_table[hash ^ type]) { + sk_for_each(s, &unix_socket_table[hash]) { struct unix_sock *u = unix_sk(s); if (!net_eq(sock_net(s), net)) @@ -297,13 +305,12 @@ static struct sock *__unix_find_socket_byname(struct net *net, static inline struct sock *unix_find_socket_byname(struct net *net, struct sockaddr_un *sunname, - int len, int type, - unsigned int hash) + int len, unsigned int hash) { struct sock *s; spin_lock(&unix_table_lock); - s = __unix_find_socket_byname(net, sunname, len, type, hash); + s = __unix_find_socket_byname(net, sunname, len, hash); if (s) sock_hold(s); spin_unlock(&unix_table_lock); @@ -535,12 +542,14 @@ static void unix_release_sock(struct sock *sk, int embrion) u->path.mnt = NULL; state = sk->sk_state; sk->sk_state = TCP_CLOSE; + + skpair = unix_peer(sk); + unix_peer(sk) = NULL; + unix_state_unlock(sk); wake_up_interruptible_all(&u->peer_wait); - skpair = unix_peer(sk); - if (skpair != NULL) { if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { unix_state_lock(skpair); @@ -555,7 +564,6 @@ static void unix_release_sock(struct sock *sk, int embrion) unix_dgram_peer_wake_disconnect(sk, skpair); sock_put(skpair); /* It may now die */ - unix_peer(sk) = NULL; } /* Try to flush out this socket. Throw out buffers at least */ @@ -890,12 +898,12 @@ static int unix_autobind(struct socket *sock) retry: addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); + addr->hash ^= sk->sk_type; spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; - if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, - addr->hash)) { + if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) { spin_unlock(&unix_table_lock); /* * __unix_find_socket_byname() may take long time if many names @@ -910,11 +918,8 @@ retry: } goto retry; } - addr->hash ^= sk->sk_type; - __unix_remove_socket(sk); - smp_store_release(&u->addr, addr); - __unix_insert_socket(&unix_socket_table[addr->hash], sk); + __unix_set_addr(sk, addr, addr->hash); spin_unlock(&unix_table_lock); err = 0; @@ -959,7 +964,7 @@ static struct sock *unix_find_other(struct net *net, } } else { err = -ECONNREFUSED; - u = unix_find_socket_byname(net, sunname, len, type, hash); + u = unix_find_socket_byname(net, sunname, len, type ^ hash); if (u) { struct dentry *dentry; dentry = unix_sk(u)->path.dentry; @@ -977,125 +982,125 @@ fail: return NULL; } -static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) +static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) { + struct unix_sock *u = unix_sk(sk); + umode_t mode = S_IFSOCK | + (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); + struct user_namespace *ns; // barf... + struct path parent; struct dentry *dentry; - struct path path; - int err = 0; + unsigned int hash; + int err; + /* * Get the parent directory, calculate the hash for last * component. */ - dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); - err = PTR_ERR(dentry); + dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); if (IS_ERR(dentry)) - return err; + return PTR_ERR(dentry); + ns = mnt_user_ns(parent.mnt); /* * All right, let's create it. */ - err = security_path_mknod(&path, dentry, mode, 0); - if (!err) { - err = vfs_mknod(mnt_user_ns(path.mnt), d_inode(path.dentry), - dentry, mode, 0); - if (!err) { - res->mnt = mntget(path.mnt); - res->dentry = dget(dentry); - } - } - done_path_create(&path, dentry); + err = security_path_mknod(&parent, dentry, mode, 0); + if (!err) + err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0); + if (err) + goto out; + err = mutex_lock_interruptible(&u->bindlock); + if (err) + goto out_unlink; + if (u->addr) + goto out_unlock; + + addr->hash = UNIX_HASH_SIZE; + hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); + spin_lock(&unix_table_lock); + u->path.mnt = mntget(parent.mnt); + u->path.dentry = dget(dentry); + __unix_set_addr(sk, addr, hash); + spin_unlock(&unix_table_lock); + mutex_unlock(&u->bindlock); + done_path_create(&parent, dentry); + return 0; + +out_unlock: + mutex_unlock(&u->bindlock); + err = -EINVAL; +out_unlink: + /* failed after successful mknod? unlink what we'd created... */ + vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL); +out: + done_path_create(&parent, dentry); return err; } +static int unix_bind_abstract(struct sock *sk, struct unix_address *addr) +{ + struct unix_sock *u = unix_sk(sk); + int err; + + err = mutex_lock_interruptible(&u->bindlock); + if (err) + return err; + + if (u->addr) { + mutex_unlock(&u->bindlock); + return -EINVAL; + } + + spin_lock(&unix_table_lock); + if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, + addr->hash)) { + spin_unlock(&unix_table_lock); + mutex_unlock(&u->bindlock); + return -EADDRINUSE; + } + __unix_set_addr(sk, addr, addr->hash); + spin_unlock(&unix_table_lock); + mutex_unlock(&u->bindlock); + return 0; +} + static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - struct net *net = sock_net(sk); - struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; char *sun_path = sunaddr->sun_path; int err; unsigned int hash; struct unix_address *addr; - struct hlist_head *list; - struct path path = { }; - err = -EINVAL; if (addr_len < offsetofend(struct sockaddr_un, sun_family) || sunaddr->sun_family != AF_UNIX) - goto out; + return -EINVAL; - if (addr_len == sizeof(short)) { - err = unix_autobind(sock); - goto out; - } + if (addr_len == sizeof(short)) + return unix_autobind(sock); err = unix_mkname(sunaddr, addr_len, &hash); if (err < 0) - goto out; + return err; addr_len = err; - - if (sun_path[0]) { - umode_t mode = S_IFSOCK | - (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = unix_mknod(sun_path, mode, &path); - if (err) { - if (err == -EEXIST) - err = -EADDRINUSE; - goto out; - } - } - - err = mutex_lock_interruptible(&u->bindlock); - if (err) - goto out_put; - - err = -EINVAL; - if (u->addr) - goto out_up; - - err = -ENOMEM; addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); if (!addr) - goto out_up; + return -ENOMEM; memcpy(addr->name, sunaddr, addr_len); addr->len = addr_len; addr->hash = hash ^ sk->sk_type; refcount_set(&addr->refcnt, 1); - if (sun_path[0]) { - addr->hash = UNIX_HASH_SIZE; - hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); - spin_lock(&unix_table_lock); - u->path = path; - list = &unix_socket_table[hash]; - } else { - spin_lock(&unix_table_lock); - err = -EADDRINUSE; - if (__unix_find_socket_byname(net, sunaddr, addr_len, - sk->sk_type, hash)) { - unix_release_addr(addr); - goto out_unlock; - } - - list = &unix_socket_table[addr->hash]; - } - - err = 0; - __unix_remove_socket(sk); - smp_store_release(&u->addr, addr); - __unix_insert_socket(list, sk); - -out_unlock: - spin_unlock(&unix_table_lock); -out_up: - mutex_unlock(&u->bindlock); -out_put: + if (sun_path[0]) + err = unix_bind_bsd(sk, addr); + else + err = unix_bind_abstract(sk, addr); if (err) - path_put(&path); -out: - return err; + unix_release_addr(addr); + return err == -EEXIST ? -EADDRINUSE : err; } static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 67954afef4e1..21ccf450e249 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -860,7 +860,7 @@ s64 vsock_stream_has_data(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(vsock_stream_has_data); -static s64 vsock_has_data(struct vsock_sock *vsk) +static s64 vsock_connectible_has_data(struct vsock_sock *vsk) { struct sock *sk = sk_vsock(vsk); @@ -1866,10 +1866,11 @@ out: return err; } -static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait, - long timeout, - struct vsock_transport_recv_notify_data *recv_data, - size_t target) +static int vsock_connectible_wait_data(struct sock *sk, + struct wait_queue_entry *wait, + long timeout, + struct vsock_transport_recv_notify_data *recv_data, + size_t target) { const struct vsock_transport *transport; struct vsock_sock *vsk; @@ -1880,7 +1881,7 @@ static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait, err = 0; transport = vsk->transport; - while ((data = vsock_has_data(vsk)) == 0) { + while ((data = vsock_connectible_has_data(vsk)) == 0) { prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE); if (sk->sk_err != 0 || @@ -1967,7 +1968,8 @@ static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg, while (1) { ssize_t read; - err = vsock_wait_data(sk, &wait, timeout, &recv_data, target); + err = vsock_connectible_wait_data(sk, &wait, timeout, + &recv_data, target); if (err <= 0) break; @@ -2022,7 +2024,7 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg, timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - err = vsock_wait_data(sk, &wait, timeout, NULL, 0); + err = vsock_connectible_wait_data(sk, &wait, timeout, NULL, 0); if (err <= 0) goto out; diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index e73ce652bf3c..ed1664e7bd88 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -498,9 +498,11 @@ static bool virtio_transport_seqpacket_allow(u32 remote_cid) struct virtio_vsock *vsock; bool seqpacket_allow; + seqpacket_allow = false; rcu_read_lock(); vsock = rcu_dereference(the_virtio_vsock); - seqpacket_allow = vsock->seqpacket_allow; + if (vsock) + seqpacket_allow = vsock->seqpacket_allow; rcu_read_unlock(); return seqpacket_allow; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 23704a6bc437..f014ccfdd9c2 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -413,7 +413,6 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, struct virtio_vsock_pkt *pkt; int dequeued_len = 0; size_t user_buf_len = msg_data_left(msg); - bool copy_failed = false; bool msg_ready = false; spin_lock_bh(&vvs->rx_lock); @@ -426,7 +425,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, while (!msg_ready) { pkt = list_first_entry(&vvs->rx_queue, struct virtio_vsock_pkt, list); - if (!copy_failed) { + if (dequeued_len >= 0) { size_t pkt_len; size_t bytes_to_copy; @@ -443,11 +442,9 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, err = memcpy_to_msg(msg, pkt->buf, bytes_to_copy); if (err) { - /* Copy of message failed, set flag to skip - * copy path for rest of fragments. Rest of + /* Copy of message failed. Rest of * fragments will be freed without copy. */ - copy_failed = true; dequeued_len = err; } else { user_buf_len -= bytes_to_copy; diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 2eee93985ab0..af590ae606b6 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -28,7 +28,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex) @$(kecho) " GEN $@" @(echo '#include "reg.h"'; \ echo 'const u8 shipped_regdb_certs[] = {'; \ - cat $^ ; \ + echo | cat - $^ ; \ echo '};'; \ echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \ ) > $@ diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 285b8076054b..869c43d4414c 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -6,7 +6,7 @@ * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2018-2020 Intel Corporation + * Copyright 2018-2021 Intel Corporation */ #include <linux/export.h> @@ -942,7 +942,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, struct ieee80211_sta_vht_cap *vht_cap; struct ieee80211_edmg *edmg_cap; u32 width, control_freq, cap; - bool support_80_80 = false; + bool ext_nss_cap, support_80_80 = false; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return false; @@ -950,6 +950,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, ht_cap = &wiphy->bands[chandef->chan->band]->ht_cap; vht_cap = &wiphy->bands[chandef->chan->band]->vht_cap; edmg_cap = &wiphy->bands[chandef->chan->band]->edmg_cap; + ext_nss_cap = __le16_to_cpu(vht_cap->vht_mcs.tx_highest) & + IEEE80211_VHT_EXT_NSS_BW_CAPABLE; if (edmg_cap->channels && !cfg80211_edmg_usable(wiphy, @@ -1015,7 +1017,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, (cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) || (cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ && cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) || - u32_get_bits(cap, IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) > 1; + (ext_nss_cap && + u32_get_bits(cap, IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) > 1); if (chandef->chan->band != NL80211_BAND_6GHZ && !support_80_80) return false; fallthrough; @@ -1037,7 +1040,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ && cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ && - !(vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK)) + !(ext_nss_cap && + (vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK))) return false; break; default: @@ -1335,3 +1339,34 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, WARN_ON(1); } } + +bool cfg80211_any_usable_channels(struct wiphy *wiphy, + unsigned long sband_mask, + u32 prohibited_flags) +{ + int idx; + + prohibited_flags |= IEEE80211_CHAN_DISABLED; + + for_each_set_bit(idx, &sband_mask, NUM_NL80211_BANDS) { + struct ieee80211_supported_band *sband = wiphy->bands[idx]; + int chanidx; + + if (!sband) + continue; + + for (chanidx = 0; chanidx < sband->n_channels; chanidx++) { + struct ieee80211_channel *chan; + + chan = &sband->channels[chanidx]; + + if (chan->flags & prohibited_flags) + continue; + + return true; + } + } + + return false; +} +EXPORT_SYMBOL(cfg80211_any_usable_channels); diff --git a/net/wireless/core.c b/net/wireless/core.c index 6fbf7537faf5..03323121ca50 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -532,11 +532,11 @@ use_default_name: wiphy_net_set(&rdev->wiphy, &init_net); rdev->rfkill_ops.set_block = cfg80211_rfkill_set_block; - rdev->rfkill = rfkill_alloc(dev_name(&rdev->wiphy.dev), - &rdev->wiphy.dev, RFKILL_TYPE_WLAN, - &rdev->rfkill_ops, rdev); + rdev->wiphy.rfkill = rfkill_alloc(dev_name(&rdev->wiphy.dev), + &rdev->wiphy.dev, RFKILL_TYPE_WLAN, + &rdev->rfkill_ops, rdev); - if (!rdev->rfkill) { + if (!rdev->wiphy.rfkill) { wiphy_free(&rdev->wiphy); return NULL; } @@ -589,14 +589,6 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) if (WARN_ON(!c->num_different_channels)) return -EINVAL; - /* - * Put a sane limit on maximum number of different - * channels to simplify channel accounting code. - */ - if (WARN_ON(c->num_different_channels > - CFG80211_MAX_NUM_DIFFERENT_CHANNELS)) - return -EINVAL; - /* DFS only works on one channel. */ if (WARN_ON(c->radar_detect_widths && (c->num_different_channels > 1))) @@ -936,9 +928,6 @@ int wiphy_register(struct wiphy *wiphy) return res; } - /* set up regulatory info */ - wiphy_regulatory_register(wiphy); - list_add_rcu(&rdev->list, &cfg80211_rdev_list); cfg80211_rdev_list_generation++; @@ -949,6 +938,9 @@ int wiphy_register(struct wiphy *wiphy) cfg80211_debugfs_rdev_add(rdev); nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); + /* set up regulatory info */ + wiphy_regulatory_register(wiphy); + if (wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) { struct regulatory_request request; @@ -993,10 +985,10 @@ int wiphy_register(struct wiphy *wiphy) rdev->wiphy.registered = true; rtnl_unlock(); - res = rfkill_register(rdev->rfkill); + res = rfkill_register(rdev->wiphy.rfkill); if (res) { - rfkill_destroy(rdev->rfkill); - rdev->rfkill = NULL; + rfkill_destroy(rdev->wiphy.rfkill); + rdev->wiphy.rfkill = NULL; wiphy_unregister(&rdev->wiphy); return res; } @@ -1012,18 +1004,10 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy) if (!rdev->ops->rfkill_poll) return; rdev->rfkill_ops.poll = cfg80211_rfkill_poll; - rfkill_resume_polling(rdev->rfkill); + rfkill_resume_polling(wiphy->rfkill); } EXPORT_SYMBOL(wiphy_rfkill_start_polling); -void wiphy_rfkill_stop_polling(struct wiphy *wiphy) -{ - struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - - rfkill_pause_polling(rdev->rfkill); -} -EXPORT_SYMBOL(wiphy_rfkill_stop_polling); - void wiphy_unregister(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); @@ -1035,8 +1019,8 @@ void wiphy_unregister(struct wiphy *wiphy) wiphy_unlock(&rdev->wiphy); __count == 0; })); - if (rdev->rfkill) - rfkill_unregister(rdev->rfkill); + if (rdev->wiphy.rfkill) + rfkill_unregister(rdev->wiphy.rfkill); rtnl_lock(); wiphy_lock(&rdev->wiphy); @@ -1088,7 +1072,7 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) { struct cfg80211_internal_bss *scan, *tmp; struct cfg80211_beacon_registration *reg, *treg; - rfkill_destroy(rdev->rfkill); + rfkill_destroy(rdev->wiphy.rfkill); list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) { list_del(®->list); kfree(reg); @@ -1110,7 +1094,7 @@ void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked, { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - if (rfkill_set_hw_state_reason(rdev->rfkill, blocked, reason)) + if (rfkill_set_hw_state_reason(wiphy->rfkill, blocked, reason)) schedule_work(&rdev->rfkill_block); } EXPORT_SYMBOL(wiphy_rfkill_set_hw_state_reason); @@ -1340,6 +1324,11 @@ void cfg80211_register_wdev(struct cfg80211_registered_device *rdev, rdev->devlist_generation++; wdev->registered = true; + if (wdev->netdev && + sysfs_create_link(&wdev->netdev->dev.kobj, &rdev->wiphy.dev.kobj, + "phy80211")) + pr_err("failed to add phy80211 symlink to netdev!\n"); + nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); } @@ -1365,14 +1354,6 @@ int cfg80211_register_netdevice(struct net_device *dev) if (ret) goto out; - if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj, - "phy80211")) { - pr_err("failed to add phy80211 symlink to netdev!\n"); - unregister_netdevice(dev); - ret = -EINVAL; - goto out; - } - cfg80211_register_wdev(rdev, wdev); ret = 0; out: @@ -1506,7 +1487,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, wdev->use_4addr, 0)) return notifier_from_errno(-EOPNOTSUPP); - if (rfkill_blocked(rdev->rfkill)) + if (rfkill_blocked(rdev->wiphy.rfkill)) return notifier_from_errno(-ERFKILL); break; default: diff --git a/net/wireless/core.h b/net/wireless/core.h index a7d19b4b40ac..b35d0db12f1d 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -3,7 +3,7 @@ * Wireless configuration interface internals. * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #ifndef __NET_WIRELESS_CORE_H #define __NET_WIRELESS_CORE_H @@ -27,7 +27,6 @@ struct cfg80211_registered_device { /* rfkill support */ struct rfkill_ops rfkill_ops; - struct rfkill *rfkill; struct work_struct rfkill_block; /* ISO / IEC 3166 alpha2 for which this device is receiving diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fc9286afe3c9..50eb405b0690 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -330,7 +330,7 @@ nl80211_pmsr_req_attr_policy[NL80211_PMSR_REQ_ATTR_MAX + 1] = { }; static const struct nla_policy -nl80211_psmr_peer_attr_policy[NL80211_PMSR_PEER_ATTR_MAX + 1] = { +nl80211_pmsr_peer_attr_policy[NL80211_PMSR_PEER_ATTR_MAX + 1] = { [NL80211_PMSR_PEER_ATTR_ADDR] = NLA_POLICY_ETH_ADDR, [NL80211_PMSR_PEER_ATTR_CHAN] = NLA_POLICY_NESTED(nl80211_policy), [NL80211_PMSR_PEER_ATTR_REQ] = @@ -345,7 +345,7 @@ nl80211_pmsr_attr_policy[NL80211_PMSR_ATTR_MAX + 1] = { [NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR] = { .type = NLA_REJECT }, [NL80211_PMSR_ATTR_TYPE_CAPA] = { .type = NLA_REJECT }, [NL80211_PMSR_ATTR_PEERS] = - NLA_POLICY_NESTED_ARRAY(nl80211_psmr_peer_attr_policy), + NLA_POLICY_NESTED_ARRAY(nl80211_pmsr_peer_attr_policy), }; static const struct nla_policy @@ -1731,6 +1731,11 @@ nl80211_send_iftype_data(struct sk_buff *msg, &iftdata->he_6ghz_capa)) return -ENOBUFS; + if (iftdata->vendor_elems.data && iftdata->vendor_elems.len && + nla_put(msg, NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS, + iftdata->vendor_elems.len, iftdata->vendor_elems.data)) + return -ENOBUFS; + return 0; } @@ -4781,11 +4786,10 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, sband->ht_cap.mcs.rx_mask, sizeof(mask->control[i].ht_mcs)); - if (!sband->vht_cap.vht_supported) - continue; - - vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); - vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs); + if (sband->vht_cap.vht_supported) { + vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs); + } he_cap = ieee80211_get_he_iftype_cap(sband, wdev->iftype); if (!he_cap) @@ -13042,7 +13046,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) if (wdev_running(wdev)) return 0; - if (rfkill_blocked(rdev->rfkill)) + if (rfkill_blocked(rdev->wiphy.rfkill)) return -ERFKILL; err = rdev_start_p2p_device(rdev, wdev); @@ -13084,7 +13088,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info) if (wdev_running(wdev)) return -EEXIST; - if (rfkill_blocked(rdev->rfkill)) + if (rfkill_blocked(rdev->wiphy.rfkill)) return -ERFKILL; if (!info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 6bdd96408022..328cf54bda82 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -168,6 +168,18 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, return -EINVAL; } + if (tb[NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR]) { + if (!out->ftm.non_trigger_based && !out->ftm.trigger_based) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR], + "FTM: BSS color set for EDCA based ranging"); + return -EINVAL; + } + + out->ftm.bss_color = + nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR]); + } + return 0; } @@ -334,6 +346,7 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev, gfp_t gfp) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + struct cfg80211_pmsr_request *tmp, *prev, *to_free = NULL; struct sk_buff *msg; void *hdr; @@ -364,9 +377,20 @@ free_msg: nlmsg_free(msg); free_request: spin_lock_bh(&wdev->pmsr_lock); - list_del(&req->list); + /* + * cfg80211_pmsr_process_abort() may have already moved this request + * to the free list, and will free it later. In this case, don't free + * it here. + */ + list_for_each_entry_safe(tmp, prev, &wdev->pmsr_list, list) { + if (tmp == req) { + list_del(&req->list); + to_free = req; + break; + } + } spin_unlock_bh(&wdev->pmsr_lock); - kfree(req); + kfree(to_free); } EXPORT_SYMBOL_GPL(cfg80211_pmsr_complete); diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 8b1358d04ca2..b1d37f582dc6 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -464,8 +464,18 @@ static inline int rdev_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_assoc_request *req) { + const struct cfg80211_bss_ies *bss_ies; int ret; - trace_rdev_assoc(&rdev->wiphy, dev, req); + + /* + * Note: we might trace not exactly the data that's processed, + * due to races and the driver/mac80211 getting a newer copy. + */ + rcu_read_lock(); + bss_ies = rcu_dereference(req->bss->ies); + trace_rdev_assoc(&rdev->wiphy, dev, req, bss_ies); + rcu_read_unlock(); + ret = rdev->ops->assoc(&rdev->wiphy, dev, req); trace_rdev_return_int(&rdev->wiphy, ret); return ret; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0406ce7334fa..c2d0ff7f089f 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3975,7 +3975,9 @@ static int __regulatory_set_wiphy_regd(struct wiphy *wiphy, "wiphy should have REGULATORY_WIPHY_SELF_MANAGED\n")) return -EPERM; - if (WARN(!is_valid_rd(rd), "Invalid regulatory domain detected\n")) { + if (WARN(!is_valid_rd(rd), + "Invalid regulatory domain detected: %c%c\n", + rd->alpha2[0], rd->alpha2[1])) { print_regdomain_info(rd); return -EINVAL; } @@ -4049,6 +4051,7 @@ void wiphy_regulatory_register(struct wiphy *wiphy) wiphy_update_regulatory(wiphy, lr->initiator); wiphy_all_share_dfs_chan_state(wiphy); + reg_process_self_managed_hints(); } void wiphy_regulatory_deregister(struct wiphy *wiphy) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 4f06c1825029..f03c7ac8e184 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -5,7 +5,7 @@ * Copyright 2008 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2016 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #include <linux/kernel.h> #include <linux/slab.h> @@ -618,7 +618,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies, freq = ieee80211_channel_to_frequency(ap_info->channel, band); - if (end - pos < count * ap_info->tbtt_info_len) + if (end - pos < count * length) break; /* @@ -630,7 +630,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies, if (band != NL80211_BAND_6GHZ || (length != IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM && length < IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM)) { - pos += count * ap_info->tbtt_info_len; + pos += count * length; continue; } @@ -653,7 +653,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies, kfree(entry); } - pos += ap_info->tbtt_info_len; + pos += length; } } @@ -757,7 +757,8 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) } request = kzalloc(struct_size(request, channels, n_channels) + - sizeof(*request->scan_6ghz_params) * count, + sizeof(*request->scan_6ghz_params) * count + + sizeof(*request->ssids) * rdev_req->n_ssids, GFP_KERNEL); if (!request) { cfg80211_free_coloc_ap_list(&coloc_ap_list); @@ -848,10 +849,19 @@ skip: if (request->n_channels) { struct cfg80211_scan_request *old = rdev->int_scan_req; - rdev->int_scan_req = request; /* + * Add the ssids from the parent scan request to the new scan + * request, so the driver would be able to use them in its + * probe requests to discover hidden APs on PSC channels. + */ + request->ssids = (void *)&request->channels[request->n_channels]; + request->n_ssids = rdev_req->n_ssids; + memcpy(request->ssids, rdev_req->ssids, sizeof(*request->ssids) * + request->n_ssids); + + /* * If this scan follows a previous scan, save the scan start * info from the first part of the scan */ diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 9b959e3b09c6..0c3f05c9be27 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -133,6 +133,10 @@ static int wiphy_resume(struct device *dev) if (rdev->wiphy.registered && rdev->ops->resume) ret = rdev_resume(rdev); wiphy_unlock(&rdev->wiphy); + + if (ret) + cfg80211_shutdown_all_interfaces(&rdev->wiphy); + rtnl_unlock(); return ret; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 76b777d5903f..440bce5f0274 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1195,8 +1195,9 @@ TRACE_EVENT(rdev_auth, TRACE_EVENT(rdev_assoc, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - struct cfg80211_assoc_request *req), - TP_ARGS(wiphy, netdev, req), + struct cfg80211_assoc_request *req, + const struct cfg80211_bss_ies *bss_ies), + TP_ARGS(wiphy, netdev, req, bss_ies), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY @@ -1204,6 +1205,17 @@ TRACE_EVENT(rdev_assoc, MAC_ENTRY(prev_bssid) __field(bool, use_mfp) __field(u32, flags) + __dynamic_array(u8, bss_elements, bss_ies->len) + __field(bool, bss_elements_bcon) + __field(u64, bss_elements_tsf) + __dynamic_array(u8, elements, req->ie_len) + __array(u8, ht_capa, sizeof(struct ieee80211_ht_cap)) + __array(u8, ht_capa_mask, sizeof(struct ieee80211_ht_cap)) + __array(u8, vht_capa, sizeof(struct ieee80211_vht_cap)) + __array(u8, vht_capa_mask, sizeof(struct ieee80211_vht_cap)) + __dynamic_array(u8, fils_kek, req->fils_kek_len) + __dynamic_array(u8, fils_nonces, + req->fils_nonces ? 2 * FILS_NONCE_LEN : 0) ), TP_fast_assign( WIPHY_ASSIGN; @@ -1215,6 +1227,26 @@ TRACE_EVENT(rdev_assoc, MAC_ASSIGN(prev_bssid, req->prev_bssid); __entry->use_mfp = req->use_mfp; __entry->flags = req->flags; + if (bss_ies->len) + memcpy(__get_dynamic_array(bss_elements), + bss_ies->data, bss_ies->len); + __entry->bss_elements_bcon = bss_ies->from_beacon; + __entry->bss_elements_tsf = bss_ies->tsf; + if (req->ie) + memcpy(__get_dynamic_array(elements), + req->ie, req->ie_len); + memcpy(__entry->ht_capa, &req->ht_capa, sizeof(req->ht_capa)); + memcpy(__entry->ht_capa_mask, &req->ht_capa_mask, + sizeof(req->ht_capa_mask)); + memcpy(__entry->vht_capa, &req->vht_capa, sizeof(req->vht_capa)); + memcpy(__entry->vht_capa_mask, &req->vht_capa_mask, + sizeof(req->vht_capa_mask)); + if (req->fils_kek) + memcpy(__get_dynamic_array(fils_kek), + req->fils_kek, req->fils_kek_len); + if (req->fils_nonces) + memcpy(__get_dynamic_array(fils_nonces), + req->fils_nonces, 2 * FILS_NONCE_LEN); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT ", previous bssid: " MAC_PR_FMT ", use mfp: %s, flags: %u", diff --git a/net/wireless/util.c b/net/wireless/util.c index 7ec021a610ae..18dba3d7c638 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1059,6 +1059,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_MESH_POINT: /* mesh should be handled? */ break; + case NL80211_IFTYPE_OCB: + cfg80211_leave_ocb(rdev, dev); + break; default: break; } diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index a8320dc59af7..a32065d600a1 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -902,7 +902,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, /* only change when not disabling */ if (!data->txpower.disabled) { - rfkill_set_sw_state(rdev->rfkill, false); + rfkill_set_sw_state(rdev->wiphy.rfkill, false); if (data->txpower.fixed) { /* @@ -927,7 +927,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, } } } else { - if (rfkill_set_sw_state(rdev->rfkill, true)) + if (rfkill_set_sw_state(rdev->wiphy.rfkill, true)) schedule_work(&rdev->rfkill_block); return 0; } @@ -963,7 +963,7 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev, /* well... oh well */ data->txpower.fixed = 1; - data->txpower.disabled = rfkill_blocked(rdev->rfkill); + data->txpower.disabled = rfkill_blocked(rdev->wiphy.rfkill); data->txpower.value = val; data->txpower.flags = IW_TXPOW_DBM; @@ -1167,7 +1167,7 @@ static int cfg80211_wext_siwpower(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - bool ps = wdev->ps; + bool ps; int timeout = wdev->ps_timeout; int err; diff --git a/net/wireless/wext-spy.c b/net/wireless/wext-spy.c index 33bef22e44e9..b379a0371653 100644 --- a/net/wireless/wext-spy.c +++ b/net/wireless/wext-spy.c @@ -120,8 +120,8 @@ int iw_handler_set_thrspy(struct net_device * dev, return -EOPNOTSUPP; /* Just do it */ - memcpy(&(spydata->spy_thr_low), &(threshold->low), - 2 * sizeof(struct iw_quality)); + spydata->spy_thr_low = threshold->low; + spydata->spy_thr_high = threshold->high; /* Clear flag */ memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under)); @@ -147,8 +147,8 @@ int iw_handler_get_thrspy(struct net_device * dev, return -EOPNOTSUPP; /* Just do it */ - memcpy(&(threshold->low), &(spydata->spy_thr_low), - 2 * sizeof(struct iw_quality)); + threshold->low = spydata->spy_thr_low; + threshold->high = spydata->spy_thr_high; return 0; } @@ -173,10 +173,10 @@ static void iw_send_thrspy_event(struct net_device * dev, memcpy(threshold.addr.sa_data, address, ETH_ALEN); threshold.addr.sa_family = ARPHRD_ETHER; /* Copy stats */ - memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality)); + threshold.qual = *wstats; /* Copy also thresholds */ - memcpy(&(threshold.low), &(spydata->spy_thr_low), - 2 * sizeof(struct iw_quality)); + threshold.low = spydata->spy_thr_low; + threshold.high = spydata->spy_thr_high; /* Send event to user space */ wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold); diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index ce66323102f9..d12bb906c9c9 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -131,6 +131,13 @@ __xfrm_spi_hash(const xfrm_address_t *daddr, __be32 spi, u8 proto, return (h ^ (h >> 10) ^ (h >> 20)) & hmask; } +static inline unsigned int +__xfrm_seq_hash(u32 seq, unsigned int hmask) +{ + unsigned int h = seq; + return (h ^ (h >> 10) ^ (h >> 20)) & hmask; +} + static inline unsigned int __idx_hash(u32 index, unsigned int hmask) { return (index ^ (index >> 8)) & hmask; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 1158cd0311d7..3df0861d4390 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -612,7 +612,7 @@ lock: goto drop_unlock; } - if (x->repl->check(x, skb, seq)) { + if (xfrm_replay_check(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } @@ -660,12 +660,12 @@ resume: /* only the first xfrm gets the encap type */ encap_type = 0; - if (x->repl->recheck(x, skb, seq)) { + if (xfrm_replay_recheck(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } - x->repl->advance(x, seq); + xfrm_replay_advance(x, seq); x->curlft.bytes += skb->len; x->curlft.packets++; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index e4cb0ff4dcf4..ab2fbe432261 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -77,6 +77,83 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } +#if IS_ENABLED(CONFIG_IPV6_MIP6) +static int mip6_rthdr_offset(struct sk_buff *skb, u8 **nexthdr, int type) +{ + const unsigned char *nh = skb_network_header(skb); + unsigned int offset = sizeof(struct ipv6hdr); + unsigned int packet_len; + int found_rhdr = 0; + + packet_len = skb_tail_pointer(skb) - nh; + *nexthdr = &ipv6_hdr(skb)->nexthdr; + + while (offset <= packet_len) { + struct ipv6_opt_hdr *exthdr; + + switch (**nexthdr) { + case NEXTHDR_HOP: + break; + case NEXTHDR_ROUTING: + if (type == IPPROTO_ROUTING && offset + 3 <= packet_len) { + struct ipv6_rt_hdr *rt; + + rt = (struct ipv6_rt_hdr *)(nh + offset); + if (rt->type != 0) + return offset; + } + found_rhdr = 1; + break; + case NEXTHDR_DEST: + /* HAO MUST NOT appear more than once. + * XXX: It is better to try to find by the end of + * XXX: packet if HAO exists. + */ + if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { + net_dbg_ratelimited("mip6: hao exists already, override\n"); + return offset; + } + + if (found_rhdr) + return offset; + + break; + default: + return offset; + } + + if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) + return -EINVAL; + + exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + + offset); + offset += ipv6_optlen(exthdr); + if (offset > IPV6_MAXPLEN) + return -EINVAL; + *nexthdr = &exthdr->nexthdr; + } + + return -EINVAL; +} +#endif + +#if IS_ENABLED(CONFIG_IPV6) +static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr) +{ + switch (x->type->proto) { +#if IS_ENABLED(CONFIG_IPV6_MIP6) + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + return mip6_rthdr_offset(skb, prevhdr, x->type->proto); +#endif + default: + break; + } + + return ip6_find_1stfragopt(skb, prevhdr); +} +#endif + /* Add encapsulation header. * * The IP header and mutable extension headers will be moved forward to make @@ -92,7 +169,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) iph = ipv6_hdr(skb); skb_set_inner_transport_header(skb, skb_transport_offset(skb)); - hdr_len = x->type->hdr_offset(x, skb, &prevhdr); + hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); if (hdr_len < 0) return hdr_len; skb_set_mac_header(skb, @@ -122,7 +199,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) iph = ipv6_hdr(skb); - hdr_len = x->type->hdr_offset(x, skb, &prevhdr); + hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); if (hdr_len < 0) return hdr_len; skb_set_mac_header(skb, @@ -448,7 +525,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) goto error; } - err = x->repl->overflow(x, skb); + err = xfrm_replay_overflow(x, skb); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); goto error; @@ -565,6 +642,42 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb return 0; } +/* For partial checksum offload, the outer header checksum is calculated + * by software and the inner header checksum is calculated by hardware. + * This requires hardware to know the inner packet type to calculate + * the inner header checksum. Save inner ip protocol here to avoid + * traversing the packet in the vendor's xmit code. + * If the encap type is IPIP, just save skb->inner_ipproto. Otherwise, + * get the ip protocol from the IP header. + */ +static void xfrm_get_inner_ipproto(struct sk_buff *skb) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + const struct ethhdr *eth; + + if (!xo) + return; + + if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) { + xo->inner_ipproto = skb->inner_ipproto; + return; + } + + if (skb->inner_protocol_type != ENCAP_TYPE_ETHER) + return; + + eth = (struct ethhdr *)skb_inner_mac_header(skb); + + switch (ntohs(eth->h_proto)) { + case ETH_P_IPV6: + xo->inner_ipproto = inner_ipv6_hdr(skb)->nexthdr; + break; + case ETH_P_IP: + xo->inner_ipproto = inner_ip_hdr(skb)->protocol; + break; + } +} + int xfrm_output(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -594,12 +707,15 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); return -ENOMEM; } - skb->encapsulation = 1; sp->olen++; sp->xvec[sp->len++] = x; xfrm_state_hold(x); + if (skb->encapsulation) + xfrm_get_inner_ipproto(skb); + skb->encapsulation = 1; + if (skb_is_gso(skb)) { if (skb->inner_protocol) return xfrm_output_gso(net, sk, skb); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ce500f847b99..1e24b21457f7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3247,7 +3247,7 @@ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, /* * 0 or more than 0 is returned when validation is succeeded (either bypass - * because of optional transport mode, or next index of the mathced secpath + * because of optional transport mode, or next index of the matched secpath * state with the template. * -1 is returned when no matching template is found. * Otherwise "-2 - errored_index" is returned. diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index c6a4338a0d08..9277d81b344c 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -34,8 +34,11 @@ u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq) return seq_hi; } EXPORT_SYMBOL(xfrm_replay_seqhi); -; -static void xfrm_replay_notify(struct xfrm_state *x, int event) + +static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event); +static void xfrm_replay_notify_esn(struct xfrm_state *x, int event); + +void xfrm_replay_notify(struct xfrm_state *x, int event) { struct km_event c; /* we send notify messages in case @@ -48,6 +51,17 @@ static void xfrm_replay_notify(struct xfrm_state *x, int event) * The state structure must be locked! */ + switch (x->repl_mode) { + case XFRM_REPLAY_MODE_LEGACY: + break; + case XFRM_REPLAY_MODE_BMP: + xfrm_replay_notify_bmp(x, event); + return; + case XFRM_REPLAY_MODE_ESN: + xfrm_replay_notify_esn(x, event); + return; + } + switch (event) { case XFRM_REPLAY_UPDATE: if (!x->replay_maxdiff || @@ -81,7 +95,7 @@ static void xfrm_replay_notify(struct xfrm_state *x, int event) x->xflags &= ~XFRM_TIME_DEFER; } -static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) +static int __xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) { int err = 0; struct net *net = xs_net(x); @@ -98,14 +112,14 @@ static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) return err; } if (xfrm_aevent_is_on(net)) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; } -static int xfrm_replay_check(struct xfrm_state *x, - struct sk_buff *skb, __be32 net_seq) +static int xfrm_replay_check_legacy(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) { u32 diff; u32 seq = ntohl(net_seq); @@ -136,14 +150,26 @@ err: return -EINVAL; } -static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) +static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq); +static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq); + +void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) { - u32 diff; - u32 seq = ntohl(net_seq); + u32 diff, seq; + + switch (x->repl_mode) { + case XFRM_REPLAY_MODE_LEGACY: + break; + case XFRM_REPLAY_MODE_BMP: + return xfrm_replay_advance_bmp(x, net_seq); + case XFRM_REPLAY_MODE_ESN: + return xfrm_replay_advance_esn(x, net_seq); + } if (!x->props.replay_window) return; + seq = ntohl(net_seq); if (seq > x->replay.seq) { diff = seq - x->replay.seq; if (diff < x->props.replay_window) @@ -157,7 +183,7 @@ static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) } if (xfrm_aevent_is_on(xs_net(x))) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) @@ -178,7 +204,7 @@ static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) return err; } if (xfrm_aevent_is_on(net)) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; @@ -273,7 +299,7 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) replay_esn->bmp[nr] |= (1U << bitnr); if (xfrm_aevent_is_on(xs_net(x))) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) @@ -416,7 +442,7 @@ static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb) } } if (xfrm_aevent_is_on(net)) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; @@ -481,6 +507,21 @@ err: return -EINVAL; } +int xfrm_replay_check(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + switch (x->repl_mode) { + case XFRM_REPLAY_MODE_LEGACY: + break; + case XFRM_REPLAY_MODE_BMP: + return xfrm_replay_check_bmp(x, skb, net_seq); + case XFRM_REPLAY_MODE_ESN: + return xfrm_replay_check_esn(x, skb, net_seq); + } + + return xfrm_replay_check_legacy(x, skb, net_seq); +} + static int xfrm_replay_recheck_esn(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq) { @@ -493,6 +534,22 @@ static int xfrm_replay_recheck_esn(struct xfrm_state *x, return xfrm_replay_check_esn(x, skb, net_seq); } +int xfrm_replay_recheck(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + switch (x->repl_mode) { + case XFRM_REPLAY_MODE_LEGACY: + break; + case XFRM_REPLAY_MODE_BMP: + /* no special recheck treatment */ + return xfrm_replay_check_bmp(x, skb, net_seq); + case XFRM_REPLAY_MODE_ESN: + return xfrm_replay_recheck_esn(x, skb, net_seq); + } + + return xfrm_replay_check_legacy(x, skb, net_seq); +} + static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) { unsigned int bitnr, nr, i; @@ -548,7 +605,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) replay_esn->bmp[nr] |= (1U << bitnr); if (xfrm_aevent_is_on(xs_net(x))) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } #ifdef CONFIG_XFRM_OFFLOAD @@ -560,7 +617,7 @@ static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *sk __u32 oseq = x->replay.oseq; if (!xo) - return xfrm_replay_overflow(x, skb); + return __xfrm_replay_overflow(x, skb); if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { if (!skb_is_gso(skb)) { @@ -585,7 +642,7 @@ static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *sk x->replay.oseq = oseq; if (xfrm_aevent_is_on(net)) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; @@ -625,7 +682,7 @@ static int xfrm_replay_overflow_offload_bmp(struct xfrm_state *x, struct sk_buff } if (xfrm_aevent_is_on(net)) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; @@ -674,59 +731,39 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff replay_esn->oseq = oseq; if (xfrm_aevent_is_on(net)) - x->repl->notify(x, XFRM_REPLAY_UPDATE); + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } return err; } -static const struct xfrm_replay xfrm_replay_legacy = { - .advance = xfrm_replay_advance, - .check = xfrm_replay_check, - .recheck = xfrm_replay_check, - .notify = xfrm_replay_notify, - .overflow = xfrm_replay_overflow_offload, -}; - -static const struct xfrm_replay xfrm_replay_bmp = { - .advance = xfrm_replay_advance_bmp, - .check = xfrm_replay_check_bmp, - .recheck = xfrm_replay_check_bmp, - .notify = xfrm_replay_notify_bmp, - .overflow = xfrm_replay_overflow_offload_bmp, -}; - -static const struct xfrm_replay xfrm_replay_esn = { - .advance = xfrm_replay_advance_esn, - .check = xfrm_replay_check_esn, - .recheck = xfrm_replay_recheck_esn, - .notify = xfrm_replay_notify_esn, - .overflow = xfrm_replay_overflow_offload_esn, -}; +int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) +{ + switch (x->repl_mode) { + case XFRM_REPLAY_MODE_LEGACY: + break; + case XFRM_REPLAY_MODE_BMP: + return xfrm_replay_overflow_offload_bmp(x, skb); + case XFRM_REPLAY_MODE_ESN: + return xfrm_replay_overflow_offload_esn(x, skb); + } + + return xfrm_replay_overflow_offload(x, skb); +} #else -static const struct xfrm_replay xfrm_replay_legacy = { - .advance = xfrm_replay_advance, - .check = xfrm_replay_check, - .recheck = xfrm_replay_check, - .notify = xfrm_replay_notify, - .overflow = xfrm_replay_overflow, -}; - -static const struct xfrm_replay xfrm_replay_bmp = { - .advance = xfrm_replay_advance_bmp, - .check = xfrm_replay_check_bmp, - .recheck = xfrm_replay_check_bmp, - .notify = xfrm_replay_notify_bmp, - .overflow = xfrm_replay_overflow_bmp, -}; - -static const struct xfrm_replay xfrm_replay_esn = { - .advance = xfrm_replay_advance_esn, - .check = xfrm_replay_check_esn, - .recheck = xfrm_replay_recheck_esn, - .notify = xfrm_replay_notify_esn, - .overflow = xfrm_replay_overflow_esn, -}; +int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) +{ + switch (x->repl_mode) { + case XFRM_REPLAY_MODE_LEGACY: + break; + case XFRM_REPLAY_MODE_BMP: + return xfrm_replay_overflow_bmp(x, skb); + case XFRM_REPLAY_MODE_ESN: + return xfrm_replay_overflow_esn(x, skb); + } + + return __xfrm_replay_overflow(x, skb); +} #endif int xfrm_init_replay(struct xfrm_state *x) @@ -741,12 +778,12 @@ int xfrm_init_replay(struct xfrm_state *x) if (x->props.flags & XFRM_STATE_ESN) { if (replay_esn->replay_window == 0) return -EINVAL; - x->repl = &xfrm_replay_esn; + x->repl_mode = XFRM_REPLAY_MODE_ESN; } else { - x->repl = &xfrm_replay_bmp; + x->repl_mode = XFRM_REPLAY_MODE_BMP; } } else { - x->repl = &xfrm_replay_legacy; + x->repl_mode = XFRM_REPLAY_MODE_LEGACY; } return 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 4496f7efa220..c2ce1e6f4760 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -78,10 +78,16 @@ xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr, return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); } +static unsigned int xfrm_seq_hash(struct net *net, u32 seq) +{ + return __xfrm_seq_hash(seq, net->xfrm.state_hmask); +} + static void xfrm_hash_transfer(struct hlist_head *list, struct hlist_head *ndsttable, struct hlist_head *nsrctable, struct hlist_head *nspitable, + struct hlist_head *nseqtable, unsigned int nhashmask) { struct hlist_node *tmp; @@ -106,6 +112,11 @@ static void xfrm_hash_transfer(struct hlist_head *list, nhashmask); hlist_add_head_rcu(&x->byspi, nspitable + h); } + + if (x->km.seq) { + h = __xfrm_seq_hash(x->km.seq, nhashmask); + hlist_add_head_rcu(&x->byseq, nseqtable + h); + } } } @@ -117,7 +128,7 @@ static unsigned long xfrm_hash_new_size(unsigned int state_hmask) static void xfrm_hash_resize(struct work_struct *work) { struct net *net = container_of(work, struct net, xfrm.state_hash_work); - struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; + struct hlist_head *ndst, *nsrc, *nspi, *nseq, *odst, *osrc, *ospi, *oseq; unsigned long nsize, osize; unsigned int nhashmask, ohashmask; int i; @@ -137,6 +148,13 @@ static void xfrm_hash_resize(struct work_struct *work) xfrm_hash_free(nsrc, nsize); return; } + nseq = xfrm_hash_alloc(nsize); + if (!nseq) { + xfrm_hash_free(ndst, nsize); + xfrm_hash_free(nsrc, nsize); + xfrm_hash_free(nspi, nsize); + return; + } spin_lock_bh(&net->xfrm.xfrm_state_lock); write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); @@ -144,15 +162,17 @@ static void xfrm_hash_resize(struct work_struct *work) nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net); for (i = net->xfrm.state_hmask; i >= 0; i--) - xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask); + xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nseq, nhashmask); osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net); ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net); + oseq = xfrm_state_deref_prot(net->xfrm.state_byseq, net); ohashmask = net->xfrm.state_hmask; rcu_assign_pointer(net->xfrm.state_bydst, ndst); rcu_assign_pointer(net->xfrm.state_bysrc, nsrc); rcu_assign_pointer(net->xfrm.state_byspi, nspi); + rcu_assign_pointer(net->xfrm.state_byseq, nseq); net->xfrm.state_hmask = nhashmask; write_seqcount_end(&net->xfrm.xfrm_state_hash_generation); @@ -165,6 +185,7 @@ static void xfrm_hash_resize(struct work_struct *work) xfrm_hash_free(odst, osize); xfrm_hash_free(osrc, osize); xfrm_hash_free(ospi, osize); + xfrm_hash_free(oseq, osize); } static DEFINE_SPINLOCK(xfrm_state_afinfo_lock); @@ -621,6 +642,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) INIT_HLIST_NODE(&x->bydst); INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); + INIT_HLIST_NODE(&x->byseq); hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT); x->mtimer.function = xfrm_timer_handler; timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0); @@ -664,6 +686,8 @@ int __xfrm_state_delete(struct xfrm_state *x) list_del(&x->km.all); hlist_del_rcu(&x->bydst); hlist_del_rcu(&x->bysrc); + if (x->km.seq) + hlist_del_rcu(&x->byseq); if (x->id.spi) hlist_del_rcu(&x->byspi); net->xfrm.state_num--; @@ -1148,6 +1172,10 @@ found: h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); } + if (x->km.seq) { + h = xfrm_seq_hash(net, x->km.seq); + hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h); + } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), @@ -1263,6 +1291,12 @@ static void __xfrm_state_insert(struct xfrm_state *x) hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); } + if (x->km.seq) { + h = xfrm_seq_hash(net, x->km.seq); + + hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h); + } + hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT); if (x->replay_maxage) mod_timer(&x->rtimer, jiffies + x->replay_maxage); @@ -1932,20 +1966,18 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) { - int i; - - for (i = 0; i <= net->xfrm.state_hmask; i++) { - struct xfrm_state *x; + unsigned int h = xfrm_seq_hash(net, seq); + struct xfrm_state *x; - hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { - if (x->km.seq == seq && - (mark & x->mark.m) == x->mark.v && - x->km.state == XFRM_STATE_ACQ) { - xfrm_state_hold(x); - return x; - } + hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) { + if (x->km.seq == seq && + (mark & x->mark.m) == x->mark.v && + x->km.state == XFRM_STATE_ACQ) { + xfrm_state_hold(x); + return x; } } + return NULL; } @@ -2145,7 +2177,7 @@ static void xfrm_replay_timer_handler(struct timer_list *t) if (x->km.state == XFRM_STATE_VALID) { if (xfrm_aevent_is_on(xs_net(x))) - x->repl->notify(x, XFRM_REPLAY_TIMEOUT); + xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT); else x->xflags |= XFRM_TIME_DEFER; } @@ -2660,6 +2692,9 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_byspi = xfrm_hash_alloc(sz); if (!net->xfrm.state_byspi) goto out_byspi; + net->xfrm.state_byseq = xfrm_hash_alloc(sz); + if (!net->xfrm.state_byseq) + goto out_byseq; net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1); net->xfrm.state_num = 0; @@ -2669,6 +2704,8 @@ int __net_init xfrm_state_init(struct net *net) &net->xfrm.xfrm_state_lock); return 0; +out_byseq: + xfrm_hash_free(net->xfrm.state_byspi, sz); out_byspi: xfrm_hash_free(net->xfrm.state_bysrc, sz); out_bysrc: @@ -2688,6 +2725,8 @@ void xfrm_state_fini(struct net *net) WARN_ON(!list_empty(&net->xfrm.state_all)); sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.state_byseq)); + xfrm_hash_free(net->xfrm.state_byseq, sz); WARN_ON(!hlist_empty(net->xfrm.state_byspi)); xfrm_hash_free(net->xfrm.state_byspi, sz); WARN_ON(!hlist_empty(net->xfrm.state_bysrc)); |