diff options
Diffstat (limited to 'net')
133 files changed, 1842 insertions, 1012 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 41be38264493..49a6d49c23dc 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -358,6 +358,35 @@ static int __vlan_device_event(struct net_device *dev, unsigned long event) return err; } +static void vlan_vid0_add(struct net_device *dev) +{ + struct vlan_info *vlan_info; + int err; + + if (!(dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) + return; + + pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name); + + err = vlan_vid_add(dev, htons(ETH_P_8021Q), 0); + if (err) + return; + + vlan_info = rtnl_dereference(dev->vlan_info); + vlan_info->auto_vid0 = true; +} + +static void vlan_vid0_del(struct net_device *dev) +{ + struct vlan_info *vlan_info = rtnl_dereference(dev->vlan_info); + + if (!vlan_info || !vlan_info->auto_vid0) + return; + + vlan_info->auto_vid0 = false; + vlan_vid_del(dev, htons(ETH_P_8021Q), 0); +} + static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -379,15 +408,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, return notifier_from_errno(err); } - if ((event == NETDEV_UP) && - (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { - pr_info("adding VLAN 0 to HW filter on device %s\n", - dev->name); - vlan_vid_add(dev, htons(ETH_P_8021Q), 0); - } - if (event == NETDEV_DOWN && - (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) - vlan_vid_del(dev, htons(ETH_P_8021Q), 0); + if (event == NETDEV_UP) + vlan_vid0_add(dev); + else if (event == NETDEV_DOWN) + vlan_vid0_del(dev); vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 5eaf38875554..c7ffe591d593 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -33,6 +33,7 @@ struct vlan_info { struct vlan_group grp; struct list_head vid_list; unsigned int nr_vids; + bool auto_vid0; struct rcu_head rcu; }; diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 9fa0b246902b..70c5a508ffac 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -35,6 +35,7 @@ #include <linux/seq_file.h> #include <linux/export.h> #include <linux/etherdevice.h> +#include <linux/refcount.h> int sysctl_aarp_expiry_time = AARP_EXPIRY_TIME; int sysctl_aarp_tick_time = AARP_TICK_TIME; @@ -44,6 +45,7 @@ int sysctl_aarp_resolve_time = AARP_RESOLVE_TIME; /* Lists of aarp entries */ /** * struct aarp_entry - AARP entry + * @refcnt: Reference count * @last_sent: Last time we xmitted the aarp request * @packet_queue: Queue of frames wait for resolution * @status: Used for proxy AARP @@ -55,6 +57,7 @@ int sysctl_aarp_resolve_time = AARP_RESOLVE_TIME; * @next: Next entry in chain */ struct aarp_entry { + refcount_t refcnt; /* These first two are only used for unresolved entries */ unsigned long last_sent; struct sk_buff_head packet_queue; @@ -79,6 +82,17 @@ static DEFINE_RWLOCK(aarp_lock); /* Used to walk the list and purge/kick entries. */ static struct timer_list aarp_timer; +static inline void aarp_entry_get(struct aarp_entry *a) +{ + refcount_inc(&a->refcnt); +} + +static inline void aarp_entry_put(struct aarp_entry *a) +{ + if (refcount_dec_and_test(&a->refcnt)) + kfree(a); +} + /* * Delete an aarp queue * @@ -87,7 +101,7 @@ static struct timer_list aarp_timer; static void __aarp_expire(struct aarp_entry *a) { skb_queue_purge(&a->packet_queue); - kfree(a); + aarp_entry_put(a); } /* @@ -380,9 +394,11 @@ static void aarp_purge(void) static struct aarp_entry *aarp_alloc(void) { struct aarp_entry *a = kmalloc(sizeof(*a), GFP_ATOMIC); + if (!a) + return NULL; - if (a) - skb_queue_head_init(&a->packet_queue); + refcount_set(&a->refcnt, 1); + skb_queue_head_init(&a->packet_queue); return a; } @@ -508,6 +524,7 @@ int aarp_proxy_probe_network(struct atalk_iface *atif, struct atalk_addr *sa) entry->dev = atif->dev; write_lock_bh(&aarp_lock); + aarp_entry_get(entry); hash = sa->s_node % (AARP_HASH_SIZE - 1); entry->next = proxies[hash]; @@ -533,6 +550,7 @@ int aarp_proxy_probe_network(struct atalk_iface *atif, struct atalk_addr *sa) retval = 1; } + aarp_entry_put(entry); write_unlock_bh(&aarp_lock); out: return retval; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index b068651984fe..fa7f002b14fa 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -576,6 +576,7 @@ static int atrtr_create(struct rtentry *r, struct net_device *devhint) /* Fill in the routing entry */ rt->target = ta->sat_addr; + dev_put(rt->dev); /* Release old device */ dev_hold(devhint); rt->dev = devhint; rt->flags = r->rt_flags; diff --git a/net/atm/clip.c b/net/atm/clip.c index 42b910cb4e8e..ebba0d6ae324 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -45,7 +45,8 @@ #include <net/atmclip.h> static struct net_device *clip_devs; -static struct atm_vcc *atmarpd; +static struct atm_vcc __rcu *atmarpd; +static DEFINE_MUTEX(atmarpd_lock); static struct timer_list idle_timer; static const struct neigh_ops clip_neigh_ops; @@ -53,24 +54,35 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip) { struct sock *sk; struct atmarp_ctrl *ctrl; + struct atm_vcc *vcc; struct sk_buff *skb; + int err = 0; pr_debug("(%d)\n", type); - if (!atmarpd) - return -EUNATCH; + + rcu_read_lock(); + vcc = rcu_dereference(atmarpd); + if (!vcc) { + err = -EUNATCH; + goto unlock; + } skb = alloc_skb(sizeof(struct atmarp_ctrl), GFP_ATOMIC); - if (!skb) - return -ENOMEM; + if (!skb) { + err = -ENOMEM; + goto unlock; + } ctrl = skb_put(skb, sizeof(struct atmarp_ctrl)); ctrl->type = type; ctrl->itf_num = itf; ctrl->ip = ip; - atm_force_charge(atmarpd, skb->truesize); + atm_force_charge(vcc, skb->truesize); - sk = sk_atm(atmarpd); + sk = sk_atm(vcc); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); - return 0; +unlock: + rcu_read_unlock(); + return err; } static void link_vcc(struct clip_vcc *clip_vcc, struct atmarp_entry *entry) @@ -193,12 +205,6 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb) pr_debug("\n"); - if (!clip_devs) { - atm_return(vcc, skb->truesize); - kfree_skb(skb); - return; - } - if (!skb) { pr_debug("removing VCC %p\n", clip_vcc); if (clip_vcc->entry) @@ -208,6 +214,11 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb) return; } atm_return(vcc, skb->truesize); + if (!clip_devs) { + kfree_skb(skb); + return; + } + skb->dev = clip_vcc->entry ? clip_vcc->entry->neigh->dev : clip_devs; /* clip_vcc->entry == NULL if we don't have an IP address yet */ if (!skb->dev) { @@ -418,6 +429,8 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout) if (!vcc->push) return -EBADFD; + if (vcc->user_back) + return -EINVAL; clip_vcc = kmalloc(sizeof(struct clip_vcc), GFP_KERNEL); if (!clip_vcc) return -ENOMEM; @@ -608,17 +621,27 @@ static void atmarpd_close(struct atm_vcc *vcc) { pr_debug("\n"); - rtnl_lock(); - atmarpd = NULL; + mutex_lock(&atmarpd_lock); + RCU_INIT_POINTER(atmarpd, NULL); + mutex_unlock(&atmarpd_lock); + + synchronize_rcu(); skb_queue_purge(&sk_atm(vcc)->sk_receive_queue); - rtnl_unlock(); pr_debug("(done)\n"); module_put(THIS_MODULE); } +static int atmarpd_send(struct atm_vcc *vcc, struct sk_buff *skb) +{ + atm_return_tx(vcc, skb); + dev_kfree_skb_any(skb); + return 0; +} + static const struct atmdev_ops atmarpd_dev_ops = { - .close = atmarpd_close + .close = atmarpd_close, + .send = atmarpd_send }; @@ -632,15 +655,18 @@ static struct atm_dev atmarpd_dev = { static int atm_init_atmarp(struct atm_vcc *vcc) { - rtnl_lock(); + if (vcc->push == clip_push) + return -EINVAL; + + mutex_lock(&atmarpd_lock); if (atmarpd) { - rtnl_unlock(); + mutex_unlock(&atmarpd_lock); return -EADDRINUSE; } mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ); - atmarpd = vcc; + rcu_assign_pointer(atmarpd, vcc); set_bit(ATM_VF_META, &vcc->flags); set_bit(ATM_VF_READY, &vcc->flags); /* allow replies and avoid getting closed if signaling dies */ @@ -649,13 +675,14 @@ static int atm_init_atmarp(struct atm_vcc *vcc) vcc->push = NULL; vcc->pop = NULL; /* crash */ vcc->push_oam = NULL; /* crash */ - rtnl_unlock(); + mutex_unlock(&atmarpd_lock); return 0; } static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct atm_vcc *vcc = ATM_SD(sock); + struct sock *sk = sock->sk; int err = 0; switch (cmd) { @@ -676,14 +703,18 @@ static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) err = clip_create(arg); break; case ATMARPD_CTRL: + lock_sock(sk); err = atm_init_atmarp(vcc); if (!err) { sock->state = SS_CONNECTED; __module_get(THIS_MODULE); } + release_sock(sk); break; case ATMARP_MKIP: + lock_sock(sk); err = clip_mkip(vcc, arg); + release_sock(sk); break; case ATMARP_SETENTRY: err = clip_setentry(vcc, (__force __be32)arg); diff --git a/net/atm/common.c b/net/atm/common.c index 9b75699992ff..d7f7976ea13a 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -635,6 +635,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) skb->dev = NULL; /* for paths shared with net_device interfaces */ if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) { + atm_return_tx(vcc, skb); kfree_skb(skb); error = -EFAULT; goto out; diff --git a/net/atm/lec.c b/net/atm/lec.c index a948dd47c3f3..42e8047c6510 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -124,6 +124,7 @@ static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; /* Device structures */ static struct net_device *dev_lec[MAX_LEC_ITF]; +static DEFINE_MUTEX(lec_mutex); #if IS_ENABLED(CONFIG_BRIDGE) static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) @@ -685,6 +686,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) int bytes_left; struct atmlec_ioc ioc_data; + lockdep_assert_held(&lec_mutex); /* Lecd must be up in this case */ bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc)); if (bytes_left != 0) @@ -710,6 +712,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) static int lec_mcast_attach(struct atm_vcc *vcc, int arg) { + lockdep_assert_held(&lec_mutex); if (arg < 0 || arg >= MAX_LEC_ITF) return -EINVAL; arg = array_index_nospec(arg, MAX_LEC_ITF); @@ -725,6 +728,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) int i; struct lec_priv *priv; + lockdep_assert_held(&lec_mutex); if (arg < 0) arg = 0; if (arg >= MAX_LEC_ITF) @@ -742,6 +746,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i); if (register_netdev(dev_lec[i])) { free_netdev(dev_lec[i]); + dev_lec[i] = NULL; return -EINVAL; } @@ -904,7 +909,6 @@ static void *lec_itf_walk(struct lec_state *state, loff_t *l) v = (dev && netdev_priv(dev)) ? lec_priv_walk(state, l, netdev_priv(dev)) : NULL; if (!v && dev) { - dev_put(dev); /* Partial state reset for the next time we get called */ dev = NULL; } @@ -928,6 +932,7 @@ static void *lec_seq_start(struct seq_file *seq, loff_t *pos) { struct lec_state *state = seq->private; + mutex_lock(&lec_mutex); state->itf = 0; state->dev = NULL; state->locked = NULL; @@ -945,8 +950,9 @@ static void lec_seq_stop(struct seq_file *seq, void *v) if (state->dev) { spin_unlock_irqrestore(&state->locked->lec_arp_lock, state->flags); - dev_put(state->dev); + state->dev = NULL; } + mutex_unlock(&lec_mutex); } static void *lec_seq_next(struct seq_file *seq, void *v, loff_t *pos) @@ -1003,6 +1009,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return -ENOIOCTLCMD; } + mutex_lock(&lec_mutex); switch (cmd) { case ATMLEC_CTRL: err = lecd_attach(vcc, (int)arg); @@ -1017,6 +1024,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } + mutex_unlock(&lec_mutex); return err; } diff --git a/net/atm/raw.c b/net/atm/raw.c index 2b5f78a7ec3e..1e6511ec842c 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -36,7 +36,7 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb) pr_debug("(%d) %d -= %d\n", vcc->vci, sk_wmem_alloc_get(sk), ATM_SKB(skb)->acct_truesize); - WARN_ON(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, &sk->sk_wmem_alloc)); + atm_return_tx(vcc, skb); dev_kfree_skb_any(skb); sk->sk_write_space(sk); } diff --git a/net/atm/resources.c b/net/atm/resources.c index 995d29e7fb13..b19d851e1f44 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -146,11 +146,10 @@ void atm_dev_deregister(struct atm_dev *dev) */ mutex_lock(&atm_dev_mutex); list_del(&dev->dev_list); - mutex_unlock(&atm_dev_mutex); - atm_dev_release_vccs(dev); atm_unregister_sysfs(dev); atm_proc_dev_deregister(dev); + mutex_unlock(&atm_dev_mutex); atm_dev_put(dev); } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0d3816c80758..b74ada809237 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -64,7 +64,7 @@ static DEFINE_IDA(hci_index_ida); /* Get HCI device by index. * Device is held on return. */ -struct hci_dev *hci_dev_get(int index) +static struct hci_dev *__hci_dev_get(int index, int *srcu_index) { struct hci_dev *hdev = NULL, *d; @@ -77,6 +77,8 @@ struct hci_dev *hci_dev_get(int index) list_for_each_entry(d, &hci_dev_list, list) { if (d->id == index) { hdev = hci_dev_hold(d); + if (srcu_index) + *srcu_index = srcu_read_lock(&d->srcu); break; } } @@ -84,6 +86,22 @@ struct hci_dev *hci_dev_get(int index) return hdev; } +struct hci_dev *hci_dev_get(int index) +{ + return __hci_dev_get(index, NULL); +} + +static struct hci_dev *hci_dev_get_srcu(int index, int *srcu_index) +{ + return __hci_dev_get(index, srcu_index); +} + +static void hci_dev_put_srcu(struct hci_dev *hdev, int srcu_index) +{ + srcu_read_unlock(&hdev->srcu, srcu_index); + hci_dev_put(hdev); +} + /* ---- Inquiry support ---- */ bool hci_discovery_active(struct hci_dev *hdev) @@ -568,9 +586,9 @@ static int hci_dev_do_reset(struct hci_dev *hdev) int hci_dev_reset(__u16 dev) { struct hci_dev *hdev; - int err; + int err, srcu_index; - hdev = hci_dev_get(dev); + hdev = hci_dev_get_srcu(dev, &srcu_index); if (!hdev) return -ENODEV; @@ -592,7 +610,7 @@ int hci_dev_reset(__u16 dev) err = hci_dev_do_reset(hdev); done: - hci_dev_put(hdev); + hci_dev_put_srcu(hdev, srcu_index); return err; } @@ -2439,6 +2457,11 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) if (!hdev) return NULL; + if (init_srcu_struct(&hdev->srcu)) { + kfree(hdev); + return NULL; + } + hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1); hdev->esco_type = (ESCO_HV1); hdev->link_mode = (HCI_LM_ACCEPT); @@ -2684,6 +2707,9 @@ void hci_unregister_dev(struct hci_dev *hdev) list_del(&hdev->list); write_unlock(&hci_dev_list_lock); + synchronize_srcu(&hdev->srcu); + cleanup_srcu_struct(&hdev->srcu); + disable_work_sync(&hdev->rx_work); disable_work_sync(&hdev->cmd_work); disable_work_sync(&hdev->tx_work); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 5c4c3d04d8b9..38643ffa65a9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2141,40 +2141,6 @@ static u8 hci_cc_set_adv_param(struct hci_dev *hdev, void *data, return rp->status; } -static u8 hci_cc_set_ext_adv_param(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_rp_le_set_ext_adv_params *rp = data; - struct hci_cp_le_set_ext_adv_params *cp; - struct adv_info *adv_instance; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; - - cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS); - if (!cp) - return rp->status; - - hci_dev_lock(hdev); - hdev->adv_addr_type = cp->own_addr_type; - if (!cp->handle) { - /* Store in hdev for instance 0 */ - hdev->adv_tx_power = rp->tx_power; - } else { - adv_instance = hci_find_adv_instance(hdev, cp->handle); - if (adv_instance) - adv_instance->tx_power = rp->tx_power; - } - /* Update adv data as tx power is known now */ - hci_update_adv_data(hdev, cp->handle); - - hci_dev_unlock(hdev); - - return rp->status; -} - static u8 hci_cc_read_rssi(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -4155,8 +4121,6 @@ static const struct hci_cc { HCI_CC(HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS, hci_cc_le_read_num_adv_sets, sizeof(struct hci_rp_le_read_num_supported_adv_sets)), - HCI_CC(HCI_OP_LE_SET_EXT_ADV_PARAMS, hci_cc_set_ext_adv_param, - sizeof(struct hci_rp_le_set_ext_adv_params)), HCI_CC_STATUS(HCI_OP_LE_SET_EXT_ADV_ENABLE, hci_cc_le_set_ext_adv_enable), HCI_CC_STATUS(HCI_OP_LE_SET_ADV_SET_RAND_ADDR, @@ -6257,6 +6221,11 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data, static u8 ext_evt_type_to_legacy(struct hci_dev *hdev, u16 evt_type) { + u16 pdu_type = evt_type & ~LE_EXT_ADV_DATA_STATUS_MASK; + + if (!pdu_type) + return LE_ADV_NONCONN_IND; + if (evt_type & LE_EXT_ADV_LEGACY_PDU) { switch (evt_type) { case LE_LEGACY_ADV_IND: @@ -6288,8 +6257,7 @@ static u8 ext_evt_type_to_legacy(struct hci_dev *hdev, u16 evt_type) if (evt_type & LE_EXT_ADV_SCAN_IND) return LE_ADV_SCAN_IND; - if (evt_type == LE_EXT_ADV_NON_CONN_IND || - evt_type & LE_EXT_ADV_DIRECT_IND) + if (evt_type & LE_EXT_ADV_DIRECT_IND) return LE_ADV_NONCONN_IND; invalid: @@ -6981,7 +6949,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, bis->iso_qos.bcast.in.sdu = le16_to_cpu(ev->max_pdu); if (!ev->status) { + bis->state = BT_CONNECTED; set_bit(HCI_CONN_BIG_SYNC, &bis->flags); + hci_debugfs_create_conn(bis); + hci_conn_add_sysfs(bis); hci_iso_setup_path(bis); } } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 022b86797acd..4ad5296d7934 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -118,7 +118,7 @@ static void hci_sock_free_cookie(struct sock *sk) int id = hci_pi(sk)->cookie; if (id) { - hci_pi(sk)->cookie = 0xffffffff; + hci_pi(sk)->cookie = 0; ida_free(&sock_cookie_ida, id); } } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index a00316d79dbf..bbd809414b2f 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1205,9 +1205,126 @@ static int hci_set_adv_set_random_addr_sync(struct hci_dev *hdev, u8 instance, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } +static int +hci_set_ext_adv_params_sync(struct hci_dev *hdev, struct adv_info *adv, + const struct hci_cp_le_set_ext_adv_params *cp, + struct hci_rp_le_set_ext_adv_params *rp) +{ + struct sk_buff *skb; + + skb = __hci_cmd_sync(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(*cp), + cp, HCI_CMD_TIMEOUT); + + /* If command return a status event, skb will be set to -ENODATA */ + if (skb == ERR_PTR(-ENODATA)) + return 0; + + if (IS_ERR(skb)) { + bt_dev_err(hdev, "Opcode 0x%4.4x failed: %ld", + HCI_OP_LE_SET_EXT_ADV_PARAMS, PTR_ERR(skb)); + return PTR_ERR(skb); + } + + if (skb->len != sizeof(*rp)) { + bt_dev_err(hdev, "Invalid response length for 0x%4.4x: %u", + HCI_OP_LE_SET_EXT_ADV_PARAMS, skb->len); + kfree_skb(skb); + return -EIO; + } + + memcpy(rp, skb->data, sizeof(*rp)); + kfree_skb(skb); + + if (!rp->status) { + hdev->adv_addr_type = cp->own_addr_type; + if (!cp->handle) { + /* Store in hdev for instance 0 */ + hdev->adv_tx_power = rp->tx_power; + } else if (adv) { + adv->tx_power = rp->tx_power; + } + } + + return rp->status; +} + +static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length, + HCI_MAX_EXT_AD_LENGTH); + u8 len; + struct adv_info *adv = NULL; + int err; + + if (instance) { + adv = hci_find_adv_instance(hdev, instance); + if (!adv || !adv->adv_data_changed) + return 0; + } + + len = eir_create_adv_data(hdev, instance, pdu->data, + HCI_MAX_EXT_AD_LENGTH); + + pdu->length = len; + pdu->handle = adv ? adv->handle : instance; + pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; + pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; + + err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA, + struct_size(pdu, data, len), pdu, + HCI_CMD_TIMEOUT); + if (err) + return err; + + /* Update data if the command succeed */ + if (adv) { + adv->adv_data_changed = false; + } else { + memcpy(hdev->adv_data, pdu->data, len); + hdev->adv_data_len = len; + } + + return 0; +} + +static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + struct hci_cp_le_set_adv_data cp; + u8 len; + + memset(&cp, 0, sizeof(cp)); + + len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data)); + + /* There's nothing to do if the data hasn't changed */ + if (hdev->adv_data_len == len && + memcmp(cp.data, hdev->adv_data, len) == 0) + return 0; + + memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); + hdev->adv_data_len = len; + + cp.length = len; + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); +} + +int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return 0; + + if (ext_adv_capable(hdev)) + return hci_set_ext_adv_data_sync(hdev, instance); + + return hci_set_adv_data_sync(hdev, instance); +} + int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) { struct hci_cp_le_set_ext_adv_params cp; + struct hci_rp_le_set_ext_adv_params rp; bool connectable; u32 flags; bdaddr_t random_addr; @@ -1228,7 +1345,7 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) * Command Disallowed error, so we must first disable the * instance if it is active. */ - if (adv && !adv->pending) { + if (adv) { err = hci_disable_ext_adv_instance_sync(hdev, instance); if (err) return err; @@ -1314,8 +1431,12 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) cp.secondary_phy = HCI_ADV_PHY_1M; } - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); + err = hci_set_ext_adv_params_sync(hdev, adv, &cp, &rp); + if (err) + return err; + + /* Update adv data as tx power is known now */ + err = hci_set_ext_adv_data_sync(hdev, cp.handle); if (err) return err; @@ -1832,79 +1953,6 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason) sizeof(cp), &cp, HCI_CMD_TIMEOUT); } -static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) -{ - DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length, - HCI_MAX_EXT_AD_LENGTH); - u8 len; - struct adv_info *adv = NULL; - int err; - - if (instance) { - adv = hci_find_adv_instance(hdev, instance); - if (!adv || !adv->adv_data_changed) - return 0; - } - - len = eir_create_adv_data(hdev, instance, pdu->data, - HCI_MAX_EXT_AD_LENGTH); - - pdu->length = len; - pdu->handle = adv ? adv->handle : instance; - pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; - pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; - - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA, - struct_size(pdu, data, len), pdu, - HCI_CMD_TIMEOUT); - if (err) - return err; - - /* Update data if the command succeed */ - if (adv) { - adv->adv_data_changed = false; - } else { - memcpy(hdev->adv_data, pdu->data, len); - hdev->adv_data_len = len; - } - - return 0; -} - -static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) -{ - struct hci_cp_le_set_adv_data cp; - u8 len; - - memset(&cp, 0, sizeof(cp)); - - len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data)); - - /* There's nothing to do if the data hasn't changed */ - if (hdev->adv_data_len == len && - memcmp(cp.data, hdev->adv_data, len) == 0) - return 0; - - memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); - hdev->adv_data_len = len; - - cp.length = len; - - return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); -} - -int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance) -{ - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return 0; - - if (ext_adv_capable(hdev)) - return hci_set_ext_adv_data_sync(hdev, instance); - - return hci_set_adv_data_sync(hdev, instance); -} - int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance, bool force) { @@ -1980,13 +2028,10 @@ static int hci_clear_adv_sets_sync(struct hci_dev *hdev, struct sock *sk) static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force) { struct adv_info *adv, *n; - int err = 0; if (ext_adv_capable(hdev)) /* Remove all existing sets */ - err = hci_clear_adv_sets_sync(hdev, sk); - if (ext_adv_capable(hdev)) - return err; + return hci_clear_adv_sets_sync(hdev, sk); /* This is safe as long as there is no command send while the lock is * held. @@ -2014,13 +2059,11 @@ static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force) static int hci_remove_adv_sync(struct hci_dev *hdev, u8 instance, struct sock *sk) { - int err = 0; + int err; /* If we use extended advertising, instance has to be removed first. */ if (ext_adv_capable(hdev)) - err = hci_remove_ext_adv_instance_sync(hdev, instance, sk); - if (ext_adv_capable(hdev)) - return err; + return hci_remove_ext_adv_instance_sync(hdev, instance, sk); /* This is safe as long as there is no command send while the lock is * held. @@ -2119,16 +2162,13 @@ int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type) int hci_disable_advertising_sync(struct hci_dev *hdev) { u8 enable = 0x00; - int err = 0; /* If controller is not advertising we are done. */ if (!hci_dev_test_flag(hdev, HCI_LE_ADV)) return 0; if (ext_adv_capable(hdev)) - err = hci_disable_ext_adv_instance_sync(hdev, 0x00); - if (ext_adv_capable(hdev)) - return err; + return hci_disable_ext_adv_instance_sync(hdev, 0x00); return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable, HCI_CMD_TIMEOUT); @@ -2491,6 +2531,10 @@ static int hci_pause_advertising_sync(struct hci_dev *hdev) int err; int old_state; + /* If controller is not advertising we are done. */ + if (!hci_dev_test_flag(hdev, HCI_LE_ADV)) + return 0; + /* If already been paused there is nothing to do. */ if (hdev->advertising_paused) return 0; @@ -6251,6 +6295,7 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev, struct hci_conn *conn) { struct hci_cp_le_set_ext_adv_params cp; + struct hci_rp_le_set_ext_adv_params rp; int err; bdaddr_t random_addr; u8 own_addr_type; @@ -6292,8 +6337,12 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev, if (err) return err; - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); + err = hci_set_ext_adv_params_sync(hdev, NULL, &cp, &rp); + if (err) + return err; + + /* Update adv data as tx power is known now */ + err = hci_set_ext_adv_data_sync(hdev, cp.handle); if (err) return err; @@ -6740,8 +6789,8 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, return 0; } - /* No privacy so use a public address. */ - *own_addr_type = ADDR_LE_DEV_PUBLIC; + /* No privacy, use the current address */ + hci_copy_identity_address(hdev, rand_addr, own_addr_type); return 0; } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a40534bf9084..7dafc3e0a15a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3380,7 +3380,7 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; struct l2cap_conf_efs efs; u8 remote_efs = 0; - u16 mtu = L2CAP_DEFAULT_MTU; + u16 mtu = 0; u16 result = L2CAP_CONF_SUCCESS; u16 size; @@ -3485,6 +3485,29 @@ done: /* Configure output options and let the other side know * which ones we don't like. */ + /* If MTU is not provided in configure request, try adjusting it + * to the current output MTU if it has been set + * + * Bluetooth Core 6.1, Vol 3, Part A, Section 4.5 + * + * Each configuration parameter value (if any is present) in an + * L2CAP_CONFIGURATION_RSP packet reflects an ‘adjustment’ to a + * configuration parameter value that has been sent (or, in case + * of default values, implied) in the corresponding + * L2CAP_CONFIGURATION_REQ packet. + */ + if (!mtu) { + /* Only adjust for ERTM channels as for older modes the + * remote stack may not be able to detect that the + * adjustment causing it to silently drop packets. + */ + if (chan->mode == L2CAP_MODE_ERTM && + chan->omtu && chan->omtu != L2CAP_DEFAULT_MTU) + mtu = chan->omtu; + else + mtu = L2CAP_DEFAULT_MTU; + } + if (mtu < L2CAP_DEFAULT_MIN_MTU) result = L2CAP_CONF_UNACCEPT; else { diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index acd11b268b98..615c18e290ab 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1690,6 +1690,9 @@ static void l2cap_sock_resume_cb(struct l2cap_chan *chan) { struct sock *sk = chan->data; + if (!sk) + return; + if (test_and_clear_bit(FLAG_PENDING_SECURITY, &chan->flags)) { sk->sk_state = BT_CONNECTED; chan->state = BT_CONNECTED; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7664e7ba372c..ade93532db34 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1073,7 +1073,8 @@ static int mesh_send_done_sync(struct hci_dev *hdev, void *data) struct mgmt_mesh_tx *mesh_tx; hci_dev_clear_flag(hdev, HCI_MESH_SENDING); - hci_disable_advertising_sync(hdev); + if (list_empty(&hdev->adv_instances)) + hci_disable_advertising_sync(hdev); mesh_tx = mgmt_mesh_next(hdev, NULL); if (mesh_tx) @@ -2146,6 +2147,9 @@ static int set_mesh_sync(struct hci_dev *hdev, void *data) else hci_dev_clear_flag(hdev, HCI_MESH); + hdev->le_scan_interval = __le16_to_cpu(cp->period); + hdev->le_scan_window = __le16_to_cpu(cp->window); + len -= sizeof(*cp); /* If filters don't fit, forward all adv pkts */ @@ -2160,6 +2164,7 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_mesh *cp = data; struct mgmt_pending_cmd *cmd; + __u16 period, window; int err = 0; bt_dev_dbg(hdev, "sock %p", sk); @@ -2173,6 +2178,23 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, MGMT_STATUS_INVALID_PARAMS); + /* Keep allowed ranges in sync with set_scan_params() */ + period = __le16_to_cpu(cp->period); + + if (period < 0x0004 || period > 0x4000) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + + window = __le16_to_cpu(cp->window); + + if (window < 0x0004 || window > 0x4000) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + + if (window > period) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); cmd = mgmt_pending_add(sk, MGMT_OP_SET_MESH_RECEIVER, hdev, data, len); @@ -6536,6 +6558,7 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, MGMT_STATUS_NOT_SUPPORTED); + /* Keep allowed ranges in sync with set_mesh() */ interval = __le16_to_cpu(cp->interval); if (interval < 0x0004 || interval > 0x4000) diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 8b9724fd752a..a31971fe2fd7 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1379,7 +1379,7 @@ static void smp_timeout(struct work_struct *work) bt_dev_dbg(conn->hcon->hdev, "conn %p", conn); - hci_disconnect(conn->hcon, HCI_ERROR_REMOTE_USER_TERM); + hci_disconnect(conn->hcon, HCI_ERROR_AUTH_FAILURE); } static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) @@ -2977,8 +2977,25 @@ static int smp_sig_channel(struct l2cap_chan *chan, struct sk_buff *skb) if (code > SMP_CMD_MAX) goto drop; - if (smp && !test_and_clear_bit(code, &smp->allow_cmd)) + if (smp && !test_and_clear_bit(code, &smp->allow_cmd)) { + /* If there is a context and the command is not allowed consider + * it a failure so the session is cleanup properly. + */ + switch (code) { + case SMP_CMD_IDENT_INFO: + case SMP_CMD_IDENT_ADDR_INFO: + case SMP_CMD_SIGN_INFO: + /* 3.6.1. Key distribution and generation + * + * A device may reject a distributed key by sending the + * Pairing Failed command with the reason set to + * "Key Rejected". + */ + smp_failure(conn, SMP_KEY_REJECTED); + break; + } goto drop; + } /* If we don't have a context the only allowed commands are * pairing request and security request. diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 87a59ec2c9f0..c5da53dfab04 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -138,6 +138,7 @@ struct smp_cmd_keypress_notify { #define SMP_NUMERIC_COMP_FAILED 0x0c #define SMP_BREDR_PAIRING_IN_PROGRESS 0x0d #define SMP_CROSS_TRANSP_NOT_ALLOWED 0x0e +#define SMP_KEY_REJECTED 0x0f #define SMP_MIN_ENC_KEY_SIZE 7 #define SMP_MAX_ENC_KEY_SIZE 16 diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c index 1820f09ff59c..3f24b4ee49c2 100644 --- a/net/bridge/br_mst.c +++ b/net/bridge/br_mst.c @@ -80,10 +80,10 @@ static void br_mst_vlan_set_state(struct net_bridge_vlan_group *vg, if (br_vlan_get_state(v) == state) return; - br_vlan_set_state(v, state); - if (v->vid == vg->pvid) br_vlan_set_pvid_state(vg, state); + + br_vlan_set_state(v, state); } int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index b2ae0d2434d2..733ff6b758f6 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2105,12 +2105,17 @@ static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx) } } -void br_multicast_enable_port(struct net_bridge_port *port) +static void br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx) { - struct net_bridge *br = port->br; + struct net_bridge *br = pmctx->port->br; spin_lock_bh(&br->multicast_lock); - __br_multicast_enable_port_ctx(&port->multicast_ctx); + if (br_multicast_port_ctx_is_vlan(pmctx) && + !(pmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED)) { + spin_unlock_bh(&br->multicast_lock); + return; + } + __br_multicast_enable_port_ctx(pmctx); spin_unlock_bh(&br->multicast_lock); } @@ -2137,11 +2142,67 @@ static void __br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx) br_multicast_rport_del_notify(pmctx, del); } +static void br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx) +{ + struct net_bridge *br = pmctx->port->br; + + spin_lock_bh(&br->multicast_lock); + if (br_multicast_port_ctx_is_vlan(pmctx) && + !(pmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED)) { + spin_unlock_bh(&br->multicast_lock); + return; + } + + __br_multicast_disable_port_ctx(pmctx); + spin_unlock_bh(&br->multicast_lock); +} + +static void br_multicast_toggle_port(struct net_bridge_port *port, bool on) +{ +#if IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING) + if (br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) { + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *vlan; + + rcu_read_lock(); + vg = nbp_vlan_group_rcu(port); + if (!vg) { + rcu_read_unlock(); + return; + } + + /* iterate each vlan, toggle vlan multicast context */ + list_for_each_entry_rcu(vlan, &vg->vlan_list, vlist) { + struct net_bridge_mcast_port *pmctx = + &vlan->port_mcast_ctx; + u8 state = br_vlan_get_state(vlan); + /* enable vlan multicast context when state is + * LEARNING or FORWARDING + */ + if (on && br_vlan_state_allowed(state, true)) + br_multicast_enable_port_ctx(pmctx); + else + br_multicast_disable_port_ctx(pmctx); + } + rcu_read_unlock(); + return; + } +#endif + /* toggle port multicast context when vlan snooping is disabled */ + if (on) + br_multicast_enable_port_ctx(&port->multicast_ctx); + else + br_multicast_disable_port_ctx(&port->multicast_ctx); +} + +void br_multicast_enable_port(struct net_bridge_port *port) +{ + br_multicast_toggle_port(port, true); +} + void br_multicast_disable_port(struct net_bridge_port *port) { - spin_lock_bh(&port->br->multicast_lock); - __br_multicast_disable_port_ctx(&port->multicast_ctx); - spin_unlock_bh(&port->br->multicast_lock); + br_multicast_toggle_port(port, false); } static int __grp_src_delete_marked(struct net_bridge_port_group *pg) @@ -4211,6 +4272,32 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx) #endif } +void br_multicast_update_vlan_mcast_ctx(struct net_bridge_vlan *v, u8 state) +{ +#if IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING) + struct net_bridge *br; + + if (!br_vlan_should_use(v)) + return; + + if (br_vlan_is_master(v)) + return; + + br = v->port->br; + + if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) + return; + + if (br_vlan_state_allowed(state, true)) + br_multicast_enable_port_ctx(&v->port_mcast_ctx); + + /* Multicast is not disabled for the vlan when it goes in + * blocking state because the timers will expire and stop by + * themselves without sending more queries. + */ +#endif +} + void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on) { struct net_bridge *br; @@ -4304,9 +4391,9 @@ int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, __br_multicast_open(&br->multicast_ctx); list_for_each_entry(p, &br->port_list, list) { if (on) - br_multicast_disable_port(p); + br_multicast_disable_port_ctx(&p->multicast_ctx); else - br_multicast_enable_port(p); + br_multicast_enable_port_ctx(&p->multicast_ctx); } list_for_each_entry(vlan, &vg->vlan_list, vlist) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index df502cc1191c..6a1bce8959af 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1053,6 +1053,7 @@ void br_multicast_port_ctx_init(struct net_bridge_port *port, struct net_bridge_vlan *vlan, struct net_bridge_mcast_port *pmctx); void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx); +void br_multicast_update_vlan_mcast_ctx(struct net_bridge_vlan *v, u8 state); void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on); int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, struct netlink_ext_ack *extack); @@ -1503,6 +1504,11 @@ static inline void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pm { } +static inline void br_multicast_update_vlan_mcast_ctx(struct net_bridge_vlan *v, + u8 state) +{ +} + static inline void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on) { @@ -1854,7 +1860,9 @@ bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr, bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range, const struct net_bridge_vlan *v_opts); -/* vlan state manipulation helpers using *_ONCE to annotate lock-free access */ +/* vlan state manipulation helpers using *_ONCE to annotate lock-free access, + * while br_vlan_set_state() may access data protected by multicast_lock. + */ static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v) { return READ_ONCE(v->state); @@ -1863,6 +1871,7 @@ static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v) static inline void br_vlan_set_state(struct net_bridge_vlan *v, u8 state) { WRITE_ONCE(v->state, state); + br_multicast_update_vlan_mcast_ctx(v, state); } static inline u8 br_vlan_get_pvid_state(const struct net_bridge_vlan_group *vg) diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 7b41ee8740cb..f10bd6a233dc 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -17,6 +17,9 @@ static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p, if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload)) return false; + if (br_multicast_igmp_type(skb)) + return false; + return (p->flags & BR_TX_FWD_OFFLOAD) && (p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom); } diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index 20139fa1be1f..06b604cf9d58 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -351,17 +351,154 @@ int cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer) return found; } +static int cfctrl_link_setup(struct cfctrl *cfctrl, struct cfpkt *pkt, u8 cmdrsp) +{ + u8 len; + u8 linkid = 0; + enum cfctrl_srv serv; + enum cfctrl_srv servtype; + u8 endpoint; + u8 physlinkid; + u8 prio; + u8 tmp; + u8 *cp; + int i; + struct cfctrl_link_param linkparam; + struct cfctrl_request_info rsp, *req; + + memset(&linkparam, 0, sizeof(linkparam)); + + tmp = cfpkt_extr_head_u8(pkt); + + serv = tmp & CFCTRL_SRV_MASK; + linkparam.linktype = serv; + + servtype = tmp >> 4; + linkparam.chtype = servtype; + + tmp = cfpkt_extr_head_u8(pkt); + physlinkid = tmp & 0x07; + prio = tmp >> 3; + + linkparam.priority = prio; + linkparam.phyid = physlinkid; + endpoint = cfpkt_extr_head_u8(pkt); + linkparam.endpoint = endpoint & 0x03; + + switch (serv) { + case CFCTRL_SRV_VEI: + case CFCTRL_SRV_DBG: + if (CFCTRL_ERR_BIT & cmdrsp) + break; + /* Link ID */ + linkid = cfpkt_extr_head_u8(pkt); + break; + case CFCTRL_SRV_VIDEO: + tmp = cfpkt_extr_head_u8(pkt); + linkparam.u.video.connid = tmp; + if (CFCTRL_ERR_BIT & cmdrsp) + break; + /* Link ID */ + linkid = cfpkt_extr_head_u8(pkt); + break; + + case CFCTRL_SRV_DATAGRAM: + linkparam.u.datagram.connid = cfpkt_extr_head_u32(pkt); + if (CFCTRL_ERR_BIT & cmdrsp) + break; + /* Link ID */ + linkid = cfpkt_extr_head_u8(pkt); + break; + case CFCTRL_SRV_RFM: + /* Construct a frame, convert + * DatagramConnectionID + * to network format long and copy it out... + */ + linkparam.u.rfm.connid = cfpkt_extr_head_u32(pkt); + cp = (u8 *) linkparam.u.rfm.volume; + for (tmp = cfpkt_extr_head_u8(pkt); + cfpkt_more(pkt) && tmp != '\0'; + tmp = cfpkt_extr_head_u8(pkt)) + *cp++ = tmp; + *cp = '\0'; + + if (CFCTRL_ERR_BIT & cmdrsp) + break; + /* Link ID */ + linkid = cfpkt_extr_head_u8(pkt); + + break; + case CFCTRL_SRV_UTIL: + /* Construct a frame, convert + * DatagramConnectionID + * to network format long and copy it out... + */ + /* Fifosize KB */ + linkparam.u.utility.fifosize_kb = cfpkt_extr_head_u16(pkt); + /* Fifosize bufs */ + linkparam.u.utility.fifosize_bufs = cfpkt_extr_head_u16(pkt); + /* name */ + cp = (u8 *) linkparam.u.utility.name; + caif_assert(sizeof(linkparam.u.utility.name) + >= UTILITY_NAME_LENGTH); + for (i = 0; i < UTILITY_NAME_LENGTH && cfpkt_more(pkt); i++) { + tmp = cfpkt_extr_head_u8(pkt); + *cp++ = tmp; + } + /* Length */ + len = cfpkt_extr_head_u8(pkt); + linkparam.u.utility.paramlen = len; + /* Param Data */ + cp = linkparam.u.utility.params; + while (cfpkt_more(pkt) && len--) { + tmp = cfpkt_extr_head_u8(pkt); + *cp++ = tmp; + } + if (CFCTRL_ERR_BIT & cmdrsp) + break; + /* Link ID */ + linkid = cfpkt_extr_head_u8(pkt); + /* Length */ + len = cfpkt_extr_head_u8(pkt); + /* Param Data */ + cfpkt_extr_head(pkt, NULL, len); + break; + default: + pr_warn("Request setup, invalid type (%d)\n", serv); + return -1; + } + + rsp.cmd = CFCTRL_CMD_LINK_SETUP; + rsp.param = linkparam; + spin_lock_bh(&cfctrl->info_list_lock); + req = cfctrl_remove_req(cfctrl, &rsp); + + if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) || + cfpkt_erroneous(pkt)) { + pr_err("Invalid O/E bit or parse error " + "on CAIF control channel\n"); + cfctrl->res.reject_rsp(cfctrl->serv.layer.up, 0, + req ? req->client_layer : NULL); + } else { + cfctrl->res.linksetup_rsp(cfctrl->serv.layer.up, linkid, + serv, physlinkid, + req ? req->client_layer : NULL); + } + + kfree(req); + + spin_unlock_bh(&cfctrl->info_list_lock); + + return 0; +} + static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) { u8 cmdrsp; u8 cmd; - int ret = -1; - u8 len; - u8 param[255]; + int ret = 0; u8 linkid = 0; struct cfctrl *cfctrl = container_obj(layer); - struct cfctrl_request_info rsp, *req; - cmdrsp = cfpkt_extr_head_u8(pkt); cmd = cmdrsp & CFCTRL_CMD_MASK; @@ -374,150 +511,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) switch (cmd) { case CFCTRL_CMD_LINK_SETUP: - { - enum cfctrl_srv serv; - enum cfctrl_srv servtype; - u8 endpoint; - u8 physlinkid; - u8 prio; - u8 tmp; - u8 *cp; - int i; - struct cfctrl_link_param linkparam; - memset(&linkparam, 0, sizeof(linkparam)); - - tmp = cfpkt_extr_head_u8(pkt); - - serv = tmp & CFCTRL_SRV_MASK; - linkparam.linktype = serv; - - servtype = tmp >> 4; - linkparam.chtype = servtype; - - tmp = cfpkt_extr_head_u8(pkt); - physlinkid = tmp & 0x07; - prio = tmp >> 3; - - linkparam.priority = prio; - linkparam.phyid = physlinkid; - endpoint = cfpkt_extr_head_u8(pkt); - linkparam.endpoint = endpoint & 0x03; - - switch (serv) { - case CFCTRL_SRV_VEI: - case CFCTRL_SRV_DBG: - if (CFCTRL_ERR_BIT & cmdrsp) - break; - /* Link ID */ - linkid = cfpkt_extr_head_u8(pkt); - break; - case CFCTRL_SRV_VIDEO: - tmp = cfpkt_extr_head_u8(pkt); - linkparam.u.video.connid = tmp; - if (CFCTRL_ERR_BIT & cmdrsp) - break; - /* Link ID */ - linkid = cfpkt_extr_head_u8(pkt); - break; - - case CFCTRL_SRV_DATAGRAM: - linkparam.u.datagram.connid = - cfpkt_extr_head_u32(pkt); - if (CFCTRL_ERR_BIT & cmdrsp) - break; - /* Link ID */ - linkid = cfpkt_extr_head_u8(pkt); - break; - case CFCTRL_SRV_RFM: - /* Construct a frame, convert - * DatagramConnectionID - * to network format long and copy it out... - */ - linkparam.u.rfm.connid = - cfpkt_extr_head_u32(pkt); - cp = (u8 *) linkparam.u.rfm.volume; - for (tmp = cfpkt_extr_head_u8(pkt); - cfpkt_more(pkt) && tmp != '\0'; - tmp = cfpkt_extr_head_u8(pkt)) - *cp++ = tmp; - *cp = '\0'; - - if (CFCTRL_ERR_BIT & cmdrsp) - break; - /* Link ID */ - linkid = cfpkt_extr_head_u8(pkt); - - break; - case CFCTRL_SRV_UTIL: - /* Construct a frame, convert - * DatagramConnectionID - * to network format long and copy it out... - */ - /* Fifosize KB */ - linkparam.u.utility.fifosize_kb = - cfpkt_extr_head_u16(pkt); - /* Fifosize bufs */ - linkparam.u.utility.fifosize_bufs = - cfpkt_extr_head_u16(pkt); - /* name */ - cp = (u8 *) linkparam.u.utility.name; - caif_assert(sizeof(linkparam.u.utility.name) - >= UTILITY_NAME_LENGTH); - for (i = 0; - i < UTILITY_NAME_LENGTH - && cfpkt_more(pkt); i++) { - tmp = cfpkt_extr_head_u8(pkt); - *cp++ = tmp; - } - /* Length */ - len = cfpkt_extr_head_u8(pkt); - linkparam.u.utility.paramlen = len; - /* Param Data */ - cp = linkparam.u.utility.params; - while (cfpkt_more(pkt) && len--) { - tmp = cfpkt_extr_head_u8(pkt); - *cp++ = tmp; - } - if (CFCTRL_ERR_BIT & cmdrsp) - break; - /* Link ID */ - linkid = cfpkt_extr_head_u8(pkt); - /* Length */ - len = cfpkt_extr_head_u8(pkt); - /* Param Data */ - cfpkt_extr_head(pkt, ¶m, len); - break; - default: - pr_warn("Request setup, invalid type (%d)\n", - serv); - goto error; - } - - rsp.cmd = cmd; - rsp.param = linkparam; - spin_lock_bh(&cfctrl->info_list_lock); - req = cfctrl_remove_req(cfctrl, &rsp); - - if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) || - cfpkt_erroneous(pkt)) { - pr_err("Invalid O/E bit or parse error " - "on CAIF control channel\n"); - cfctrl->res.reject_rsp(cfctrl->serv.layer.up, - 0, - req ? req->client_layer - : NULL); - } else { - cfctrl->res.linksetup_rsp(cfctrl->serv. - layer.up, linkid, - serv, physlinkid, - req ? req-> - client_layer : NULL); - } - - kfree(req); - - spin_unlock_bh(&cfctrl->info_list_lock); - } + ret = cfctrl_link_setup(cfctrl, pkt, cmdrsp); break; case CFCTRL_CMD_LINK_DESTROY: linkid = cfpkt_extr_head_u8(pkt); @@ -544,9 +538,9 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) break; default: pr_err("Unrecognized Control Frame\n"); + ret = -1; goto error; } - ret = 0; error: cfpkt_destroy(pkt); return ret; diff --git a/net/core/dst.c b/net/core/dst.c index 6d76b799ce64..cc990706b645 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -148,8 +148,8 @@ void dst_dev_put(struct dst_entry *dst) dst->obsolete = DST_OBSOLETE_DEAD; if (dst->ops->ifdown) dst->ops->ifdown(dst, dev); - dst->input = dst_discard; - dst->output = dst_discard_out; + WRITE_ONCE(dst->input, dst_discard); + WRITE_ONCE(dst->output, dst_discard_out); dst->dev = blackhole_netdev; netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker, GFP_ATOMIC); diff --git a/net/core/filter.c b/net/core/filter.c index 99b23fd2f509..02fedc404d7f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1980,10 +1980,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, bool is_pseudo = flags & BPF_F_PSEUDO_HDR; bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; bool do_mforce = flags & BPF_F_MARK_ENFORCE; + bool is_ipv6 = flags & BPF_F_IPV6; __sum16 *ptr; if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE | - BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) + BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK | BPF_F_IPV6))) return -EINVAL; if (unlikely(offset > 0xffff || offset & 1)) return -EFAULT; @@ -1999,7 +2000,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, if (unlikely(from != 0)) return -EINVAL; - inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo); + inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, is_ipv6); break; case 2: inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); @@ -3249,6 +3250,13 @@ static const struct bpf_func_proto bpf_skb_vlan_pop_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto) +{ + skb->protocol = htons(proto); + if (skb_valid_dst(skb)) + skb_dst_drop(skb); +} + static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) { /* Caller already did skb_cow() with len as headroom, @@ -3345,7 +3353,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) } } - skb->protocol = htons(ETH_P_IPV6); + bpf_skb_change_protocol(skb, ETH_P_IPV6); skb_clear_hash(skb); return 0; @@ -3375,7 +3383,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) } } - skb->protocol = htons(ETH_P_IP); + bpf_skb_change_protocol(skb, ETH_P_IP); skb_clear_hash(skb); return 0; @@ -3566,10 +3574,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, /* Match skb->protocol to new outer l3 protocol */ if (skb->protocol == htons(ETH_P_IP) && flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) - skb->protocol = htons(ETH_P_IPV6); + bpf_skb_change_protocol(skb, ETH_P_IPV6); else if (skb->protocol == htons(ETH_P_IPV6) && flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) - skb->protocol = htons(ETH_P_IP); + bpf_skb_change_protocol(skb, ETH_P_IP); } if (skb_is_gso(skb)) { @@ -3622,10 +3630,10 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, /* Match skb->protocol to new outer l3 protocol */ if (skb->protocol == htons(ETH_P_IP) && flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) - skb->protocol = htons(ETH_P_IPV6); + bpf_skb_change_protocol(skb, ETH_P_IPV6); else if (skb->protocol == htons(ETH_P_IPV6) && flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) - skb->protocol = htons(ETH_P_IP); + bpf_skb_change_protocol(skb, ETH_P_IP); if (skb_is_gso(skb)) { struct skb_shared_info *shinfo = skb_shinfo(skb); @@ -9399,6 +9407,9 @@ static bool flow_dissector_is_valid_access(int off, int size, if (off < 0 || off >= sizeof(struct __sk_buff)) return false; + if (off % size != 0) + return false; + if (type == BPF_WRITE) return false; diff --git a/net/core/ieee8021q_helpers.c b/net/core/ieee8021q_helpers.c index 759a9b9f3f89..669b357b73b2 100644 --- a/net/core/ieee8021q_helpers.c +++ b/net/core/ieee8021q_helpers.c @@ -7,6 +7,11 @@ #include <net/dscp.h> #include <net/ieee8021q.h> +/* verify that table covers all 8 traffic types */ +#define TT_MAP_SIZE_OK(tbl) \ + compiletime_assert(ARRAY_SIZE(tbl) == IEEE8021Q_TT_MAX, \ + #tbl " size mismatch") + /* The following arrays map Traffic Types (TT) to traffic classes (TC) for * different number of queues as shown in the example provided by * IEEE 802.1Q-2022 in Annex I "I.3 Traffic type to traffic class mapping" and @@ -101,51 +106,28 @@ int ieee8021q_tt_to_tc(enum ieee8021q_traffic_type tt, unsigned int num_queues) switch (num_queues) { case 8: - compiletime_assert(ARRAY_SIZE(ieee8021q_8queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_8queue_tt_tc_map != max - 1"); + TT_MAP_SIZE_OK(ieee8021q_8queue_tt_tc_map); return ieee8021q_8queue_tt_tc_map[tt]; case 7: - compiletime_assert(ARRAY_SIZE(ieee8021q_7queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_7queue_tt_tc_map != max - 1"); - + TT_MAP_SIZE_OK(ieee8021q_7queue_tt_tc_map); return ieee8021q_7queue_tt_tc_map[tt]; case 6: - compiletime_assert(ARRAY_SIZE(ieee8021q_6queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_6queue_tt_tc_map != max - 1"); - + TT_MAP_SIZE_OK(ieee8021q_6queue_tt_tc_map); return ieee8021q_6queue_tt_tc_map[tt]; case 5: - compiletime_assert(ARRAY_SIZE(ieee8021q_5queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_5queue_tt_tc_map != max - 1"); - + TT_MAP_SIZE_OK(ieee8021q_5queue_tt_tc_map); return ieee8021q_5queue_tt_tc_map[tt]; case 4: - compiletime_assert(ARRAY_SIZE(ieee8021q_4queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_4queue_tt_tc_map != max - 1"); - + TT_MAP_SIZE_OK(ieee8021q_4queue_tt_tc_map); return ieee8021q_4queue_tt_tc_map[tt]; case 3: - compiletime_assert(ARRAY_SIZE(ieee8021q_3queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_3queue_tt_tc_map != max - 1"); - + TT_MAP_SIZE_OK(ieee8021q_3queue_tt_tc_map); return ieee8021q_3queue_tt_tc_map[tt]; case 2: - compiletime_assert(ARRAY_SIZE(ieee8021q_2queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_2queue_tt_tc_map != max - 1"); - + TT_MAP_SIZE_OK(ieee8021q_2queue_tt_tc_map); return ieee8021q_2queue_tt_tc_map[tt]; case 1: - compiletime_assert(ARRAY_SIZE(ieee8021q_1queue_tt_tc_map) != - IEEE8021Q_TT_MAX - 1, - "ieee8021q_1queue_tt_tc_map != max - 1"); - + TT_MAP_SIZE_OK(ieee8021q_1queue_tt_tc_map); return ieee8021q_1queue_tt_tc_map[tt]; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8082cc6be4fc..96786016dbb4 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -55,7 +55,8 @@ static void __neigh_notify(struct neighbour *n, int type, int flags, u32 pid); static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, - struct net_device *dev); + struct net_device *dev, + bool skip_perm); #ifdef CONFIG_PROC_FS static const struct seq_operations neigh_stat_seq_ops; @@ -444,7 +445,7 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, { write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, skip_perm); - pneigh_ifdown_and_unlock(tbl, dev); + pneigh_ifdown_and_unlock(tbl, dev, skip_perm); pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL, tbl->family); if (skb_queue_empty_lockless(&tbl->proxy_queue)) @@ -847,7 +848,8 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, } static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, - struct net_device *dev) + struct net_device *dev, + bool skip_perm) { struct pneigh_entry *n, **np, *freelist = NULL; u32 h; @@ -855,12 +857,15 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, for (h = 0; h <= PNEIGH_HASHMASK; h++) { np = &tbl->phash_buckets[h]; while ((n = *np) != NULL) { + if (skip_perm && n->permanent) + goto skip; if (!dev || n->dev == dev) { *np = n->next; n->next = freelist; freelist = n; continue; } +skip: np = &n->next; } } @@ -2041,6 +2046,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, pn = pneigh_lookup(tbl, net, dst, dev, 1); if (pn) { pn->flags = ndm_flags; + pn->permanent = !!(ndm->ndm_state & NUD_PERMANENT); if (protocol) pn->protocol = protocol; err = 0; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 70fea7c1a4b0..ee3c1b37d06c 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -458,7 +458,7 @@ static void net_complete_free(void) } -static void net_free(struct net *net) +void net_passive_dec(struct net *net) { if (refcount_dec_and_test(&net->passive)) { kfree(rcu_access_pointer(net->gen)); @@ -476,7 +476,7 @@ void net_drop_ns(void *p) struct net *net = (struct net *)p; if (net) - net_free(net); + net_passive_dec(net); } struct net *copy_net_ns(unsigned long flags, @@ -517,7 +517,7 @@ put_userns: key_remove_domain(net->key_domain); #endif put_user_ns(user_ns); - net_free(net); + net_passive_dec(net); dec_ucounts: dec_net_namespaces(ucounts); return ERR_PTR(rv); @@ -662,7 +662,7 @@ static void cleanup_net(struct work_struct *work) key_remove_domain(net->key_domain); #endif put_user_ns(net->user_ns); - net_free(net); + net_passive_dec(net); } } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e95c2933756d..87182a4272bf 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -784,6 +784,13 @@ put_noaddr: if (err) goto put; rtnl_unlock(); + + /* Make sure all NAPI polls which started before dev->npinfo + * was visible have exited before we start calling NAPI poll. + * NAPI skips locking if dev->npinfo is NULL. + */ + synchronize_rcu(); + return 0; put: diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 0f23b3126bda..b1c3e0ad6dbf 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -829,6 +829,10 @@ static bool page_pool_napi_local(const struct page_pool *pool) const struct napi_struct *napi; u32 cpuid; + /* On PREEMPT_RT the softirq can be preempted by the consumer */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + return false; + if (unlikely(!in_softirq())) return false; diff --git a/net/core/selftests.c b/net/core/selftests.c index 561653f9d71d..ef27594d6a99 100644 --- a/net/core/selftests.c +++ b/net/core/selftests.c @@ -160,8 +160,9 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev, skb->csum = 0; skb->ip_summed = CHECKSUM_PARTIAL; if (attr->tcp) { - thdr->check = ~tcp_v4_check(skb->len, ihdr->saddr, - ihdr->daddr, 0); + int l4len = skb->len - skb_transport_offset(skb); + + thdr->check = ~tcp_v4_check(l4len, ihdr->saddr, ihdr->daddr, 0); skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); } else { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fdb36165c58f..cf54593149cc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -6197,9 +6197,6 @@ int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len) if (!pskb_may_pull(skb, write_len)) return -ENOMEM; - if (!skb_frags_readable(skb)) - return -EFAULT; - if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) return 0; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index a8d238dd982a..adb3166ede97 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -655,6 +655,13 @@ static void sk_psock_backlog(struct work_struct *work) bool ingress; int ret; + /* If sk is quickly removed from the map and then added back, the old + * psock should not be scheduled, because there are now two psocks + * pointing to the same sk. + */ + if (!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + return; + /* Increment the psock refcnt to synchronize with close(fd) path in * sock_map_close(), ensuring we wait for backlog thread completion * before sk_socket freed. If refcnt increment fails, it indicates @@ -689,7 +696,8 @@ static void sk_psock_backlog(struct work_struct *work) if (ret <= 0) { if (ret == -EAGAIN) { sk_psock_skb_state(psock, state, len, off); - + /* Restore redir info we cleared before */ + skb_bpf_set_redir(skb, psock->sk, ingress); /* Delay slightly to prioritize any * other work that might be here. */ diff --git a/net/core/sock.c b/net/core/sock.c index 3c5386c76d6f..d392cb37a864 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2243,6 +2243,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, get_net_track(net, &sk->ns_tracker, priority); sock_inuse_add(net, 1); } else { + net_passive_inc(net); __netns_tracker_alloc(net, &sk->ns_tracker, false, priority); } @@ -2267,6 +2268,7 @@ EXPORT_SYMBOL(sk_alloc); static void __sk_destruct(struct rcu_head *head) { struct sock *sk = container_of(head, struct sock, sk_rcu); + struct net *net = sock_net(sk); struct sk_filter *filter; if (sk->sk_destruct) @@ -2298,14 +2300,28 @@ static void __sk_destruct(struct rcu_head *head) put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); - if (likely(sk->sk_net_refcnt)) - put_net_track(sock_net(sk), &sk->ns_tracker); - else - __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); - + if (likely(sk->sk_net_refcnt)) { + put_net_track(net, &sk->ns_tracker); + } else { + __netns_tracker_free(net, &sk->ns_tracker, false); + net_passive_dec(net); + } sk_prot_free(sk->sk_prot_creator, sk); } +void sk_net_refcnt_upgrade(struct sock *sk) +{ + struct net *net = sock_net(sk); + + WARN_ON_ONCE(sk->sk_net_refcnt); + __netns_tracker_free(net, &sk->ns_tracker, false); + net_passive_dec(net); + sk->sk_net_refcnt = 1; + get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); +} +EXPORT_SYMBOL_GPL(sk_net_refcnt_upgrade); + void sk_destruct(struct sock *sk) { bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE); @@ -2402,6 +2418,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) * is not properly dismantling its kernel sockets at netns * destroy time. */ + net_passive_inc(sock_net(newsk)); __netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker, false, priority); } @@ -3930,7 +3947,7 @@ static int assign_proto_idx(struct proto *prot) { prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); - if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { + if (unlikely(prot->inuse_idx == PROTO_INUSE_NR)) { pr_err("PROTO_INUSE_NR exhausted\n"); return -ENOSPC; } @@ -3941,7 +3958,7 @@ static int assign_proto_idx(struct proto *prot) static void release_proto_idx(struct proto *prot) { - if (prot->inuse_idx != PROTO_INUSE_NR - 1) + if (prot->inuse_idx != PROTO_INUSE_NR) clear_bit(prot->inuse_idx, proto_inuse_idx); } #else diff --git a/net/core/utils.c b/net/core/utils.c index 27f4cffaae05..b8c21a859e27 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -473,11 +473,11 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, EXPORT_SYMBOL(inet_proto_csum_replace16); void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, - __wsum diff, bool pseudohdr) + __wsum diff, bool pseudohdr, bool ipv6) { if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_replace_by_diff(sum, diff); - if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr && !ipv6) skb->csum = ~csum_sub(diff, skb->csum); } else if (pseudohdr) { *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum))); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 41b320f0c20e..9a5c9497b393 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -189,7 +189,11 @@ const __u8 ip_tos2prio[16] = { EXPORT_SYMBOL(ip_tos2prio); static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); +#ifndef CONFIG_PREEMPT_RT #define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field) +#else +#define RT_CACHE_STAT_INC(field) this_cpu_inc(rt_cache_stat.field) +#endif #ifdef CONFIG_PROC_FS static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) @@ -1680,8 +1684,8 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) else if (rt->rt_gw_family == AF_INET6) new_rt->rt_gw6 = rt->rt_gw6; - new_rt->dst.input = rt->dst.input; - new_rt->dst.output = rt->dst.output; + new_rt->dst.input = READ_ONCE(rt->dst.input); + new_rt->dst.output = READ_ONCE(rt->dst.output); new_rt->dst.error = rt->dst.error; new_rt->dst.lastuse = jiffies; new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate); @@ -2541,7 +2545,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, do_cache = true; if (type == RTN_BROADCAST) { flags |= RTCF_BROADCAST | RTCF_LOCAL; - fi = NULL; } else if (type == RTN_MULTICAST) { flags |= RTCF_MULTICAST | RTCF_LOCAL; if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b731a4a8f2b0..156da81bce06 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1145,7 +1145,7 @@ restart: goto do_error; while (msg_data_left(msg)) { - ssize_t copy = 0; + int copy = 0; skb = tcp_write_queue_tail(sk); if (skb) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 32b28fc21b63..408985eb74ee 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -3,6 +3,7 @@ #include <linux/tcp.h> #include <linux/rcupdate.h> #include <net/tcp.h> +#include <net/busy_poll.h> void tcp_fastopen_init_key_once(struct net *net) { @@ -279,6 +280,8 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, refcount_set(&req->rsk_refcnt, 2); + sk_mark_napi_id_set(child, skb); + /* Now finish processing the fastopen child socket. */ tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, skb); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d29219e067b7..30f4375f8431 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -665,10 +665,12 @@ EXPORT_SYMBOL(tcp_initialize_rcv_mss); */ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep) { - u32 new_sample = tp->rcv_rtt_est.rtt_us; - long m = sample; + u32 new_sample, old_sample = tp->rcv_rtt_est.rtt_us; + long m = sample << 3; - if (new_sample != 0) { + if (old_sample == 0 || m < old_sample) { + new_sample = m; + } else { /* If we sample in larger samples in the non-timestamp * case, we could grossly overestimate the RTT especially * with chatty applications or bulk transfer apps which @@ -679,17 +681,9 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep) * else with timestamps disabled convergence takes too * long. */ - if (!win_dep) { - m -= (new_sample >> 3); - new_sample += m; - } else { - m <<= 3; - if (m < new_sample) - new_sample = m; - } - } else { - /* No previous measure. */ - new_sample = m << 3; + if (win_dep) + return; + new_sample = old_sample - (old_sample >> 3) + sample; } tp->rcv_rtt_est.rtt_us = new_sample; @@ -713,7 +707,7 @@ new_measure: tp->rcv_rtt_est.time = tp->tcp_mstamp; } -static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp) +static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp, u32 min_delta) { u32 delta, delta_us; @@ -723,7 +717,7 @@ static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp) if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { if (!delta) - delta = 1; + delta = min_delta; delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); return delta_us; } @@ -741,9 +735,9 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, if (TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) { - s32 delta = tcp_rtt_tsopt_us(tp); + s32 delta = tcp_rtt_tsopt_us(tp, 0); - if (delta >= 0) + if (delta > 0) tcp_rcv_rtt_update(tp, delta, 0); } } @@ -755,8 +749,7 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, void tcp_rcv_space_adjust(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - u32 copied; - int time; + int time, inq, copied; trace_tcp_rcv_space_adjust(sk); @@ -767,6 +760,9 @@ void tcp_rcv_space_adjust(struct sock *sk) /* Number of bytes copied to user in last RTT */ copied = tp->copied_seq - tp->rcvq_space.seq; + /* Number of bytes in receive queue. */ + inq = tp->rcv_nxt - tp->copied_seq; + copied -= inq; if (copied <= tp->rcvq_space.space) goto new_measure; @@ -2486,20 +2482,33 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp) { const struct sock *sk = (const struct sock *)tp; - if (tp->retrans_stamp && - tcp_tsopt_ecr_before(tp, tp->retrans_stamp)) - return true; /* got echoed TS before first retransmission */ + /* Received an echoed timestamp before the first retransmission? */ + if (tp->retrans_stamp) + return tcp_tsopt_ecr_before(tp, tp->retrans_stamp); + + /* We set tp->retrans_stamp upon the first retransmission of a loss + * recovery episode, so normally if tp->retrans_stamp is 0 then no + * retransmission has happened yet (likely due to TSQ, which can cause + * fast retransmits to be delayed). So if snd_una advanced while + * (tp->retrans_stamp is 0 then apparently a packet was merely delayed, + * not lost. But there are exceptions where we retransmit but then + * clear tp->retrans_stamp, so we check for those exceptions. + */ - /* Check if nothing was retransmitted (retrans_stamp==0), which may - * happen in fast recovery due to TSQ. But we ignore zero retrans_stamp - * in TCP_SYN_SENT, since when we set FLAG_SYN_ACKED we also clear - * retrans_stamp even if we had retransmitted the SYN. + /* (1) For non-SACK connections, tcp_is_non_sack_preventing_reopen() + * clears tp->retrans_stamp when snd_una == high_seq. */ - if (!tp->retrans_stamp && /* no record of a retransmit/SYN? */ - sk->sk_state != TCP_SYN_SENT) /* not the FLAG_SYN_ACKED case? */ - return true; /* nothing was retransmitted */ + if (!tcp_is_sack(tp) && !before(tp->snd_una, tp->high_seq)) + return false; - return false; + /* (2) In TCP_SYN_SENT tcp_clean_rtx_queue() clears tp->retrans_stamp + * when setting FLAG_SYN_ACKED is set, even if the SYN was + * retransmitted. + */ + if (sk->sk_state == TCP_SYN_SENT) + return false; + + return true; /* tp->retrans_stamp is zero; no retransmit yet */ } /* Undo procedures. */ @@ -3226,7 +3235,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag, */ if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED) - seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp); + seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp, 1); rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */ if (seq_rtt_us < 0) @@ -4961,8 +4970,9 @@ static void tcp_ofo_queue(struct sock *sk) if (before(TCP_SKB_CB(skb)->seq, dsack_high)) { __u32 dsack = dsack_high; + if (before(TCP_SKB_CB(skb)->end_seq, dsack_high)) - dsack_high = TCP_SKB_CB(skb)->end_seq; + dsack = TCP_SKB_CB(skb)->end_seq; tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack); } p = rb_next(p); @@ -5030,6 +5040,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) return; } + tcp_measure_rcv_mss(sk, skb); /* Disable header prediction. */ tp->pred_flags = 0; inet_csk_schedule_ack(sk); @@ -6841,6 +6852,9 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (!tp->srtt_us) tcp_synack_rtt_meas(sk, req); + if (tp->rx_opt.tstamp_ok) + tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; + if (req) { tcp_rcv_synrecv_state_fastopen(sk); } else { @@ -6866,9 +6880,6 @@ tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - if (tp->rx_opt.tstamp_ok) - tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; - if (!inet_csk(sk)->icsk_ca_ops->cong_control) tcp_update_pacing_rate(sk); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index e04ebe651c33..3a9c5c14c310 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -355,6 +355,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, flush |= skb->ip_summed != p->ip_summed; flush |= skb->csum_level != p->csum_level; flush |= NAPI_GRO_CB(p)->count >= 64; + skb_set_network_header(skb, skb_gro_receive_network_offset(skb)); if (flush || skb_gro_receive_list(p, skb)) mss = 1; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 845730184c5d..12ba1a8db93a 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -61,7 +61,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); skb->remcsum_offload = remcsum; - need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb)); + need_ipsec = (skb_dst(skb) && dst_xfrm(skb_dst(skb))) || skb_sec_path(skb); /* Try to offload checksum if possible */ offload_csum = !!(need_csum && !need_ipsec && @@ -604,6 +604,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, NAPI_GRO_CB(skb)->flush = 1; return NULL; } + skb_set_network_header(skb, skb_gro_receive_network_offset(skb)); ret = skb_gro_receive_list(p, skb); } else { skb_gro_postpull_rcsum(skb, uh, diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 17d3fc2fab4c..12a1a0f42195 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -202,6 +202,9 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) goto out; + /* set the transport header to ESP */ + skb_set_transport_header(skb, offset); + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 16ba3bb12fc4..49ec223f2eda 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2228,13 +2228,12 @@ errdad: in6_ifa_put(ifp); } -/* Join to solicited addr multicast group. - * caller must hold RTNL */ +/* Join to solicited addr multicast group. */ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr) { struct in6_addr maddr; - if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) + if (READ_ONCE(dev->flags) & (IFF_LOOPBACK | IFF_NOARP)) return; addrconf_addr_solict_mult(addr, &maddr); @@ -3548,11 +3547,9 @@ static void addrconf_gre_config(struct net_device *dev) ASSERT_RTNL(); - idev = ipv6_find_idev(dev); - if (IS_ERR(idev)) { - pr_debug("%s: add_dev failed\n", __func__); + idev = addrconf_add_dev(dev); + if (IS_ERR(idev)) return; - } /* Generate the IPv6 link-local address using addrconf_addr_gen(), * unless we have an IPv4 GRE device not bound to an IP address and @@ -3566,9 +3563,6 @@ static void addrconf_gre_config(struct net_device *dev) } add_v4_addrs(idev); - - if (dev->flags & IFF_POINTOPOINT) - addrconf_add_mroute(dev); } #endif @@ -3888,7 +3882,7 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) * Do not dev_put! */ if (unregister) { - idev->dead = 1; + WRITE_ONCE(idev->dead, 1); /* protected by rtnl_lock */ RCU_INIT_POINTER(dev->ip6_ptr, NULL); diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 62618a058b8f..a247bb93908b 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -1207,6 +1207,10 @@ static int calipso_req_setattr(struct request_sock *req, struct ipv6_opt_hdr *old, *new; struct sock *sk = sk_to_full_sk(req_to_sk(req)); + /* sk is NULL for SYN+ACK w/ SYN Cookie */ + if (!sk) + return -ENOMEM; + if (req_inet->ipv6_opt && req_inet->ipv6_opt->hopopt) old = req_inet->ipv6_opt->hopopt; else @@ -1247,6 +1251,10 @@ static void calipso_req_delattr(struct request_sock *req) struct ipv6_txoptions *txopts; struct sock *sk = sk_to_full_sk(req_to_sk(req)); + /* sk is NULL for SYN+ACK w/ SYN Cookie */ + if (!sk) + return; + if (!req_inet->ipv6_opt || !req_inet->ipv6_opt->hopopt) return; diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index 95e9146918cc..b8d43ed4689d 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -86,7 +86,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb, diff = get_csum_diff(ip6h, p); inet_proto_csum_replace_by_diff(&th->check, skb, - diff, true); + diff, true, true); } break; case NEXTHDR_UDP: @@ -97,7 +97,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb, if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { diff = get_csum_diff(ip6h, p); inet_proto_csum_replace_by_diff(&uh->check, skb, - diff, true); + diff, true, true); if (!uh->check) uh->check = CSUM_MANGLED_0; } @@ -111,7 +111,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb, diff = get_csum_diff(ip6h, p); inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb, - diff, true); + diff, true, true); } break; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 9a1c59275a10..aa1046fbf28e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -440,15 +440,17 @@ struct fib6_dump_arg { static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg) { enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE; + unsigned int nsiblings; int err; if (!rt || rt == arg->net->ipv6.fib6_null_entry) return 0; - if (rt->fib6_nsiblings) + nsiblings = READ_ONCE(rt->fib6_nsiblings); + if (nsiblings) err = call_fib6_multipath_entry_notifier(arg->nb, fib_event, rt, - rt->fib6_nsiblings, + nsiblings, arg->extack); else err = call_fib6_entry_notifier(arg->nb, fib_event, rt, @@ -1126,7 +1128,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, if (rt6_duplicate_nexthop(iter, rt)) { if (rt->fib6_nsiblings) - rt->fib6_nsiblings = 0; + WRITE_ONCE(rt->fib6_nsiblings, 0); if (!(iter->fib6_flags & RTF_EXPIRES)) return -EEXIST; if (!(rt->fib6_flags & RTF_EXPIRES)) { @@ -1155,7 +1157,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, */ if (rt_can_ecmp && rt6_qualify_for_ecmp(iter)) - rt->fib6_nsiblings++; + WRITE_ONCE(rt->fib6_nsiblings, + rt->fib6_nsiblings + 1); } if (iter->fib6_metric > rt->fib6_metric) @@ -1205,7 +1208,8 @@ next_iter: fib6_nsiblings = 0; list_for_each_entry_safe(sibling, temp_sibling, &rt->fib6_siblings, fib6_siblings) { - sibling->fib6_nsiblings++; + WRITE_ONCE(sibling->fib6_nsiblings, + sibling->fib6_nsiblings + 1); BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings); fib6_nsiblings++; } @@ -1250,8 +1254,9 @@ add: list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) - sibling->fib6_nsiblings--; - rt->fib6_nsiblings = 0; + WRITE_ONCE(sibling->fib6_nsiblings, + sibling->fib6_nsiblings - 1); + WRITE_ONCE(rt->fib6_nsiblings, 0); list_del_rcu(&rt->fib6_siblings); rt6_multipath_rebalance(next_sibling); return err; @@ -1968,8 +1973,9 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, notify_del = true; list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) - sibling->fib6_nsiblings--; - rt->fib6_nsiblings = 0; + WRITE_ONCE(sibling->fib6_nsiblings, + sibling->fib6_nsiblings - 1); + WRITE_ONCE(rt->fib6_nsiblings, 0); list_del_rcu(&rt->fib6_siblings); rt6_multipath_rebalance(next_sibling); } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 9822163428b0..fce91183797a 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -148,7 +148,9 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, ops = rcu_dereference(inet6_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) { - skb_reset_transport_header(skb); + if (!skb_reset_transport_header_careful(skb)) + goto out; + segs = ops->callbacks.gso_segment(skb, features); if (!segs) skb->network_header = skb_mac_header(skb) + nhoff - skb->head; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 89a61e040e6a..f0e5431c2d46 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -2043,8 +2043,6 @@ struct sk_buff *ip6_make_skb(struct sock *sk, ip6_cork_release(cork, &v6_cork); return ERR_PTR(err); } - if (ipc6->dontfrag < 0) - ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk); err = __ip6_append_data(sk, &queue, cork, &v6_cork, ¤t->task_frag, getfrag, from, diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 440048d609c3..68bc518500f9 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2032,6 +2032,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, struct sk_buff *skb, int vifi) { struct vif_device *vif = &mrt->vif_table[vifi]; + struct net_device *indev = skb->dev; struct net_device *vif_dev; struct ipv6hdr *ipv6h; struct dst_entry *dst; @@ -2094,7 +2095,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, IP6CB(skb)->flags |= IP6SKB_FORWARDED; return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, - net, NULL, skb, skb->dev, vif_dev, + net, NULL, skb, indev, skb->dev, ip6mr_forward2_finish); out_free: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index b7b62e5a562e..e2a11a2f3b25 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -804,8 +804,8 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) } else { im->mca_crcount = idev->mc_qrv; } - in6_dev_put(pmc->idev); ip6_mc_clear_src(pmc); + in6_dev_put(pmc->idev); kfree_rcu(pmc, rcu); } } @@ -907,23 +907,22 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, static int __ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr, unsigned int mode) { - struct ifmcaddr6 *mc; struct inet6_dev *idev; - - ASSERT_RTNL(); + struct ifmcaddr6 *mc; /* we need to take a reference on idev */ idev = in6_dev_get(dev); - if (!idev) return -EINVAL; - if (idev->dead) { + mutex_lock(&idev->mc_lock); + + if (READ_ONCE(idev->dead)) { + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); return -ENODEV; } - mutex_lock(&idev->mc_lock); for_each_mc_mclock(idev, mc) { if (ipv6_addr_equal(&mc->mca_addr, addr)) { mc->mca_users++; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 608fa9d05b55..328419e05c81 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -777,7 +777,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_mark = READ_ONCE(sk->sk_mark); fl6.flowi6_uid = sk->sk_uid; - ipcm6_init(&ipc6); + ipcm6_init_sk(&ipc6, sk); ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = fl6.flowi6_mark; @@ -890,9 +890,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (hdrincl) fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH; - if (ipc6.tclass < 0) - ipc6.tclass = np->tclass; - fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); @@ -903,9 +900,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (ipc6.hlimit < 0) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); - if (ipc6.dontfrag < 0) - ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); - if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d9ab070e78e0..22866444efc0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5240,7 +5240,8 @@ static void ip6_route_mpath_notify(struct fib6_info *rt, */ rcu_read_lock(); - if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) { + if ((nlflags & NLM_F_APPEND) && rt_last && + READ_ONCE(rt_last->fib6_nsiblings)) { rt = list_first_or_null_rcu(&rt_last->fib6_siblings, struct fib6_info, fib6_siblings); @@ -5587,32 +5588,34 @@ static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg) static size_t rt6_nlmsg_size(struct fib6_info *f6i) { + struct fib6_info *sibling; + struct fib6_nh *nh; int nexthop_len; if (f6i->nh) { nexthop_len = nla_total_size(4); /* RTA_NH_ID */ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size, &nexthop_len); - } else { - struct fib6_nh *nh = f6i->fib6_nh; - struct fib6_info *sibling; - - nexthop_len = 0; - if (f6i->fib6_nsiblings) { - rt6_nh_nlmsg_size(nh, &nexthop_len); - - rcu_read_lock(); + goto common; + } - list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, - fib6_siblings) { - rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len); - } + rcu_read_lock(); +retry: + nh = f6i->fib6_nh; + nexthop_len = 0; + if (READ_ONCE(f6i->fib6_nsiblings)) { + rt6_nh_nlmsg_size(nh, &nexthop_len); - rcu_read_unlock(); + list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, + fib6_siblings) { + rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len); + if (!READ_ONCE(f6i->fib6_nsiblings)) + goto retry; } - nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws); } - + rcu_read_unlock(); + nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws); +common: return NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(16) /* RTA_SRC */ + nla_total_size(16) /* RTA_DST */ @@ -5771,7 +5774,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (dst->lwtstate && lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) goto nla_put_failure; - } else if (rt->fib6_nsiblings) { + } else if (READ_ONCE(rt->fib6_nsiblings)) { struct fib6_info *sibling; struct nlattr *mp; @@ -5873,16 +5876,21 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i, if (f6i->fib6_nh->fib_nh_dev == dev) return true; - if (f6i->fib6_nsiblings) { - struct fib6_info *sibling, *next_sibling; + if (READ_ONCE(f6i->fib6_nsiblings)) { + const struct fib6_info *sibling; - list_for_each_entry_safe(sibling, next_sibling, - &f6i->fib6_siblings, fib6_siblings) { - if (sibling->fib6_nh->fib_nh_dev == dev) + rcu_read_lock(); + list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, + fib6_siblings) { + if (sibling->fib6_nh->fib_nh_dev == dev) { + rcu_read_unlock(); return true; + } + if (!READ_ONCE(f6i->fib6_nsiblings)) + break; } + rcu_read_unlock(); } - return false; } diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 7c05ac846646..eccfa4203e96 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -129,13 +129,13 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt, struct dst_entry *cache_dst) { struct ipv6_rpl_sr_hdr *isrh, *csrh; - const struct ipv6hdr *oldhdr; + struct ipv6hdr oldhdr; struct ipv6hdr *hdr; unsigned char *buf; size_t hdrlen; int err; - oldhdr = ipv6_hdr(skb); + memcpy(&oldhdr, ipv6_hdr(skb), sizeof(oldhdr)); buf = kcalloc(struct_size(srh, segments.addr, srh->segments_left), 2, GFP_ATOMIC); if (!buf) @@ -147,7 +147,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt, memcpy(isrh, srh, sizeof(*isrh)); memcpy(isrh->rpl_segaddr, &srh->rpl_segaddr[1], (srh->segments_left - 1) * 16); - isrh->rpl_segaddr[srh->segments_left - 1] = oldhdr->daddr; + isrh->rpl_segaddr[srh->segments_left - 1] = oldhdr.daddr; ipv6_rpl_srh_compress(csrh, isrh, &srh->rpl_segaddr[0], isrh->segments_left - 1); @@ -169,7 +169,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt, skb_mac_header_rebuild(skb); hdr = ipv6_hdr(skb); - memmove(hdr, oldhdr, sizeof(*hdr)); + memmove(hdr, &oldhdr, sizeof(*hdr)); isrh = (void *)hdr + sizeof(*hdr); memcpy(isrh, csrh, hdrlen); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 197d0ac47592..57e38e5e4be9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1399,7 +1399,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); - ipcm6_init(&ipc6); + ipcm6_init_sk(&ipc6, sk); ipc6.gso_size = READ_ONCE(up->gso_size); ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = READ_ONCE(sk->sk_mark); @@ -1608,9 +1608,6 @@ do_udp_sendmsg: security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); - if (ipc6.tclass < 0) - ipc6.tclass = np->tclass; - fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel); dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected); @@ -1656,8 +1653,6 @@ back_from_confirm: WRITE_ONCE(up->pending, AF_INET6); do_append_data: - if (ipc6.dontfrag < 0) - ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, fl6, dst_rt6_info(dst), diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 841c81abaaf4..9005fc156a20 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -202,6 +202,9 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) goto out; + /* set the transport header to ESP */ + skb_set_transport_header(skb, offset); + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index d4118c796290..1d37b26ea2ef 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -429,7 +429,7 @@ static void psock_write_space(struct sock *sk) /* Check if the socket is reserved so someone is waiting for sending. */ kcm = psock->tx_kcm; - if (kcm && !unlikely(kcm->tx_stopped)) + if (kcm) queue_work(kcm_wq, &kcm->tx_work); spin_unlock_bh(&mux->lock); @@ -1696,12 +1696,6 @@ static int kcm_release(struct socket *sock) */ __skb_queue_purge(&sk->sk_write_queue); - /* Set tx_stopped. This is checked when psock is bound to a kcm and we - * get a writespace callback. This prevents further work being queued - * from the callback (unbinding the psock occurs after canceling work. - */ - kcm->tx_stopped = 1; - release_sock(sk); spin_lock_bh(&mux->lock); @@ -1717,7 +1711,7 @@ static int kcm_release(struct socket *sock) /* Cancel work. After this point there should be no outside references * to the kcm socket. */ - cancel_work_sync(&kcm->tx_work); + disable_work_sync(&kcm->tx_work); lock_sock(sk); psock = kcm->tx_psock; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index f4c1da070826..b98d13584c81 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -547,7 +547,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_mark = READ_ONCE(sk->sk_mark); fl6.flowi6_uid = sk->sk_uid; - ipcm6_init(&ipc6); + ipcm6_init_sk(&ipc6, sk); if (lsa) { if (addr_len < SIN6_LEN_RFC2133) @@ -634,9 +634,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); - if (ipc6.tclass < 0) - ipc6.tclass = np->tclass; - fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); @@ -648,9 +645,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (ipc6.hlimit < 0) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); - if (ipc6.dontfrag < 0) - ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); - if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f11fd360b422..2890dde9b3bf 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1078,13 +1078,13 @@ ieee80211_copy_rnr_beacon(u8 *pos, struct cfg80211_rnr_elems *dst, { int i, offset = 0; + dst->cnt = src->cnt; for (i = 0; i < src->cnt; i++) { memcpy(pos + offset, src->elem[i].data, src->elem[i].len); dst->elem[i].len = src->elem[i].len; dst->elem[i].data = pos + offset; offset += dst->elem[i].len; } - dst->cnt = src->cnt; return offset; } @@ -1879,12 +1879,12 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, } if (params->supported_rates && - params->supported_rates_len) { - ieee80211_parse_bitrates(link->conf->chanreq.oper.width, - sband, params->supported_rates, - params->supported_rates_len, - &link_sta->pub->supp_rates[sband->band]); - } + params->supported_rates_len && + !ieee80211_parse_bitrates(link->conf->chanreq.oper.width, + sband, params->supported_rates, + params->supported_rates_len, + &link_sta->pub->supp_rates[sband->band])) + return -EINVAL; if (params->ht_capa) ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, @@ -2876,7 +2876,7 @@ static int ieee80211_scan(struct wiphy *wiphy, * the frames sent while scanning on other channel will be * lost) */ - if (sdata->deflink.u.ap.beacon && + if (ieee80211_num_beaconing_links(sdata) && (!(wiphy->features & NL80211_FEATURE_AP_SCAN) || !(req->flags & NL80211_SCAN_FLAG_AP))) return -EOPNOTSUPP; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index cca6d14084d2..e3b46df95b71 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1349,6 +1349,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link) goto out; } + link->radar_required = link->reserved_radar_required; list_move(&link->assigned_chanctx_list, &new_ctx->assigned_links); rcu_assign_pointer(link_conf->chanctx_conf, &new_ctx->conf); @@ -2097,6 +2098,9 @@ void ieee80211_link_release_channel(struct ieee80211_link_data *link) { struct ieee80211_sub_if_data *sdata = link->sdata; + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + return; + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (rcu_access_pointer(link->conf->chanctx_conf)) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 1e9389c49a57..e6f937cfedcf 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -154,12 +154,6 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, p += scnprintf(p, bufsz + buf - p, - "target %uus interval %uus ecn %s\n", - codel_time_to_us(sta->cparams.target), - codel_time_to_us(sta->cparams.interval), - sta->cparams.ecn ? "yes" : "no"); - p += scnprintf(p, - bufsz + buf - p, "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n"); for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index bfe0514efca3..2f017dbbcb97 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1209,6 +1209,15 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) if ((_link = wiphy_dereference((local)->hw.wiphy, \ ___sdata->link[___link_id]))) +#define for_each_link_data(sdata, __link) \ + struct ieee80211_sub_if_data *__sdata = sdata; \ + for (int __link_id = 0; \ + __link_id < ARRAY_SIZE((__sdata)->link); __link_id++) \ + if ((!(__sdata)->vif.valid_links || \ + (__sdata)->vif.valid_links & BIT(__link_id)) && \ + ((__link) = sdata_dereference((__sdata)->link[__link_id], \ + (__sdata)))) + static inline int ieee80211_get_mbssid_beacon_len(struct cfg80211_mbssid_elems *elems, struct cfg80211_rnr_elems *rnr_elems, @@ -2061,6 +2070,9 @@ static inline void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata ieee80211_vif_set_links(sdata, 0, 0); } +void ieee80211_apvlan_link_setup(struct ieee80211_sub_if_data *sdata); +void ieee80211_apvlan_link_clear(struct ieee80211_sub_if_data *sdata); + /* tx handling */ void ieee80211_clear_tx_pending(struct ieee80211_local *local); void ieee80211_tx_pending(struct tasklet_struct *t); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 7e1e561ef76c..209d6ffa8e42 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -494,6 +494,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do break; list_del_rcu(&sdata->u.mntr.list); break; + case NL80211_IFTYPE_AP_VLAN: + ieee80211_apvlan_link_clear(sdata); + break; default: break; } @@ -1268,6 +1271,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) sdata->crypto_tx_tailroom_needed_cnt += master->crypto_tx_tailroom_needed_cnt; + ieee80211_apvlan_link_setup(sdata); + break; } case NL80211_IFTYPE_AP: @@ -1322,7 +1327,12 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) case NL80211_IFTYPE_AP_VLAN: /* no need to tell driver, but set carrier and chanctx */ if (sdata->bss->active) { - ieee80211_link_vlan_copy_chanctx(&sdata->deflink); + struct ieee80211_link_data *link; + + for_each_link_data(sdata, link) { + ieee80211_link_vlan_copy_chanctx(link); + } + netif_carrier_on(dev); ieee80211_set_vif_encap_ops(sdata); } else { diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 46092fbcde90..cafedc5ecd44 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -2,7 +2,7 @@ /* * MLO link handling * - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation */ #include <linux/slab.h> #include <linux/kernel.h> @@ -12,6 +12,71 @@ #include "key.h" #include "debugfs_netdev.h" +static void ieee80211_update_apvlan_links(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_sub_if_data *vlan; + struct ieee80211_link_data *link; + u16 ap_bss_links = sdata->vif.valid_links; + u16 new_links, vlan_links; + unsigned long add; + + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { + int link_id; + + if (!vlan) + continue; + + /* No support for 4addr with MLO yet */ + if (vlan->wdev.use_4addr) + return; + + vlan_links = vlan->vif.valid_links; + + new_links = ap_bss_links; + + add = new_links & ~vlan_links; + if (!add) + continue; + + ieee80211_vif_set_links(vlan, add, 0); + + for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { + link = sdata_dereference(vlan->link[link_id], vlan); + ieee80211_link_vlan_copy_chanctx(link); + } + } +} + +void ieee80211_apvlan_link_setup(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_sub_if_data *ap_bss = container_of(sdata->bss, + struct ieee80211_sub_if_data, u.ap); + u16 new_links = ap_bss->vif.valid_links; + unsigned long add; + int link_id; + + if (!ap_bss->vif.valid_links) + return; + + add = new_links; + for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { + sdata->wdev.valid_links |= BIT(link_id); + ether_addr_copy(sdata->wdev.links[link_id].addr, + ap_bss->wdev.links[link_id].addr); + } + + ieee80211_vif_set_links(sdata, new_links, 0); +} + +void ieee80211_apvlan_link_clear(struct ieee80211_sub_if_data *sdata) +{ + if (!sdata->wdev.valid_links) + return; + + sdata->wdev.valid_links = 0; + ieee80211_vif_clear_links(sdata); +} + void ieee80211_link_setup(struct ieee80211_link_data *link) { if (link->sdata->vif.type == NL80211_IFTYPE_STATION) @@ -28,8 +93,16 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, if (link_id < 0) link_id = 0; - rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf); - rcu_assign_pointer(sdata->link[link_id], link); + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + struct ieee80211_sub_if_data *ap_bss; + struct ieee80211_bss_conf *ap_bss_conf; + + ap_bss = container_of(sdata->bss, + struct ieee80211_sub_if_data, u.ap); + ap_bss_conf = sdata_dereference(ap_bss->vif.link_conf[link_id], + ap_bss); + memcpy(link_conf, ap_bss_conf, sizeof(*link_conf)); + } link->sdata = sdata; link->link_id = link_id; @@ -51,6 +124,7 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, if (!deflink) { switch (sdata->vif.type) { case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: ether_addr_copy(link_conf->addr, sdata->wdev.links[link_id].addr); link_conf->bssid = link_conf->addr; @@ -65,6 +139,9 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, ieee80211_link_debugfs_add(link); } + + rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf); + rcu_assign_pointer(sdata->link[link_id], link); } void ieee80211_link_stop(struct ieee80211_link_data *link) @@ -174,6 +251,7 @@ static void ieee80211_set_vif_links_bitmaps(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: /* in an AP all links are always active */ sdata->vif.active_links = valid_links; @@ -275,14 +353,25 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, ieee80211_set_vif_links_bitmaps(sdata, new_links, dormant_links); /* tell the driver */ - ret = drv_change_vif_links(sdata->local, sdata, - old_links & old_active, - new_links & sdata->vif.active_links, - old); + if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN) + ret = drv_change_vif_links(sdata->local, sdata, + old_links & old_active, + new_links & sdata->vif.active_links, + old); if (!new_links) ieee80211_debugfs_recreate_netdev(sdata, false); + + if (sdata->vif.type == NL80211_IFTYPE_AP) + ieee80211_update_apvlan_links(sdata); } + /* + * Ignore errors if we are only removing links as removal should + * always succeed + */ + if (!new_links) + ret = 0; + if (ret) { /* restore config */ memcpy(sdata->link, old_data, sizeof(old_data)); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 2922a9fec950..ba8aeb47bffd 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -636,7 +636,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, mesh_path_add_gate(mpath); } rcu_read_unlock(); - } else { + } else if (ifmsh->mshcfg.dot11MeshForwarding) { rcu_read_lock(); mpath = mesh_path_lookup(sdata, target_addr); if (mpath) { @@ -654,6 +654,8 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, } } rcu_read_unlock(); + } else { + forward = false; } if (reply) { @@ -671,7 +673,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, } } - if (forward && ifmsh->mshcfg.dot11MeshForwarding) { + if (forward) { u32 preq_id; u8 hopcount; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 16bb3db67eaa..5a9a84a0cc35 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2243,7 +2243,8 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link) if (!local->ops->abort_channel_switch) return; - ieee80211_link_unreserve_chanctx(link); + if (rcu_access_pointer(link->conf->chanctx_conf)) + ieee80211_link_unreserve_chanctx(link); ieee80211_vif_unblock_queues_csa(sdata); @@ -4291,6 +4292,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_prep_tx_info info = { .subtype = IEEE80211_STYPE_AUTH, }; + bool sae_need_confirm = false; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -4336,6 +4338,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, jiffies + IEEE80211_AUTH_WAIT_SAE_RETRY; ifmgd->auth_data->timeout_started = true; run_again(sdata, ifmgd->auth_data->timeout); + if (auth_transaction == 1) + sae_need_confirm = true; goto notify_driver; } @@ -4379,6 +4383,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, if (!ieee80211_mark_sta_auth(sdata)) return; /* ignore frame -- wait for timeout */ } else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && + auth_transaction == 1) { + sae_need_confirm = true; + } else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && auth_transaction == 2) { sdata_info(sdata, "SAE peer confirmed\n"); ifmgd->auth_data->peer_confirmed = true; @@ -4386,7 +4393,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); notify_driver: - drv_mgd_complete_tx(sdata->local, sdata, &info); + if (!sae_need_confirm) + drv_mgd_complete_tx(sdata->local, sdata, &info); } #define case_WLAN(type) \ @@ -6702,6 +6710,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, struct ieee80211_bss_conf *bss_conf = link->conf; struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg; struct ieee80211_mgmt *mgmt = (void *) hdr; + struct ieee80211_ext *ext = NULL; size_t baselen; struct ieee802_11_elems *elems; struct ieee80211_local *local = sdata->local; @@ -6727,7 +6736,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, /* Process beacon from the current BSS */ bssid = ieee80211_get_bssid(hdr, len, sdata->vif.type); if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { - struct ieee80211_ext *ext = (void *) mgmt; + ext = (void *)mgmt; variable = ext->u.s1g_beacon.variable + ieee80211_s1g_optional_len(ext->frame_control); } @@ -6914,7 +6923,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, } if ((ncrc == link->u.mgd.beacon_crc && link->u.mgd.beacon_crc_valid) || - ieee80211_is_s1g_short_beacon(mgmt->frame_control)) + (ext && ieee80211_is_s1g_short_beacon(ext->frame_control, + parse_params.start, + parse_params.len))) goto free; link->u.mgd.beacon_crc = ncrc; link->u.mgd.beacon_crc_valid = true; diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 6da39c864f45..922ea9a6e241 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -758,7 +758,6 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, { const struct element *elem, *sub; size_t profile_len = 0; - bool found = false; if (!bss || !bss->transmitted_bss) return profile_len; @@ -809,15 +808,14 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, index[2], new_bssid); if (ether_addr_equal(new_bssid, bss->bssid)) { - found = true; elems->bssid_index_len = index[1]; elems->bssid_index = (void *)&index[2]; - break; + return profile_len; } } } - return found ? profile_len : 0; + return 0; } static void diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 3dc9752188d5..1b045b62961f 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -971,8 +971,6 @@ int rate_control_set_rates(struct ieee80211_hw *hw, if (sta->uploaded) drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta); - ieee80211_sta_set_expected_throughput(pubsta, sta_get_expected_throughput(sta)); - return 0; } EXPORT_SYMBOL(rate_control_set_rates); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8e1fbdd3bff1..8c0d91dfd7e2 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4283,10 +4283,16 @@ static bool ieee80211_rx_data_set_sta(struct ieee80211_rx_data *rx, rx->link_sta = NULL; } - if (link_id < 0) - rx->link = &rx->sdata->deflink; - else if (!ieee80211_rx_data_set_link(rx, link_id)) + if (link_id < 0) { + if (ieee80211_vif_is_mld(&rx->sdata->vif) && + sta && !sta->sta.valid_links) + rx->link = + rcu_dereference(rx->sdata->link[sta->deflink.link_id]); + else + rx->link = &rx->sdata->deflink; + } else if (!ieee80211_rx_data_set_link(rx, link_id)) { return false; + } return true; } @@ -4481,6 +4487,10 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) if (!multicast && !ether_addr_equal(sdata->dev->dev_addr, hdr->addr1)) return false; + /* reject invalid/our STA address */ + if (!is_valid_ether_addr(hdr->addr2) || + ether_addr_equal(sdata->dev->dev_addr, hdr->addr2)) + return false; if (!rx->sta) { int rate_idx; if (status->encoding != RX_ENC_LEGACY) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 49095f19a0f2..4eb45e08b97e 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -18,7 +18,6 @@ #include <linux/timer.h> #include <linux/rtnetlink.h> -#include <net/codel.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" @@ -683,12 +682,6 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, } } - sta->cparams.ce_threshold = CODEL_DISABLED_THRESHOLD; - sta->cparams.target = MS2TIME(20); - sta->cparams.interval = MS2TIME(100); - sta->cparams.ecn = true; - sta->cparams.ce_threshold_selector = 0; - sta->cparams.ce_threshold_mask = 0; sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); @@ -2878,27 +2871,6 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta) return sta->deflink.status_stats.last_ack; } -static void sta_update_codel_params(struct sta_info *sta, u32 thr) -{ - if (thr && thr < STA_SLOW_THRESHOLD * sta->local->num_sta) { - sta->cparams.target = MS2TIME(50); - sta->cparams.interval = MS2TIME(300); - sta->cparams.ecn = false; - } else { - sta->cparams.target = MS2TIME(20); - sta->cparams.interval = MS2TIME(100); - sta->cparams.ecn = true; - } -} - -void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, - u32 thr) -{ - struct sta_info *sta = container_of(pubsta, struct sta_info, sta); - - sta_update_codel_params(sta, thr); -} - int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = sta->sdata; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 9195d5a2de0a..a9cfeeb13e53 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -466,14 +466,6 @@ struct ieee80211_fragment_cache { unsigned int next; }; -/* - * The bandwidth threshold below which the per-station CoDel parameters will be - * scaled to be more lenient (to prevent starvation of slow stations). This - * value will be scaled by the number of active stations when it is being - * applied. - */ -#define STA_SLOW_THRESHOLD 6000 /* 6 Mbps */ - /** * struct link_sta_info - Link STA information * All link specific sta info are stored here for reference. This can be @@ -619,7 +611,6 @@ struct link_sta_info { * @sta: station information we share with the driver * @sta_state: duplicates information about station state (for debug) * @rcu_head: RCU head used for freeing this station struct - * @cparams: CoDel parameters for this station. * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) * @amsdu_mesh_control: track the mesh A-MSDU format used by the peer: * @@ -710,8 +701,6 @@ struct sta_info { struct dentry *debugfs_dir; #endif - struct codel_params cparams; - u8 reserved_tid; s8 amsdu_mesh_control; diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index f07b40916485..1cb42c5b9b04 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -1421,7 +1421,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) return -EOPNOTSUPP; - if (sdata->vif.type != NL80211_IFTYPE_STATION) + if (sdata->vif.type != NL80211_IFTYPE_STATION || !sdata->vif.cfg.assoc) return -EINVAL; switch (oper) { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 0ff8b56f5807..9c515fb8ebe1 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -622,6 +622,12 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) else tx->key = NULL; + if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) { + if (tx->key && tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) + info->control.hw_key = &tx->key->conf; + return TX_CONTINUE; + } + if (tx->key) { bool skip_hw = false; @@ -1401,16 +1407,9 @@ static struct sk_buff *fq_tin_dequeue_func(struct fq *fq, local = container_of(fq, struct ieee80211_local, fq); txqi = container_of(tin, struct txq_info, tin); + cparams = &local->cparams; cstats = &txqi->cstats; - if (txqi->txq.sta) { - struct sta_info *sta = container_of(txqi->txq.sta, - struct sta_info, sta); - cparams = &sta->cparams; - } else { - cparams = &local->cparams; - } - if (flow == &tin->default_flow) cvars = &txqi->def_cvars; else @@ -1444,7 +1443,7 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local, { struct fq *fq = &local->fq; struct fq_tin *tin = &txqi->tin; - u32 flow_idx = fq_flow_idx(fq, skb); + u32 flow_idx; ieee80211_set_skb_enqueue_time(skb); @@ -1460,6 +1459,7 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local, IEEE80211_TX_INTCFL_NEED_TXPROCESSING; __skb_queue_tail(&txqi->frags, skb); } else { + flow_idx = fq_flow_idx(fq, skb); fq_tin_enqueue(fq, tin, flow_idx, skb, fq_skb_free_func); } @@ -3892,6 +3892,7 @@ begin: * The key can be removed while the packet was queued, so need to call * this here to get the current key. */ + info->control.hw_key = NULL; r = ieee80211_tx_h_select_key(&tx); if (r != TX_CONTINUE) { ieee80211_free_txskb(&local->hw, skb); @@ -4113,7 +4114,9 @@ void __ieee80211_schedule_txq(struct ieee80211_hw *hw, spin_lock_bh(&local->active_txq_lock[txq->ac]); - has_queue = force || txq_has_queue(txq); + has_queue = force || + (!test_bit(IEEE80211_TXQ_STOP, &txqi->flags) && + txq_has_queue(txq)); if (list_empty(&txqi->schedule_order) && (has_queue || ieee80211_txq_keep_active(txqi))) { /* If airtime accounting is active, always enqueue STAs at the @@ -4523,8 +4526,10 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, IEEE80211_TX_CTRL_MLO_LINK_UNSPEC, NULL); } else if (ieee80211_vif_is_mld(&sdata->vif) && - sdata->vif.type == NL80211_IFTYPE_AP && - !ieee80211_hw_check(&sdata->local->hw, MLO_MCAST_MULTI_LINK_TX)) { + ((sdata->vif.type == NL80211_IFTYPE_AP && + !ieee80211_hw_check(&sdata->local->hw, MLO_MCAST_MULTI_LINK_TX)) || + (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && + !sdata->wdev.use_4addr))) { ieee80211_mlo_multicast_tx(dev, skb); } else { normal: diff --git a/net/mac80211/util.c b/net/mac80211/util.c index a98ae563613c..77638e965726 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3908,7 +3908,7 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local, { u64 tsf = drv_get_tsf(local, sdata); u64 dtim_count = 0; - u16 beacon_int = sdata->vif.bss_conf.beacon_int * 1024; + u32 beacon_int = sdata->vif.bss_conf.beacon_int * 1024; u8 dtim_period = sdata->vif.bss_conf.dtim_period; struct ps_data *ps; u8 bcns_from_dtim; diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 57850d4dac5d..70aeebfc4182 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -73,7 +73,6 @@ static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen) lock_sock(sk); - /* TODO: allow rebind */ if (sk_hashed(sk)) { rc = -EADDRINUSE; goto out_release; @@ -629,15 +628,36 @@ static void mctp_sk_close(struct sock *sk, long timeout) static int mctp_sk_hash(struct sock *sk) { struct net *net = sock_net(sk); + struct sock *existing; + struct mctp_sock *msk; + int rc; + + msk = container_of(sk, struct mctp_sock, sk); /* Bind lookup runs under RCU, remain live during that. */ sock_set_flag(sk, SOCK_RCU_FREE); mutex_lock(&net->mctp.bind_lock); + + /* Prevent duplicate binds. */ + sk_for_each(existing, &net->mctp.binds) { + struct mctp_sock *mex = + container_of(existing, struct mctp_sock, sk); + + if (mex->bind_type == msk->bind_type && + mex->bind_addr == msk->bind_addr && + mex->bind_net == msk->bind_net) { + rc = -EADDRINUSE; + goto out; + } + } + sk_add_node_rcu(sk, &net->mctp.binds); - mutex_unlock(&net->mctp.bind_lock); + rc = 0; - return 0; +out: + mutex_unlock(&net->mctp.bind_lock); + return rc; } static void mctp_sk_unhash(struct sock *sk) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index df62638b6498..3373b6b34dc7 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -81,8 +81,8 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) if (index < net->mpls.platform_labels) { struct mpls_route __rcu **platform_label = - rcu_dereference(net->mpls.platform_label); - rt = rcu_dereference(platform_label[index]); + rcu_dereference_rtnl(net->mpls.platform_label); + rt = rcu_dereference_rtnl(platform_label[index]); } return rt; } diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 23949ae2a3a8..a97505b78671 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -979,8 +979,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (subflow->mp_join) goto reset; subflow->mp_capable = 0; + if (!mptcp_try_fallback(ssk)) + goto reset; pr_fallback(msk); - mptcp_do_fallback(ssk); return false; } diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 620264c75dc2..2c8815daf5b0 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -303,8 +303,14 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) pr_debug("fail_seq=%llu\n", fail_seq); - if (!READ_ONCE(msk->allow_infinite_fallback)) + /* After accepting the fail, we can't create any other subflows */ + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_infinite_fallback) { + spin_unlock_bh(&msk->fallback_lock); return; + } + msk->allow_subflows = false; + spin_unlock_bh(&msk->fallback_lock); if (!subflow->fail_tout) { pr_debug("send MP_FAIL response and infinite map\n"); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 42b239d9b2b3..d865d08a0c5e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -623,10 +623,9 @@ static bool mptcp_check_data_fin(struct sock *sk) static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk) { - if (READ_ONCE(msk->allow_infinite_fallback)) { + if (mptcp_try_fallback(ssk)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONFALLBACK); - mptcp_do_fallback(ssk); } else { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET); mptcp_subflow_reset(ssk); @@ -878,7 +877,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk) { mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq); - WRITE_ONCE(msk->allow_infinite_fallback, false); + msk->allow_infinite_fallback = false; mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); } @@ -889,6 +888,14 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_state != TCP_ESTABLISHED) return false; + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_subflows) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } + mptcp_subflow_joined(msk, ssk); + spin_unlock_bh(&msk->fallback_lock); + /* attach to msk socket only after we are sure we will deal with it * at close time */ @@ -897,7 +904,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++; mptcp_sockopt_sync_locked(msk, ssk); - mptcp_subflow_joined(msk, ssk); mptcp_stop_tout_timer(sk); __mptcp_propagate_sndbuf(sk, ssk); return true; @@ -1236,10 +1242,14 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk, mpext->infinite_map = 1; mpext->data_len = 0; + if (!mptcp_try_fallback(ssk)) { + mptcp_subflow_reset(ssk); + return; + } + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX); mptcp_subflow_ctx(ssk)->send_infinite_map = 0; pr_fallback(msk); - mptcp_do_fallback(ssk); } #define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1)) @@ -2643,9 +2653,9 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) static void __mptcp_retrans(struct sock *sk) { + struct mptcp_sendmsg_info info = { .data_lock_held = true, }; struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; - struct mptcp_sendmsg_info info = {}; struct mptcp_data_frag *dfrag; struct sock *ssk; int ret, err; @@ -2690,6 +2700,18 @@ static void __mptcp_retrans(struct sock *sk) info.sent = 0; info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent; + + /* + * make the whole retrans decision, xmit, disallow + * fallback atomic + */ + spin_lock_bh(&msk->fallback_lock); + if (__mptcp_check_fallback(msk)) { + spin_unlock_bh(&msk->fallback_lock); + release_sock(ssk); + return; + } + while (info.sent < info.limit) { ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); if (ret <= 0) @@ -2703,8 +2725,9 @@ static void __mptcp_retrans(struct sock *sk) len = max(copied, len); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); - WRITE_ONCE(msk->allow_infinite_fallback, false); + msk->allow_infinite_fallback = false; } + spin_unlock_bh(&msk->fallback_lock); release_sock(ssk); } @@ -2833,7 +2856,8 @@ static void __mptcp_init_sock(struct sock *sk) WRITE_ONCE(msk->first, NULL); inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); - WRITE_ONCE(msk->allow_infinite_fallback, true); + msk->allow_infinite_fallback = true; + msk->allow_subflows = true; msk->recovery = false; msk->subflow_id = 1; msk->last_data_sent = tcp_jiffies32; @@ -2841,6 +2865,7 @@ static void __mptcp_init_sock(struct sock *sk) msk->last_ack_recv = tcp_jiffies32; mptcp_pm_data_init(msk); + spin_lock_init(&msk->fallback_lock); /* re-use the csk retrans timer for MPTCP-level retrans */ timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); @@ -3224,7 +3249,16 @@ static int mptcp_disconnect(struct sock *sk, int flags) * subflow */ mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE); + + /* The first subflow is already in TCP_CLOSE status, the following + * can't overlap with a fallback anymore + */ + spin_lock_bh(&msk->fallback_lock); + msk->allow_subflows = true; + msk->allow_infinite_fallback = true; WRITE_ONCE(msk->flags, 0); + spin_unlock_bh(&msk->fallback_lock); + msk->cb_flags = 0; msk->recovery = false; WRITE_ONCE(msk->can_ack, false); @@ -3637,7 +3671,13 @@ bool mptcp_finish_join(struct sock *ssk) /* active subflow, already present inside the conn_list */ if (!list_empty(&subflow->node)) { + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_subflows) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } mptcp_subflow_joined(msk, ssk); + spin_unlock_bh(&msk->fallback_lock); mptcp_propagate_sndbuf(parent, ssk); return true; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7e2f70f22b05..6f191b125978 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -342,10 +342,16 @@ struct mptcp_sock { u64 rtt_us; /* last maximum rtt of subflows */ } rcvq_space; u8 scaling_ratio; + bool allow_subflows; u32 subflow_id; u32 setsockopt_seq; char ca_name[TCP_CA_NAME_MAX]; + + spinlock_t fallback_lock; /* protects fallback, + * allow_infinite_fallback and + * allow_join + */ }; #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock) @@ -1188,15 +1194,22 @@ static inline bool mptcp_check_fallback(const struct sock *sk) return __mptcp_check_fallback(msk); } -static inline void __mptcp_do_fallback(struct mptcp_sock *msk) +static inline bool __mptcp_try_fallback(struct mptcp_sock *msk) { if (__mptcp_check_fallback(msk)) { pr_debug("TCP fallback already done (msk=%p)\n", msk); - return; + return true; } - if (WARN_ON_ONCE(!READ_ONCE(msk->allow_infinite_fallback))) - return; + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_infinite_fallback) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } + + msk->allow_subflows = false; set_bit(MPTCP_FALLBACK_DONE, &msk->flags); + spin_unlock_bh(&msk->fallback_lock); + return true; } static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk) @@ -1208,14 +1221,15 @@ static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk) TCPF_SYN_RECV | TCPF_LISTEN)); } -static inline void mptcp_do_fallback(struct sock *ssk) +static inline bool mptcp_try_fallback(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = subflow->conn; struct mptcp_sock *msk; msk = mptcp_sk(sk); - __mptcp_do_fallback(msk); + if (!__mptcp_try_fallback(msk)) + return false; if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) { gfp_t saved_allocation = ssk->sk_allocation; @@ -1227,6 +1241,7 @@ static inline void mptcp_do_fallback(struct sock *ssk) tcp_shutdown(ssk, SEND_SHUTDOWN); ssk->sk_allocation = saved_allocation; } + return true; } #define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a) @@ -1236,7 +1251,7 @@ static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk, { pr_fallback(msk); subflow->request_mptcp = 0; - __mptcp_do_fallback(msk); + WARN_ON_ONCE(!__mptcp_try_fallback(msk)); } static inline bool mptcp_check_infinite_map(struct sk_buff *skb) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4c2aa45c466d..a05f201d194c 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -543,9 +543,11 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) mptcp_get_options(skb, &mp_opt); if (subflow->request_mptcp) { if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) { + if (!mptcp_try_fallback(sk)) + goto do_reset; + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); - mptcp_do_fallback(sk); pr_fallback(msk); goto fallback; } @@ -1288,20 +1290,29 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss mptcp_schedule_work(sk); } -static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) +static bool mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); unsigned long fail_tout; + /* we are really failing, prevent any later subflow join */ + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_infinite_fallback) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } + msk->allow_subflows = false; + spin_unlock_bh(&msk->fallback_lock); + /* graceful failure can happen only on the MPC subflow */ if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first))) - return; + return false; /* since the close timeout take precedence on the fail one, * no need to start the latter when the first is already set */ if (sock_flag((struct sock *)msk, SOCK_DEAD)) - return; + return true; /* we don't need extreme accuracy here, use a zero fail_tout as special * value meaning no fail timeout at all; @@ -1313,6 +1324,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) tcp_send_ack(ssk); mptcp_reset_tout_timer(msk, subflow->fail_tout); + return true; } static bool subflow_check_data_avail(struct sock *ssk) @@ -1373,17 +1385,16 @@ fallback: (subflow->mp_join || subflow->valid_csum_seen)) { subflow->send_mp_fail = 1; - if (!READ_ONCE(msk->allow_infinite_fallback)) { + if (!mptcp_subflow_fail(msk, ssk)) { subflow->reset_transient = 0; subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; goto reset; } - mptcp_subflow_fail(msk, ssk); WRITE_ONCE(subflow->data_avail, true); return true; } - if (!READ_ONCE(msk->allow_infinite_fallback)) { + if (!mptcp_try_fallback(ssk)) { /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ @@ -1399,8 +1410,6 @@ reset: WRITE_ONCE(subflow->data_avail, false); return false; } - - mptcp_do_fallback(ssk); } skb = skb_peek(&ssk->sk_receive_queue); @@ -1665,7 +1674,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, /* discard the subflow socket */ mptcp_sock_graft(ssk, sk->sk_socket); iput(SOCK_INODE(sf)); - WRITE_ONCE(msk->allow_infinite_fallback, false); mptcp_stop_tout_timer(sk); return 0; @@ -1760,10 +1768,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, * needs it. * Update ns_tracker to current stack trace and refcounted tracker. */ - __netns_tracker_free(net, &sf->sk->ns_tracker, false); - sf->sk->sk_net_refcnt = 1; - get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sf->sk); err = tcp_set_ulp(sf->sk, "mptcp"); if (err) goto err_free; @@ -1845,7 +1850,7 @@ static void subflow_state_change(struct sock *sk) msk = mptcp_sk(parent); if (subflow_simultaneous_connect(sk)) { - mptcp_do_fallback(sk); + WARN_ON_ONCE(!mptcp_try_fallback(sk)); pr_fallback(msk); subflow->conn_finished = 1; mptcp_propagate_state(parent, sk, subflow, NULL); diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 2c260f33b55c..ad1f671ffc37 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -110,7 +110,7 @@ struct ncsi_channel_version { u8 update; /* NCSI version update */ char alpha1; /* NCSI version alpha1 */ char alpha2; /* NCSI version alpha2 */ - u8 fw_name[12]; /* Firmware name string */ + u8 fw_name[12 + 1]; /* Firmware name string */ u32 fw_version; /* Firmware version */ u16 pci_ids[4]; /* PCI identification */ u32 mf_id; /* Manufacture ID */ diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 8668888c5a2f..d5ed80731e89 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -775,6 +775,7 @@ static int ncsi_rsp_handler_gvi(struct ncsi_request *nr) ncv->alpha1 = rsp->alpha1; ncv->alpha2 = rsp->alpha2; memcpy(ncv->fw_name, rsp->fw_name, 12); + ncv->fw_name[12] = '\0'; ncv->fw_version = ntohl(rsp->fw_version); for (i = 0; i < ARRAY_SIZE(ncv->pci_ids); i++) ncv->pci_ids[i] = ntohs(rsp->pci_ids[i]); diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index 3d64a4511fcf..b5e4ca9026a8 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -17,7 +17,7 @@ static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb, .skb = skb, }; - return bpf_prog_run(prog, &ctx); + return bpf_prog_run_pin_on_cpu(prog, &ctx); } struct bpf_nf_link { @@ -295,6 +295,9 @@ static bool nf_is_valid_access(int off, int size, enum bpf_access_type type, if (off < 0 || off >= sizeof(struct bpf_nf_ctx)) return false; + if (off % size != 0) + return false; + if (type == BPF_WRITE) return false; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 456446d7af20..f5bde4f13958 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1121,6 +1121,12 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &nf_conntrack_hash[repl_idx]); + /* confirmed bit must be set after hlist add, not before: + * loser_ct can still be visible to other cpu due to + * SLAB_TYPESAFE_BY_RCU. + */ + smp_mb__before_atomic(); + set_bit(IPS_CONFIRMED_BIT, &loser_ct->status); NF_CT_STAT_INC(net, clash_resolve); return NF_ACCEPT; @@ -1257,8 +1263,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) * user context, else we insert an already 'dead' hash, blocking * further use of that particular connection -JM. */ - ct->status |= IPS_CONFIRMED; - if (unlikely(nf_ct_is_dying(ct))) { NF_CT_STAT_INC(net, insert_failed); goto dying; @@ -1290,7 +1294,7 @@ chaintoolong: } } - /* Timer relative to confirmation time, not original + /* Timeout is relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ ct->timeout += nfct_time_stamp; @@ -1298,11 +1302,21 @@ chaintoolong: __nf_conntrack_insert_prepare(ct); /* Since the lookup is lockless, hash insertion must be done after - * starting the timer and setting the CONFIRMED bit. The RCU barriers - * guarantee that no other CPU can find the conntrack before the above - * stores are visible. + * setting ct->timeout. The RCU barriers guarantee that no other CPU + * can find the conntrack before the above stores are visible. */ __nf_conntrack_hash_insert(ct, hash, reply_hash); + + /* IPS_CONFIRMED unset means 'ct not (yet) in hash', conntrack lookups + * skip entries that lack this bit. This happens when a CPU is looking + * at a stale entry that is being recycled due to SLAB_TYPESAFE_BY_RCU + * or when another CPU encounters this entry right after the insertion + * but before the set-confirm-bit below. This bit must not be set until + * after __nf_conntrack_hash_insert(). + */ + smp_mb__before_atomic(); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6a1239433830..18a91c031554 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -860,8 +860,6 @@ errout: static int ctnetlink_done(struct netlink_callback *cb) { - if (cb->args[1]) - nf_ct_put((struct nf_conn *)cb->args[1]); kfree(cb->data); return 0; } @@ -1184,19 +1182,26 @@ ignore_entry: return 0; } +static unsigned long ctnetlink_get_id(const struct nf_conn *ct) +{ + unsigned long id = nf_ct_get_id(ct); + + return id ? id : 1; +} + static int ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { unsigned int flags = cb->data ? NLM_F_DUMP_FILTERED : 0; struct net *net = sock_net(skb->sk); - struct nf_conn *ct, *last; + unsigned long last_id = cb->args[1]; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; struct nf_conn *nf_ct_evict[8]; + struct nf_conn *ct; int res, i; spinlock_t *lockp; - last = (struct nf_conn *)cb->args[1]; i = 0; local_bh_disable(); @@ -1233,7 +1238,7 @@ restart: continue; if (cb->args[1]) { - if (ct != last) + if (ctnetlink_get_id(ct) != last_id) continue; cb->args[1] = 0; } @@ -1246,8 +1251,7 @@ restart: NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct, true, flags); if (res < 0) { - nf_conntrack_get(&ct->ct_general); - cb->args[1] = (unsigned long)ct; + cb->args[1] = ctnetlink_get_id(ct); spin_unlock(lockp); goto out; } @@ -1260,12 +1264,10 @@ restart: } out: local_bh_enable(); - if (last) { + if (last_id) { /* nf ct hash resize happened, now clear the leftover. */ - if ((struct nf_conn *)cb->args[1] == last) + if (cb->args[1] == last_id) cb->args[1] = 0; - - nf_ct_put(last); } while (i) { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index bdee187bc5dd..3743e4249dc8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1029,11 +1029,6 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, NFTA_TABLE_PAD)) goto nla_put_failure; - if (event == NFT_MSG_DELTABLE) { - nlmsg_end(skb, nlh); - return 0; - } - if (nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags & NFT_TABLE_F_MASK))) goto nla_put_failure; @@ -1889,11 +1884,6 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, NFTA_CHAIN_PAD)) goto nla_put_failure; - if (event == NFT_MSG_DELCHAIN && !hook_list) { - nlmsg_end(skb, nlh); - return 0; - } - if (nft_is_base_chain(chain)) { const struct nft_base_chain *basechain = nft_base_chain(chain); struct nft_stats __percpu *stats; @@ -3884,7 +3874,7 @@ void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) /* can only be used if rule is no longer visible to dumps */ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { - lockdep_commit_lock_is_held(ctx->net); + WARN_ON_ONCE(!lockdep_commit_lock_is_held(ctx->net)); nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); @@ -4678,11 +4668,6 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, NFTA_SET_PAD)) goto nla_put_failure; - if (event == NFT_MSG_DELSET) { - nlmsg_end(skb, nlh); - return 0; - } - if (set->flags != 0) if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags))) goto nla_put_failure; @@ -5674,7 +5659,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase) { - lockdep_commit_lock_is_held(ctx->net); + WARN_ON_ONCE(!lockdep_commit_lock_is_held(ctx->net)); switch (phase) { case NFT_TRANS_PREPARE_ERROR: @@ -8017,11 +8002,6 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, NFTA_OBJ_PAD)) goto nla_put_failure; - if (event == NFT_MSG_DELOBJ) { - nlmsg_end(skb, nlh); - return 0; - } - if (nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset)) @@ -9040,11 +9020,6 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, NFTA_FLOWTABLE_PAD)) goto nla_put_failure; - if (event == NFT_MSG_DELFLOWTABLE && !hook_list) { - nlmsg_end(skb, nlh); - return 0; - } - if (nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) || nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags))) goto nla_put_failure; diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 0529e4ef7520..9e4e25f2458f 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -663,6 +663,9 @@ static int pipapo_realloc_mt(struct nft_pipapo_field *f, check_add_overflow(rules, extra, &rules_alloc)) return -EOVERFLOW; + if (rules_alloc > (INT_MAX / sizeof(*new_mt))) + return -ENOMEM; + new_mt = kvmalloc_array(rules_alloc, sizeof(*new_mt), GFP_KERNEL_ACCOUNT); if (!new_mt) return -ENOMEM; @@ -1216,7 +1219,7 @@ static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int c mem = s; mem -= s->align_off; - kfree(mem); + kvfree(mem); } /** @@ -1237,10 +1240,9 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, void *scratch_aligned; u32 align_off; #endif - scratch = kzalloc_node(struct_size(scratch, map, - bsize_max * 2) + - NFT_PIPAPO_ALIGN_HEADROOM, - GFP_KERNEL_ACCOUNT, cpu_to_node(i)); + scratch = kvzalloc_node(struct_size(scratch, map, bsize_max * 2) + + NFT_PIPAPO_ALIGN_HEADROOM, + GFP_KERNEL_ACCOUNT, cpu_to_node(i)); if (!scratch) { /* On failure, there's no need to undo previous * allocations: this means that some scratch maps have @@ -1499,6 +1501,9 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) src->groups * NFT_PIPAPO_BUCKETS(src->bb)); if (src->rules > 0) { + if (src->rules_alloc > (INT_MAX / sizeof(*src->mt))) + goto out_mt; + dst->mt = kvmalloc_array(src->rules_alloc, sizeof(*src->mt), GFP_KERNEL_ACCOUNT); diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index 7c6bf1c16813..0ca1cdfc4095 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -38,8 +38,8 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par) nfacct = nfnl_acct_find_get(par->net, info->name); if (nfacct == NULL) { - pr_info_ratelimited("accounting object `%s' does not exists\n", - info->name); + pr_info_ratelimited("accounting object `%.*s' does not exist\n", + NFACCT_NAME_MAX, info->name); return -ENOENT; } info->nfacct = nfacct; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 775d707ec708..8b060465a2be 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -387,7 +387,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) WARN_ON(skb->sk != NULL); skb->sk = sk; skb->destructor = netlink_skb_destructor; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); sk_mem_charge(sk, skb->truesize); } @@ -795,16 +794,6 @@ static int netlink_release(struct socket *sock) sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); - /* Because struct net might disappear soon, do not keep a pointer. */ - if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) { - __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); - /* Because of deferred_put_nlk_sk and use of work queue, - * it is possible netns will be freed before this socket. - */ - sock_net_set(sk, &init_net); - __netns_tracker_alloc(&init_net, &sk->ns_tracker, - false, GFP_KERNEL); - } call_rcu(&nlk->rcu, deferred_put_nlk_sk); return 0; } @@ -1216,41 +1205,48 @@ struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast) int netlink_attachskb(struct sock *sk, struct sk_buff *skb, long *timeo, struct sock *ssk) { + DECLARE_WAITQUEUE(wait, current); struct netlink_sock *nlk; + unsigned int rmem; nlk = nlk_sk(sk); + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_S_CONGESTED, &nlk->state))) { - DECLARE_WAITQUEUE(wait, current); - if (!*timeo) { - if (!ssk || netlink_is_kernel(ssk)) - netlink_overrun(sk); - sock_put(sk); - kfree_skb(skb); - return -EAGAIN; - } - - __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&nlk->wait, &wait); + if ((rmem == skb->truesize || rmem <= READ_ONCE(sk->sk_rcvbuf)) && + !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { + netlink_skb_set_owner_r(skb, sk); + return 0; + } - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_S_CONGESTED, &nlk->state)) && - !sock_flag(sk, SOCK_DEAD)) - *timeo = schedule_timeout(*timeo); + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&nlk->wait, &wait); + if (!*timeo) { + if (!ssk || netlink_is_kernel(ssk)) + netlink_overrun(sk); sock_put(sk); + kfree_skb(skb); + return -EAGAIN; + } - if (signal_pending(current)) { - kfree_skb(skb); - return sock_intr_errno(*timeo); - } - return 1; + __set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&nlk->wait, &wait); + rmem = atomic_read(&sk->sk_rmem_alloc); + + if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) || + test_bit(NETLINK_S_CONGESTED, &nlk->state)) && + !sock_flag(sk, SOCK_DEAD)) + *timeo = schedule_timeout(*timeo); + + __set_current_state(TASK_RUNNING); + remove_wait_queue(&nlk->wait, &wait); + sock_put(sk); + + if (signal_pending(current)) { + kfree_skb(skb); + return sock_intr_errno(*timeo); } - netlink_skb_set_owner_r(skb, sk); - return 0; + + return 1; } static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) @@ -1310,6 +1306,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, ret = -ECONNREFUSED; if (nlk->netlink_rcv != NULL) { ret = skb->len; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); netlink_skb_set_owner_r(skb, sk); NETLINK_CB(skb).sk = ssk; netlink_deliver_tap_kernel(sk, ssk, skb); @@ -1386,13 +1383,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check); static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) { struct netlink_sock *nlk = nlk_sk(sk); + unsigned int rmem, rcvbuf; - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); + rcvbuf = READ_ONCE(sk->sk_rcvbuf); + + if ((rmem == skb->truesize || rmem <= rcvbuf) && !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { netlink_skb_set_owner_r(skb, sk); __netlink_sendskb(sk, skb); - return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); + return rmem > (rcvbuf >> 1); } + + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); return -1; } @@ -2248,6 +2251,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken) struct netlink_ext_ack extack = {}; struct netlink_callback *cb; struct sk_buff *skb = NULL; + unsigned int rmem, rcvbuf; size_t max_recvmsg_len; struct module *module; int err = -ENOBUFS; @@ -2261,9 +2265,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken) goto errout_skb; } - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) - goto errout_skb; - /* NLMSG_GOODSIZE is small to avoid high order allocations being * required, but it makes sense to _attempt_ a 16K bytes allocation * to reduce number of system calls on dump operations, if user @@ -2286,6 +2287,13 @@ static int netlink_dump(struct sock *sk, bool lock_taken) if (!skb) goto errout_skb; + rcvbuf = READ_ONCE(sk->sk_rcvbuf); + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); + if (rmem != skb->truesize && rmem >= rcvbuf) { + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + goto errout_skb; + } + /* Trim skb to allocated size. User is expected to provide buffer as * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at * netlink_recvmsg())). dump will pack as many smaller messages as diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index ed1508a9e093..aab107727f18 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -119,22 +119,22 @@ static int nci_uart_set_driver(struct tty_struct *tty, unsigned int driver) memcpy(nu, nci_uart_drivers[driver], sizeof(struct nci_uart)); nu->tty = tty; - tty->disc_data = nu; skb_queue_head_init(&nu->tx_q); INIT_WORK(&nu->write_work, nci_uart_write_work); spin_lock_init(&nu->rx_lock); ret = nu->ops.open(nu); if (ret) { - tty->disc_data = NULL; kfree(nu); + return ret; } else if (!try_module_get(nu->owner)) { nu->ops.close(nu); - tty->disc_data = NULL; kfree(nu); return -ENOENT; } - return ret; + tty->disc_data = nu; + + return 0; } /* ------ LDISC part ------ */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f3cecb3e4bcb..e8589fede4d4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2784,7 +2784,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) int len_sum = 0; int status = TP_STATUS_AVAILABLE; int hlen, tlen, copylen = 0; - long timeo = 0; + long timeo; mutex_lock(&po->pg_vec_lock); @@ -2838,22 +2838,28 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz) size_max = dev->mtu + reserve + VLAN_HLEN; + timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT); reinit_completion(&po->skb_completion); do { ph = packet_current_frame(po, &po->tx_ring, TP_STATUS_SEND_REQUEST); if (unlikely(ph == NULL)) { - if (need_wait && skb) { - timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT); + /* Note: packet_read_pending() might be slow if we + * have to call it as it's per_cpu variable, but in + * fast-path we don't have to call it, only when ph + * is NULL, we need to check the pending_refcnt. + */ + if (need_wait && packet_read_pending(&po->tx_ring)) { timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo); if (timeo <= 0) { err = !timeo ? -ETIMEDOUT : -ERESTARTSYS; goto out_put; } - } - /* check for additional frames */ - continue; + /* check for additional frames */ + continue; + } else + break; } skb = NULL; @@ -2942,14 +2948,7 @@ tpacket_error: } packet_increment_head(&po->tx_ring); len_sum += tp_len; - } while (likely((ph != NULL) || - /* Note: packet_read_pending() might be slow if we have - * to call it as it's per_cpu variable, but in fast-path - * we already short-circuit the loop with the first - * condition, and luckily don't have to go that path - * anyway. - */ - (need_wait && packet_read_pending(&po->tx_ring)))); + } while (1); err = len_sum; goto out_put; @@ -4563,10 +4562,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, spin_lock(&po->bind_lock); was_running = packet_sock_flag(po, PACKET_SOCK_RUNNING); num = po->num; - if (was_running) { - WRITE_ONCE(po->num, 0); + WRITE_ONCE(po->num, 0); + if (was_running) __unregister_prot_hook(sk, false); - } + spin_unlock(&po->bind_lock); synchronize_net(); @@ -4598,10 +4597,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, mutex_unlock(&po->pg_vec_lock); spin_lock(&po->bind_lock); - if (was_running) { - WRITE_ONCE(po->num, num); + WRITE_ONCE(po->num, num); + if (was_running) register_prot_hook(sk); - } + spin_unlock(&po->bind_lock); if (pg_vec && (po->tp_version > TPACKET_V2)) { /* Because we don't support block-based V3 on tx-ring */ diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 53a858478e22..62527e1ebb88 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -826,6 +826,7 @@ static struct sock *pep_sock_accept(struct sock *sk, } /* Check for duplicate pipe handle */ + pn_skb_get_dst_sockaddr(skb, &dst); newsk = pep_find_pipe(&pn->hlist, &dst, pipe_handle); if (unlikely(newsk)) { __sock_put(newsk); @@ -850,7 +851,6 @@ static struct sock *pep_sock_accept(struct sock *sk, newsk->sk_destruct = pipe_destruct; newpn = pep_sk(newsk); - pn_skb_get_dst_sockaddr(skb, &dst); pn_skb_get_src_sockaddr(skb, &src); newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); newpn->pn_sk.dobject = pn_sockaddr_get_object(&src); diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 0581c53e6517..3cc2f303bf78 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -504,12 +504,8 @@ bool rds_tcp_tune(struct socket *sock) release_sock(sk); return false; } - /* Update ns_tracker to current stack trace and refcounted tracker */ - __netns_tracker_free(net, &sk->ns_tracker, false); - - sk->sk_net_refcnt = 1; - netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sk); + put_net(net); } rtn = net_generic(net, rds_tcp_netid); if (rtn->sndbuf_size > 0) { diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index fee772b4637c..a7054546f52d 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -497,22 +497,15 @@ void rose_rt_device_down(struct net_device *dev) t = rose_node; rose_node = rose_node->next; - for (i = 0; i < t->count; i++) { + for (i = t->count - 1; i >= 0; i--) { if (t->neighbour[i] != s) continue; t->count--; - switch (i) { - case 0: - t->neighbour[0] = t->neighbour[1]; - fallthrough; - case 1: - t->neighbour[1] = t->neighbour[2]; - break; - case 2: - break; - } + memmove(&t->neighbour[i], &t->neighbour[i + 1], + sizeof(t->neighbour[0]) * + (t->count - i)); } if (t->count <= 0) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 0f5a1d77b890..37ac8a665678 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -149,6 +149,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, id_in_use: write_unlock(&rx->call_lock); + rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EBADSLT); rxrpc_cleanup_call(call); _leave(" = -EBADSLT"); return -EBADSLT; @@ -218,6 +219,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) tail = b->call_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_call *call = b->call_backlog[tail]; + rxrpc_see_call(call, rxrpc_call_see_discard); rcu_assign_pointer(call->socket, rx); if (rx->discard_new_call) { _debug("discard %lx", call->user_call_ID); @@ -253,6 +255,9 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, unsigned short call_tail, conn_tail, peer_tail; unsigned short call_count, conn_count; + if (!b) + return NULL; + /* #calls >= #conns >= #peers must hold true. */ call_head = smp_load_acquire(&b->call_backlog_head); call_tail = b->call_backlog_tail; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 5ea9601efd05..ccfae607c9bb 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -590,6 +590,9 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) __be32 code; int ret, ioc; + if (sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) + return; /* Never abort an abort. */ + rxrpc_see_skb(skb, rxrpc_skb_see_reject); iov[0].iov_base = &whdr; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index a482f88c5fc5..e24a44bae9a3 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -351,6 +351,16 @@ try_again: goto try_again; } + rxrpc_see_call(call, rxrpc_call_see_recvmsg); + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) { + rxrpc_see_call(call, rxrpc_call_see_already_released); + list_del_init(&call->recvmsg_link); + spin_unlock_irq(&rx->recvmsg_lock); + release_sock(&rx->sk); + trace_rxrpc_recvmsg(call->debug_id, rxrpc_recvmsg_unqueue, 0); + rxrpc_put_call(call, rxrpc_call_put_recvmsg); + goto try_again; + } if (!(flags & MSG_PEEK)) list_del_init(&call->recvmsg_link); else @@ -374,8 +384,13 @@ try_again: release_sock(&rx->sk); - if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) - BUG(); + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) { + rxrpc_see_call(call, rxrpc_call_see_already_released); + mutex_unlock(&call->user_mutex); + if (!(flags & MSG_PEEK)) + rxrpc_put_call(call, rxrpc_call_put_recvmsg); + goto try_again; + } if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { if (flags & MSG_CMSG_COMPAT) { diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 5dd41a012110..ae571bfe84c7 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -44,9 +44,9 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca, ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, newdscp); - ca->stats_dscp_set++; + atomic64_inc(&ca->stats_dscp_set); } else { - ca->stats_dscp_error++; + atomic64_inc(&ca->stats_dscp_error); } } break; @@ -57,9 +57,9 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca, ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, newdscp); - ca->stats_dscp_set++; + atomic64_inc(&ca->stats_dscp_set); } else { - ca->stats_dscp_error++; + atomic64_inc(&ca->stats_dscp_error); } } break; @@ -72,7 +72,7 @@ static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca, struct tcf_ctinfo_params *cp, struct sk_buff *skb) { - ca->stats_cpmark_set++; + atomic64_inc(&ca->stats_cpmark_set); skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask; } @@ -323,15 +323,18 @@ static int tcf_ctinfo_dump(struct sk_buff *skb, struct tc_action *a, } if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_SET, - ci->stats_dscp_set, TCA_CTINFO_PAD)) + atomic64_read(&ci->stats_dscp_set), + TCA_CTINFO_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_ERROR, - ci->stats_dscp_error, TCA_CTINFO_PAD)) + atomic64_read(&ci->stats_dscp_error), + TCA_CTINFO_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_CPMARK_SET, - ci->stats_cpmark_set, TCA_CTINFO_PAD)) + atomic64_read(&ci->stats_cpmark_set), + TCA_CTINFO_PAD)) goto nla_put_failure; spin_unlock_bh(&ci->tcf_lock); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 518f52f65a49..c56a01992cb2 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -334,17 +334,22 @@ out: return q; } -static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) +static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid, + struct netlink_ext_ack *extack) { unsigned long cl; const struct Qdisc_class_ops *cops = p->ops->cl_ops; - if (cops == NULL) - return NULL; + if (cops == NULL) { + NL_SET_ERR_MSG(extack, "Parent qdisc is not classful"); + return ERR_PTR(-EOPNOTSUPP); + } cl = cops->find(p, classid); - if (cl == 0) - return NULL; + if (cl == 0) { + NL_SET_ERR_MSG(extack, "Specified class not found"); + return ERR_PTR(-ENOENT); + } return cops->leaf(p, cl); } @@ -779,15 +784,12 @@ static u32 qdisc_alloc_handle(struct net_device *dev) void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) { - bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; bool notify; int drops; - if (n == 0 && len == 0) - return; drops = max_t(int, n, 0); rcu_read_lock(); while ((parentid = sch->parent)) { @@ -796,17 +798,8 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) if (sch->flags & TCQ_F_NOPARENT) break; - /* Notify parent qdisc only if child qdisc becomes empty. - * - * If child was empty even before update then backlog - * counter is screwed and we skip notification because - * parent class is already passive. - * - * If the original child was offloaded then it is allowed - * to be seem as empty, so the parent is notified anyway. - */ - notify = !sch->q.qlen && !WARN_ON_ONCE(!n && - !qdisc_is_offloaded); + /* Notify parent qdisc only if child qdisc becomes empty. */ + notify = !sch->q.qlen; /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { @@ -815,6 +808,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) } cops = sch->ops->cl_ops; if (notify && cops->qlen_notify) { + /* Note that qlen_notify must be idempotent as it may get called + * multiple times. + */ cl = cops->find(sch, parentid); cops->qlen_notify(sch, cl); } @@ -1535,7 +1531,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); return -ENOENT; } - q = qdisc_leaf(p, clid); + q = qdisc_leaf(p, clid, extack); } else if (dev_ingress_queue(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } @@ -1546,6 +1542,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); return -ENOENT; } + if (IS_ERR(q)) + return PTR_ERR(q); if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { NL_SET_ERR_MSG(extack, "Invalid handle"); @@ -1639,7 +1637,9 @@ replay: NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); return -ENOENT; } - q = qdisc_leaf(p, clid); + q = qdisc_leaf(p, clid, extack); + if (IS_ERR(q)) + return PTR_ERR(q); } else if (dev_ingress_queue_create(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 037f764822b9..82635dd2cfa5 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -651,6 +651,12 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); + for (i = nbands; i < oldbands; i++) { + if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) + list_del_init(&q->classes[i].alist); + qdisc_purge_queue(q->classes[i].qdisc); + } + WRITE_ONCE(q->nbands, nbands); for (i = nstrict; i < q->nstrict; i++) { if (q->classes[i].qdisc->q.qlen) { @@ -658,11 +664,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, q->classes[i].deficit = quanta[i]; } } - for (i = q->nbands; i < oldbands; i++) { - if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) - list_del_init(&q->classes[i].alist); - qdisc_purge_queue(q->classes[i].qdisc); - } WRITE_ONCE(q->nstrict, nstrict); memcpy(q->prio2band, priomap, sizeof(priomap)); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index b2494d24a542..1021681a5718 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -821,7 +821,9 @@ static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio) u32 *pid; } stk[TC_HTB_MAXDEPTH], *sp = stk; - BUG_ON(!hprio->row.rb_node); + if (unlikely(!hprio->row.rb_node)) + return NULL; + sp->root = hprio->row.rb_node; sp->pptr = &hprio->ptr; sp->pid = &hprio->last_ptr_id; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 51d4013b6121..f3e5ef9a9592 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -152,7 +152,7 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt, static const struct nla_policy mqprio_tc_entry_policy[TCA_MQPRIO_TC_ENTRY_MAX + 1] = { [TCA_MQPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32, - TC_QOPT_MAX_QUEUE), + TC_QOPT_MAX_QUEUE - 1), [TCA_MQPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32, TC_FP_EXPRESS, TC_FP_PREEMPTIBLE), diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 68a08f6d1fbc..2270547b51df 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -972,6 +972,41 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, return 0; } +static const struct Qdisc_class_ops netem_class_ops; + +static int check_netem_in_tree(struct Qdisc *sch, bool duplicates, + struct netlink_ext_ack *extack) +{ + struct Qdisc *root, *q; + unsigned int i; + + root = qdisc_root_sleeping(sch); + + if (sch != root && root->ops->cl_ops == &netem_class_ops) { + if (duplicates || + ((struct netem_sched_data *)qdisc_priv(root))->duplicate) + goto err; + } + + if (!qdisc_dev(root)) + return 0; + + hash_for_each(qdisc_dev(root)->qdisc_hash, i, q, hash) { + if (sch != q && q->ops->cl_ops == &netem_class_ops) { + if (duplicates || + ((struct netem_sched_data *)qdisc_priv(q))->duplicate) + goto err; + } + } + + return 0; + +err: + NL_SET_ERR_MSG(extack, + "netem: cannot mix duplicating netems with other netems in tree"); + return -EINVAL; +} + /* Parse netlink message to set options */ static int netem_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) @@ -1030,6 +1065,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, q->gap = qopt->gap; q->counter = 0; q->loss = qopt->loss; + + ret = check_netem_in_tree(sch, qopt->duplicate, extack); + if (ret) + goto unlock; + q->duplicate = qopt->duplicate; /* for compatibility with earlier versions. diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index aa4fbd2fae29..5a345ef35c9f 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -412,7 +412,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, bool existing = false; struct nlattr *tb[TCA_QFQ_MAX + 1]; struct qfq_aggregate *new_agg = NULL; - u32 weight, lmax, inv_w; + u32 weight, lmax, inv_w, old_weight, old_lmax; int err; int delta_w; @@ -446,12 +446,16 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, inv_w = ONE_FP / weight; weight = ONE_FP / inv_w; - if (cl != NULL && - lmax == cl->agg->lmax && - weight == cl->agg->class_weight) - return 0; /* nothing to change */ + if (cl != NULL) { + sch_tree_lock(sch); + old_weight = cl->agg->class_weight; + old_lmax = cl->agg->lmax; + sch_tree_unlock(sch); + if (lmax == old_lmax && weight == old_weight) + return 0; /* nothing to change */ + } - delta_w = weight - (cl ? cl->agg->class_weight : 0); + delta_w = weight - (cl ? old_weight : 0); if (q->wsum + delta_w > QFQ_MAX_WSUM) { NL_SET_ERR_MSG_FMT_MOD(extack, @@ -535,9 +539,6 @@ destroy_class: static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) { - struct qfq_sched *q = qdisc_priv(sch); - - qfq_rm_from_agg(q, cl); gen_kill_estimator(&cl->rate_est); qdisc_put(cl->qdisc); kfree(cl); @@ -558,6 +559,7 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg, qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->common); + qfq_rm_from_agg(q, cl); sch_tree_unlock(sch); @@ -628,6 +630,7 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg, { struct qfq_class *cl = (struct qfq_class *)arg; struct nlattr *nest; + u32 class_weight, lmax; tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle = cl->common.classid; @@ -636,8 +639,13 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg, nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; - if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) || - nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax)) + + sch_tree_lock(sch); + class_weight = cl->agg->class_weight; + lmax = cl->agg->lmax; + sch_tree_unlock(sch); + if (nla_put_u32(skb, TCA_QFQ_WEIGHT, class_weight) || + nla_put_u32(skb, TCA_QFQ_LMAX, lmax)) goto nla_put_failure; return nla_nest_end(skb, nest); @@ -654,8 +662,10 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, memset(&xstats, 0, sizeof(xstats)); + sch_tree_lock(sch); xstats.weight = cl->agg->class_weight; xstats.lmax = cl->agg->lmax; + sch_tree_unlock(sch); if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || @@ -1494,6 +1504,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], common.hnode) { + qfq_rm_from_agg(q, cl); qfq_destroy_class(sch, cl); } } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index a903b3c46805..11a7d5a25d6b 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -656,6 +656,14 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, NL_SET_ERR_MSG_MOD(extack, "invalid quantum"); return -EINVAL; } + + if (ctl->perturb_period < 0 || + ctl->perturb_period > INT_MAX / HZ) { + NL_SET_ERR_MSG_MOD(extack, "invalid perturb period"); + return -EINVAL; + } + perturb_period = ctl->perturb_period * HZ; + if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) return -EINVAL; @@ -672,14 +680,12 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, headdrop = q->headdrop; maxdepth = q->maxdepth; maxflows = q->maxflows; - perturb_period = q->perturb_period; quantum = q->quantum; flags = q->flags; /* update and validate configuration */ if (ctl->quantum) quantum = ctl->quantum; - perturb_period = ctl->perturb_period * HZ; if (ctl->flows) maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); if (ctl->divisor) { diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 8623dc0bafc0..1620f0fd78ce 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -43,6 +43,11 @@ static struct static_key_false taprio_have_working_mqprio; #define TAPRIO_SUPPORTED_FLAGS \ (TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) #define TAPRIO_FLAGS_INVALID U32_MAX +/* Minimum value for picos_per_byte to ensure non-zero duration + * for minimum-sized Ethernet frames (ETH_ZLEN = 60). + * 60 * 17 > PSEC_PER_NSEC (1000) + */ +#define TAPRIO_PICOS_PER_BYTE_MIN 17 struct sched_entry { /* Durations between this GCL entry and the GCL entry where the @@ -1284,7 +1289,8 @@ static void taprio_start_sched(struct Qdisc *sch, } static void taprio_set_picos_per_byte(struct net_device *dev, - struct taprio_sched *q) + struct taprio_sched *q, + struct netlink_ext_ack *extack) { struct ethtool_link_ksettings ecmd; int speed = SPEED_10; @@ -1300,6 +1306,15 @@ static void taprio_set_picos_per_byte(struct net_device *dev, skip: picos_per_byte = (USEC_PER_SEC * 8) / speed; + if (picos_per_byte < TAPRIO_PICOS_PER_BYTE_MIN) { + if (!extack) + pr_warn("Link speed %d is too high. Schedule may be inaccurate.\n", + speed); + NL_SET_ERR_MSG_FMT_MOD(extack, + "Link speed %d is too high. Schedule may be inaccurate.", + speed); + picos_per_byte = TAPRIO_PICOS_PER_BYTE_MIN; + } atomic64_set(&q->picos_per_byte, picos_per_byte); netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n", @@ -1324,17 +1339,19 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, if (dev != qdisc_dev(q->root)) continue; - taprio_set_picos_per_byte(dev, q); + taprio_set_picos_per_byte(dev, q, NULL); stab = rtnl_dereference(q->root->stab); - oper = rtnl_dereference(q->oper_sched); + rcu_read_lock(); + oper = rcu_dereference(q->oper_sched); if (oper) taprio_update_queue_max_sdu(q, oper, stab); - admin = rtnl_dereference(q->admin_sched); + admin = rcu_dereference(q->admin_sched); if (admin) taprio_update_queue_max_sdu(q, admin, stab); + rcu_read_unlock(); break; } @@ -1846,7 +1863,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, q->flags = taprio_flags; /* Needed for length_to_duration() during netlink attribute parsing */ - taprio_set_picos_per_byte(dev, q); + taprio_set_picos_per_byte(dev, q, extack); err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags); if (err < 0) diff --git a/net/sctp/input.c b/net/sctp/input.c index a8a254a5008e..032a10d82302 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -117,7 +117,7 @@ int sctp_rcv(struct sk_buff *skb) * it's better to just linearize it otherwise crc computing * takes longer. */ - if ((!is_gso && skb_linearize(skb)) || + if (((!is_gso || skb_cloned(skb)) && skb_linearize(skb)) || !pskb_may_pull(skb, sizeof(struct sctphdr))) goto discard_it; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 53725ee7ba06..b301d64d9d80 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9100,7 +9100,8 @@ static void __sctp_write_space(struct sctp_association *asoc) wq = rcu_dereference(sk->sk_wq); if (wq) { if (waitqueue_active(&wq->wait)) - wake_up_interruptible(&wq->wait); + wake_up_interruptible_poll(&wq->wait, EPOLLOUT | + EPOLLWRNORM | EPOLLWRBAND); /* Note that we try to include the Async I/O support * here by modeling from the current TCP/UDP code. diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 78b0e6dba0a2..cdd445d40b94 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -30,6 +30,10 @@ #include <linux/splice.h> #include <net/sock.h> +#include <net/inet_common.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6.h> +#endif #include <net/tcp.h> #include <net/smc.h> #include <asm/ioctls.h> @@ -360,6 +364,16 @@ static void smc_destruct(struct sock *sk) return; if (!sock_flag(sk, SOCK_DEAD)) return; + switch (sk->sk_family) { + case AF_INET: + inet_sock_destruct(sk); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + inet6_sock_destruct(sk); + break; +#endif + } } static struct lock_class_key smc_key; @@ -3339,10 +3353,7 @@ int smc_create_clcsk(struct net *net, struct sock *sk, int family) * which need net ref. */ sk = smc->clcsock->sk; - __netns_tracker_free(net, &sk->ns_tracker, false); - sk->sk_net_refcnt = 1; - get_net_track(net, &sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sk); return 0; } diff --git a/net/smc/smc.h b/net/smc/smc.h index ad77d6b6b8d3..7579f9622e01 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -283,10 +283,10 @@ struct smc_connection { }; struct smc_sock { /* smc sock container */ - struct sock sk; -#if IS_ENABLED(CONFIG_IPV6) - struct ipv6_pinfo *pinet6; -#endif + union { + struct sock sk; + struct inet_sock icsk_inet; + }; struct socket *clcsock; /* internal tcp socket */ void (*clcsk_state_change)(struct sock *sk); /* original stat_change fct. */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 17a4de75bfaf..e492655cb221 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2749,8 +2749,13 @@ out_verifier: case -EPROTONOSUPPORT: goto out_err; case -EACCES: - /* Re-encode with a fresh cred */ - fallthrough; + /* possible RPCSEC_GSS out-of-sequence event (RFC2203), + * reset recv state and keep waiting, don't retransmit + */ + task->tk_rqstp->rq_reply_bytes_recvd = 0; + task->tk_status = xprt_request_enqueue_receive(task); + task->tk_action = call_transmit_status; + return -EBADMSG; default: goto out_garbage; } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 7ce3721c06ca..eadc00410ebc 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -630,7 +630,7 @@ static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, const char *name) { - struct qstr q = QSTR_INIT(name, strlen(name)); + struct qstr q = QSTR(name); struct dentry *dentry = d_hash_and_lookup(parent, &q); if (!dentry) { dentry = d_alloc(parent, &q); @@ -1190,8 +1190,7 @@ static const struct rpc_filelist files[] = { struct dentry *rpc_d_lookup_sb(const struct super_block *sb, const unsigned char *dir_name) { - struct qstr dir = QSTR_INIT(dir_name, strlen(dir_name)); - return d_hash_and_lookup(sb->s_root, &dir); + return d_hash_and_lookup(sb->s_root, &QSTR(dir_name)); } EXPORT_SYMBOL_GPL(rpc_d_lookup_sb); @@ -1300,11 +1299,9 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data) struct dentry *gssd_dentry; struct dentry *clnt_dentry = NULL; struct dentry *pipe_dentry = NULL; - struct qstr q = QSTR_INIT(files[RPCAUTH_gssd].name, - strlen(files[RPCAUTH_gssd].name)); /* We should never get this far if "gssd" doesn't exist */ - gssd_dentry = d_hash_and_lookup(root, &q); + gssd_dentry = d_hash_and_lookup(root, &QSTR(files[RPCAUTH_gssd].name)); if (!gssd_dentry) return ERR_PTR(-ENOENT); @@ -1314,9 +1311,8 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data) goto out; } - q.name = gssd_dummy_clnt_dir[0].name; - q.len = strlen(gssd_dummy_clnt_dir[0].name); - clnt_dentry = d_hash_and_lookup(gssd_dentry, &q); + clnt_dentry = d_hash_and_lookup(gssd_dentry, + &QSTR(gssd_dummy_clnt_dir[0].name)); if (!clnt_dentry) { __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); pipe_dentry = ERR_PTR(-ENOENT); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 79879b7d39cb..46a95877d2de 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1369,7 +1369,8 @@ svc_process_common(struct svc_rqst *rqstp) case SVC_OK: break; case SVC_GARBAGE: - goto err_garbage_args; + rqstp->rq_auth_stat = rpc_autherr_badcred; + goto err_bad_auth; case SVC_SYSERR: goto err_system_err; case SVC_DENIED: @@ -1510,14 +1511,6 @@ err_bad_proc: *rqstp->rq_accept_statp = rpc_proc_unavail; goto sendit; -err_garbage_args: - svc_printk(rqstp, "failed to decode RPC header\n"); - - if (serv->sv_stats) - serv->sv_stats->rpcbadfmt++; - *rqstp->rq_accept_statp = rpc_garbage_args; - goto sendit; - err_system_err: if (serv->sv_stats) serv->sv_stats->rpcbadfmt++; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 3bfbb789c4be..e61e94576058 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -257,20 +257,47 @@ svc_tcp_sock_process_cmsg(struct socket *sock, struct msghdr *msg, } static int -svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg) +svc_tcp_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags) { union { struct cmsghdr cmsg; u8 buf[CMSG_SPACE(sizeof(u8))]; } u; - struct socket *sock = svsk->sk_sock; + u8 alert[2]; + struct kvec alert_kvec = { + .iov_base = alert, + .iov_len = sizeof(alert), + }; + struct msghdr msg = { + .msg_flags = *msg_flags, + .msg_control = &u, + .msg_controllen = sizeof(u), + }; + int ret; + + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1, + alert_kvec.iov_len); + ret = sock_recvmsg(sock, &msg, MSG_DONTWAIT); + if (ret > 0 && + tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { + iov_iter_revert(&msg.msg_iter, ret); + ret = svc_tcp_sock_process_cmsg(sock, &msg, &u.cmsg, -EAGAIN); + } + return ret; +} + +static int +svc_tcp_sock_recvmsg(struct svc_sock *svsk, struct msghdr *msg) +{ int ret; + struct socket *sock = svsk->sk_sock; - msg->msg_control = &u; - msg->msg_controllen = sizeof(u); ret = sock_recvmsg(sock, msg, MSG_DONTWAIT); - if (unlikely(msg->msg_controllen != sizeof(u))) - ret = svc_tcp_sock_process_cmsg(sock, msg, &u.cmsg, ret); + if (msg->msg_flags & MSG_CTRUNC) { + msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR); + if (ret == 0 || ret == -EIO) + ret = svc_tcp_sock_recv_cmsg(sock, &msg->msg_flags); + } return ret; } @@ -321,7 +348,7 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, iov_iter_advance(&msg.msg_iter, seek); buflen -= seek; } - len = svc_tcp_sock_recv_cmsg(svsk, &msg); + len = svc_tcp_sock_recvmsg(svsk, &msg); if (len > 0) svc_flush_bvec(bvec, len, seek); @@ -1019,7 +1046,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk, iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want); - len = svc_tcp_sock_recv_cmsg(svsk, &msg); + len = svc_tcp_sock_recvmsg(svsk, &msg); if (len < 0) return len; svsk->sk_tcplen += len; @@ -1541,10 +1568,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, newlen = error; if (protocol == IPPROTO_TCP) { - __netns_tracker_free(net, &sock->sk->ns_tracker, false); - sock->sk->sk_net_refcnt = 1; - get_net_track(net, &sock->sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sock->sk); if ((error = kernel_listen(sock, 64)) < 0) goto bummer; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index ca6172822b68..3d7f1413df02 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -577,6 +577,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) ib_destroy_qp(newxprt->sc_qp); rdma_destroy_id(newxprt->sc_cm_id); + rpcrdma_rn_unregister(dev, &newxprt->sc_rn); /* This call to put will destroy the transport */ svc_xprt_put(&newxprt->sc_xprt); return NULL; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 171ad4e2523f..92cec227215a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -358,7 +358,7 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp) static int xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, - struct cmsghdr *cmsg, int ret) + unsigned int *msg_flags, struct cmsghdr *cmsg, int ret) { u8 content_type = tls_get_record_type(sock->sk, cmsg); u8 level, description; @@ -371,7 +371,7 @@ xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, * record, even though there might be more frames * waiting to be decrypted. */ - msg->msg_flags &= ~MSG_EOR; + *msg_flags &= ~MSG_EOR; break; case TLS_RECORD_TYPE_ALERT: tls_alert_recv(sock->sk, msg, &level, &description); @@ -386,19 +386,33 @@ xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, } static int -xs_sock_recv_cmsg(struct socket *sock, struct msghdr *msg, int flags) +xs_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags, int flags) { union { struct cmsghdr cmsg; u8 buf[CMSG_SPACE(sizeof(u8))]; } u; + u8 alert[2]; + struct kvec alert_kvec = { + .iov_base = alert, + .iov_len = sizeof(alert), + }; + struct msghdr msg = { + .msg_flags = *msg_flags, + .msg_control = &u, + .msg_controllen = sizeof(u), + }; int ret; - msg->msg_control = &u; - msg->msg_controllen = sizeof(u); - ret = sock_recvmsg(sock, msg, flags); - if (msg->msg_controllen != sizeof(u)) - ret = xs_sock_process_cmsg(sock, msg, &u.cmsg, ret); + iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1, + alert_kvec.iov_len); + ret = sock_recvmsg(sock, &msg, flags); + if (ret > 0 && + tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) { + iov_iter_revert(&msg.msg_iter, ret); + ret = xs_sock_process_cmsg(sock, &msg, msg_flags, &u.cmsg, + -EAGAIN); + } return ret; } @@ -408,7 +422,13 @@ xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek) ssize_t ret; if (seek != 0) iov_iter_advance(&msg->msg_iter, seek); - ret = xs_sock_recv_cmsg(sock, msg, flags); + ret = sock_recvmsg(sock, msg, flags); + /* Handle TLS inband control message lazily */ + if (msg->msg_flags & MSG_CTRUNC) { + msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR); + if (ret == 0 || ret == -EIO) + ret = xs_sock_recv_cmsg(sock, &msg->msg_flags, flags); + } return ret > 0 ? ret + seek : ret; } @@ -434,7 +454,7 @@ xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, size_t count) { iov_iter_discard(&msg->msg_iter, ITER_DEST, count); - return xs_sock_recv_cmsg(sock, msg, flags); + return xs_sock_recvmsg(sock, msg, flags, 0); } #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE @@ -1940,12 +1960,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, goto out; } - if (protocol == IPPROTO_TCP) { - __netns_tracker_free(xprt->xprt_net, &sock->sk->ns_tracker, false); - sock->sk->sk_net_refcnt = 1; - get_net_track(xprt->xprt_net, &sock->sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(xprt->xprt_net, 1); - } + if (protocol == IPPROTO_TCP) + sk_net_refcnt_upgrade(sock->sk); filp = sock_alloc_file(sock, O_NONBLOCK, NULL); if (IS_ERR(filp)) @@ -2743,6 +2759,11 @@ static void xs_tcp_tls_setup_socket(struct work_struct *work) } rpc_shutdown_client(lower_clnt); + /* Check for ingress data that arrived before the socket's + * ->data_ready callback was set up. + */ + xs_poll_check_readable(upper_transport); + out_unlock: current_restore_flags(pflags, PF_MEMALLOC); upper_transport->clnt = NULL; diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index 79f91b6ca8c8..ea5bb131ebd0 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -425,7 +425,7 @@ static void tipc_aead_free(struct rcu_head *rp) } free_percpu(aead->tfm_entry); kfree_sensitive(aead->key); - kfree(aead); + kfree_sensitive(aead); } static int tipc_aead_users(struct tipc_aead __rcu *aead) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 8ee0c07d00e9..ffe577bf6b51 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -704,8 +704,10 @@ static void tipc_topsrv_stop(struct net *net) for (id = 0; srv->idr_in_use; id++) { con = idr_find(&srv->conn_idr, id); if (con) { + conn_get(con); spin_unlock_bh(&srv->idr_lock); tipc_conn_close(con); + conn_put(con); spin_lock_bh(&srv->idr_lock); } } diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 108a4cc2e001..258d6aa4f21a 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -489,7 +489,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); b = tipc_bearer_find(net, bname); - if (!b) { + if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) { rtnl_unlock(); return -EINVAL; } @@ -500,7 +500,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); b = rtnl_dereference(tn->bearer_list[bid]); - if (!b) { + if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) { rtnl_unlock(); return -EINVAL; } diff --git a/net/tls/tls.h b/net/tls/tls.h index e5e47452308a..e1eaf12b3742 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -195,7 +195,7 @@ void tls_strp_msg_done(struct tls_strparser *strp); int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb); void tls_rx_msg_ready(struct tls_strparser *strp); -void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh); +bool tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh); int tls_strp_msg_cow(struct tls_sw_context_rx *ctx); struct sk_buff *tls_strp_msg_detach(struct tls_sw_context_rx *ctx); int tls_strp_msg_hold(struct tls_strparser *strp, struct sk_buff_head *dst); diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 65b0da6fdf6a..d71643b494a1 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -475,7 +475,7 @@ static void tls_strp_load_anchor_with_queue(struct tls_strparser *strp, int len) strp->stm.offset = offset; } -void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) +bool tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) { struct strp_msg *rxm; struct tls_msg *tlm; @@ -484,8 +484,11 @@ void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) DEBUG_NET_WARN_ON_ONCE(!strp->stm.full_len); if (!strp->copy_mode && force_refresh) { - if (WARN_ON(tcp_inq(strp->sk) < strp->stm.full_len)) - return; + if (unlikely(tcp_inq(strp->sk) < strp->stm.full_len)) { + WRITE_ONCE(strp->msg_ready, 0); + memset(&strp->stm, 0, sizeof(strp->stm)); + return false; + } tls_strp_load_anchor_with_queue(strp, strp->stm.full_len); } @@ -495,6 +498,8 @@ void tls_strp_msg_load(struct tls_strparser *strp, bool force_refresh) rxm->offset = strp->stm.offset; tlm = tls_msg(strp->anchor); tlm->control = strp->mark; + + return true; } /* Called with lock held on lower socket */ @@ -512,9 +517,8 @@ static int tls_strp_read_sock(struct tls_strparser *strp) if (inq < strp->stm.full_len) return tls_strp_read_copy(strp, true); + tls_strp_load_anchor_with_queue(strp, inq); if (!strp->stm.full_len) { - tls_strp_load_anchor_with_queue(strp, inq); - sz = tls_rx_msg_size(strp, strp->anchor); if (sz < 0) { tls_strp_abort_strp(strp, sz); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 8fb5925f2389..6385329ef98d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -872,6 +872,19 @@ more_data: delta = msg->sg.size; psock->eval = sk_psock_msg_verdict(sk, psock, msg); delta -= msg->sg.size; + + if ((s32)delta > 0) { + /* It indicates that we executed bpf_msg_pop_data(), + * causing the plaintext data size to decrease. + * Therefore the encrypted data size also needs to + * correspondingly decrease. We only need to subtract + * delta to calculate the new ciphertext length since + * ktls does not support block encryption. + */ + struct sk_msg *enc = &ctx->open_rec->msg_encrypted; + + sk_msg_trim(sk, enc, enc->sg.size - delta); + } } if (msg->cork_bytes && msg->cork_bytes > msg->sg.size && !enospc && !full_record) { @@ -1367,7 +1380,8 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, return sock_intr_errno(timeo); } - tls_strp_msg_load(&ctx->strp, released); + if (unlikely(!tls_strp_msg_load(&ctx->strp, released))) + return tls_rx_rec_wait(sk, psock, nonblock, false); return 1; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 6b1762300443..45f8e21829ec 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -666,6 +666,11 @@ static void unix_sock_destructor(struct sock *sk) #endif } +static unsigned int unix_skb_len(const struct sk_buff *skb) +{ + return skb->len - UNIXCB(skb).consumed; +} + static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); @@ -700,10 +705,16 @@ static void unix_release_sock(struct sock *sk, int embrion) if (skpair != NULL) { if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { + struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); + +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (skb && !unix_skb_len(skb)) + skb = skb_peek_next(skb, &sk->sk_receive_queue); +#endif unix_state_lock(skpair); /* No more writes */ WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); - if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion) + if (skb || embrion) WRITE_ONCE(skpair->sk_err, ECONNRESET); unix_state_unlock(skpair); skpair->sk_state_change(skpair); @@ -2594,11 +2605,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, return timeo; } -static unsigned int unix_skb_len(const struct sk_buff *skb) -{ - return skb->len - UNIXCB(skb).consumed; -} - struct unix_stream_read_state { int (*recv_actor)(struct sk_buff *, int, int, struct unix_stream_read_state *); @@ -2613,11 +2619,11 @@ struct unix_stream_read_state { #if IS_ENABLED(CONFIG_AF_UNIX_OOB) static int unix_stream_recv_urg(struct unix_stream_read_state *state) { + struct sk_buff *oob_skb, *read_skb = NULL; struct socket *sock = state->socket; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); int chunk = 1; - struct sk_buff *oob_skb; mutex_lock(&u->iolock); unix_state_lock(sk); @@ -2632,9 +2638,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) oob_skb = u->oob_skb; - if (!(state->flags & MSG_PEEK)) + if (!(state->flags & MSG_PEEK)) { WRITE_ONCE(u->oob_skb, NULL); + if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue && + !unix_skb_len(oob_skb->prev)) { + read_skb = oob_skb->prev; + __skb_unlink(read_skb, &sk->sk_receive_queue); + } + } + spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); @@ -2645,6 +2658,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) mutex_unlock(&u->iolock); + consume_skb(read_skb); + if (chunk < 0) return -EFAULT; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index d08f205b33dc..ef519b55a3d9 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -407,6 +407,8 @@ EXPORT_SYMBOL_GPL(vsock_enqueue_accept); static bool vsock_use_local_transport(unsigned int remote_cid) { + lockdep_assert_held(&vsock_register_mutex); + if (!transport_local) return false; @@ -464,6 +466,8 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) remote_flags = vsk->remote_addr.svm_flags; + mutex_lock(&vsock_register_mutex); + switch (sk->sk_type) { case SOCK_DGRAM: new_transport = transport_dgram; @@ -479,12 +483,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) new_transport = transport_h2g; break; default: - return -ESOCKTNOSUPPORT; + ret = -ESOCKTNOSUPPORT; + goto err; } if (vsk->transport) { - if (vsk->transport == new_transport) - return 0; + if (vsk->transport == new_transport) { + ret = 0; + goto err; + } /* transport->release() must be called with sock lock acquired. * This path can only be taken during vsock_connect(), where we @@ -508,8 +515,16 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) /* We increase the module refcnt to prevent the transport unloading * while there are open sockets assigned to it. */ - if (!new_transport || !try_module_get(new_transport->module)) - return -ENODEV; + if (!new_transport || !try_module_get(new_transport->module)) { + ret = -ENODEV; + goto err; + } + + /* It's safe to release the mutex after a successful try_module_get(). + * Whichever transport `new_transport` points at, it won't go away until + * the last module_put() below or in vsock_deassign_transport(). + */ + mutex_unlock(&vsock_register_mutex); if (sk->sk_type == SOCK_SEQPACKET) { if (!new_transport->seqpacket_allow || @@ -528,12 +543,31 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) vsk->transport = new_transport; return 0; +err: + mutex_unlock(&vsock_register_mutex); + return ret; } EXPORT_SYMBOL_GPL(vsock_assign_transport); +/* + * Provide safe access to static transport_{h2g,g2h,dgram,local} callbacks. + * Otherwise we may race with module removal. Do not use on `vsk->transport`. + */ +static u32 vsock_registered_transport_cid(const struct vsock_transport **transport) +{ + u32 cid = VMADDR_CID_ANY; + + mutex_lock(&vsock_register_mutex); + if (*transport) + cid = (*transport)->get_local_cid(); + mutex_unlock(&vsock_register_mutex); + + return cid; +} + bool vsock_find_cid(unsigned int cid) { - if (transport_g2h && cid == transport_g2h->get_local_cid()) + if (cid == vsock_registered_transport_cid(&transport_g2h)) return true; if (transport_h2g && cid == VMADDR_CID_HOST) @@ -655,7 +689,8 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, unsigned int i; for (i = 0; i < MAX_PORT_RETRIES; i++) { - if (port <= LAST_RESERVED_PORT) + if (port == VMADDR_PORT_ANY || + port <= LAST_RESERVED_PORT) port = LAST_RESERVED_PORT + 1; new_addr.svm_port = port++; @@ -2502,18 +2537,19 @@ static long vsock_dev_do_ioctl(struct file *filp, unsigned int cmd, void __user *ptr) { u32 __user *p = ptr; - u32 cid = VMADDR_CID_ANY; int retval = 0; + u32 cid; switch (cmd) { case IOCTL_VM_SOCKETS_GET_LOCAL_CID: /* To be compatible with the VMCI behavior, we prioritize the * guest CID instead of well-know host CID (VMADDR_CID_HOST). */ - if (transport_g2h) - cid = transport_g2h->get_local_cid(); - else if (transport_h2g) - cid = transport_h2g->get_local_cid(); + cid = vsock_registered_transport_cid(&transport_g2h); + if (cid == VMADDR_CID_ANY) + cid = vsock_registered_transport_cid(&transport_h2g); + if (cid == VMADDR_CID_ANY) + cid = vsock_registered_transport_cid(&transport_local); if (put_user(cid, p) != 0) retval = -EFAULT; diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index f0e48e6911fc..f01f9e878106 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -307,7 +307,7 @@ out_rcu: static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) { - int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM; + int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; struct scatterlist pkt, *p; struct virtqueue *vq; struct sk_buff *skb; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index b370070194fa..7eccd6708d66 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -119,6 +119,8 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt, u16 proto, struct vmci_handle handle) { + memset(pkt, 0, sizeof(*pkt)); + /* We register the stream control handler as an any cid handle so we * must always send from a source address of VMADDR_CID_ANY */ @@ -131,8 +133,6 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt, pkt->type = type; pkt->src_port = src->svm_port; pkt->dst_port = dst->svm_port; - memset(&pkt->proto, 0, sizeof(pkt->proto)); - memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2)); switch (pkt->type) { case VMCI_TRANSPORT_PACKET_TYPE_INVALID: diff --git a/net/wireless/core.c b/net/wireless/core.c index 1ce8fff2a28a..586e50678ed8 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -553,6 +553,9 @@ use_default_name: INIT_WORK(&rdev->mgmt_registrations_update_wk, cfg80211_mgmt_registrations_update_wk); spin_lock_init(&rdev->mgmt_registrations_lock); + INIT_WORK(&rdev->wiphy_work, cfg80211_wiphy_work); + INIT_LIST_HEAD(&rdev->wiphy_work_list); + spin_lock_init(&rdev->wiphy_work_lock); #ifdef CONFIG_CFG80211_DEFAULT_PS rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; @@ -570,9 +573,6 @@ use_default_name: return NULL; } - INIT_WORK(&rdev->wiphy_work, cfg80211_wiphy_work); - INIT_LIST_HEAD(&rdev->wiphy_work_list); - spin_lock_init(&rdev->wiphy_work_lock); INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work); INIT_WORK(&rdev->conn_work, cfg80211_conn_work); INIT_WORK(&rdev->event_work, cfg80211_event_work); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index a5eb92d93074..d1a66410b9c5 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -843,7 +843,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, mgmt = (const struct ieee80211_mgmt *)params->buf; - if (!ieee80211_is_mgmt(mgmt->frame_control)) + if (!ieee80211_is_mgmt(mgmt->frame_control) || + ieee80211_has_order(mgmt->frame_control)) return -EINVAL; stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c778ffa1c8ef..ec8265f2d568 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -229,6 +229,7 @@ static int validate_beacon_head(const struct nlattr *attr, unsigned int len = nla_len(attr); const struct element *elem; const struct ieee80211_mgmt *mgmt = (void *)data; + const struct ieee80211_ext *ext; unsigned int fixedlen, hdrlen; bool s1g_bcn; @@ -237,8 +238,10 @@ static int validate_beacon_head(const struct nlattr *attr, s1g_bcn = ieee80211_is_s1g_beacon(mgmt->frame_control); if (s1g_bcn) { - fixedlen = offsetof(struct ieee80211_ext, - u.s1g_beacon.variable); + ext = (struct ieee80211_ext *)mgmt; + fixedlen = + offsetof(struct ieee80211_ext, u.s1g_beacon.variable) + + ieee80211_s1g_optional_len(ext->frame_control); hdrlen = offsetof(struct ieee80211_ext, u.s1g_beacon); } else { fixedlen = offsetof(struct ieee80211_mgmt, @@ -16786,6 +16789,7 @@ static int nl80211_set_sar_specs(struct sk_buff *skb, struct genl_info *info) if (!sar_spec) return -ENOMEM; + sar_spec->num_sub_specs = specs; sar_spec->type = type; specs = 0; nla_for_each_nested(spec_list, tb[NL80211_SAR_ATTR_SPECS], rem) { diff --git a/net/wireless/util.c b/net/wireless/util.c index 18585b1416c6..b115489a846f 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -820,6 +820,52 @@ bool ieee80211_is_valid_amsdu(struct sk_buff *skb, u8 mesh_hdr) } EXPORT_SYMBOL(ieee80211_is_valid_amsdu); + +/* + * Detects if an MSDU frame was maliciously converted into an A-MSDU + * frame by an adversary. This is done by parsing the received frame + * as if it were a regular MSDU, even though the A-MSDU flag is set. + * + * For non-mesh interfaces, detection involves checking whether the + * payload, when interpreted as an MSDU, begins with a valid RFC1042 + * header. This is done by comparing the A-MSDU subheader's destination + * address to the start of the RFC1042 header. + * + * For mesh interfaces, the MSDU includes a 6-byte Mesh Control field + * and an optional variable-length Mesh Address Extension field before + * the RFC1042 header. The position of the RFC1042 header must therefore + * be calculated based on the mesh header length. + * + * Since this function intentionally parses an A-MSDU frame as an MSDU, + * it only assumes that the A-MSDU subframe header is present, and + * beyond this it performs its own bounds checks under the assumption + * that the frame is instead parsed as a non-aggregated MSDU. + */ +static bool +is_amsdu_aggregation_attack(struct ethhdr *eth, struct sk_buff *skb, + enum nl80211_iftype iftype) +{ + int offset; + + /* Non-mesh case can be directly compared */ + if (iftype != NL80211_IFTYPE_MESH_POINT) + return ether_addr_equal(eth->h_dest, rfc1042_header); + + offset = __ieee80211_get_mesh_hdrlen(eth->h_dest[0]); + if (offset == 6) { + /* Mesh case with empty address extension field */ + return ether_addr_equal(eth->h_source, rfc1042_header); + } else if (offset + ETH_ALEN <= skb->len) { + /* Mesh case with non-empty address extension field */ + u8 temp[ETH_ALEN]; + + skb_copy_bits(skb, offset, temp, ETH_ALEN); + return ether_addr_equal(temp, rfc1042_header); + } + + return false; +} + void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, const unsigned int extra_headroom, @@ -861,8 +907,10 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, /* the last MSDU has no padding */ if (subframe_len > remaining) goto purge; - /* mitigate A-MSDU aggregation injection attacks */ - if (ether_addr_equal(hdr.eth.h_dest, rfc1042_header)) + /* mitigate A-MSDU aggregation injection attacks, to be + * checked when processing first subframe (offset == 0). + */ + if (offset == 0 && is_amsdu_aggregation_attack(&hdr.eth, skb, iftype)) goto purge; offset += sizeof(struct ethhdr); diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 98f1e2b67c76..b95d882f9dbc 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -874,7 +874,7 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], return -EINVAL; } - if (p.collect_md) { + if (p.collect_md || xi->p.collect_md) { NL_SET_ERR_MSG(extack, "collect_md can't be changed"); return -EINVAL; } @@ -885,11 +885,6 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], } else { if (xi->dev != dev) return -EEXIST; - if (xi->p.collect_md) { - NL_SET_ERR_MSG(extack, - "device can't be changed to collect_md"); - return -EINVAL; - } } return xfrmi_update(xi, &p); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7a298058fc16..6f99fd2d966c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1242,14 +1242,8 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision) static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, const struct flowi *fl, unsigned short family, struct xfrm_state **best, int *acq_in_progress, - int *error) + int *error, unsigned int pcpu_id) { - /* We need the cpu id just as a lookup key, - * we don't require it to be stable. - */ - unsigned int pcpu_id = get_cpu(); - put_cpu(); - /* Resolution logic: * 1. There is a valid state with matching selector. Done. * 2. Valid state with inappropriate selector. Skip. @@ -1316,14 +1310,15 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, /* We need the cpu id just as a lookup key, * we don't require it to be stable. */ - pcpu_id = get_cpu(); - put_cpu(); + pcpu_id = raw_smp_processor_id(); to_put = NULL; sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); rcu_read_lock(); + xfrm_hash_ptrs_get(net, &state_ptrs); + hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && @@ -1335,7 +1330,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) xfrm_state_look_at(pol, x, fl, encap_family, - &best, &acquire_in_progress, &error); + &best, &acquire_in_progress, &error, pcpu_id); } if (best) @@ -1352,7 +1347,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) xfrm_state_look_at(pol, x, fl, family, - &best, &acquire_in_progress, &error); + &best, &acquire_in_progress, &error, pcpu_id); } cached: @@ -1364,8 +1359,6 @@ cached: else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */ WARN_ON(1); - xfrm_hash_ptrs_get(net, &state_ptrs); - h = __xfrm_dst_hash(daddr, saddr, tmpl->reqid, encap_family, state_ptrs.hmask); hlist_for_each_entry_rcu(x, state_ptrs.bydst + h, bydst) { #ifdef CONFIG_XFRM_OFFLOAD @@ -1395,7 +1388,7 @@ cached: tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) xfrm_state_look_at(pol, x, fl, family, - &best, &acquire_in_progress, &error); + &best, &acquire_in_progress, &error, pcpu_id); } if (best || acquire_in_progress) goto found; @@ -1430,7 +1423,7 @@ cached: tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) xfrm_state_look_at(pol, x, fl, family, - &best, &acquire_in_progress, &error); + &best, &acquire_in_progress, &error, pcpu_id); } found: @@ -1644,6 +1637,26 @@ struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, } EXPORT_SYMBOL(xfrm_state_lookup_byspi); +static struct xfrm_state *xfrm_state_lookup_spi_proto(struct net *net, __be32 spi, u8 proto) +{ + struct xfrm_state *x; + unsigned int i; + + rcu_read_lock(); + for (i = 0; i <= net->xfrm.state_hmask; i++) { + hlist_for_each_entry_rcu(x, &net->xfrm.state_byspi[i], byspi) { + if (x->id.spi == spi && x->id.proto == proto) { + if (!xfrm_state_hold_rcu(x)) + continue; + rcu_read_unlock(); + return x; + } + } + } + rcu_read_unlock(); + return NULL; +} + static void __xfrm_state_insert(struct xfrm_state *x) { struct net *net = xs_net(x); @@ -2472,10 +2485,8 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high, unsigned int h; struct xfrm_state *x0; int err = -ENOENT; - __be32 minspi = htonl(low); - __be32 maxspi = htonl(high); + u32 range = high - low + 1; __be32 newspi = 0; - u32 mark = x->mark.v & x->mark.m; spin_lock_bh(&x->lock); if (x->km.state == XFRM_STATE_DEAD) { @@ -2489,38 +2500,34 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high, err = -ENOENT; - if (minspi == maxspi) { - x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family); - if (x0) { - NL_SET_ERR_MSG(extack, "Requested SPI is already in use"); - xfrm_state_put(x0); + for (h = 0; h < range; h++) { + u32 spi = (low == high) ? low : get_random_u32_inclusive(low, high); + newspi = htonl(spi); + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + x0 = xfrm_state_lookup_spi_proto(net, newspi, x->id.proto); + if (!x0) { + x->id.spi = newspi; + h = xfrm_spi_hash(net, &x->id.daddr, newspi, x->id.proto, x->props.family); + XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, x->xso.type); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + err = 0; goto unlock; } - newspi = minspi; - } else { - u32 spi = 0; - for (h = 0; h < high-low+1; h++) { - spi = get_random_u32_inclusive(low, high); - x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); - if (x0 == NULL) { - newspi = htonl(spi); - break; - } - xfrm_state_put(x0); + xfrm_state_put(x0); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + + if (signal_pending(current)) { + err = -ERESTARTSYS; + goto unlock; } + + if (low == high) + break; } - if (newspi) { - spin_lock_bh(&net->xfrm.xfrm_state_lock); - x->id.spi = newspi; - h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, - x->xso.type); - spin_unlock_bh(&net->xfrm.xfrm_state_lock); - err = 0; - } else { + if (err) NL_SET_ERR_MSG(extack, "No SPI available in the requested range"); - } unlock: spin_unlock_bh(&x->lock); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index da2a1c00ca8a..d41e5642625e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -178,11 +178,27 @@ static inline int verify_replay(struct xfrm_usersa_info *p, "Replay seq and seq_hi should be 0 for output SA"); return -EINVAL; } - if (rs->oseq_hi && !(p->flags & XFRM_STATE_ESN)) { - NL_SET_ERR_MSG( - extack, - "Replay oseq_hi should be 0 in non-ESN mode for output SA"); - return -EINVAL; + + if (!(p->flags & XFRM_STATE_ESN)) { + if (rs->oseq_hi) { + NL_SET_ERR_MSG( + extack, + "Replay oseq_hi should be 0 in non-ESN mode for output SA"); + return -EINVAL; + } + if (rs->oseq == U32_MAX) { + NL_SET_ERR_MSG( + extack, + "Replay oseq should be less than 0xFFFFFFFF in non-ESN mode for output SA"); + return -EINVAL; + } + } else { + if (rs->oseq == U32_MAX && rs->oseq_hi == U32_MAX) { + NL_SET_ERR_MSG( + extack, + "Replay oseq and oseq_hi should be less than 0xFFFFFFFF for output SA"); + return -EINVAL; + } } if (rs->bmp_len) { NL_SET_ERR_MSG(extack, "Replay bmp_len should 0 for output SA"); @@ -196,11 +212,27 @@ static inline int verify_replay(struct xfrm_usersa_info *p, "Replay oseq and oseq_hi should be 0 for input SA"); return -EINVAL; } - if (rs->seq_hi && !(p->flags & XFRM_STATE_ESN)) { - NL_SET_ERR_MSG( - extack, - "Replay seq_hi should be 0 in non-ESN mode for input SA"); - return -EINVAL; + if (!(p->flags & XFRM_STATE_ESN)) { + if (rs->seq_hi) { + NL_SET_ERR_MSG( + extack, + "Replay seq_hi should be 0 in non-ESN mode for input SA"); + return -EINVAL; + } + + if (rs->seq == U32_MAX) { + NL_SET_ERR_MSG( + extack, + "Replay seq should be less than 0xFFFFFFFF in non-ESN mode for input SA"); + return -EINVAL; + } + } else { + if (rs->seq == U32_MAX && rs->seq_hi == U32_MAX) { + NL_SET_ERR_MSG( + extack, + "Replay seq and seq_hi should be less than 0xFFFFFFFF for input SA"); + return -EINVAL; + } } } |