diff options
Diffstat (limited to 'net')
229 files changed, 7861 insertions, 4118 deletions
diff --git a/net/Makefile b/net/Makefile index 07ea48160874..5744bf1997fd 100644 --- a/net/Makefile +++ b/net/Makefile @@ -6,7 +6,7 @@ # Rewritten to use lists instead of if-statements. # -obj-$(CONFIG_NET) := socket.o core/ +obj-$(CONFIG_NET) := devres.o socket.o core/ tmp-$(CONFIG_COMPAT) := compat.o obj-$(CONFIG_NET) += $(tmp-y) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index b41375d4d295..15787e8c0629 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -57,6 +57,7 @@ #include <net/sock.h> #include <net/tcp_states.h> #include <net/route.h> +#include <net/compat.h> #include <linux/atalk.h> #include <linux/highmem.h> @@ -867,6 +868,24 @@ static int atif_ioctl(int cmd, void __user *arg) return copy_to_user(arg, &atreq, sizeof(atreq)) ? -EFAULT : 0; } +static int atrtr_ioctl_addrt(struct rtentry *rt) +{ + struct net_device *dev = NULL; + + if (rt->rt_dev) { + char name[IFNAMSIZ]; + + if (copy_from_user(name, rt->rt_dev, IFNAMSIZ-1)) + return -EFAULT; + name[IFNAMSIZ-1] = '\0'; + + dev = __dev_get_by_name(&init_net, name); + if (!dev) + return -ENODEV; + } + return atrtr_create(rt, dev); +} + /* Routing ioctl() calls */ static int atrtr_ioctl(unsigned int cmd, void __user *arg) { @@ -882,19 +901,8 @@ static int atrtr_ioctl(unsigned int cmd, void __user *arg) return atrtr_delete(&((struct sockaddr_at *) &rt.rt_dst)->sat_addr); - case SIOCADDRT: { - struct net_device *dev = NULL; - if (rt.rt_dev) { - char name[IFNAMSIZ]; - if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1)) - return -EFAULT; - name[IFNAMSIZ-1] = '\0'; - dev = __dev_get_by_name(&init_net, name); - if (!dev) - return -ENODEV; - } - return atrtr_create(&rt, dev); - } + case SIOCADDRT: + return atrtr_ioctl_addrt(&rt); } return -EINVAL; } @@ -1832,20 +1840,58 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) #ifdef CONFIG_COMPAT +static int atalk_compat_routing_ioctl(struct sock *sk, unsigned int cmd, + struct compat_rtentry __user *ur) +{ + compat_uptr_t rtdev; + struct rtentry rt; + + if (copy_from_user(&rt.rt_dst, &ur->rt_dst, + 3 * sizeof(struct sockaddr)) || + get_user(rt.rt_flags, &ur->rt_flags) || + get_user(rt.rt_metric, &ur->rt_metric) || + get_user(rt.rt_mtu, &ur->rt_mtu) || + get_user(rt.rt_window, &ur->rt_window) || + get_user(rt.rt_irtt, &ur->rt_irtt) || + get_user(rtdev, &ur->rt_dev)) + return -EFAULT; + + switch (cmd) { + case SIOCDELRT: + if (rt.rt_dst.sa_family != AF_APPLETALK) + return -EINVAL; + return atrtr_delete(&((struct sockaddr_at *) + &rt.rt_dst)->sat_addr); + + case SIOCADDRT: + rt.rt_dev = compat_ptr(rtdev); + return atrtr_ioctl_addrt(&rt); + default: + return -EINVAL; + } +} static int atalk_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { + void __user *argp = compat_ptr(arg); + struct sock *sk = sock->sk; + + switch (cmd) { + case SIOCADDRT: + case SIOCDELRT: + return atalk_compat_routing_ioctl(sk, cmd, argp); /* * SIOCATALKDIFADDR is a SIOCPROTOPRIVATE ioctl number, so we * cannot handle it in common code. The data we access if ifreq * here is compatible, so we can simply call the native * handler. */ - if (cmd == SIOCATALKDIFADDR) - return atalk_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); - - return -ENOIOCTLCMD; + case SIOCATALKDIFADDR: + return atalk_ioctl(sock, cmd, (unsigned long)argp); + default: + return -ENOIOCTLCMD; + } } -#endif +#endif /* CONFIG_COMPAT */ static const struct net_proto_family atalk_family_ops = { diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index d955b683aa7c..838ebf0cabbf 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -56,6 +56,8 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, int error; struct list_head *pos; void __user *argp = (void __user *)arg; + void __user *buf; + int __user *len; vcc = ATM_SD(sock); switch (cmd) { @@ -162,7 +164,49 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, if (error != -ENOIOCTLCMD) goto done; - error = atm_dev_ioctl(cmd, argp, compat); + if (cmd == ATM_GETNAMES) { + if (IS_ENABLED(CONFIG_COMPAT) && compat) { +#ifdef CONFIG_COMPAT + struct compat_atm_iobuf __user *ciobuf = argp; + compat_uptr_t cbuf; + len = &ciobuf->length; + if (get_user(cbuf, &ciobuf->buffer)) + return -EFAULT; + buf = compat_ptr(cbuf); +#endif + } else { + struct atm_iobuf __user *iobuf = argp; + len = &iobuf->length; + if (get_user(buf, &iobuf->buffer)) + return -EFAULT; + } + error = atm_getnames(buf, len); + } else { + int number; + + if (IS_ENABLED(CONFIG_COMPAT) && compat) { +#ifdef CONFIG_COMPAT + struct compat_atmif_sioc __user *csioc = argp; + compat_uptr_t carg; + + len = &csioc->length; + if (get_user(carg, &csioc->arg)) + return -EFAULT; + buf = compat_ptr(carg); + if (get_user(number, &csioc->number)) + return -EFAULT; +#endif + } else { + struct atmif_sioc __user *sioc = argp; + + len = &sioc->length; + if (get_user(buf, &sioc->arg)) + return -EFAULT; + if (get_user(number, &sioc->number)) + return -EFAULT; + } + error = atm_dev_ioctl(cmd, buf, len, number, compat); + } done: return error; @@ -230,61 +274,25 @@ static struct { static int do_atm_iobuf(struct socket *sock, unsigned int cmd, unsigned long arg) { - struct atm_iobuf __user *iobuf; - struct compat_atm_iobuf __user *iobuf32; + struct compat_atm_iobuf __user *iobuf32 = compat_ptr(arg); u32 data; - void __user *datap; - int len, err; - - iobuf = compat_alloc_user_space(sizeof(*iobuf)); - iobuf32 = compat_ptr(arg); - if (get_user(len, &iobuf32->length) || - get_user(data, &iobuf32->buffer)) + if (get_user(data, &iobuf32->buffer)) return -EFAULT; - datap = compat_ptr(data); - if (put_user(len, &iobuf->length) || - put_user(datap, &iobuf->buffer)) - return -EFAULT; - - err = do_vcc_ioctl(sock, cmd, (unsigned long) iobuf, 0); - if (!err) { - if (copy_in_user(&iobuf32->length, &iobuf->length, - sizeof(int))) - err = -EFAULT; - } - - return err; + return atm_getnames(&iobuf32->length, compat_ptr(data)); } static int do_atmif_sioc(struct socket *sock, unsigned int cmd, unsigned long arg) { - struct atmif_sioc __user *sioc; - struct compat_atmif_sioc __user *sioc32; + struct compat_atmif_sioc __user *sioc32 = compat_ptr(arg); + int number; u32 data; - void __user *datap; - int err; - - sioc = compat_alloc_user_space(sizeof(*sioc)); - sioc32 = compat_ptr(arg); - if (copy_in_user(&sioc->number, &sioc32->number, 2 * sizeof(int)) || - get_user(data, &sioc32->arg)) + if (get_user(data, &sioc32->arg) || get_user(number, &sioc32->number)) return -EFAULT; - datap = compat_ptr(data); - if (put_user(datap, &sioc->arg)) - return -EFAULT; - - err = do_vcc_ioctl(sock, cmd, (unsigned long) sioc, 0); - - if (!err) { - if (copy_in_user(&sioc32->length, &sioc->length, - sizeof(int))) - err = -EFAULT; - } - return err; + return atm_dev_ioctl(cmd, compat_ptr(data), &sioc32->length, number, 0); } static int do_atm_ioctl(struct socket *sock, unsigned int cmd32, diff --git a/net/atm/resources.c b/net/atm/resources.c index 889349c6d90d..94bdc6527ee8 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -193,88 +193,48 @@ static int fetch_stats(struct atm_dev *dev, struct atm_dev_stats __user *arg, return error ? -EFAULT : 0; } -int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat) +int atm_getnames(void __user *buf, int __user *iobuf_len) { - void __user *buf; - int error, len, number, size = 0; + int error, len, size = 0; struct atm_dev *dev; struct list_head *p; int *tmp_buf, *tmp_p; - int __user *sioc_len; - int __user *iobuf_len; - switch (cmd) { - case ATM_GETNAMES: - if (IS_ENABLED(CONFIG_COMPAT) && compat) { -#ifdef CONFIG_COMPAT - struct compat_atm_iobuf __user *ciobuf = arg; - compat_uptr_t cbuf; - iobuf_len = &ciobuf->length; - if (get_user(cbuf, &ciobuf->buffer)) - return -EFAULT; - buf = compat_ptr(cbuf); -#endif - } else { - struct atm_iobuf __user *iobuf = arg; - iobuf_len = &iobuf->length; - if (get_user(buf, &iobuf->buffer)) - return -EFAULT; - } - if (get_user(len, iobuf_len)) - return -EFAULT; - mutex_lock(&atm_dev_mutex); - list_for_each(p, &atm_devs) - size += sizeof(int); - if (size > len) { - mutex_unlock(&atm_dev_mutex); - return -E2BIG; - } - tmp_buf = kmalloc(size, GFP_ATOMIC); - if (!tmp_buf) { - mutex_unlock(&atm_dev_mutex); - return -ENOMEM; - } - tmp_p = tmp_buf; - list_for_each(p, &atm_devs) { - dev = list_entry(p, struct atm_dev, dev_list); - *tmp_p++ = dev->number; - } + if (get_user(len, iobuf_len)) + return -EFAULT; + mutex_lock(&atm_dev_mutex); + list_for_each(p, &atm_devs) + size += sizeof(int); + if (size > len) { mutex_unlock(&atm_dev_mutex); - error = ((copy_to_user(buf, tmp_buf, size)) || - put_user(size, iobuf_len)) - ? -EFAULT : 0; - kfree(tmp_buf); - return error; - default: - break; + return -E2BIG; } - - if (IS_ENABLED(CONFIG_COMPAT) && compat) { -#ifdef CONFIG_COMPAT - struct compat_atmif_sioc __user *csioc = arg; - compat_uptr_t carg; - - sioc_len = &csioc->length; - if (get_user(carg, &csioc->arg)) - return -EFAULT; - buf = compat_ptr(carg); - - if (get_user(len, &csioc->length)) - return -EFAULT; - if (get_user(number, &csioc->number)) - return -EFAULT; -#endif - } else { - struct atmif_sioc __user *sioc = arg; - - sioc_len = &sioc->length; - if (get_user(buf, &sioc->arg)) - return -EFAULT; - if (get_user(len, &sioc->length)) - return -EFAULT; - if (get_user(number, &sioc->number)) - return -EFAULT; + tmp_buf = kmalloc(size, GFP_ATOMIC); + if (!tmp_buf) { + mutex_unlock(&atm_dev_mutex); + return -ENOMEM; + } + tmp_p = tmp_buf; + list_for_each(p, &atm_devs) { + dev = list_entry(p, struct atm_dev, dev_list); + *tmp_p++ = dev->number; } + mutex_unlock(&atm_dev_mutex); + error = ((copy_to_user(buf, tmp_buf, size)) || + put_user(size, iobuf_len)) + ? -EFAULT : 0; + kfree(tmp_buf); + return error; +} + +int atm_dev_ioctl(unsigned int cmd, void __user *buf, int __user *sioc_len, + int number, int compat) +{ + int error, len, size = 0; + struct atm_dev *dev; + + if (get_user(len, sioc_len)) + return -EFAULT; dev = try_then_request_module(atm_dev_lookup(number), "atm-device-%d", number); diff --git a/net/atm/resources.h b/net/atm/resources.h index 048232e4d4c6..4a0839e92ff3 100644 --- a/net/atm/resources.h +++ b/net/atm/resources.h @@ -14,8 +14,9 @@ extern struct list_head atm_devs; extern struct mutex atm_dev_mutex; -int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat); - +int atm_getnames(void __user *buf, int __user *iobuf_len); +int atm_dev_ioctl(unsigned int cmd, void __user *buf, int __user *sioc_len, + int number, int compat); #ifdef CONFIG_PROC_FS diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index ff57ea89c27e..fd91cd34f25e 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -635,8 +635,10 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case SO_BINDTODEVICE: - if (optlen > IFNAMSIZ) - optlen = IFNAMSIZ; + if (optlen > IFNAMSIZ - 1) + optlen = IFNAMSIZ - 1; + + memset(devname, 0, sizeof(devname)); if (copy_from_user(devname, optval, optlen)) { res = -EFAULT; diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 353e49c40e7f..0bdefa35da98 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -127,20 +127,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) rtnl_lock(); ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings); rtnl_unlock(); - - /* Virtual interface drivers such as tun / tap interfaces, VLAN, etc - * tend to initialize the interface throughput with some value for the - * sake of having a throughput number to export via ethtool. This - * exported throughput leaves batman-adv to conclude the interface - * throughput is genuine (reflecting reality), thus no measurements - * are necessary. - * - * Based on the observation that those interface types also tend to set - * the link auto-negotiation to 'off', batman-adv shall check this - * setting to differentiate between genuine link throughput information - * and placeholders installed by virtual interfaces. - */ - if (ret == 0 && link_settings.base.autoneg == AUTONEG_ENABLE) { + if (ret == 0) { /* link characteristics might change over time */ if (link_settings.base.duplex == DUPLEX_FULL) hard_iface->bat_v.flags |= BATADV_FULL_DUPLEX; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index e22e49289677..a18dcc686dc3 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -146,8 +146,8 @@ static void batadv_gw_select(struct batadv_priv *bat_priv, if (new_gw_node) kref_get(&new_gw_node->refcount); - curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1); - rcu_assign_pointer(bat_priv->gw.curr_gw, new_gw_node); + curr_gw_node = rcu_replace_pointer(bat_priv->gw.curr_gw, new_gw_node, + true); if (curr_gw_node) batadv_gw_node_put(curr_gw_node); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index c7e98a40dd33..3a256af92784 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -473,8 +473,8 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv, if (new_hard_iface) kref_get(&new_hard_iface->refcount); - curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1); - rcu_assign_pointer(bat_priv->primary_if, new_hard_iface); + curr_hard_iface = rcu_replace_pointer(bat_priv->primary_if, + new_hard_iface, 1); if (!new_hard_iface) goto out; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index ccb535c77e5d..8bdabc03b0b2 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -135,9 +135,6 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf, if (!buf || count < sizeof(struct batadv_icmp_packet)) return -EINVAL; - if (!access_ok(buf, count)) - return -EFAULT; - error = wait_event_interruptible(socket_client->queue_wait, socket_client->queue_len); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 3632bd976c56..d343382e9664 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -71,13 +71,13 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, * the code needs to ensure the curr_router variable contains a pointer * to the replaced best neighbor. */ - curr_router = rcu_dereference_protected(orig_ifinfo->router, true); /* increase refcount of new best neighbor */ if (neigh_node) kref_get(&neigh_node->refcount); - rcu_assign_pointer(orig_ifinfo->router, neigh_node); + curr_router = rcu_replace_pointer(orig_ifinfo->router, neigh_node, + true); spin_unlock_bh(&orig_node->neigh_list_lock); batadv_orig_ifinfo_put(orig_ifinfo); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 822af540b854..0ddd80130ea3 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -22,6 +22,7 @@ #include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> +#include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/percpu.h> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 30ba7d38941d..bfd4ccd80847 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -160,16 +160,20 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size, u32 headroom, u32 tailroom) { void __user *data_in = u64_to_user_ptr(kattr->test.data_in); + u32 user_size = kattr->test.data_size_in; void *data; if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom) return ERR_PTR(-EINVAL); + if (user_size > size) + return ERR_PTR(-EMSGSIZE); + data = kzalloc(size + headroom + tailroom, GFP_USER); if (!data) return ERR_PTR(-ENOMEM); - if (copy_from_user(data + headroom, data_in, size)) { + if (copy_from_user(data + headroom, data_in, user_size)) { kfree(data); return ERR_PTR(-EFAULT); } @@ -486,8 +490,6 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, /* XDP have extra tailroom as (most) drivers use full page */ max_data_sz = 4096 - headroom - tailroom; - if (size > max_data_sz) - return -EINVAL; data = bpf_test_init(kattr, max_data_sz, headroom, tailroom); if (IS_ERR(data)) diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c index d7bc09de4c13..8ea59504ef47 100644 --- a/net/bridge/br_mrp.c +++ b/net/bridge/br_mrp.c @@ -37,6 +37,26 @@ static struct br_mrp *br_mrp_find_id(struct net_bridge *br, u32 ring_id) return res; } +static bool br_mrp_unique_ifindex(struct net_bridge *br, u32 ifindex) +{ + struct br_mrp *mrp; + + list_for_each_entry_rcu(mrp, &br->mrp_list, list, + lockdep_rtnl_is_held()) { + struct net_bridge_port *p; + + p = rtnl_dereference(mrp->p_port); + if (p && p->dev->ifindex == ifindex) + return false; + + p = rtnl_dereference(mrp->s_port); + if (p && p->dev->ifindex == ifindex) + return false; + } + + return true; +} + static struct br_mrp *br_mrp_find_port(struct net_bridge *br, struct net_bridge_port *p) { @@ -203,6 +223,7 @@ out: static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp) { struct net_bridge_port *p; + u8 state; /* Stop sending MRP_Test frames */ cancel_delayed_work_sync(&mrp->test_work); @@ -214,20 +235,24 @@ static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp) p = rtnl_dereference(mrp->p_port); if (p) { spin_lock_bh(&br->lock); - p->state = BR_STATE_FORWARDING; + state = netif_running(br->dev) ? + BR_STATE_FORWARDING : BR_STATE_DISABLED; + p->state = state; p->flags &= ~BR_MRP_AWARE; spin_unlock_bh(&br->lock); - br_mrp_port_switchdev_set_state(p, BR_STATE_FORWARDING); + br_mrp_port_switchdev_set_state(p, state); rcu_assign_pointer(mrp->p_port, NULL); } p = rtnl_dereference(mrp->s_port); if (p) { spin_lock_bh(&br->lock); - p->state = BR_STATE_FORWARDING; + state = netif_running(br->dev) ? + BR_STATE_FORWARDING : BR_STATE_DISABLED; + p->state = state; p->flags &= ~BR_MRP_AWARE; spin_unlock_bh(&br->lock); - br_mrp_port_switchdev_set_state(p, BR_STATE_FORWARDING); + br_mrp_port_switchdev_set_state(p, state); rcu_assign_pointer(mrp->s_port, NULL); } @@ -255,6 +280,11 @@ int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance) !br_mrp_get_port(br, instance->s_ifindex)) return -EINVAL; + /* It is not possible to have the same port part of multiple rings */ + if (!br_mrp_unique_ifindex(br, instance->p_ifindex) || + !br_mrp_unique_ifindex(br, instance->s_ifindex)) + return -EINVAL; + mrp = kzalloc(sizeof(*mrp), GFP_KERNEL); if (!mrp) return -ENOMEM; @@ -346,24 +376,24 @@ int br_mrp_set_port_state(struct net_bridge_port *p, * note: already called with rtnl_lock */ int br_mrp_set_port_role(struct net_bridge_port *p, - struct br_mrp_port_role *role) + enum br_mrp_port_role_type role) { struct br_mrp *mrp; if (!p || !(p->flags & BR_MRP_AWARE)) return -EINVAL; - mrp = br_mrp_find_id(p->br, role->ring_id); + mrp = br_mrp_find_port(p->br, p); if (!mrp) return -EINVAL; - if (role->role == BR_MRP_PORT_ROLE_PRIMARY) + if (role == BR_MRP_PORT_ROLE_PRIMARY) rcu_assign_pointer(mrp->p_port, p); else rcu_assign_pointer(mrp->s_port, p); - br_mrp_port_switchdev_set_role(p, role->role); + br_mrp_port_switchdev_set_role(p, role); return 0; } diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c index 397e7f710772..d9de780d2ce0 100644 --- a/net/bridge/br_mrp_netlink.c +++ b/net/bridge/br_mrp_netlink.c @@ -8,25 +8,234 @@ static const struct nla_policy br_mrp_policy[IFLA_BRIDGE_MRP_MAX + 1] = { [IFLA_BRIDGE_MRP_UNSPEC] = { .type = NLA_REJECT }, - [IFLA_BRIDGE_MRP_INSTANCE] = { .type = NLA_EXACT_LEN, - .len = sizeof(struct br_mrp_instance)}, - [IFLA_BRIDGE_MRP_PORT_STATE] = { .type = NLA_U32 }, - [IFLA_BRIDGE_MRP_PORT_ROLE] = { .type = NLA_EXACT_LEN, - .len = sizeof(struct br_mrp_port_role)}, - [IFLA_BRIDGE_MRP_RING_STATE] = { .type = NLA_EXACT_LEN, - .len = sizeof(struct br_mrp_ring_state)}, - [IFLA_BRIDGE_MRP_RING_ROLE] = { .type = NLA_EXACT_LEN, - .len = sizeof(struct br_mrp_ring_role)}, - [IFLA_BRIDGE_MRP_START_TEST] = { .type = NLA_EXACT_LEN, - .len = sizeof(struct br_mrp_start_test)}, + [IFLA_BRIDGE_MRP_INSTANCE] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_PORT_STATE] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_PORT_ROLE] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_RING_STATE] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_RING_ROLE] = { .type = NLA_NESTED }, + [IFLA_BRIDGE_MRP_START_TEST] = { .type = NLA_NESTED }, }; +static const struct nla_policy +br_mrp_instance_policy[IFLA_BRIDGE_MRP_INSTANCE_MAX + 1] = { + [IFLA_BRIDGE_MRP_INSTANCE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_INSTANCE_RING_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX] = { .type = NLA_U32 }, +}; + +static int br_mrp_instance_parse(struct net_bridge *br, struct nlattr *attr, + int cmd, struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_INSTANCE_MAX + 1]; + struct br_mrp_instance inst; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_INSTANCE_MAX, attr, + br_mrp_instance_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_INSTANCE_RING_ID] || + !tb[IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX] || + !tb[IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX]) { + NL_SET_ERR_MSG_MOD(extack, + "Missing attribute: RING_ID or P_IFINDEX or S_IFINDEX"); + return -EINVAL; + } + + memset(&inst, 0, sizeof(inst)); + + inst.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_RING_ID]); + inst.p_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX]); + inst.s_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX]); + + if (cmd == RTM_SETLINK) + return br_mrp_add(br, &inst); + else + return br_mrp_del(br, &inst); + + return 0; +} + +static const struct nla_policy +br_mrp_port_state_policy[IFLA_BRIDGE_MRP_PORT_STATE_MAX + 1] = { + [IFLA_BRIDGE_MRP_PORT_STATE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_PORT_STATE_STATE] = { .type = NLA_U32 }, +}; + +static int br_mrp_port_state_parse(struct net_bridge_port *p, + struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_PORT_STATE_MAX + 1]; + enum br_mrp_port_state_type state; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_PORT_STATE_MAX, attr, + br_mrp_port_state_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_PORT_STATE_STATE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing attribute: STATE"); + return -EINVAL; + } + + state = nla_get_u32(tb[IFLA_BRIDGE_MRP_PORT_STATE_STATE]); + + return br_mrp_set_port_state(p, state); +} + +static const struct nla_policy +br_mrp_port_role_policy[IFLA_BRIDGE_MRP_PORT_ROLE_MAX + 1] = { + [IFLA_BRIDGE_MRP_PORT_ROLE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_PORT_ROLE_ROLE] = { .type = NLA_U32 }, +}; + +static int br_mrp_port_role_parse(struct net_bridge_port *p, + struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_PORT_ROLE_MAX + 1]; + enum br_mrp_port_role_type role; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_PORT_ROLE_MAX, attr, + br_mrp_port_role_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_PORT_ROLE_ROLE]) { + NL_SET_ERR_MSG_MOD(extack, "Missing attribute: ROLE"); + return -EINVAL; + } + + role = nla_get_u32(tb[IFLA_BRIDGE_MRP_PORT_ROLE_ROLE]); + + return br_mrp_set_port_role(p, role); +} + +static const struct nla_policy +br_mrp_ring_state_policy[IFLA_BRIDGE_MRP_RING_STATE_MAX + 1] = { + [IFLA_BRIDGE_MRP_RING_STATE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_RING_STATE_RING_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_RING_STATE_STATE] = { .type = NLA_U32 }, +}; + +static int br_mrp_ring_state_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_RING_STATE_MAX + 1]; + struct br_mrp_ring_state state; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_RING_STATE_MAX, attr, + br_mrp_ring_state_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_RING_STATE_RING_ID] || + !tb[IFLA_BRIDGE_MRP_RING_STATE_STATE]) { + NL_SET_ERR_MSG_MOD(extack, + "Missing attribute: RING_ID or STATE"); + return -EINVAL; + } + + memset(&state, 0x0, sizeof(state)); + + state.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_STATE_RING_ID]); + state.ring_state = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_STATE_STATE]); + + return br_mrp_set_ring_state(br, &state); +} + +static const struct nla_policy +br_mrp_ring_role_policy[IFLA_BRIDGE_MRP_RING_ROLE_MAX + 1] = { + [IFLA_BRIDGE_MRP_RING_ROLE_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_RING_ROLE_RING_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_RING_ROLE_ROLE] = { .type = NLA_U32 }, +}; + +static int br_mrp_ring_role_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_RING_ROLE_MAX + 1]; + struct br_mrp_ring_role role; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_RING_ROLE_MAX, attr, + br_mrp_ring_role_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_RING_ROLE_RING_ID] || + !tb[IFLA_BRIDGE_MRP_RING_ROLE_ROLE]) { + NL_SET_ERR_MSG_MOD(extack, + "Missing attribute: RING_ID or ROLE"); + return -EINVAL; + } + + memset(&role, 0x0, sizeof(role)); + + role.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_ROLE_RING_ID]); + role.ring_role = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_ROLE_ROLE]); + + return br_mrp_set_ring_role(br, &role); +} + +static const struct nla_policy +br_mrp_start_test_policy[IFLA_BRIDGE_MRP_START_TEST_MAX + 1] = { + [IFLA_BRIDGE_MRP_START_TEST_UNSPEC] = { .type = NLA_REJECT }, + [IFLA_BRIDGE_MRP_START_TEST_RING_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] = { .type = NLA_U32 }, + [IFLA_BRIDGE_MRP_START_TEST_PERIOD] = { .type = NLA_U32 }, +}; + +static int br_mrp_start_test_parse(struct net_bridge *br, struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IFLA_BRIDGE_MRP_START_TEST_MAX + 1]; + struct br_mrp_start_test test; + int err; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_START_TEST_MAX, attr, + br_mrp_start_test_policy, extack); + if (err) + return err; + + if (!tb[IFLA_BRIDGE_MRP_START_TEST_RING_ID] || + !tb[IFLA_BRIDGE_MRP_START_TEST_INTERVAL] || + !tb[IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] || + !tb[IFLA_BRIDGE_MRP_START_TEST_PERIOD]) { + NL_SET_ERR_MSG_MOD(extack, + "Missing attribute: RING_ID or INTERVAL or MAX_MISS or PERIOD"); + return -EINVAL; + } + + memset(&test, 0x0, sizeof(test)); + + test.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_RING_ID]); + test.interval = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_INTERVAL]); + test.max_miss = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_MAX_MISS]); + test.period = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_PERIOD]); + + return br_mrp_start_test(br, &test); +} + int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *attr, int cmd, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_MRP_MAX + 1]; int err; + /* When this function is called for a port then the br pointer is + * invalid, therefor set the br to point correctly + */ + if (p) + br = p->br; + if (br->stp_enabled != BR_NO_STP) { NL_SET_ERR_MSG_MOD(extack, "MRP can't be enabled if STP is already enabled"); return -EINVAL; @@ -38,58 +247,45 @@ int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p, return err; if (tb[IFLA_BRIDGE_MRP_INSTANCE]) { - struct br_mrp_instance *instance = - nla_data(tb[IFLA_BRIDGE_MRP_INSTANCE]); - - if (cmd == RTM_SETLINK) - err = br_mrp_add(br, instance); - else - err = br_mrp_del(br, instance); + err = br_mrp_instance_parse(br, tb[IFLA_BRIDGE_MRP_INSTANCE], + cmd, extack); if (err) return err; } if (tb[IFLA_BRIDGE_MRP_PORT_STATE]) { - enum br_mrp_port_state_type state = - nla_get_u32(tb[IFLA_BRIDGE_MRP_PORT_STATE]); - - err = br_mrp_set_port_state(p, state); + err = br_mrp_port_state_parse(p, tb[IFLA_BRIDGE_MRP_PORT_STATE], + extack); if (err) return err; } if (tb[IFLA_BRIDGE_MRP_PORT_ROLE]) { - struct br_mrp_port_role *role = - nla_data(tb[IFLA_BRIDGE_MRP_PORT_ROLE]); - - err = br_mrp_set_port_role(p, role); + err = br_mrp_port_role_parse(p, tb[IFLA_BRIDGE_MRP_PORT_ROLE], + extack); if (err) return err; } if (tb[IFLA_BRIDGE_MRP_RING_STATE]) { - struct br_mrp_ring_state *state = - nla_data(tb[IFLA_BRIDGE_MRP_RING_STATE]); - - err = br_mrp_set_ring_state(br, state); + err = br_mrp_ring_state_parse(br, + tb[IFLA_BRIDGE_MRP_RING_STATE], + extack); if (err) return err; } if (tb[IFLA_BRIDGE_MRP_RING_ROLE]) { - struct br_mrp_ring_role *role = - nla_data(tb[IFLA_BRIDGE_MRP_RING_ROLE]); - - err = br_mrp_set_ring_role(br, role); + err = br_mrp_ring_role_parse(br, tb[IFLA_BRIDGE_MRP_RING_ROLE], + extack); if (err) return err; } if (tb[IFLA_BRIDGE_MRP_START_TEST]) { - struct br_mrp_start_test *test = - nla_data(tb[IFLA_BRIDGE_MRP_START_TEST]); - - err = br_mrp_start_test(br, test); + err = br_mrp_start_test_parse(br, + tb[IFLA_BRIDGE_MRP_START_TEST], + extack); if (err) return err; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index ad12fe3fca8c..83490bf73a13 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2413,7 +2413,8 @@ void br_multicast_uninit_stats(struct net_bridge *br) free_percpu(br->mcast_stats); } -static void mcast_stats_add_dir(u64 *dst, u64 *src) +/* noinline for https://bugs.llvm.org/show_bug.cgi?id=45802#c9 */ +static noinline_for_stack void mcast_stats_add_dir(u64 *dst, u64 *src) { dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX]; dst[BR_MCAST_DIR_TX] += src[BR_MCAST_DIR_TX]; diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h index 2921a4b59f8e..a0f53cc3ab85 100644 --- a/net/bridge/br_private_mrp.h +++ b/net/bridge/br_private_mrp.h @@ -37,7 +37,7 @@ int br_mrp_del(struct net_bridge *br, struct br_mrp_instance *instance); int br_mrp_set_port_state(struct net_bridge_port *p, enum br_mrp_port_state_type state); int br_mrp_set_port_role(struct net_bridge_port *p, - struct br_mrp_port_role *role); + enum br_mrp_port_role_type role); int br_mrp_set_ring_state(struct net_bridge *br, struct br_mrp_ring_state *state); int br_mrp_set_ring_role(struct net_bridge *br, struct br_mrp_ring_role *role); diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index b325b569e761..f48cf4cfb80f 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -31,6 +31,12 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, ether_addr_copy(eth->h_dest, eth_hdr(oldskb)->h_source); eth->h_proto = eth_hdr(oldskb)->h_proto; skb_pull(nskb, ETH_HLEN); + + if (skb_vlan_tag_present(oldskb)) { + u16 vid = skb_vlan_tag_get(oldskb); + + __vlan_hwaccel_put_tag(nskb, oldskb->vlan_proto, vid); + } } static int nft_bridge_iphdr_validate(struct sk_buff *skb) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index f8ca5edc5f2c..27d6ab11f9ee 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -490,15 +490,8 @@ static int ceph_tcp_connect(struct ceph_connection *con) return ret; } - if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) { - int optval = 1; - - ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)&optval, sizeof(optval)); - if (ret) - pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d", - ret); - } + if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) + tcp_sock_set_nodelay(sock->sk); con->sock = sock; return 0; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 998e26b75a78..1d4973f8cd7a 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -3649,7 +3649,9 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg) * supported. */ req->r_t.target_oloc.pool = m.redirect.oloc.pool; - req->r_flags |= CEPH_OSD_FLAG_REDIRECTED; + req->r_flags |= CEPH_OSD_FLAG_REDIRECTED | + CEPH_OSD_FLAG_IGNORE_OVERLAY | + CEPH_OSD_FLAG_IGNORE_CACHE; req->r_tid = 0; __submit_request(req, false); goto out_unlock_osdc; diff --git a/net/compat.c b/net/compat.c index 69fc6d1e4e6e..afd7b444e0bf 100644 --- a/net/compat.c +++ b/net/compat.c @@ -448,200 +448,6 @@ COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, return __compat_sys_getsockopt(fd, level, optname, optval, optlen); } -struct compat_group_req { - __u32 gr_interface; - struct __kernel_sockaddr_storage gr_group - __aligned(4); -} __packed; - -struct compat_group_source_req { - __u32 gsr_interface; - struct __kernel_sockaddr_storage gsr_group - __aligned(4); - struct __kernel_sockaddr_storage gsr_source - __aligned(4); -} __packed; - -struct compat_group_filter { - __u32 gf_interface; - struct __kernel_sockaddr_storage gf_group - __aligned(4); - __u32 gf_fmode; - __u32 gf_numsrc; - struct __kernel_sockaddr_storage gf_slist[1] - __aligned(4); -} __packed; - -#define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \ - sizeof(struct __kernel_sockaddr_storage)) - - -int compat_mc_setsockopt(struct sock *sock, int level, int optname, - char __user *optval, unsigned int optlen, - int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int)) -{ - char __user *koptval = optval; - int koptlen = optlen; - - switch (optname) { - case MCAST_JOIN_GROUP: - case MCAST_LEAVE_GROUP: - { - struct compat_group_req __user *gr32 = (void __user *)optval; - struct group_req __user *kgr = - compat_alloc_user_space(sizeof(struct group_req)); - u32 interface; - - if (!access_ok(gr32, sizeof(*gr32)) || - !access_ok(kgr, sizeof(struct group_req)) || - __get_user(interface, &gr32->gr_interface) || - __put_user(interface, &kgr->gr_interface) || - copy_in_user(&kgr->gr_group, &gr32->gr_group, - sizeof(kgr->gr_group))) - return -EFAULT; - koptval = (char __user *)kgr; - koptlen = sizeof(struct group_req); - break; - } - case MCAST_JOIN_SOURCE_GROUP: - case MCAST_LEAVE_SOURCE_GROUP: - case MCAST_BLOCK_SOURCE: - case MCAST_UNBLOCK_SOURCE: - { - struct compat_group_source_req __user *gsr32 = (void __user *)optval; - struct group_source_req __user *kgsr = compat_alloc_user_space( - sizeof(struct group_source_req)); - u32 interface; - - if (!access_ok(gsr32, sizeof(*gsr32)) || - !access_ok(kgsr, - sizeof(struct group_source_req)) || - __get_user(interface, &gsr32->gsr_interface) || - __put_user(interface, &kgsr->gsr_interface) || - copy_in_user(&kgsr->gsr_group, &gsr32->gsr_group, - sizeof(kgsr->gsr_group)) || - copy_in_user(&kgsr->gsr_source, &gsr32->gsr_source, - sizeof(kgsr->gsr_source))) - return -EFAULT; - koptval = (char __user *)kgsr; - koptlen = sizeof(struct group_source_req); - break; - } - case MCAST_MSFILTER: - { - struct compat_group_filter __user *gf32 = (void __user *)optval; - struct group_filter __user *kgf; - u32 interface, fmode, numsrc; - - if (!access_ok(gf32, __COMPAT_GF0_SIZE) || - __get_user(interface, &gf32->gf_interface) || - __get_user(fmode, &gf32->gf_fmode) || - __get_user(numsrc, &gf32->gf_numsrc)) - return -EFAULT; - koptlen = optlen + sizeof(struct group_filter) - - sizeof(struct compat_group_filter); - if (koptlen < GROUP_FILTER_SIZE(numsrc)) - return -EINVAL; - kgf = compat_alloc_user_space(koptlen); - if (!access_ok(kgf, koptlen) || - __put_user(interface, &kgf->gf_interface) || - __put_user(fmode, &kgf->gf_fmode) || - __put_user(numsrc, &kgf->gf_numsrc) || - copy_in_user(&kgf->gf_group, &gf32->gf_group, - sizeof(kgf->gf_group)) || - (numsrc && copy_in_user(kgf->gf_slist, gf32->gf_slist, - numsrc * sizeof(kgf->gf_slist[0])))) - return -EFAULT; - koptval = (char __user *)kgf; - break; - } - - default: - break; - } - return setsockopt(sock, level, optname, koptval, koptlen); -} -EXPORT_SYMBOL(compat_mc_setsockopt); - -int compat_mc_getsockopt(struct sock *sock, int level, int optname, - char __user *optval, int __user *optlen, - int (*getsockopt)(struct sock *, int, int, char __user *, int __user *)) -{ - struct compat_group_filter __user *gf32 = (void __user *)optval; - struct group_filter __user *kgf; - int __user *koptlen; - u32 interface, fmode, numsrc; - int klen, ulen, err; - - if (optname != MCAST_MSFILTER) - return getsockopt(sock, level, optname, optval, optlen); - - koptlen = compat_alloc_user_space(sizeof(*koptlen)); - if (!access_ok(optlen, sizeof(*optlen)) || - __get_user(ulen, optlen)) - return -EFAULT; - - /* adjust len for pad */ - klen = ulen + sizeof(*kgf) - sizeof(*gf32); - - if (klen < GROUP_FILTER_SIZE(0)) - return -EINVAL; - - if (!access_ok(koptlen, sizeof(*koptlen)) || - __put_user(klen, koptlen)) - return -EFAULT; - - /* have to allow space for previous compat_alloc_user_space, too */ - kgf = compat_alloc_user_space(klen+sizeof(*optlen)); - - if (!access_ok(gf32, __COMPAT_GF0_SIZE) || - __get_user(interface, &gf32->gf_interface) || - __get_user(fmode, &gf32->gf_fmode) || - __get_user(numsrc, &gf32->gf_numsrc) || - __put_user(interface, &kgf->gf_interface) || - __put_user(fmode, &kgf->gf_fmode) || - __put_user(numsrc, &kgf->gf_numsrc) || - copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group))) - return -EFAULT; - - err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen); - if (err) - return err; - - if (!access_ok(koptlen, sizeof(*koptlen)) || - __get_user(klen, koptlen)) - return -EFAULT; - - ulen = klen - (sizeof(*kgf)-sizeof(*gf32)); - - if (!access_ok(optlen, sizeof(*optlen)) || - __put_user(ulen, optlen)) - return -EFAULT; - - if (!access_ok(kgf, klen) || - !access_ok(gf32, ulen) || - __get_user(interface, &kgf->gf_interface) || - __get_user(fmode, &kgf->gf_fmode) || - __get_user(numsrc, &kgf->gf_numsrc) || - __put_user(interface, &gf32->gf_interface) || - __put_user(fmode, &gf32->gf_fmode) || - __put_user(numsrc, &gf32->gf_numsrc)) - return -EFAULT; - if (numsrc) { - int copylen; - - klen -= GROUP_FILTER_SIZE(0); - copylen = numsrc * sizeof(gf32->gf_slist[0]); - if (copylen > klen) - copylen = klen; - if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen)) - return -EFAULT; - } - return err; -} -EXPORT_SYMBOL(compat_mc_getsockopt); - - /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) static unsigned char nas[21] = { diff --git a/net/core/dev.c b/net/core/dev.c index f36bd3b21997..ae37586f6ee8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5058,11 +5058,12 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, return 0; } -static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc, +static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, struct packet_type **ppt_prev) { struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; + struct sk_buff *skb = *pskb; struct net_device *orig_dev; bool deliver_exact = false; int ret = NET_RX_DROP; @@ -5093,8 +5094,10 @@ another_round: ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb); preempt_enable(); - if (ret2 != XDP_PASS) - return NET_RX_DROP; + if (ret2 != XDP_PASS) { + ret = NET_RX_DROP; + goto out; + } skb_reset_mac_len(skb); } @@ -5244,6 +5247,13 @@ drop: } out: + /* The invariant here is that if *ppt_prev is not NULL + * then skb should also be non-NULL. + * + * Apparently *ppt_prev assignment above holds this invariant due to + * skb dereferencing near it. + */ + *pskb = skb; return ret; } @@ -5253,7 +5263,7 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc) struct packet_type *pt_prev = NULL; int ret; - ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); + ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev); if (pt_prev) ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb, skb->dev, pt_prev, orig_dev); @@ -5331,7 +5341,7 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo struct packet_type *pt_prev = NULL; skb_list_del_init(skb); - __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); + __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev); if (!pt_prev) continue; if (pt_curr != pt_prev || od_curr != orig_dev) { diff --git a/net/core/filter.c b/net/core/filter.c index 822d662f97ef..bd2853d23b50 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7049,6 +7049,8 @@ static bool sock_addr_is_valid_access(int off, int size, switch (prog->expected_attach_type) { case BPF_CGROUP_INET4_BIND: case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET4_GETPEERNAME: + case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP4_RECVMSG: break; @@ -7060,6 +7062,8 @@ static bool sock_addr_is_valid_access(int off, int size, switch (prog->expected_attach_type) { case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_INET6_GETSOCKNAME: case BPF_CGROUP_UDP6_SENDMSG: case BPF_CGROUP_UDP6_RECVMSG: break; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 3eff84824c8b..0aeb33572feb 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -160,12 +160,10 @@ out: return ret; } -int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) +static int flow_dissector_bpf_prog_detach(struct net *net) { struct bpf_prog *attached; - struct net *net; - net = current->nsproxy->net_ns; mutex_lock(&flow_dissector_mutex); attached = rcu_dereference_protected(net->flow_dissector_prog, lockdep_is_held(&flow_dissector_mutex)); @@ -179,6 +177,24 @@ int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) return 0; } +int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) +{ + return flow_dissector_bpf_prog_detach(current->nsproxy->net_ns); +} + +static void __net_exit flow_dissector_pernet_pre_exit(struct net *net) +{ + /* We're not racing with attach/detach because there are no + * references to netns left when pre_exit gets called. + */ + if (rcu_access_pointer(net->flow_dissector_prog)) + flow_dissector_bpf_prog_detach(net); +} + +static struct pernet_operations flow_dissector_pernet_ops __net_initdata = { + .pre_exit = flow_dissector_pernet_pre_exit, +}; + /** * __skb_flow_get_ports - extract the upper layer ports and return them * @skb: sk_buff to extract the ports from @@ -464,47 +480,59 @@ EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); static enum flow_dissect_ret __skb_flow_dissect_mpls(const struct sk_buff *skb, struct flow_dissector *flow_dissector, - void *target_container, void *data, int nhoff, int hlen) + void *target_container, void *data, int nhoff, int hlen, + int lse_index, bool *entropy_label) { - struct flow_dissector_key_keyid *key_keyid; - struct mpls_label *hdr, _hdr[2]; - u32 entry, label; + struct mpls_label *hdr, _hdr; + u32 entry, label, bos; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS_ENTROPY) && !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) return FLOW_DISSECT_RET_OUT_GOOD; + if (lse_index >= FLOW_DIS_MPLS_MAX) + return FLOW_DISSECT_RET_OUT_GOOD; + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) return FLOW_DISSECT_RET_OUT_BAD; - entry = ntohl(hdr[0].entry); + entry = ntohl(hdr->entry); label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; + bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT; if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) { struct flow_dissector_key_mpls *key_mpls; + struct flow_dissector_mpls_lse *lse; key_mpls = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_MPLS, target_container); - key_mpls->mpls_label = label; - key_mpls->mpls_ttl = (entry & MPLS_LS_TTL_MASK) - >> MPLS_LS_TTL_SHIFT; - key_mpls->mpls_tc = (entry & MPLS_LS_TC_MASK) - >> MPLS_LS_TC_SHIFT; - key_mpls->mpls_bos = (entry & MPLS_LS_S_MASK) - >> MPLS_LS_S_SHIFT; + lse = &key_mpls->ls[lse_index]; + + lse->mpls_ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; + lse->mpls_bos = bos; + lse->mpls_tc = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT; + lse->mpls_label = label; + dissector_set_mpls_lse(key_mpls, lse_index); } - if (label == MPLS_LABEL_ENTROPY) { + if (*entropy_label && + dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) { + struct flow_dissector_key_keyid *key_keyid; + key_keyid = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_MPLS_ENTROPY, target_container); - key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK); + key_keyid->keyid = cpu_to_be32(label); } - return FLOW_DISSECT_RET_OUT_GOOD; + + *entropy_label = label == MPLS_LABEL_ENTROPY; + + return bos ? FLOW_DISSECT_RET_OUT_GOOD : FLOW_DISSECT_RET_PROTO_AGAIN; } static enum flow_dissect_ret @@ -963,6 +991,8 @@ bool __skb_flow_dissect(const struct net *net, struct bpf_prog *attached = NULL; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; + bool mpls_el = false; + int mpls_lse = 0; int num_hdrs = 0; u8 ip_proto = 0; bool ret; @@ -1262,7 +1292,10 @@ proto_again: case htons(ETH_P_MPLS_MC): fdret = __skb_flow_dissect_mpls(skb, flow_dissector, target_container, data, - nhoff, hlen); + nhoff, hlen, mpls_lse, + &mpls_el); + nhoff += sizeof(struct mpls_label); + mpls_lse++; break; case htons(ETH_P_FCOE): if ((hlen - nhoff) < FCOE_HEADER_LEN) { @@ -1836,7 +1869,7 @@ static int __init init_default_flow_dissectors(void) skb_flow_dissector_init(&flow_keys_basic_dissector, flow_keys_basic_dissector_keys, ARRAY_SIZE(flow_keys_basic_dissector_keys)); - return 0; -} + return register_pernet_subsys(&flow_dissector_pernet_ops); +} core_initcall(init_default_flow_dissectors); diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index e951b743bed3..e64941c526b1 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -8,6 +8,7 @@ struct flow_rule *flow_rule_alloc(unsigned int num_actions) { struct flow_rule *rule; + int i; rule = kzalloc(struct_size(rule, action.entries, num_actions), GFP_KERNEL); @@ -15,6 +16,11 @@ struct flow_rule *flow_rule_alloc(unsigned int num_actions) return NULL; rule->action.num_entries = num_actions; + /* Pre-fill each action hw_stats with DONT_CARE. + * Caller can override this if it wants stats for a given action. + */ + for (i = 0; i < num_actions; i++) + rule->action.entries[i].hw_stats = FLOW_ACTION_HW_STATS_DONT_CARE; return rule; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b607ea602774..ef6b5a8f629c 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1082,8 +1082,8 @@ static void neigh_timer_handler(struct timer_list *t) } if (neigh->nud_state & NUD_IN_TIMER) { - if (time_before(next, jiffies + HZ/2)) - next = jiffies + HZ/2; + if (time_before(next, jiffies + HZ/100)) + next = jiffies + HZ/100; if (!mod_timer(&neigh->timer, next)) neigh_hold(neigh); } @@ -1771,6 +1771,7 @@ static struct neigh_table *neigh_find_table(int family) } const struct nla_policy nda_policy[NDA_MAX+1] = { + [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID }, [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) }, @@ -1781,6 +1782,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = { [NDA_IFINDEX] = { .type = NLA_U32 }, [NDA_MASTER] = { .type = NLA_U32 }, [NDA_PROTOCOL] = { .type = NLA_U8 }, + [NDA_NH_ID] = { .type = NLA_U32 }, }; static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 35a133c6d13b..b8afefe6f6b6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3727,7 +3727,6 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) return 0; } -EXPORT_SYMBOL_GPL(skb_gro_receive_list); /** * skb_segment - Perform protocol segmentation on skb. @@ -4191,7 +4190,6 @@ done: NAPI_GRO_CB(skb)->same_flow = 1; return 0; } -EXPORT_SYMBOL_GPL(skb_gro_receive); #ifdef CONFIG_SKB_EXTENSIONS #define SKB_EXT_ALIGN_VALUE 8 diff --git a/net/core/sock.c b/net/core/sock.c index fd85e651ce28..61ec573221a6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -566,7 +566,7 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) } EXPORT_SYMBOL(sk_dst_check); -static int sock_setbindtodevice_locked(struct sock *sk, int ifindex) +static int sock_bindtoindex_locked(struct sock *sk, int ifindex) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES @@ -594,6 +594,18 @@ out: return ret; } +int sock_bindtoindex(struct sock *sk, int ifindex) +{ + int ret; + + lock_sock(sk); + ret = sock_bindtoindex_locked(sk, ifindex); + release_sock(sk); + + return ret; +} +EXPORT_SYMBOL(sock_bindtoindex); + static int sock_setbindtodevice(struct sock *sk, char __user *optval, int optlen) { @@ -634,10 +646,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval, goto out; } - lock_sock(sk); - ret = sock_setbindtodevice_locked(sk, index); - release_sock(sk); - + return sock_bindtoindex(sk, index); out: #endif @@ -712,6 +721,111 @@ bool sk_mc_loop(struct sock *sk) } EXPORT_SYMBOL(sk_mc_loop); +void sock_set_reuseaddr(struct sock *sk) +{ + lock_sock(sk); + sk->sk_reuse = SK_CAN_REUSE; + release_sock(sk); +} +EXPORT_SYMBOL(sock_set_reuseaddr); + +void sock_set_reuseport(struct sock *sk) +{ + lock_sock(sk); + sk->sk_reuseport = true; + release_sock(sk); +} +EXPORT_SYMBOL(sock_set_reuseport); + +void sock_no_linger(struct sock *sk) +{ + lock_sock(sk); + sk->sk_lingertime = 0; + sock_set_flag(sk, SOCK_LINGER); + release_sock(sk); +} +EXPORT_SYMBOL(sock_no_linger); + +void sock_set_priority(struct sock *sk, u32 priority) +{ + lock_sock(sk); + sk->sk_priority = priority; + release_sock(sk); +} +EXPORT_SYMBOL(sock_set_priority); + +void sock_set_sndtimeo(struct sock *sk, s64 secs) +{ + lock_sock(sk); + if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1) + sk->sk_sndtimeo = secs * HZ; + else + sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; + release_sock(sk); +} +EXPORT_SYMBOL(sock_set_sndtimeo); + +static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns) +{ + if (val) { + sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new); + sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns); + sock_set_flag(sk, SOCK_RCVTSTAMP); + sock_enable_timestamp(sk, SOCK_TIMESTAMP); + } else { + sock_reset_flag(sk, SOCK_RCVTSTAMP); + sock_reset_flag(sk, SOCK_RCVTSTAMPNS); + sock_reset_flag(sk, SOCK_TSTAMP_NEW); + } +} + +void sock_enable_timestamps(struct sock *sk) +{ + lock_sock(sk); + __sock_set_timestamps(sk, true, false, true); + release_sock(sk); +} +EXPORT_SYMBOL(sock_enable_timestamps); + +void sock_set_keepalive(struct sock *sk) +{ + lock_sock(sk); + if (sk->sk_prot->keepalive) + sk->sk_prot->keepalive(sk, true); + sock_valbool_flag(sk, SOCK_KEEPOPEN, true); + release_sock(sk); +} +EXPORT_SYMBOL(sock_set_keepalive); + +static void __sock_set_rcvbuf(struct sock *sk, int val) +{ + /* Ensure val * 2 fits into an int, to prevent max_t() from treating it + * as a negative value. + */ + val = min_t(int, val, INT_MAX / 2); + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; + + /* We double it on the way in to account for "struct sk_buff" etc. + * overhead. Applications assume that the SO_RCVBUF setting they make + * will allow that much actual data to be received on that socket. + * + * Applications are unaware that "struct sk_buff" and other overheads + * allocate from the receive buffer during socket buffer allocation. + * + * And after considering the possible alternatives, returning the value + * we actually used in getsockopt is the most desirable behavior. + */ + WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); +} + +void sock_set_rcvbuf(struct sock *sk, int val) +{ + lock_sock(sk); + __sock_set_rcvbuf(sk, val); + release_sock(sk); +} +EXPORT_SYMBOL(sock_set_rcvbuf); + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -808,30 +922,7 @@ set_sndbuf: * play 'guess the biggest size' games. RCVBUF/SNDBUF * are treated in BSD as hints */ - val = min_t(u32, val, sysctl_rmem_max); -set_rcvbuf: - /* Ensure val * 2 fits into an int, to prevent max_t() - * from treating it as a negative value. - */ - val = min_t(int, val, INT_MAX / 2); - sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - /* - * We double it on the way in to account for - * "struct sk_buff" etc. overhead. Applications - * assume that the SO_RCVBUF setting they make will - * allow that much actual data to be received on that - * socket. - * - * Applications are unaware that "struct sk_buff" and - * other overheads allocate from the receive buffer - * during socket buffer allocation. - * - * And after considering the possible alternatives, - * returning the value we actually used in getsockopt - * is the most desirable behavior. - */ - WRITE_ONCE(sk->sk_rcvbuf, - max_t(int, val * 2, SOCK_MIN_RCVBUF)); + __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max)); break; case SO_RCVBUFFORCE: @@ -843,9 +934,8 @@ set_rcvbuf: /* No negative values (to prevent underflow, as val will be * multiplied by 2). */ - if (val < 0) - val = 0; - goto set_rcvbuf; + __sock_set_rcvbuf(sk, max(val, 0)); + break; case SO_KEEPALIVE: if (sk->sk_prot->keepalive) @@ -903,28 +993,17 @@ set_rcvbuf: break; case SO_TIMESTAMP_OLD: + __sock_set_timestamps(sk, valbool, false, false); + break; case SO_TIMESTAMP_NEW: + __sock_set_timestamps(sk, valbool, true, false); + break; case SO_TIMESTAMPNS_OLD: + __sock_set_timestamps(sk, valbool, false, true); + break; case SO_TIMESTAMPNS_NEW: - if (valbool) { - if (optname == SO_TIMESTAMP_NEW || optname == SO_TIMESTAMPNS_NEW) - sock_set_flag(sk, SOCK_TSTAMP_NEW); - else - sock_reset_flag(sk, SOCK_TSTAMP_NEW); - - if (optname == SO_TIMESTAMP_OLD || optname == SO_TIMESTAMP_NEW) - sock_reset_flag(sk, SOCK_RCVTSTAMPNS); - else - sock_set_flag(sk, SOCK_RCVTSTAMPNS); - sock_set_flag(sk, SOCK_RCVTSTAMP); - sock_enable_timestamp(sk, SOCK_TIMESTAMP); - } else { - sock_reset_flag(sk, SOCK_RCVTSTAMP); - sock_reset_flag(sk, SOCK_RCVTSTAMPNS); - sock_reset_flag(sk, SOCK_TSTAMP_NEW); - } + __sock_set_timestamps(sk, valbool, true, true); break; - case SO_TIMESTAMPING_NEW: sock_set_flag(sk, SOCK_TSTAMP_NEW); /* fall through */ @@ -1180,7 +1259,7 @@ set_rcvbuf: break; case SO_BINDTOIFINDEX: - ret = sock_setbindtodevice_locked(sk, val); + ret = sock_bindtoindex_locked(sk, val); break; default: @@ -3633,3 +3712,11 @@ bool sk_busy_loop_end(void *p, unsigned long start_time) } EXPORT_SYMBOL(sk_busy_loop_end); #endif /* CONFIG_NET_RX_BUSY_POLL */ + +int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len) +{ + if (!sk->sk_prot->bind_add) + return -EOPNOTSUPP; + return sk->sk_prot->bind_add(sk, addr, addr_len); +} +EXPORT_SYMBOL(sock_bind_add); diff --git a/net/core/xdp.c b/net/core/xdp.c index 490b8f5fa8ee..90f44f382115 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -17,6 +17,7 @@ #include <net/xdp.h> #include <net/xdp_priv.h> /* struct xdp_mem_allocator */ #include <trace/events/xdp.h> +#include <net/xdp_sock_drv.h> #define REG_STATE_NEW 0x0 #define REG_STATE_REGISTERED 0x1 @@ -109,27 +110,6 @@ static void mem_allocator_disconnect(void *allocator) mutex_unlock(&mem_id_lock); } -static void mem_id_disconnect(int id) -{ - struct xdp_mem_allocator *xa; - - mutex_lock(&mem_id_lock); - - xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params); - if (!xa) { - mutex_unlock(&mem_id_lock); - WARN(1, "Request remove non-existing id(%d), driver bug?", id); - return; - } - - trace_mem_disconnect(xa); - - if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) - call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); - - mutex_unlock(&mem_id_lock); -} - void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) { struct xdp_mem_allocator *xa; @@ -143,9 +123,6 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) if (id == 0) return; - if (xdp_rxq->mem.type == MEM_TYPE_ZERO_COPY) - return mem_id_disconnect(id); - if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) { rcu_read_lock(); xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params); @@ -301,7 +278,7 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, xdp_rxq->mem.type = type; if (!allocator) { - if (type == MEM_TYPE_PAGE_POOL || type == MEM_TYPE_ZERO_COPY) + if (type == MEM_TYPE_PAGE_POOL) return -EINVAL; /* Setup time check page_pool req */ return 0; } @@ -358,10 +335,11 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); * scenarios (e.g. queue full), it is possible to return the xdp_frame * while still leveraging this protection. The @napi_direct boolean * is used for those calls sites. Thus, allowing for faster recycling - * of xdp_frames/pages in those cases. + * of xdp_frames/pages in those cases. This path is never used by the + * MEM_TYPE_XSK_BUFF_POOL memory type, so it's explicitly not part of + * the switch-statement. */ -static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, - unsigned long handle) +static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct) { struct xdp_mem_allocator *xa; struct page *page; @@ -383,36 +361,29 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, page = virt_to_page(data); /* Assumes order0 page*/ put_page(page); break; - case MEM_TYPE_ZERO_COPY: - /* NB! Only valid from an xdp_buff! */ - rcu_read_lock(); - /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ - xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - xa->zc_alloc->free(xa->zc_alloc, handle); - rcu_read_unlock(); default: /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ + WARN(1, "Incorrect XDP memory type (%d) usage", mem->type); break; } } void xdp_return_frame(struct xdp_frame *xdpf) { - __xdp_return(xdpf->data, &xdpf->mem, false, 0); + __xdp_return(xdpf->data, &xdpf->mem, false); } EXPORT_SYMBOL_GPL(xdp_return_frame); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) { - __xdp_return(xdpf->data, &xdpf->mem, true, 0); + __xdp_return(xdpf->data, &xdpf->mem, true); } EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); void xdp_return_buff(struct xdp_buff *xdp) { - __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle); + __xdp_return(xdp->data, &xdp->rxq->mem, true); } -EXPORT_SYMBOL_GPL(xdp_return_buff); /* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ void __xdp_release_frame(void *data, struct xdp_mem_info *mem) @@ -493,7 +464,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp) xdpf->metasize = metasize; xdpf->mem.type = MEM_TYPE_PAGE_ORDER0; - xdp_return_buff(xdp); + xsk_buff_free(xdp); return xdpf; } EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 1e5e08cc0bfc..650187d68851 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1082,6 +1082,7 @@ static const struct proto_ops inet6_dccp_ops = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/devres.c b/net/devres.c new file mode 100644 index 000000000000..57a6a88d11f6 --- /dev/null +++ b/net/devres.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * This file contains all networking devres helpers. + */ + +#include <linux/device.h> +#include <linux/etherdevice.h> +#include <linux/netdevice.h> + +struct net_device_devres { + struct net_device *ndev; +}; + +static void devm_free_netdev(struct device *dev, void *this) +{ + struct net_device_devres *res = this; + + free_netdev(res->ndev); +} + +struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv, + unsigned int txqs, unsigned int rxqs) +{ + struct net_device_devres *dr; + + dr = devres_alloc(devm_free_netdev, sizeof(*dr), GFP_KERNEL); + if (!dr) + return NULL; + + dr->ndev = alloc_etherdev_mqs(sizeof_priv, txqs, rxqs); + if (!dr->ndev) { + devres_free(dr); + return NULL; + } + + devres_add(dev, dr); + + return dr->ndev; +} +EXPORT_SYMBOL(devm_alloc_etherdev_mqs); + +static void devm_netdev_release(struct device *dev, void *this) +{ + struct net_device_devres *res = this; + + unregister_netdev(res->ndev); +} + +static int netdev_devres_match(struct device *dev, void *this, void *match_data) +{ + struct net_device_devres *res = this; + struct net_device *ndev = match_data; + + return ndev == res->ndev; +} + +/** + * devm_register_netdev - resource managed variant of register_netdev() + * @dev: managing device for this netdev - usually the parent device + * @ndev: device to register + * + * This is a devres variant of register_netdev() for which the unregister + * function will be call automatically when the managing device is + * detached. Note: the net_device used must also be resource managed by + * the same struct device. + */ +int devm_register_netdev(struct device *dev, struct net_device *ndev) +{ + struct net_device_devres *dr; + int ret; + + /* struct net_device must itself be managed. For now a managed netdev + * can only be allocated by devm_alloc_etherdev_mqs() so the check is + * straightforward. + */ + if (WARN_ON(!devres_find(dev, devm_free_netdev, + netdev_devres_match, ndev))) + return -EINVAL; + + dr = devres_alloc(devm_netdev_release, sizeof(*dr), GFP_KERNEL); + if (!dr) + return -ENOMEM; + + ret = register_netdev(ndev); + if (ret) { + devres_free(dr); + return ret; + } + + dr->ndev = ndev; + devres_add(ndev->dev.parent, dr); + + return 0; +} +EXPORT_SYMBOL(devm_register_netdev); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 886490fb203d..4c7f086a047b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1746,6 +1746,7 @@ int dsa_slave_create(struct dsa_port *port) if (ds->ops->port_vlan_add && ds->ops->port_vlan_del) slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; slave_dev->hw_features |= NETIF_F_HW_TC; + slave_dev->features |= NETIF_F_LLTX; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; if (!IS_ERR_OR_NULL(port->mac)) ether_addr_copy(slave_dev->dev_addr, port->mac); diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 3052da668156..780b2a15ac9b 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -140,34 +140,6 @@ bool vid_is_dsa_8021q(u16 vid) } EXPORT_SYMBOL_GPL(vid_is_dsa_8021q); -static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port) -{ - struct bridge_vlan_info vinfo; - struct net_device *slave; - u16 pvid; - int err; - - if (!dsa_is_user_port(ds, port)) - return 0; - - slave = dsa_to_port(ds, port)->slave; - - err = br_vlan_get_pvid(slave, &pvid); - if (!pvid || err < 0) - /* There is no pvid on the bridge for this port, which is - * perfectly valid. Nothing to restore, bye-bye! - */ - return 0; - - err = br_vlan_get_info(slave, pvid, &vinfo); - if (err < 0) { - dev_err(ds->dev, "Couldn't determine PVID attributes\n"); - return err; - } - - return dsa_port_vid_add(dsa_to_port(ds, port), pvid, vinfo.flags); -} - /* If @enabled is true, installs @vid with @flags into the switch port's HW * filter. * If @enabled is false, deletes @vid (ignores @flags) from the port. Had the @@ -178,39 +150,11 @@ static int dsa_8021q_vid_apply(struct dsa_switch *ds, int port, u16 vid, u16 flags, bool enabled) { struct dsa_port *dp = dsa_to_port(ds, port); - struct bridge_vlan_info vinfo; - int err; if (enabled) return dsa_port_vid_add(dp, vid, flags); - err = dsa_port_vid_del(dp, vid); - if (err < 0) - return err; - - /* Nothing to restore from the bridge for a non-user port. - * The CPU port VLANs are restored implicitly with the user ports, - * similar to how the bridge does in dsa_slave_vlan_add and - * dsa_slave_vlan_del. - */ - if (!dsa_is_user_port(ds, port)) - return 0; - - err = br_vlan_get_info(dp->slave, vid, &vinfo); - /* Couldn't determine bridge attributes for this vid, - * it means the bridge had not configured it. - */ - if (err < 0) - return 0; - - /* Restore the VID from the bridge */ - err = dsa_port_vid_add(dp, vid, vinfo.flags); - if (err < 0) - return err; - - vinfo.flags &= ~BRIDGE_VLAN_INFO_PVID; - - return dsa_port_vid_add(dp->cpu_dp, vid, vinfo.flags); + return dsa_port_vid_del(dp, vid); } /* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single @@ -329,9 +273,6 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) return err; } - if (!enabled) - err = dsa_8021q_restore_pvid(ds, port); - return err; } EXPORT_SYMBOL_GPL(dsa_port_setup_8021q_tagging); diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index b5705cba8318..d6619edd53e5 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -15,6 +15,7 @@ #define MTK_HDR_XMIT_TAGGED_TPID_8100 1 #define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) #define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0) +#define MTK_HDR_XMIT_SA_DIS BIT(6) static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, struct net_device *dev) @@ -22,6 +23,9 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, struct dsa_port *dp = dsa_slave_to_port(dev); u8 *mtk_tag; bool is_vlan_skb = true; + unsigned char *dest = eth_hdr(skb)->h_dest; + bool is_multicast_skb = is_multicast_ether_addr(dest) && + !is_broadcast_ether_addr(dest); /* Build the special tag after the MAC Source Address. If VLAN header * is present, it's required that VLAN header and special tag is @@ -47,6 +51,10 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, MTK_HDR_XMIT_UNTAGGED; mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; + /* Disable SA learning for multicast frames */ + if (unlikely(is_multicast_skb)) + mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS; + /* Tag control information is kept for 802.1Q */ if (!is_vlan_skb) { mtk_tag[2] = 0; @@ -61,6 +69,9 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, { int port; __be16 *phdr, hdr; + unsigned char *dest = eth_hdr(skb)->h_dest; + bool is_multicast_skb = is_multicast_ether_addr(dest) && + !is_broadcast_ether_addr(dest); if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN))) return NULL; @@ -86,6 +97,10 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb->dev) return NULL; + /* Only unicast or broadcast frames are offloaded */ + if (likely(!is_multicast_skb)) + skb->offload_fwd_mark = 1; + return skb; } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index c8b903302ff2..dac65180c4ef 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -400,34 +400,6 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, } EXPORT_SYMBOL(alloc_etherdev_mqs); -static void devm_free_netdev(struct device *dev, void *res) -{ - free_netdev(*(struct net_device **)res); -} - -struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv, - unsigned int txqs, unsigned int rxqs) -{ - struct net_device **dr; - struct net_device *netdev; - - dr = devres_alloc(devm_free_netdev, sizeof(*dr), GFP_KERNEL); - if (!dr) - return NULL; - - netdev = alloc_etherdev_mqs(sizeof_priv, txqs, rxqs); - if (!netdev) { - devres_free(dr); - return NULL; - } - - *dr = netdev; - devres_add(dev, dr); - - return netdev; -} -EXPORT_SYMBOL(devm_alloc_etherdev_mqs); - ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) { return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr); diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index 5ba06eabe8c2..7b7a0456c15c 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -5,7 +5,11 @@ #include "netlink.h" #include "common.h" -/* CABLE_TEST_ACT */ +/* 802.3 standard allows 100 meters for BaseT cables. However longer + * cables might work, depending on the quality of the cables and the + * PHY. So allow testing for up to 150 meters. + */ +#define MAX_CABLE_LENGTH_CM (150 * 100) static const struct nla_policy cable_test_act_policy[ETHTOOL_A_CABLE_TEST_MAX + 1] = { @@ -13,7 +17,7 @@ cable_test_act_policy[ETHTOOL_A_CABLE_TEST_MAX + 1] = { [ETHTOOL_A_CABLE_TEST_HEADER] = { .type = NLA_NESTED }, }; -static int ethnl_cable_test_started(struct phy_device *phydev) +static int ethnl_cable_test_started(struct phy_device *phydev, u8 cmd) { struct sk_buff *skb; int err = -ENOMEM; @@ -23,7 +27,7 @@ static int ethnl_cable_test_started(struct phy_device *phydev) if (!skb) goto out; - ehdr = ethnl_bcastmsg_put(skb, ETHTOOL_MSG_CABLE_TEST_NTF); + ehdr = ethnl_bcastmsg_put(skb, cmd); if (!ehdr) { err = -EMSGSIZE; goto out; @@ -86,7 +90,8 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) ethnl_ops_complete(dev); if (!ret) - ethnl_cable_test_started(dev->phydev); + ethnl_cable_test_started(dev->phydev, + ETHTOOL_MSG_CABLE_TEST_NTF); out_rtnl: rtnl_unlock(); @@ -95,16 +100,18 @@ out_dev_put: return ret; } -int ethnl_cable_test_alloc(struct phy_device *phydev) +int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd) { int err = -ENOMEM; - phydev->skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + /* One TDR sample occupies 20 bytes. For a 150 meter cable, + * with four pairs, around 12K is needed. + */ + phydev->skb = genlmsg_new(SZ_16K, GFP_KERNEL); if (!phydev->skb) goto out; - phydev->ehdr = ethnl_bcastmsg_put(phydev->skb, - ETHTOOL_MSG_CABLE_TEST_NTF); + phydev->ehdr = ethnl_bcastmsg_put(phydev->skb, cmd); if (!phydev->ehdr) { err = -EMSGSIZE; goto out; @@ -199,3 +206,226 @@ err: return ret; } EXPORT_SYMBOL_GPL(ethnl_cable_test_fault_length); + +struct cable_test_tdr_req_info { + struct ethnl_req_info base; +}; + +static const struct nla_policy +cable_test_tdr_act_cfg_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX + 1] = { + [ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST] = { .type = NLA_U32 }, + [ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST] = { .type = NLA_U32 }, + [ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP] = { .type = NLA_U32 }, + [ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR] = { .type = NLA_U8 }, +}; + +static const struct nla_policy +cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_MAX + 1] = { + [ETHTOOL_A_CABLE_TEST_TDR_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_CABLE_TEST_TDR_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_CABLE_TEST_TDR_CFG] = { .type = NLA_NESTED }, +}; + +/* CABLE_TEST_TDR_ACT */ +static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest, + struct genl_info *info, + struct phy_tdr_config *cfg) +{ + struct nlattr *tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX + 1]; + int ret; + + ret = nla_parse_nested(tb, ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX, nest, + cable_test_tdr_act_cfg_policy, info->extack); + if (ret < 0) + return ret; + + if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST]) + cfg->first = nla_get_u32( + tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST]); + else + cfg->first = 100; + if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST]) + cfg->last = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST]); + else + cfg->last = MAX_CABLE_LENGTH_CM; + + if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP]) + cfg->step = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP]); + else + cfg->step = 100; + + if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]) { + cfg->pair = nla_get_u8(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]); + if (cfg->pair > ETHTOOL_A_CABLE_PAIR_D) { + NL_SET_ERR_MSG_ATTR( + info->extack, + tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR], + "invalid pair parameter"); + return -EINVAL; + } + } else { + cfg->pair = PHY_PAIR_ALL; + } + + if (cfg->first > MAX_CABLE_LENGTH_CM) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST], + "invalid first parameter"); + return -EINVAL; + } + + if (cfg->last > MAX_CABLE_LENGTH_CM) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST], + "invalid last parameter"); + return -EINVAL; + } + + if (cfg->first > cfg->last) { + NL_SET_ERR_MSG(info->extack, "invalid first/last parameter"); + return -EINVAL; + } + + if (!cfg->step) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP], + "invalid step parameter"); + return -EINVAL; + } + + if (cfg->step > (cfg->last - cfg->first)) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP], + "step parameter too big"); + return -EINVAL; + } + + return 0; +} + +int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *tb[ETHTOOL_A_CABLE_TEST_TDR_MAX + 1]; + struct ethnl_req_info req_info = {}; + struct phy_tdr_config cfg; + struct net_device *dev; + int ret; + + ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, + ETHTOOL_A_CABLE_TEST_TDR_MAX, + cable_test_tdr_act_policy, info->extack); + if (ret < 0) + return ret; + + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_CABLE_TEST_TDR_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + + dev = req_info.dev; + if (!dev->phydev) { + ret = -EOPNOTSUPP; + goto out_dev_put; + } + + ret = ethnl_act_cable_test_tdr_cfg(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG], + info, &cfg); + if (ret) + goto out_dev_put; + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + + ret = phy_start_cable_test_tdr(dev->phydev, info->extack, &cfg); + + ethnl_ops_complete(dev); + + if (!ret) + ethnl_cable_test_started(dev->phydev, + ETHTOOL_MSG_CABLE_TEST_TDR_NTF); + +out_rtnl: + rtnl_unlock(); +out_dev_put: + dev_put(dev); + return ret; +} + +int ethnl_cable_test_amplitude(struct phy_device *phydev, + u8 pair, s16 mV) +{ + struct nlattr *nest; + int ret = -EMSGSIZE; + + nest = nla_nest_start(phydev->skb, + ETHTOOL_A_CABLE_TDR_NEST_AMPLITUDE); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_AMPLITUDE_PAIR, pair)) + goto err; + if (nla_put_u16(phydev->skb, ETHTOOL_A_CABLE_AMPLITUDE_mV, mV)) + goto err; + + nla_nest_end(phydev->skb, nest); + return 0; + +err: + nla_nest_cancel(phydev->skb, nest); + return ret; +} +EXPORT_SYMBOL_GPL(ethnl_cable_test_amplitude); + +int ethnl_cable_test_pulse(struct phy_device *phydev, u16 mV) +{ + struct nlattr *nest; + int ret = -EMSGSIZE; + + nest = nla_nest_start(phydev->skb, ETHTOOL_A_CABLE_TDR_NEST_PULSE); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u16(phydev->skb, ETHTOOL_A_CABLE_PULSE_mV, mV)) + goto err; + + nla_nest_end(phydev->skb, nest); + return 0; + +err: + nla_nest_cancel(phydev->skb, nest); + return ret; +} +EXPORT_SYMBOL_GPL(ethnl_cable_test_pulse); + +int ethnl_cable_test_step(struct phy_device *phydev, u32 first, u32 last, + u32 step) +{ + struct nlattr *nest; + int ret = -EMSGSIZE; + + nest = nla_nest_start(phydev->skb, ETHTOOL_A_CABLE_TDR_NEST_STEP); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_STEP_FIRST_DISTANCE, + first)) + goto err; + + if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_STEP_LAST_DISTANCE, last)) + goto err; + + if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_STEP_STEP_DISTANCE, step)) + goto err; + + nla_nest_end(phydev->skb, nest); + return 0; + +err: + nla_nest_cancel(phydev->skb, nest); + return ret; +} +EXPORT_SYMBOL_GPL(ethnl_cable_test_step); diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index 3aa4975919d7..9ef54cdcf662 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <net/xdp_sock.h> +#include <net/xdp_sock_drv.h> #include "netlink.h" #include "common.h" diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index eeb1137a3f23..b5df90c981c2 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -24,7 +24,7 @@ #include <linux/sched/signal.h> #include <linux/net.h> #include <net/devlink.h> -#include <net/xdp_sock.h> +#include <net/xdp_sock_drv.h> #include <net/flow_offload.h> #include <linux/ethtool_netlink.h> #include <generated/utsrelease.h> @@ -1510,11 +1510,14 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr) { struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; + int ret; if (!dev->ethtool_ops->get_coalesce) return -EOPNOTSUPP; - dev->ethtool_ops->get_coalesce(dev, &coalesce); + ret = dev->ethtool_ops->get_coalesce(dev, &coalesce); + if (ret) + return ret; if (copy_to_user(useraddr, &coalesce, sizeof(coalesce))) return -EFAULT; diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index 2740cde0a182..7f47ba89054e 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -2,6 +2,7 @@ #include "netlink.h" #include "common.h" +#include <linux/phy.h> struct linkstate_req_info { struct ethnl_req_info base; @@ -10,6 +11,8 @@ struct linkstate_req_info { struct linkstate_reply_data { struct ethnl_reply_data base; int link; + int sqi; + int sqi_max; }; #define LINKSTATE_REPDATA(__reply_base) \ @@ -20,8 +23,46 @@ linkstate_get_policy[ETHTOOL_A_LINKSTATE_MAX + 1] = { [ETHTOOL_A_LINKSTATE_UNSPEC] = { .type = NLA_REJECT }, [ETHTOOL_A_LINKSTATE_HEADER] = { .type = NLA_NESTED }, [ETHTOOL_A_LINKSTATE_LINK] = { .type = NLA_REJECT }, + [ETHTOOL_A_LINKSTATE_SQI] = { .type = NLA_REJECT }, + [ETHTOOL_A_LINKSTATE_SQI_MAX] = { .type = NLA_REJECT }, }; +static int linkstate_get_sqi(struct net_device *dev) +{ + struct phy_device *phydev = dev->phydev; + int ret; + + if (!phydev) + return -EOPNOTSUPP; + + mutex_lock(&phydev->lock); + if (!phydev->drv || !phydev->drv->get_sqi) + ret = -EOPNOTSUPP; + else + ret = phydev->drv->get_sqi(phydev); + mutex_unlock(&phydev->lock); + + return ret; +} + +static int linkstate_get_sqi_max(struct net_device *dev) +{ + struct phy_device *phydev = dev->phydev; + int ret; + + if (!phydev) + return -EOPNOTSUPP; + + mutex_lock(&phydev->lock); + if (!phydev->drv || !phydev->drv->get_sqi_max) + ret = -EOPNOTSUPP; + else + ret = phydev->drv->get_sqi_max(phydev); + mutex_unlock(&phydev->lock); + + return ret; +} + static int linkstate_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, struct genl_info *info) @@ -34,6 +75,19 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, if (ret < 0) return ret; data->link = __ethtool_get_link(dev); + + ret = linkstate_get_sqi(dev); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + + data->sqi = ret; + + ret = linkstate_get_sqi_max(dev); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + + data->sqi_max = ret; + ethnl_ops_complete(dev); return 0; @@ -42,8 +96,19 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, static int linkstate_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { - return nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */ + struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base); + int len; + + len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */ + 0; + + if (data->sqi != -EOPNOTSUPP) + len += nla_total_size(sizeof(u32)); + + if (data->sqi_max != -EOPNOTSUPP) + len += nla_total_size(sizeof(u32)); + + return len; } static int linkstate_fill_reply(struct sk_buff *skb, @@ -56,6 +121,14 @@ static int linkstate_fill_reply(struct sk_buff *skb, nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link)) return -EMSGSIZE; + if (data->sqi != -EOPNOTSUPP && + nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi)) + return -EMSGSIZE; + + if (data->sqi_max != -EOPNOTSUPP && + nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max)) + return -EMSGSIZE; + return 0; } diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 87bc02da74bc..88fd07f47040 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -342,7 +342,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) ret = ops->reply_size(req_info, reply_data); if (ret < 0) goto err_cleanup; - reply_len = ret; + reply_len = ret + ethnl_reply_header_size(); ret = -ENOMEM; rskb = ethnl_reply_init(reply_len, req_info->dev, ops->reply_cmd, ops->hdr_attr, info, &reply_payload); @@ -588,7 +588,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd, ret = ops->reply_size(req_info, reply_data); if (ret < 0) goto err_cleanup; - reply_len = ret; + reply_len = ret + ethnl_reply_header_size(); ret = -ENOMEM; skb = genlmsg_new(reply_len, GFP_KERNEL); if (!skb) @@ -844,6 +844,11 @@ static const struct genl_ops ethtool_genl_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .doit = ethnl_act_cable_test, }, + { + .cmd = ETHTOOL_MSG_CABLE_TEST_TDR_ACT, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_act_cable_test_tdr, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index b0eb5d920099..9a96b6e90dc2 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -360,5 +360,6 @@ int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info); int ethnl_set_pause(struct sk_buff *skb, struct genl_info *info); int ethnl_set_eee(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info); +int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info); #endif /* _NET_ETHTOOL_NETLINK_H */ diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 95eae5c68a52..0eed4e4909ab 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -324,7 +324,6 @@ static int strset_reply_size(const struct ethnl_req_info *req_base, int len = 0; int ret; - len += ethnl_reply_header_size(); for (i = 0; i < ETH_SS_COUNT; i++) { const struct strset_info *set_info = &data->sets[i]; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 5da4733067fb..23ba5045e3d3 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -384,6 +384,7 @@ config INET_ESPINTCP depends on XFRM && INET_ESP select STREAM_PARSER select NET_SOCK_MSG + select XFRM_ESPINTCP help Support for RFC 8229 encapsulation of ESP and IKE over TCP/IPv4 sockets. diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fcf0d12a407a..02aa5cb3a4fd 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -116,6 +116,7 @@ #include <linux/mroute.h> #endif #include <net/l3mdev.h> +#include <net/compat.h> #include <trace/events/sock.h> @@ -755,12 +756,11 @@ do_err: } EXPORT_SYMBOL(inet_accept); - /* * This does both peername and sockname. */ int inet_getname(struct socket *sock, struct sockaddr *uaddr, - int peer) + int peer) { struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); @@ -781,6 +781,11 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin_port = inet->inet_sport; sin->sin_addr.s_addr = addr; } + if (cgroup_bpf_enabled) + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin, + peer ? BPF_CGROUP_INET4_GETPEERNAME : + BPF_CGROUP_INET4_GETSOCKNAME, + NULL); memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); return sizeof(*sin); } @@ -970,17 +975,42 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) EXPORT_SYMBOL(inet_ioctl); #ifdef CONFIG_COMPAT +static int inet_compat_routing_ioctl(struct sock *sk, unsigned int cmd, + struct compat_rtentry __user *ur) +{ + compat_uptr_t rtdev; + struct rtentry rt; + + if (copy_from_user(&rt.rt_dst, &ur->rt_dst, + 3 * sizeof(struct sockaddr)) || + get_user(rt.rt_flags, &ur->rt_flags) || + get_user(rt.rt_metric, &ur->rt_metric) || + get_user(rt.rt_mtu, &ur->rt_mtu) || + get_user(rt.rt_window, &ur->rt_window) || + get_user(rt.rt_irtt, &ur->rt_irtt) || + get_user(rtdev, &ur->rt_dev)) + return -EFAULT; + + rt.rt_dev = compat_ptr(rtdev); + return ip_rt_ioctl(sock_net(sk), cmd, &rt); +} + static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { + void __user *argp = compat_ptr(arg); struct sock *sk = sock->sk; - int err = -ENOIOCTLCMD; - - if (sk->sk_prot->compat_ioctl) - err = sk->sk_prot->compat_ioctl(sk, cmd, arg); - return err; + switch (cmd) { + case SIOCADDRT: + case SIOCDELRT: + return inet_compat_routing_ioctl(sk, cmd, argp); + default: + if (!sk->sk_prot->compat_ioctl) + return -ENOIOCTLCMD; + return sk->sk_prot->compat_ioctl(sk, cmd, arg); + } } -#endif +#endif /* CONFIG_COMPAT */ const struct proto_ops inet_stream_ops = { .family = PF_INET, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f048d0a188b7..123a6d39438f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -276,6 +276,7 @@ static struct in_device *inetdev_init(struct net_device *dev) err = devinet_sysctl_register(in_dev); if (err) { in_dev->dead = 1; + neigh_parms_release(&arp_tbl, in_dev->arp_parms); in_dev_put(in_dev); in_dev = NULL; goto out; diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 731022cff600..d14133eac476 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -63,10 +63,8 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, sp->olen++; xo = xfrm_offload(skb); - if (!xo) { - xfrm_state_put(x); + if (!xo) goto out_reset; - } } xo->flags |= XFRM_GRO; @@ -139,19 +137,27 @@ static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x, struct xfrm_offload *xo = xfrm_offload(skb); struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; - int proto = xo->proto; + u8 proto = xo->proto; skb->transport_header += x->props.header_len; - if (proto == IPPROTO_BEETPH) { - struct ip_beet_phdr *ph = (struct ip_beet_phdr *)skb->data; + if (x->sel.family != AF_INET6) { + if (proto == IPPROTO_BEETPH) { + struct ip_beet_phdr *ph = + (struct ip_beet_phdr *)skb->data; + + skb->transport_header += ph->hdrlen * 8; + proto = ph->nexthdr; + } else { + skb->transport_header -= IPV4_BEET_PHMAXLEN; + } + } else { + __be16 frag; - skb->transport_header += ph->hdrlen * 8; - proto = ph->nexthdr; - } else if (x->sel.family != AF_INET6) { - skb->transport_header -= IPV4_BEET_PHMAXLEN; - } else if (proto == IPPROTO_TCP) { - skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + skb->transport_header += + ipv6_skip_exthdr(skb, 0, &proto, &frag); + if (proto == IPPROTO_TCP) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; } __skb_pull(skb, skb_transport_offset(skb)); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 213be9c050ad..41079490a118 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -309,17 +309,18 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev) { bool dev_match = false; #ifdef CONFIG_IP_ROUTE_MULTIPATH - int ret; + if (unlikely(fi->nh)) { + dev_match = nexthop_uses_dev(fi->nh, dev); + } else { + int ret; - for (ret = 0; ret < fib_info_num_path(fi); ret++) { - const struct fib_nh_common *nhc = fib_info_nhc(fi, ret); + for (ret = 0; ret < fib_info_num_path(fi); ret++) { + const struct fib_nh_common *nhc = fib_info_nhc(fi, ret); - if (nhc->nhc_dev == dev) { - dev_match = true; - break; - } else if (l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex) { - dev_match = true; - break; + if (nhc_l3mdev_matches_dev(nhc, dev)) { + dev_match = true; + break; + } } } #else @@ -918,7 +919,6 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, else filter->dump_exceptions = false; - filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC); filter->flags = rtm->rtm_flags; filter->protocol = rtm->rtm_protocol; filter->rt_type = rtm->rtm_type; @@ -990,7 +990,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (filter.table_id) { tb = fib_get_table(net, filter.table_id); if (!tb) { - if (filter.dump_all_families) + if (rtnl_msg_family(cb->nlh) != PF_INET) return skb->len; NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist"); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 4f334b425538..248f1c1959a6 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1371,6 +1371,26 @@ static inline t_key prefix_mismatch(t_key key, struct key_vector *n) return (key ^ prefix) & (prefix | -prefix); } +bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags, + const struct flowi4 *flp) +{ + if (nhc->nhc_flags & RTNH_F_DEAD) + return false; + + if (ip_ignore_linkdown(nhc->nhc_dev) && + nhc->nhc_flags & RTNH_F_LINKDOWN && + !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) + return false; + + if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { + if (flp->flowi4_oif && + flp->flowi4_oif != nhc->nhc_oif) + return false; + } + + return true; +} + /* should be called with rcu_read_lock */ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags) @@ -1503,6 +1523,7 @@ found: /* Step 3: Process the leaf, if that fails fall back to backtracing */ hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; + struct fib_nh_common *nhc; int nhsel, err; if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) { @@ -1528,26 +1549,25 @@ out_reject: if (fi->fib_flags & RTNH_F_DEAD) continue; - if (unlikely(fi->nh && nexthop_is_blackhole(fi->nh))) { - err = fib_props[RTN_BLACKHOLE].error; - goto out_reject; + if (unlikely(fi->nh)) { + if (nexthop_is_blackhole(fi->nh)) { + err = fib_props[RTN_BLACKHOLE].error; + goto out_reject; + } + + nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp, + &nhsel); + if (nhc) + goto set_result; + goto miss; } for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) { - struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel); + nhc = fib_info_nhc(fi, nhsel); - if (nhc->nhc_flags & RTNH_F_DEAD) + if (!fib_lookup_good_nhc(nhc, fib_flags, flp)) continue; - if (ip_ignore_linkdown(nhc->nhc_dev) && - nhc->nhc_flags & RTNH_F_LINKDOWN && - !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) - continue; - if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { - if (flp->flowi4_oif && - flp->flowi4_oif != nhc->nhc_oif) - continue; - } - +set_result: if (!(fib_flags & FIB_LOOKUP_NOREF)) refcount_inc(&fi->fib_clntref); @@ -1568,6 +1588,7 @@ out_reject: return err; } } +miss: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_miss); #endif diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 47f0502b2101..7b272bbed2b4 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2565,9 +2565,9 @@ done: } int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, - struct group_filter __user *optval, int __user *optlen) + struct sockaddr_storage __user *p) { - int err, i, count, copycount; + int i, count, copycount; struct sockaddr_in *psin; __be32 addr; struct ip_mc_socklist *pmc; @@ -2583,37 +2583,29 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!ipv4_is_multicast(addr)) return -EINVAL; - err = -EADDRNOTAVAIL; - for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == addr && pmc->multi.imr_ifindex == gsf->gf_interface) break; } if (!pmc) /* must have a prior join */ - goto done; + return -EADDRNOTAVAIL; gsf->gf_fmode = pmc->sfmode; psl = rtnl_dereference(pmc->sflist); count = psl ? psl->sl_count : 0; copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; - if (put_user(GROUP_FILTER_SIZE(copycount), optlen) || - copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { - return -EFAULT; - } - for (i = 0; i < copycount; i++) { + for (i = 0; i < copycount; i++, p++) { struct sockaddr_storage ss; psin = (struct sockaddr_in *)&ss; memset(&ss, 0, sizeof(ss)); psin->sin_family = AF_INET; psin->sin_addr.s_addr = psl->sl_addr[i]; - if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss))) + if (copy_to_user(p, &ss, sizeof(ss))) return -EFAULT; } return 0; -done: - return err; } /* diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d6faf3702824..f40b1b72f979 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -24,17 +24,19 @@ #include <net/addrconf.h> #if IS_ENABLED(CONFIG_IPV6) -/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 - * only, and any IPv4 addresses if not IPv6 only - * match_wildcard == false: addresses must be exactly the same, i.e. - * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, - * and 0.0.0.0 equals to 0.0.0.0 only +/* match_sk*_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses + * if IPv6 only, and any IPv4 addresses + * if not IPv6 only + * match_sk*_wildcard == false: addresses must be exactly the same, i.e. + * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, + * and 0.0.0.0 equals to 0.0.0.0 only */ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, const struct in6_addr *sk2_rcv_saddr6, __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, bool sk1_ipv6only, bool sk2_ipv6only, - bool match_wildcard) + bool match_sk1_wildcard, + bool match_sk2_wildcard) { int addr_type = ipv6_addr_type(sk1_rcv_saddr6); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; @@ -44,8 +46,8 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, if (!sk2_ipv6only) { if (sk1_rcv_saddr == sk2_rcv_saddr) return true; - if (!sk1_rcv_saddr || !sk2_rcv_saddr) - return match_wildcard; + return (match_sk1_wildcard && !sk1_rcv_saddr) || + (match_sk2_wildcard && !sk2_rcv_saddr); } return false; } @@ -53,11 +55,11 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) return true; - if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && + if (addr_type2 == IPV6_ADDR_ANY && match_sk2_wildcard && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) return true; - if (addr_type == IPV6_ADDR_ANY && match_wildcard && + if (addr_type == IPV6_ADDR_ANY && match_sk1_wildcard && !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) return true; @@ -69,18 +71,19 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, } #endif -/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses - * match_wildcard == false: addresses must be exactly the same, i.e. - * 0.0.0.0 only equals to 0.0.0.0 +/* match_sk*_wildcard == true: 0.0.0.0 equals to any IPv4 addresses + * match_sk*_wildcard == false: addresses must be exactly the same, i.e. + * 0.0.0.0 only equals to 0.0.0.0 */ static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, - bool sk2_ipv6only, bool match_wildcard) + bool sk2_ipv6only, bool match_sk1_wildcard, + bool match_sk2_wildcard) { if (!sk2_ipv6only) { if (sk1_rcv_saddr == sk2_rcv_saddr) return true; - if (!sk1_rcv_saddr || !sk2_rcv_saddr) - return match_wildcard; + return (match_sk1_wildcard && !sk1_rcv_saddr) || + (match_sk2_wildcard && !sk2_rcv_saddr); } return false; } @@ -96,10 +99,12 @@ bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, sk2->sk_rcv_saddr, ipv6_only_sock(sk), ipv6_only_sock(sk2), + match_wildcard, match_wildcard); #endif return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr, - ipv6_only_sock(sk2), match_wildcard); + ipv6_only_sock(sk2), match_wildcard, + match_wildcard); } EXPORT_SYMBOL(inet_rcv_saddr_equal); @@ -285,10 +290,10 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb, tb->fast_rcv_saddr, sk->sk_rcv_saddr, tb->fast_ipv6_only, - ipv6_only_sock(sk), true); + ipv6_only_sock(sk), true, false); #endif return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr, - ipv6_only_sock(sk), true); + ipv6_only_sock(sk), true, false); } /* Obtain a reference to a local port for the given sock, diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0ce9b91ff55c..4e31f23e4117 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -768,45 +768,37 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu) } } -static int ipgre_tunnel_ioctl(struct net_device *dev, - struct ifreq *ifr, int cmd) +static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, + int cmd) { - struct ip_tunnel_parm p; int err; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - return -EFAULT; - if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) || - ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING))) + if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE || + p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) || + ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; } - p.i_flags = gre_flags_to_tnl_flags(p.i_flags); - p.o_flags = gre_flags_to_tnl_flags(p.o_flags); + p->i_flags = gre_flags_to_tnl_flags(p->i_flags); + p->o_flags = gre_flags_to_tnl_flags(p->o_flags); - err = ip_tunnel_ioctl(dev, &p, cmd); + err = ip_tunnel_ctl(dev, p, cmd); if (err) return err; if (cmd == SIOCCHGTUNNEL) { struct ip_tunnel *t = netdev_priv(dev); - t->parms.i_flags = p.i_flags; - t->parms.o_flags = p.o_flags; + t->parms.i_flags = p->i_flags; + t->parms.o_flags = p->o_flags; if (strcmp(dev->rtnl_link_ops->kind, "erspan")) ipgre_link_update(dev, true); } - p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); - p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); - - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - return -EFAULT; - + p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags); + p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags); return 0; } @@ -924,10 +916,11 @@ static const struct net_device_ops ipgre_netdev_ops = { .ndo_stop = ipgre_close, #endif .ndo_start_xmit = ipgre_xmit, - .ndo_do_ioctl = ipgre_tunnel_ioctl, + .ndo_do_ioctl = ip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = ipgre_tunnel_ctl, }; #define GRE_FEATURES (NETIF_F_SG | \ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8206047d70b6..84ec3703c909 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -560,6 +560,61 @@ out: return err; } +static void __ip_sock_set_tos(struct sock *sk, int val) +{ + if (sk->sk_type == SOCK_STREAM) { + val &= ~INET_ECN_MASK; + val |= inet_sk(sk)->tos & INET_ECN_MASK; + } + if (inet_sk(sk)->tos != val) { + inet_sk(sk)->tos = val; + sk->sk_priority = rt_tos2priority(val); + sk_dst_reset(sk); + } +} + +void ip_sock_set_tos(struct sock *sk, int val) +{ + lock_sock(sk); + __ip_sock_set_tos(sk, val); + release_sock(sk); +} +EXPORT_SYMBOL(ip_sock_set_tos); + +void ip_sock_set_freebind(struct sock *sk) +{ + lock_sock(sk); + inet_sk(sk)->freebind = true; + release_sock(sk); +} +EXPORT_SYMBOL(ip_sock_set_freebind); + +void ip_sock_set_recverr(struct sock *sk) +{ + lock_sock(sk); + inet_sk(sk)->recverr = true; + release_sock(sk); +} +EXPORT_SYMBOL(ip_sock_set_recverr); + +int ip_sock_set_mtu_discover(struct sock *sk, int val) +{ + if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) + return -EINVAL; + lock_sock(sk); + inet_sk(sk)->pmtudisc = val; + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(ip_sock_set_mtu_discover); + +void ip_sock_set_pktinfo(struct sock *sk) +{ + lock_sock(sk); + inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO; + release_sock(sk); +} +EXPORT_SYMBOL(ip_sock_set_pktinfo); /* * Socket option code for IP. This is the end of the line after any @@ -587,6 +642,86 @@ static bool setsockopt_needs_rtnl(int optname) return false; } +static int set_mcast_msfilter(struct sock *sk, int ifindex, + int numsrc, int fmode, + struct sockaddr_storage *group, + struct sockaddr_storage *list) +{ + int msize = IP_MSFILTER_SIZE(numsrc); + struct ip_msfilter *msf; + struct sockaddr_in *psin; + int err, i; + + msf = kmalloc(msize, GFP_KERNEL); + if (!msf) + return -ENOBUFS; + + psin = (struct sockaddr_in *)group; + if (psin->sin_family != AF_INET) + goto Eaddrnotavail; + msf->imsf_multiaddr = psin->sin_addr.s_addr; + msf->imsf_interface = 0; + msf->imsf_fmode = fmode; + msf->imsf_numsrc = numsrc; + for (i = 0; i < numsrc; ++i) { + psin = (struct sockaddr_in *)&list[i]; + + if (psin->sin_family != AF_INET) + goto Eaddrnotavail; + msf->imsf_slist[i] = psin->sin_addr.s_addr; + } + err = ip_mc_msfilter(sk, msf, ifindex); + kfree(msf); + return err; + +Eaddrnotavail: + kfree(msf); + return -EADDRNOTAVAIL; +} + +static int do_mcast_group_source(struct sock *sk, int optname, + struct group_source_req *greqs) +{ + struct ip_mreq_source mreqs; + struct sockaddr_in *psin; + int omode, add, err; + + if (greqs->gsr_group.ss_family != AF_INET || + greqs->gsr_source.ss_family != AF_INET) + return -EADDRNOTAVAIL; + + psin = (struct sockaddr_in *)&greqs->gsr_group; + mreqs.imr_multiaddr = psin->sin_addr.s_addr; + psin = (struct sockaddr_in *)&greqs->gsr_source; + mreqs.imr_sourceaddr = psin->sin_addr.s_addr; + mreqs.imr_interface = 0; /* use index for mc_source */ + + if (optname == MCAST_BLOCK_SOURCE) { + omode = MCAST_EXCLUDE; + add = 1; + } else if (optname == MCAST_UNBLOCK_SOURCE) { + omode = MCAST_EXCLUDE; + add = 0; + } else if (optname == MCAST_JOIN_SOURCE_GROUP) { + struct ip_mreqn mreq; + + psin = (struct sockaddr_in *)&greqs->gsr_group; + mreq.imr_multiaddr = psin->sin_addr; + mreq.imr_address.s_addr = 0; + mreq.imr_ifindex = greqs->gsr_interface; + err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); + if (err && err != -EADDRINUSE) + return err; + greqs->gsr_interface = mreq.imr_ifindex; + omode = MCAST_INCLUDE; + add = 1; + } else /* MCAST_LEAVE_SOURCE_GROUP */ { + omode = MCAST_INCLUDE; + add = 0; + } + return ip_mc_source(add, omode, sk, &mreqs, greqs->gsr_interface); +} + static int do_ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -743,15 +878,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE; break; case IP_TOS: /* This sets both TOS and Precedence */ - if (sk->sk_type == SOCK_STREAM) { - val &= ~INET_ECN_MASK; - val |= inet->tos & INET_ECN_MASK; - } - if (inet->tos != val) { - inet->tos = val; - sk->sk_priority = rt_tos2priority(val); - sk_dst_reset(sk); - } + __ip_sock_set_tos(sk, val); break; case IP_TTL: if (optlen < 1) @@ -1029,9 +1156,6 @@ static int do_ip_setsockopt(struct sock *sk, int level, case MCAST_UNBLOCK_SOURCE: { struct group_source_req greqs; - struct ip_mreq_source mreqs; - struct sockaddr_in *psin; - int omode, add; if (optlen != sizeof(struct group_source_req)) goto e_inval; @@ -1039,50 +1163,12 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EFAULT; break; } - if (greqs.gsr_group.ss_family != AF_INET || - greqs.gsr_source.ss_family != AF_INET) { - err = -EADDRNOTAVAIL; - break; - } - psin = (struct sockaddr_in *)&greqs.gsr_group; - mreqs.imr_multiaddr = psin->sin_addr.s_addr; - psin = (struct sockaddr_in *)&greqs.gsr_source; - mreqs.imr_sourceaddr = psin->sin_addr.s_addr; - mreqs.imr_interface = 0; /* use index for mc_source */ - - if (optname == MCAST_BLOCK_SOURCE) { - omode = MCAST_EXCLUDE; - add = 1; - } else if (optname == MCAST_UNBLOCK_SOURCE) { - omode = MCAST_EXCLUDE; - add = 0; - } else if (optname == MCAST_JOIN_SOURCE_GROUP) { - struct ip_mreqn mreq; - - psin = (struct sockaddr_in *)&greqs.gsr_group; - mreq.imr_multiaddr = psin->sin_addr; - mreq.imr_address.s_addr = 0; - mreq.imr_ifindex = greqs.gsr_interface; - err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); - if (err && err != -EADDRINUSE) - break; - greqs.gsr_interface = mreq.imr_ifindex; - omode = MCAST_INCLUDE; - add = 1; - } else /* MCAST_LEAVE_SOURCE_GROUP */ { - omode = MCAST_INCLUDE; - add = 0; - } - err = ip_mc_source(add, omode, sk, &mreqs, - greqs.gsr_interface); + err = do_mcast_group_source(sk, optname, &greqs); break; } case MCAST_MSFILTER: { - struct sockaddr_in *psin; - struct ip_msfilter *msf = NULL; struct group_filter *gsf = NULL; - int msize, i, ifindex; if (optlen < GROUP_FILTER_SIZE(0)) goto e_inval; @@ -1095,7 +1181,6 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = PTR_ERR(gsf); break; } - /* numsrc >= (4G-140)/128 overflow in 32 bits */ if (gsf->gf_numsrc >= 0x1ffffff || gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) { @@ -1106,36 +1191,10 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EINVAL; goto mc_msf_out; } - msize = IP_MSFILTER_SIZE(gsf->gf_numsrc); - msf = kmalloc(msize, GFP_KERNEL); - if (!msf) { - err = -ENOBUFS; - goto mc_msf_out; - } - ifindex = gsf->gf_interface; - psin = (struct sockaddr_in *)&gsf->gf_group; - if (psin->sin_family != AF_INET) { - err = -EADDRNOTAVAIL; - goto mc_msf_out; - } - msf->imsf_multiaddr = psin->sin_addr.s_addr; - msf->imsf_interface = 0; - msf->imsf_fmode = gsf->gf_fmode; - msf->imsf_numsrc = gsf->gf_numsrc; - err = -EADDRNOTAVAIL; - for (i = 0; i < gsf->gf_numsrc; ++i) { - psin = (struct sockaddr_in *)&gsf->gf_slist[i]; - - if (psin->sin_family != AF_INET) - goto mc_msf_out; - msf->imsf_slist[i] = psin->sin_addr.s_addr; - } - kfree(gsf); - gsf = NULL; - - err = ip_mc_msfilter(sk, msf, ifindex); + err = set_mcast_msfilter(sk, gsf->gf_interface, + gsf->gf_numsrc, gsf->gf_fmode, + &gsf->gf_group, gsf->gf_slist); mc_msf_out: - kfree(msf); kfree(gsf); break; } @@ -1272,9 +1331,113 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname, if (level != SOL_IP) return -ENOPROTOOPT; - if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER) - return compat_mc_setsockopt(sk, level, optname, optval, optlen, - ip_setsockopt); + switch (optname) { + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + { + struct compat_group_req __user *gr32 = (void __user *)optval; + struct group_req greq; + struct sockaddr_in *psin = (struct sockaddr_in *)&greq.gr_group; + struct ip_mreqn mreq; + + if (optlen < sizeof(struct compat_group_req)) + return -EINVAL; + + if (get_user(greq.gr_interface, &gr32->gr_interface) || + copy_from_user(&greq.gr_group, &gr32->gr_group, + sizeof(greq.gr_group))) + return -EFAULT; + + if (psin->sin_family != AF_INET) + return -EINVAL; + + memset(&mreq, 0, sizeof(mreq)); + mreq.imr_multiaddr = psin->sin_addr; + mreq.imr_ifindex = greq.gr_interface; + + rtnl_lock(); + lock_sock(sk); + if (optname == MCAST_JOIN_GROUP) + err = ip_mc_join_group(sk, &mreq); + else + err = ip_mc_leave_group(sk, &mreq); + release_sock(sk); + rtnl_unlock(); + return err; + } + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + { + struct compat_group_source_req __user *gsr32 = (void __user *)optval; + struct group_source_req greqs; + + if (optlen != sizeof(struct compat_group_source_req)) + return -EINVAL; + + if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) || + copy_from_user(&greqs.gsr_group, &gsr32->gsr_group, + sizeof(greqs.gsr_group)) || + copy_from_user(&greqs.gsr_source, &gsr32->gsr_source, + sizeof(greqs.gsr_source))) + return -EFAULT; + + rtnl_lock(); + lock_sock(sk); + err = do_mcast_group_source(sk, optname, &greqs); + release_sock(sk); + rtnl_unlock(); + return err; + } + case MCAST_MSFILTER: + { + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter *gf32; + unsigned int n; + void *p; + + if (optlen < size0) + return -EINVAL; + if (optlen > sysctl_optmem_max - 4) + return -ENOBUFS; + + p = kmalloc(optlen + 4, GFP_KERNEL); + if (!p) + return -ENOMEM; + gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + if (copy_from_user(gf32, optval, optlen)) { + err = -EFAULT; + goto mc_msf_out; + } + + n = gf32->gf_numsrc; + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + if (n >= 0x1ffffff) { + err = -ENOBUFS; + goto mc_msf_out; + } + if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) { + err = -EINVAL; + goto mc_msf_out; + } + + rtnl_lock(); + lock_sock(sk); + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) + err = -ENOBUFS; + else + err = set_mcast_msfilter(sk, gf32->gf_interface, + n, gf32->gf_fmode, + &gf32->gf_group, gf32->gf_slist); + release_sock(sk); + rtnl_unlock(); +mc_msf_out: + kfree(p); + return err; + } + } err = do_ip_setsockopt(sk, level, optname, optval, optlen); #ifdef CONFIG_NETFILTER @@ -1465,19 +1628,28 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, } case MCAST_MSFILTER: { + struct group_filter __user *p = (void __user *)optval; struct group_filter gsf; + const int size0 = offsetof(struct group_filter, gf_slist); + int num; - if (len < GROUP_FILTER_SIZE(0)) { + if (len < size0) { err = -EINVAL; goto out; } - if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) { + if (copy_from_user(&gsf, p, size0)) { err = -EFAULT; goto out; } - err = ip_mc_gsfget(sk, &gsf, - (struct group_filter __user *)optval, - optlen); + num = gsf.gf_numsrc; + err = ip_mc_gsfget(sk, &gsf, p->gf_slist); + if (err) + goto out; + if (gsf.gf_numsrc < num) + num = gsf.gf_numsrc; + if (put_user(GROUP_FILTER_SIZE(num), optlen) || + copy_to_user(p, &gsf, size0)) + err = -EFAULT; goto out; } case IP_MULTICAST_ALL: @@ -1590,9 +1762,47 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, { int err; - if (optname == MCAST_MSFILTER) - return compat_mc_getsockopt(sk, level, optname, optval, optlen, - ip_getsockopt); + if (optname == MCAST_MSFILTER) { + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter __user *p = (void __user *)optval; + struct compat_group_filter gf32; + struct group_filter gf; + int ulen, err; + int num; + + if (level != SOL_IP) + return -EOPNOTSUPP; + + if (get_user(ulen, optlen)) + return -EFAULT; + + if (ulen < size0) + return -EINVAL; + + if (copy_from_user(&gf32, p, size0)) + return -EFAULT; + + gf.gf_interface = gf32.gf_interface; + gf.gf_fmode = gf32.gf_fmode; + num = gf.gf_numsrc = gf32.gf_numsrc; + gf.gf_group = gf32.gf_group; + + rtnl_lock(); + lock_sock(sk); + err = ip_mc_gsfget(sk, &gf, p->gf_slist); + release_sock(sk); + rtnl_unlock(); + if (err) + return err; + if (gf.gf_numsrc < num) + num = gf.gf_numsrc; + ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32)); + if (put_user(ulen, optlen) || + put_user(gf.gf_fmode, &p->gf_fmode) || + put_user(gf.gf_numsrc, &p->gf_numsrc)) + return -EFAULT; + return 0; + } err = do_ip_getsockopt(sk, level, optname, optval, optlen, MSG_CMSG_COMPAT); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index cd4b84310d92..f4f1d11eab50 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -860,7 +860,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, netdev_state_change(dev); } -int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { int err = 0; struct ip_tunnel *t = netdev_priv(dev); @@ -960,6 +960,20 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) done: return err; } +EXPORT_SYMBOL_GPL(ip_tunnel_ctl); + +int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct ip_tunnel_parm p; + int err; + + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + return -EFAULT; + err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd); + if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + return -EFAULT; + return err; +} EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 1b4e6f298648..1d9c8cff5ac3 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -93,7 +93,28 @@ static int vti_rcv_proto(struct sk_buff *skb) static int vti_rcv_tunnel(struct sk_buff *skb) { - return vti_rcv(skb, ip_hdr(skb)->saddr, true); + struct ip_tunnel_net *itn = net_generic(dev_net(skb->dev), vti_net_id); + const struct iphdr *iph = ip_hdr(skb); + struct ip_tunnel *tunnel; + + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->saddr, iph->daddr, 0); + if (tunnel) { + struct tnl_ptk_info tpi = { + .proto = htons(ETH_P_IP), + }; + + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + if (iptunnel_pull_header(skb, 0, tpi.proto, false)) + goto drop; + return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, false); + } + + return -EINVAL; +drop: + kfree_skb(skb); + return 0; } static int vti_rcv_cb(struct sk_buff *skb, int err) @@ -378,38 +399,31 @@ static int vti4_err(struct sk_buff *skb, u32 info) } static int -vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { int err = 0; - struct ip_tunnel_parm p; - - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - return -EFAULT; if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || - p.iph.ihl != 5) + if (p->iph.version != 4 || p->iph.protocol != IPPROTO_IPIP || + p->iph.ihl != 5) return -EINVAL; } - if (!(p.i_flags & GRE_KEY)) - p.i_key = 0; - if (!(p.o_flags & GRE_KEY)) - p.o_key = 0; + if (!(p->i_flags & GRE_KEY)) + p->i_key = 0; + if (!(p->o_flags & GRE_KEY)) + p->o_key = 0; - p.i_flags = VTI_ISVTI; + p->i_flags = VTI_ISVTI; - err = ip_tunnel_ioctl(dev, &p, cmd); + err = ip_tunnel_ctl(dev, p, cmd); if (err) return err; if (cmd != SIOCDELTUNNEL) { - p.i_flags |= GRE_KEY; - p.o_flags |= GRE_KEY; + p->i_flags |= GRE_KEY; + p->o_flags |= GRE_KEY; } - - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - return -EFAULT; return 0; } @@ -417,10 +431,11 @@ static const struct net_device_ops vti_netdev_ops = { .ndo_init = vti_tunnel_init, .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = vti_tunnel_xmit, - .ndo_do_ioctl = vti_tunnel_ioctl, + .ndo_do_ioctl = ip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = vti_tunnel_ctl, }; static void vti_tunnel_setup(struct net_device *dev) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 2f01cf6fa0de..40fea52c8277 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -327,41 +327,29 @@ static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto) } static int -ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { - int err = 0; - struct ip_tunnel_parm p; - - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - return -EFAULT; - if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { - if (p.iph.version != 4 || - !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) + if (p->iph.version != 4 || + !ipip_tunnel_ioctl_verify_protocol(p->iph.protocol) || + p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF))) return -EINVAL; } - p.i_key = p.o_key = 0; - p.i_flags = p.o_flags = 0; - err = ip_tunnel_ioctl(dev, &p, cmd); - if (err) - return err; - - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - return -EFAULT; - - return 0; + p->i_key = p->o_key = 0; + p->i_flags = p->o_flags = 0; + return ip_tunnel_ctl(dev, p, cmd); } static const struct net_device_ops ipip_netdev_ops = { .ndo_init = ipip_tunnel_init, .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = ipip_tunnel_xmit, - .ndo_do_ioctl = ipip_tunnel_ioctl, + .ndo_do_ioctl = ip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = ipip_tunnel_ctl, }; #define IPIP_FEATURES (NETIF_F_SG | \ @@ -698,7 +686,7 @@ out: rtnl_link_failed: #if IS_ENABLED(CONFIG_MPLS) - xfrm4_tunnel_deregister(&mplsip_handler, AF_INET); + xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS); xfrm_tunnel_mplsip_failed: #endif diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5c218db2dede..f5c7a58844a4 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -421,37 +421,6 @@ static void ipmr_free_table(struct mr_table *mrt) /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ -static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) -{ - struct net *net = dev_net(dev); - - dev_close(dev); - - dev = __dev_get_by_name(net, "tunl0"); - if (dev) { - const struct net_device_ops *ops = dev->netdev_ops; - struct ifreq ifr; - struct ip_tunnel_parm p; - - memset(&p, 0, sizeof(p)); - p.iph.daddr = v->vifc_rmt_addr.s_addr; - p.iph.saddr = v->vifc_lcl_addr.s_addr; - p.iph.version = 4; - p.iph.ihl = 5; - p.iph.protocol = IPPROTO_IPIP; - sprintf(p.name, "dvmrp%d", v->vifc_vifi); - ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - - if (ops->ndo_do_ioctl) { - mm_segment_t oldfs = get_fs(); - - set_fs(KERNEL_DS); - ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); - set_fs(oldfs); - } - } -} - /* Initialize ipmr pimreg/tunnel in_device */ static bool ipmr_init_vif_indev(const struct net_device *dev) { @@ -471,51 +440,52 @@ static bool ipmr_init_vif_indev(const struct net_device *dev) static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) { - struct net_device *dev; - - dev = __dev_get_by_name(net, "tunl0"); + struct net_device *tunnel_dev, *new_dev; + struct ip_tunnel_parm p = { }; + int err; - if (dev) { - const struct net_device_ops *ops = dev->netdev_ops; - int err; - struct ifreq ifr; - struct ip_tunnel_parm p; + tunnel_dev = __dev_get_by_name(net, "tunl0"); + if (!tunnel_dev) + goto out; - memset(&p, 0, sizeof(p)); - p.iph.daddr = v->vifc_rmt_addr.s_addr; - p.iph.saddr = v->vifc_lcl_addr.s_addr; - p.iph.version = 4; - p.iph.ihl = 5; - p.iph.protocol = IPPROTO_IPIP; - sprintf(p.name, "dvmrp%d", v->vifc_vifi); - ifr.ifr_ifru.ifru_data = (__force void __user *)&p; + p.iph.daddr = v->vifc_rmt_addr.s_addr; + p.iph.saddr = v->vifc_lcl_addr.s_addr; + p.iph.version = 4; + p.iph.ihl = 5; + p.iph.protocol = IPPROTO_IPIP; + sprintf(p.name, "dvmrp%d", v->vifc_vifi); - if (ops->ndo_do_ioctl) { - mm_segment_t oldfs = get_fs(); + if (!tunnel_dev->netdev_ops->ndo_tunnel_ctl) + goto out; + err = tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, + SIOCADDTUNNEL); + if (err) + goto out; - set_fs(KERNEL_DS); - err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); - set_fs(oldfs); - } else { - err = -EOPNOTSUPP; - } - dev = NULL; - - if (err == 0 && - (dev = __dev_get_by_name(net, p.name)) != NULL) { - dev->flags |= IFF_MULTICAST; - if (!ipmr_init_vif_indev(dev)) - goto failure; - if (dev_open(dev, NULL)) - goto failure; - dev_hold(dev); - } - } - return dev; + new_dev = __dev_get_by_name(net, p.name); + if (!new_dev) + goto out; -failure: - unregister_netdevice(dev); - return NULL; + new_dev->flags |= IFF_MULTICAST; + if (!ipmr_init_vif_indev(new_dev)) + goto out_unregister; + if (dev_open(new_dev, NULL)) + goto out_unregister; + dev_hold(new_dev); + err = dev_set_allmulti(new_dev, 1); + if (err) { + dev_close(new_dev); + tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, + SIOCDELTUNNEL); + dev_put(new_dev); + new_dev = ERR_PTR(err); + } + return new_dev; + +out_unregister: + unregister_netdevice(new_dev); +out: + return ERR_PTR(-ENOBUFS); } #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) @@ -867,14 +837,8 @@ static int vif_add(struct net *net, struct mr_table *mrt, break; case VIFF_TUNNEL: dev = ipmr_new_tunnel(net, vifc); - if (!dev) - return -ENOBUFS; - err = dev_set_allmulti(dev, 1); - if (err) { - ipmr_del_tunnel(dev, vifc); - dev_put(dev); - return err; - } + if (IS_ERR(dev)) + return PTR_ERR(dev); break; case VIFF_USE_IFINDEX: case 0: @@ -2613,7 +2577,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) { - if (filter.dump_all_families) + if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR) return skb->len; NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 3c25a467b3ef..7afde8828b4c 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -166,8 +166,7 @@ pptp_outbound_pkt(struct sk_buff *skb, break; default: pr_debug("unknown outbound packet 0x%04x:%s\n", msg, - msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : - pptp_msg_name[0]); + pptp_msg_name(msg)); fallthrough; case PPTP_SET_LINK_INFO: /* only need to NAT in case PAC is behind NAT box */ @@ -268,9 +267,7 @@ pptp_inbound_pkt(struct sk_buff *skb, pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID); break; default: - pr_debug("unknown inbound packet %s\n", - msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : - pptp_msg_name[0]); + pr_debug("unknown inbound packet %s\n", pptp_msg_name(msg)); fallthrough; case PPTP_START_SESSION_REQUEST: case PPTP_START_SESSION_REPLY: diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 3957364d556c..ebafa5ed91ac 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -33,8 +33,20 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { [NHA_ENCAP] = { .type = NLA_NESTED }, [NHA_GROUPS] = { .type = NLA_FLAG }, [NHA_MASTER] = { .type = NLA_U32 }, + [NHA_FDB] = { .type = NLA_FLAG }, }; +static int call_nexthop_notifiers(struct net *net, + enum nexthop_event_type event_type, + struct nexthop *nh) +{ + int err; + + err = atomic_notifier_call_chain(&net->nexthop.notifier_chain, + event_type, nh); + return notifier_to_errno(err); +} + static unsigned int nh_dev_hashfn(unsigned int val) { unsigned int mask = NH_DEV_HASHSIZE - 1; @@ -63,9 +75,16 @@ static void nexthop_free_mpath(struct nexthop *nh) int i; nhg = rcu_dereference_raw(nh->nh_grp); - for (i = 0; i < nhg->num_nh; ++i) - WARN_ON(nhg->nh_entries[i].nh); + for (i = 0; i < nhg->num_nh; ++i) { + struct nh_grp_entry *nhge = &nhg->nh_entries[i]; + + WARN_ON(!list_empty(&nhge->nh_list)); + nexthop_put(nhge->nh); + } + + WARN_ON(nhg->spare == nhg); + kfree(nhg->spare); kfree(nhg); } @@ -107,6 +126,7 @@ static struct nexthop *nexthop_alloc(void) INIT_LIST_HEAD(&nh->fi_list); INIT_LIST_HEAD(&nh->f6i_list); INIT_LIST_HEAD(&nh->grp_list); + INIT_LIST_HEAD(&nh->fdb_list); } return nh; } @@ -227,6 +247,9 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nla_put_u32(skb, NHA_ID, nh->id)) goto nla_put_failure; + if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB)) + goto nla_put_failure; + if (nh->is_group) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); @@ -241,7 +264,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nla_put_flag(skb, NHA_BLACKHOLE)) goto nla_put_failure; goto out; - } else { + } else if (!nh->is_fdb_nh) { const struct net_device *dev; dev = nhi->fib_nhc.nhc_dev; @@ -276,6 +299,7 @@ out: return 0; nla_put_failure: + nlmsg_cancel(skb, nlh); return -EMSGSIZE; } @@ -387,12 +411,35 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, return true; } +static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family, + struct netlink_ext_ack *extack) +{ + struct nh_info *nhi; + + if (!nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops"); + return -EINVAL; + } + + nhi = rtnl_dereference(nh->nh_info); + if (*nh_family == AF_UNSPEC) { + *nh_family = nhi->family; + } else if (*nh_family != nhi->family) { + NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops"); + return -EINVAL; + } + + return 0; +} + static int nh_check_attr_group(struct net *net, struct nlattr *tb[], struct netlink_ext_ack *extack) { unsigned int len = nla_len(tb[NHA_GROUP]); + u8 nh_family = AF_UNSPEC; struct nexthop_grp *nhg; unsigned int i, j; + u8 nhg_fdb = 0; if (len & (sizeof(struct nexthop_grp) - 1)) { NL_SET_ERR_MSG(extack, @@ -421,6 +468,8 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], } } + if (tb[NHA_FDB]) + nhg_fdb = 1; nhg = nla_data(tb[NHA_GROUP]); for (i = 0; i < len; ++i) { struct nexthop *nh; @@ -432,11 +481,20 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], } if (!valid_group_nh(nh, len, extack)) return -EINVAL; + + if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack)) + return -EINVAL; + + if (!nhg_fdb && nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops"); + return -EINVAL; + } } - for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) { + for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) { if (!tb[i]) continue; - + if (tb[NHA_FDB]) + continue; NL_SET_ERR_MSG(extack, "No other attributes can be set in nexthop groups"); return -EINVAL; @@ -495,6 +553,9 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) if (hash > atomic_read(&nhge->upper_bound)) continue; + if (nhge->nh->is_fdb_nh) + return nhge->nh; + /* nexthops always check if it is good and does * not rely on a sysctl for this behavior */ @@ -564,6 +625,11 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, { struct nh_info *nhi; + if (nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + return -EINVAL; + } + /* fib6_src is unique to a fib6_info and limits the ability to cache * routes in fib6_nh within a nexthop that is potentially shared * across multiple fib entries. If the config wants to use source @@ -640,6 +706,12 @@ int fib_check_nexthop(struct nexthop *nh, u8 scope, { int err = 0; + if (nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + err = -EINVAL; + goto out; + } + if (nh->is_group) { struct nh_group *nhg; @@ -693,41 +765,56 @@ static void nh_group_rebalance(struct nh_group *nhg) } } -static void remove_nh_grp_entry(struct nh_grp_entry *nhge, - struct nh_group *nhg, +static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge, struct nl_info *nlinfo) { + struct nh_grp_entry *nhges, *new_nhges; + struct nexthop *nhp = nhge->nh_parent; struct nexthop *nh = nhge->nh; - struct nh_grp_entry *nhges; - bool found = false; - int i; + struct nh_group *nhg, *newg; + int i, j; WARN_ON(!nh); - nhges = nhg->nh_entries; - for (i = 0; i < nhg->num_nh; ++i) { - if (found) { - nhges[i-1].nh = nhges[i].nh; - nhges[i-1].weight = nhges[i].weight; - list_del(&nhges[i].nh_list); - list_add(&nhges[i-1].nh_list, &nhges[i-1].nh->grp_list); - } else if (nhg->nh_entries[i].nh == nh) { - found = true; - } - } + nhg = rtnl_dereference(nhp->nh_grp); + newg = nhg->spare; - if (WARN_ON(!found)) + /* last entry, keep it visible and remove the parent */ + if (nhg->num_nh == 1) { + remove_nexthop(net, nhp, nlinfo); return; + } + + newg->has_v4 = nhg->has_v4; + newg->mpath = nhg->mpath; + newg->num_nh = nhg->num_nh; + + /* copy old entries to new except the one getting removed */ + nhges = nhg->nh_entries; + new_nhges = newg->nh_entries; + for (i = 0, j = 0; i < nhg->num_nh; ++i) { + /* current nexthop getting removed */ + if (nhg->nh_entries[i].nh == nh) { + newg->num_nh--; + continue; + } - nhg->num_nh--; - nhg->nh_entries[nhg->num_nh].nh = NULL; + list_del(&nhges[i].nh_list); + new_nhges[j].nh_parent = nhges[i].nh_parent; + new_nhges[j].nh = nhges[i].nh; + new_nhges[j].weight = nhges[i].weight; + list_add(&new_nhges[j].nh_list, &new_nhges[j].nh->grp_list); + j++; + } - nh_group_rebalance(nhg); + nh_group_rebalance(newg); + rcu_assign_pointer(nhp->nh_grp, newg); - nexthop_put(nh); + list_del(&nhge->nh_list); + nexthop_put(nhge->nh); if (nlinfo) - nexthop_notify(RTM_NEWNEXTHOP, nhge->nh_parent, nlinfo); + nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo); } static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, @@ -735,17 +822,11 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, { struct nh_grp_entry *nhge, *tmp; - list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) { - struct nh_group *nhg; - - list_del(&nhge->nh_list); - nhg = rtnl_dereference(nhge->nh_parent->nh_grp); - remove_nh_grp_entry(nhge, nhg, nlinfo); + list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) + remove_nh_grp_entry(net, nhge, nlinfo); - /* if this group has no more entries then remove it */ - if (!nhg->num_nh) - remove_nexthop(net, nhge->nh_parent, nlinfo); - } + /* make sure all see the newly published array before releasing rtnl */ + synchronize_rcu(); } static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) @@ -759,10 +840,7 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) if (WARN_ON(!nhge->nh)) continue; - list_del(&nhge->nh_list); - nexthop_put(nhge->nh); - nhge->nh = NULL; - nhg->num_nh--; + list_del_init(&nhge->nh_list); } } @@ -773,6 +851,8 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) bool do_flush = false; struct fib_info *fi; + call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh); + list_for_each_entry(fi, &nh->fi_list, nh_list) { fi->fib_flags |= RTNH_F_DEAD; do_flush = true; @@ -1086,6 +1166,7 @@ static struct nexthop *nexthop_create_group(struct net *net, { struct nlattr *grps_attr = cfg->nh_grp; struct nexthop_grp *entry = nla_data(grps_attr); + u16 num_nh = nla_len(grps_attr) / sizeof(*entry); struct nh_group *nhg; struct nexthop *nh; int i; @@ -1096,12 +1177,21 @@ static struct nexthop *nexthop_create_group(struct net *net, nh->is_group = 1; - nhg = nexthop_grp_alloc(nla_len(grps_attr) / sizeof(*entry)); + nhg = nexthop_grp_alloc(num_nh); if (!nhg) { kfree(nh); return ERR_PTR(-ENOMEM); } + /* spare group used for removals */ + nhg->spare = nexthop_grp_alloc(num_nh); + if (!nhg) { + kfree(nhg); + kfree(nh); + return NULL; + } + nhg->spare->spare = nhg; + for (i = 0; i < nhg->num_nh; ++i) { struct nexthop *nhe; struct nh_info *nhi; @@ -1125,6 +1215,9 @@ static struct nexthop *nexthop_create_group(struct net *net, nh_group_rebalance(nhg); } + if (cfg->nh_fdb) + nh->is_fdb_nh = 1; + rcu_assign_pointer(nh->nh_grp, nhg); return nh; @@ -1133,6 +1226,7 @@ out_no_nh: for (; i >= 0; --i) nexthop_put(nhg->nh_entries[i].nh); + kfree(nhg->spare); kfree(nhg); kfree(nh); @@ -1152,7 +1246,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, .fc_encap = cfg->nh_encap, .fc_encap_type = cfg->nh_encap_type, }; - u32 tb_id = l3mdev_fib_table(cfg->dev); + u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN); int err; err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack); @@ -1161,6 +1255,9 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, goto out; } + if (nh->is_fdb_nh) + goto out; + /* sets nh_dev if successful */ err = fib_check_nh(net, fib_nh, tb_id, 0, extack); if (!err) { @@ -1186,6 +1283,7 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh, .fc_flags = cfg->nh_flags, .fc_encap = cfg->nh_encap, .fc_encap_type = cfg->nh_encap_type, + .fc_is_fdb = cfg->nh_fdb, }; int err; @@ -1227,6 +1325,9 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, nhi->family = cfg->nh_family; nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK; + if (cfg->nh_fdb) + nh->is_fdb_nh = 1; + if (cfg->nh_blackhole) { nhi->reject_nh = 1; cfg->nh_ifindex = net->loopback_dev->ifindex; @@ -1248,7 +1349,8 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, } /* add the entry to the device based hash */ - nexthop_devhash_add(net, nhi); + if (!nh->is_fdb_nh) + nexthop_devhash_add(net, nhi); rcu_assign_pointer(nh->nh_info, nhi); @@ -1352,6 +1454,19 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, if (tb[NHA_ID]) cfg->nh_id = nla_get_u32(tb[NHA_ID]); + if (tb[NHA_FDB]) { + if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] || + tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) { + NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole"); + goto out; + } + if (nhm->nh_flags) { + NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header"); + goto out; + } + cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]); + } + if (tb[NHA_GROUP]) { if (nhm->nh_family != AF_UNSPEC) { NL_SET_ERR_MSG(extack, "Invalid family for group"); @@ -1375,8 +1490,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, if (tb[NHA_BLACKHOLE]) { if (tb[NHA_GATEWAY] || tb[NHA_OIF] || - tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) { - NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway or oif"); + tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) { + NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb"); goto out; } @@ -1385,26 +1500,28 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, goto out; } - if (!tb[NHA_OIF]) { - NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole nexthops"); + if (!cfg->nh_fdb && !tb[NHA_OIF]) { + NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops"); goto out; } - cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); - if (cfg->nh_ifindex) - cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); + if (!cfg->nh_fdb && tb[NHA_OIF]) { + cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); + if (cfg->nh_ifindex) + cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); - if (!cfg->dev) { - NL_SET_ERR_MSG(extack, "Invalid device index"); - goto out; - } else if (!(cfg->dev->flags & IFF_UP)) { - NL_SET_ERR_MSG(extack, "Nexthop device is not up"); - err = -ENETDOWN; - goto out; - } else if (!netif_carrier_ok(cfg->dev)) { - NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); - err = -ENETDOWN; - goto out; + if (!cfg->dev) { + NL_SET_ERR_MSG(extack, "Invalid device index"); + goto out; + } else if (!(cfg->dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + err = -ENETDOWN; + goto out; + } else if (!netif_carrier_ok(cfg->dev)) { + NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); + err = -ENETDOWN; + goto out; + } } err = -EINVAL; @@ -1633,7 +1750,7 @@ static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx, static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, int *master_idx, bool *group_filter, - struct netlink_callback *cb) + bool *fdb_filter, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; struct nlattr *tb[NHA_MAX + 1]; @@ -1670,6 +1787,9 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, case NHA_GROUPS: *group_filter = true; break; + case NHA_FDB: + *fdb_filter = true; + break; default: NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request"); return -EINVAL; @@ -1688,17 +1808,17 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, /* rtnl */ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) { + bool group_filter = false, fdb_filter = false; struct nhmsg *nhm = nlmsg_data(cb->nlh); int dev_filter_idx = 0, master_idx = 0; struct net *net = sock_net(skb->sk); struct rb_root *root = &net->nexthop.rb_root; - bool group_filter = false; struct rb_node *node; int idx = 0, s_idx; int err; err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx, - &group_filter, cb); + &group_filter, &fdb_filter, cb); if (err < 0) return err; @@ -1783,6 +1903,19 @@ static struct notifier_block nh_netdev_notifier = { .notifier_call = nh_netdev_event, }; +int register_nexthop_notifier(struct net *net, struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&net->nexthop.notifier_chain, nb); +} +EXPORT_SYMBOL(register_nexthop_notifier); + +int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&net->nexthop.notifier_chain, + nb); +} +EXPORT_SYMBOL(unregister_nexthop_notifier); + static void __net_exit nexthop_net_exit(struct net *net) { rtnl_lock(); @@ -1799,6 +1932,7 @@ static int __net_init nexthop_net_init(struct net *net) net->nexthop.devhash = kzalloc(sz, GFP_KERNEL); if (!net->nexthop.devhash) return -ENOMEM; + ATOMIC_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain); return 0; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2058b94bfa00..1d7076b78e63 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -491,18 +491,16 @@ u32 ip_idents_reserve(u32 hash, int segs) atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; u32 old = READ_ONCE(*p_tstamp); u32 now = (u32)jiffies; - u32 new, delta = 0; + u32 delta = 0; if (old != now && cmpxchg(p_tstamp, old, now) == old) delta = prandom_u32_max(now - old); - /* Do not use atomic_add_return() as it makes UBSAN unhappy */ - do { - old = (u32)atomic_read(p_id); - new = old + delta + segs; - } while (atomic_cmpxchg(p_id, old, new) != old); - - return new - segs; + /* If UBSAN reports an error there, please make sure your compiler + * supports -fno-strict-overflow before reporting it that was a bug + * in UBSAN, and it has been fixed in GCC-8. + */ + return atomic_add_return(segs + delta, p_id) - segs; } EXPORT_SYMBOL(ip_idents_reserve); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 970064996377..15d47d5e7951 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2801,6 +2801,163 @@ static void tcp_enable_tx_delay(void) } } +/* When set indicates to always queue non-full frames. Later the user clears + * this option and we transmit any pending partial frames in the queue. This is + * meant to be used alongside sendfile() to get properly filled frames when the + * user (for example) must write out headers with a write() call first and then + * use sendfile to send out the data parts. + * + * TCP_CORK can be set together with TCP_NODELAY and it is stronger than + * TCP_NODELAY. + */ +static void __tcp_sock_set_cork(struct sock *sk, bool on) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (on) { + tp->nonagle |= TCP_NAGLE_CORK; + } else { + tp->nonagle &= ~TCP_NAGLE_CORK; + if (tp->nonagle & TCP_NAGLE_OFF) + tp->nonagle |= TCP_NAGLE_PUSH; + tcp_push_pending_frames(sk); + } +} + +void tcp_sock_set_cork(struct sock *sk, bool on) +{ + lock_sock(sk); + __tcp_sock_set_cork(sk, on); + release_sock(sk); +} +EXPORT_SYMBOL(tcp_sock_set_cork); + +/* TCP_NODELAY is weaker than TCP_CORK, so that this option on corked socket is + * remembered, but it is not activated until cork is cleared. + * + * However, when TCP_NODELAY is set we make an explicit push, which overrides + * even TCP_CORK for currently queued segments. + */ +static void __tcp_sock_set_nodelay(struct sock *sk, bool on) +{ + if (on) { + tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; + tcp_push_pending_frames(sk); + } else { + tcp_sk(sk)->nonagle &= ~TCP_NAGLE_OFF; + } +} + +void tcp_sock_set_nodelay(struct sock *sk) +{ + lock_sock(sk); + __tcp_sock_set_nodelay(sk, true); + release_sock(sk); +} +EXPORT_SYMBOL(tcp_sock_set_nodelay); + +static void __tcp_sock_set_quickack(struct sock *sk, int val) +{ + if (!val) { + inet_csk_enter_pingpong_mode(sk); + return; + } + + inet_csk_exit_pingpong_mode(sk); + if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && + inet_csk_ack_scheduled(sk)) { + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_PUSHED; + tcp_cleanup_rbuf(sk, 1); + if (!(val & 1)) + inet_csk_enter_pingpong_mode(sk); + } +} + +void tcp_sock_set_quickack(struct sock *sk, int val) +{ + lock_sock(sk); + __tcp_sock_set_quickack(sk, val); + release_sock(sk); +} +EXPORT_SYMBOL(tcp_sock_set_quickack); + +int tcp_sock_set_syncnt(struct sock *sk, int val) +{ + if (val < 1 || val > MAX_TCP_SYNCNT) + return -EINVAL; + + lock_sock(sk); + inet_csk(sk)->icsk_syn_retries = val; + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(tcp_sock_set_syncnt); + +void tcp_sock_set_user_timeout(struct sock *sk, u32 val) +{ + lock_sock(sk); + inet_csk(sk)->icsk_user_timeout = val; + release_sock(sk); +} +EXPORT_SYMBOL(tcp_sock_set_user_timeout); + +static int __tcp_sock_set_keepidle(struct sock *sk, int val) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (val < 1 || val > MAX_TCP_KEEPIDLE) + return -EINVAL; + + tp->keepalive_time = val * HZ; + if (sock_flag(sk, SOCK_KEEPOPEN) && + !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { + u32 elapsed = keepalive_time_elapsed(tp); + + if (tp->keepalive_time > elapsed) + elapsed = tp->keepalive_time - elapsed; + else + elapsed = 0; + inet_csk_reset_keepalive_timer(sk, elapsed); + } + + return 0; +} + +int tcp_sock_set_keepidle(struct sock *sk, int val) +{ + int err; + + lock_sock(sk); + err = __tcp_sock_set_keepidle(sk, val); + release_sock(sk); + return err; +} +EXPORT_SYMBOL(tcp_sock_set_keepidle); + +int tcp_sock_set_keepintvl(struct sock *sk, int val) +{ + if (val < 1 || val > MAX_TCP_KEEPINTVL) + return -EINVAL; + + lock_sock(sk); + tcp_sk(sk)->keepalive_intvl = val * HZ; + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(tcp_sock_set_keepintvl); + +int tcp_sock_set_keepcnt(struct sock *sk, int val) +{ + if (val < 1 || val > MAX_TCP_KEEPCNT) + return -EINVAL; + + lock_sock(sk); + tcp_sk(sk)->keepalive_probes = val; + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(tcp_sock_set_keepcnt); + /* * Socket option code for TCP. */ @@ -2898,20 +3055,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_NODELAY: - if (val) { - /* TCP_NODELAY is weaker than TCP_CORK, so that - * this option on corked socket is remembered, but - * it is not activated until cork is cleared. - * - * However, when TCP_NODELAY is set we make - * an explicit push, which overrides even TCP_CORK - * for currently queued segments. - */ - tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; - tcp_push_pending_frames(sk); - } else { - tp->nonagle &= ~TCP_NAGLE_OFF; - } + __tcp_sock_set_nodelay(sk, val); break; case TCP_THIN_LINEAR_TIMEOUTS: @@ -2979,43 +3123,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_CORK: - /* When set indicates to always queue non-full frames. - * Later the user clears this option and we transmit - * any pending partial frames in the queue. This is - * meant to be used alongside sendfile() to get properly - * filled frames when the user (for example) must write - * out headers with a write() call first and then use - * sendfile to send out the data parts. - * - * TCP_CORK can be set together with TCP_NODELAY and it is - * stronger than TCP_NODELAY. - */ - if (val) { - tp->nonagle |= TCP_NAGLE_CORK; - } else { - tp->nonagle &= ~TCP_NAGLE_CORK; - if (tp->nonagle&TCP_NAGLE_OFF) - tp->nonagle |= TCP_NAGLE_PUSH; - tcp_push_pending_frames(sk); - } + __tcp_sock_set_cork(sk, val); break; case TCP_KEEPIDLE: - if (val < 1 || val > MAX_TCP_KEEPIDLE) - err = -EINVAL; - else { - tp->keepalive_time = val * HZ; - if (sock_flag(sk, SOCK_KEEPOPEN) && - !((1 << sk->sk_state) & - (TCPF_CLOSE | TCPF_LISTEN))) { - u32 elapsed = keepalive_time_elapsed(tp); - if (tp->keepalive_time > elapsed) - elapsed = tp->keepalive_time - elapsed; - else - elapsed = 0; - inet_csk_reset_keepalive_timer(sk, elapsed); - } - } + err = __tcp_sock_set_keepidle(sk, val); break; case TCP_KEEPINTVL: if (val < 1 || val > MAX_TCP_KEEPINTVL) @@ -3072,19 +3184,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_QUICKACK: - if (!val) { - inet_csk_enter_pingpong_mode(sk); - } else { - inet_csk_exit_pingpong_mode(sk); - if ((1 << sk->sk_state) & - (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && - inet_csk_ack_scheduled(sk)) { - icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; - tcp_cleanup_rbuf(sk, 1); - if (!(val & 1)) - inet_csk_enter_pingpong_mode(sk); - } - } + __tcp_sock_set_quickack(sk, val); break; #ifdef CONFIG_TCP_MD5SIG diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ad90102f5dfb..83330a6cb242 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -437,7 +437,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) /* 3. Try to fixup all. It is made immediately after connection enters * established state. */ -void tcp_init_buffer_space(struct sock *sk) +static void tcp_init_buffer_space(struct sock *sk) { int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6c05f1ceb538..ad6435ba6d72 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -403,6 +403,46 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort) } EXPORT_SYMBOL(tcp_req_err); +/* TCP-LD (RFC 6069) logic */ +void tcp_ld_RTO_revert(struct sock *sk, u32 seq) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + s32 remaining; + u32 delta_us; + + if (sock_owned_by_user(sk)) + return; + + if (seq != tp->snd_una || !icsk->icsk_retransmits || + !icsk->icsk_backoff) + return; + + skb = tcp_rtx_queue_head(sk); + if (WARN_ON_ONCE(!skb)) + return; + + icsk->icsk_backoff--; + icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT; + icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); + + tcp_mstamp_refresh(tp); + delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb)); + remaining = icsk->icsk_rto - usecs_to_jiffies(delta_us); + + if (remaining > 0) { + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + remaining, TCP_RTO_MAX); + } else { + /* RTO revert clocked out retransmission. + * Will retransmit now. + */ + tcp_retransmit_timer(sk); + } +} +EXPORT_SYMBOL(tcp_ld_RTO_revert); + /* * This routine is called by the ICMP module when it gets some * sort of error condition. If err < 0 then the socket should @@ -419,27 +459,23 @@ EXPORT_SYMBOL(tcp_req_err); * */ -int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) +int tcp_v4_err(struct sk_buff *skb, u32 info) { - const struct iphdr *iph = (const struct iphdr *)icmp_skb->data; - struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); - struct inet_connection_sock *icsk; + const struct iphdr *iph = (const struct iphdr *)skb->data; + struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); struct tcp_sock *tp; struct inet_sock *inet; - const int type = icmp_hdr(icmp_skb)->type; - const int code = icmp_hdr(icmp_skb)->code; + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; struct sock *sk; - struct sk_buff *skb; struct request_sock *fastopen; u32 seq, snd_una; - s32 remaining; - u32 delta_us; int err; - struct net *net = dev_net(icmp_skb->dev); + struct net *net = dev_net(skb->dev); sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr, th->dest, iph->saddr, ntohs(th->source), - inet_iif(icmp_skb), 0); + inet_iif(skb), 0); if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return -ENOENT; @@ -476,7 +512,6 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; } - icsk = inet_csk(sk); tp = tcp_sk(sk); /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ fastopen = rcu_dereference(tp->fastopen_rsk); @@ -490,7 +525,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) switch (type) { case ICMP_REDIRECT: if (!sock_owned_by_user(sk)) - do_redirect(icmp_skb, sk); + do_redirect(skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ @@ -521,41 +556,12 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) } err = icmp_err_convert[code].errno; - /* check if icmp_skb allows revert of backoff - * (see draft-zimmermann-tcp-lcd) */ - if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) - break; - if (seq != tp->snd_una || !icsk->icsk_retransmits || - !icsk->icsk_backoff || fastopen) - break; - - if (sock_owned_by_user(sk)) - break; - - skb = tcp_rtx_queue_head(sk); - if (WARN_ON_ONCE(!skb)) - break; - - icsk->icsk_backoff--; - icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : - TCP_TIMEOUT_INIT; - icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); - - - tcp_mstamp_refresh(tp); - delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb)); - remaining = icsk->icsk_rto - - usecs_to_jiffies(delta_us); - - if (remaining > 0) { - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - remaining, TCP_RTO_MAX); - } else { - /* RTO revert clocked out retransmission. - * Will retransmit now */ - tcp_retransmit_timer(sk); - } - + /* check if this ICMP message allows revert of backoff. + * (see RFC 6069) + */ + if (!fastopen && + (code == ICMP_NET_UNREACH || code == ICMP_HOST_UNREACH)) + tcp_ld_RTO_revert(sk, seq); break; case ICMP_TIME_EXCEEDED: err = EHOSTUNREACH; @@ -573,6 +579,8 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (fastopen && !fastopen->sk) break; + ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th); + if (!sock_owned_by_user(sk)) { sk->sk_err = err; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32564b350823..1b7ebbcae497 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -112,6 +112,9 @@ #include <net/sock_reuseport.h> #include <net/addrconf.h> #include <net/udp_tunnel.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6_stubs.h> +#endif struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); @@ -2563,7 +2566,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_XFRM case UDP_ENCAP_ESPINUDP: case UDP_ENCAP_ESPINUDP_NON_IKE: - up->encap_rcv = xfrm4_udp_encap_rcv; +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv; + else +#endif + up->encap_rcv = xfrm4_udp_encap_rcv; #endif fallthrough; case UDP_ENCAP_L2TPINUDP: diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 150e6f0fdbf5..2158e8bddf41 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -22,9 +22,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, goto error; if (cfg->bind_ifindex) { - err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX, - (void *)&cfg->bind_ifindex, - sizeof(cfg->bind_ifindex)); + err = sock_bindtoindex(sock->sk, cfg->bind_ifindex); if (err < 0) goto error; } diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index f8de2482a529..ad2afeef4f10 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -18,11 +18,6 @@ #include <net/ip.h> #include <net/xfrm.h> -int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb) -{ - return xfrm4_extract_header(skb); -} - static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, struct sk_buff *skb) { diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 30ddb9dc9398..3cff51ba72bb 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -14,77 +14,18 @@ #include <net/xfrm.h> #include <net/icmp.h> -static int xfrm4_tunnel_check_size(struct sk_buff *skb) -{ - int mtu, ret = 0; - - if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) - goto out; - - if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) - goto out; - - mtu = dst_mtu(skb_dst(skb)); - if ((!skb_is_gso(skb) && skb->len > mtu) || - (skb_is_gso(skb) && - !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { - skb->protocol = htons(ETH_P_IP); - - if (skb->sk) - xfrm_local_error(skb, mtu); - else - icmp_send(skb, ICMP_DEST_UNREACH, - ICMP_FRAG_NEEDED, htonl(mtu)); - ret = -EMSGSIZE; - } -out: - return ret; -} - -int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - - err = xfrm4_tunnel_check_size(skb); - if (err) - return err; - - XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol; - - return xfrm4_extract_header(skb); -} - -int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) -{ - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - - IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; - - return xfrm_output(sk, skb); -} - static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) { +#ifdef CONFIG_NETFILTER struct xfrm_state *x = skb_dst(skb)->xfrm; - const struct xfrm_state_afinfo *afinfo; - int ret = -EAFNOSUPPORT; -#ifdef CONFIG_NETFILTER if (!x) { IPCB(skb)->flags |= IPSKB_REROUTED; return dst_output(net, sk, skb); } #endif - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); - if (likely(afinfo)) - ret = afinfo->output_finish(sk, skb); - else - kfree_skb(skb); - rcu_read_unlock(); - - return ret; + return xfrm_output(sk, skb); } int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index f8ed3c3bb928..87d4db591488 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -8,36 +8,12 @@ * */ -#include <net/ip.h> #include <net/xfrm.h> -#include <linux/pfkeyv2.h> -#include <linux/ipsec.h> -#include <linux/netfilter_ipv4.h> -#include <linux/export.h> - -int xfrm4_extract_header(struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - - XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); - XFRM_MODE_SKB_CB(skb)->id = iph->id; - XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off; - XFRM_MODE_SKB_CB(skb)->tos = iph->tos; - XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl; - XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph); - memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0, - sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); - - return 0; -} static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .proto = IPPROTO_IPIP, .output = xfrm4_output, - .output_finish = xfrm4_output_finish, - .extract_input = xfrm4_extract_input, - .extract_output = xfrm4_extract_output, .transport_finish = xfrm4_transport_finish, .local_error = xfrm4_local_error, }; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 5a6111da26c4..4f03aece2980 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -88,6 +88,18 @@ config INET6_ESP_OFFLOAD If unsure, say N. +config INET6_ESPINTCP + bool "IPv6: ESP in TCP encapsulation (RFC 8229)" + depends on XFRM && INET6_ESP + select STREAM_PARSER + select NET_SOCK_MSG + select XFRM_ESPINTCP + help + Support for RFC 8229 encapsulation of ESP and IKE over + TCP/IPv6 sockets. + + If unsure, say N. + config INET6_IPCOMP tristate "IPv6: IPComp transformation" select INET6_XFRM_TUNNEL diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ab7e839753ae..09cfbf5dd7ce 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2783,6 +2783,33 @@ put: in6_dev_put(in6_dev); } +static int addrconf_set_sit_dstaddr(struct net *net, struct net_device *dev, + struct in6_ifreq *ireq) +{ + struct ip_tunnel_parm p = { }; + int err; + + if (!(ipv6_addr_type(&ireq->ifr6_addr) & IPV6_ADDR_COMPATv4)) + return -EADDRNOTAVAIL; + + p.iph.daddr = ireq->ifr6_addr.s6_addr32[3]; + p.iph.version = 4; + p.iph.ihl = 5; + p.iph.protocol = IPPROTO_IPV6; + p.iph.ttl = 64; + + if (!dev->netdev_ops->ndo_tunnel_ctl) + return -EOPNOTSUPP; + err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, SIOCADDTUNNEL); + if (err) + return err; + + dev = __dev_get_by_name(net, p.name); + if (!dev) + return -ENOBUFS; + return dev_open(dev, NULL); +} + /* * Set destination address. * Special case for SIT interfaces where we create a new "virtual" @@ -2790,61 +2817,19 @@ put: */ int addrconf_set_dstaddr(struct net *net, void __user *arg) { - struct in6_ifreq ireq; struct net_device *dev; - int err = -EINVAL; - - rtnl_lock(); + struct in6_ifreq ireq; + int err = -ENODEV; - err = -EFAULT; + if (!IS_ENABLED(CONFIG_IPV6_SIT)) + return -ENODEV; if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) - goto err_exit; + return -EFAULT; + rtnl_lock(); dev = __dev_get_by_index(net, ireq.ifr6_ifindex); - - err = -ENODEV; - if (!dev) - goto err_exit; - -#if IS_ENABLED(CONFIG_IPV6_SIT) - if (dev->type == ARPHRD_SIT) { - const struct net_device_ops *ops = dev->netdev_ops; - struct ifreq ifr; - struct ip_tunnel_parm p; - - err = -EADDRNOTAVAIL; - if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4)) - goto err_exit; - - memset(&p, 0, sizeof(p)); - p.iph.daddr = ireq.ifr6_addr.s6_addr32[3]; - p.iph.saddr = 0; - p.iph.version = 4; - p.iph.ihl = 5; - p.iph.protocol = IPPROTO_IPV6; - p.iph.ttl = 64; - ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - - if (ops->ndo_do_ioctl) { - mm_segment_t oldfs = get_fs(); - - set_fs(KERNEL_DS); - err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); - set_fs(oldfs); - } else - err = -EOPNOTSUPP; - - if (err == 0) { - err = -ENOBUFS; - dev = __dev_get_by_name(net, p.name); - if (!dev) - goto err_exit; - err = dev_open(dev, NULL); - } - } -#endif - -err_exit: + if (dev && dev->type == ARPHRD_SIT) + err = addrconf_set_sit_dstaddr(net, dev, &ireq); rtnl_unlock(); return err; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 771a462a8322..b304b882e031 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -60,6 +60,8 @@ #include <net/calipso.h> #include <net/seg6.h> #include <net/rpl.h> +#include <net/compat.h> +#include <net/xfrm.h> #include <linux/uaccess.h> #include <linux/mroute6.h> @@ -504,9 +506,8 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock); /* * This does both peername and sockname. */ - int inet6_getname(struct socket *sock, struct sockaddr *uaddr, - int peer) + int peer) { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; struct sock *sk = sock->sk; @@ -531,9 +532,13 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin6_addr = np->saddr; else sin->sin6_addr = sk->sk_v6_rcv_saddr; - sin->sin6_port = inet->inet_sport; } + if (cgroup_bpf_enabled) + BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin, + peer ? BPF_CGROUP_INET6_GETPEERNAME : + BPF_CGROUP_INET6_GETSOCKNAME, + NULL); sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, sk->sk_bound_dev_if); return sizeof(*sin); @@ -542,21 +547,25 @@ EXPORT_SYMBOL(inet6_getname); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { + void __user *argp = (void __user *)arg; struct sock *sk = sock->sk; struct net *net = sock_net(sk); switch (cmd) { case SIOCADDRT: - case SIOCDELRT: - - return ipv6_route_ioctl(net, cmd, (void __user *)arg); + case SIOCDELRT: { + struct in6_rtmsg rtmsg; + if (copy_from_user(&rtmsg, argp, sizeof(rtmsg))) + return -EFAULT; + return ipv6_route_ioctl(net, cmd, &rtmsg); + } case SIOCSIFADDR: - return addrconf_add_ifaddr(net, (void __user *) arg); + return addrconf_add_ifaddr(net, argp); case SIOCDIFADDR: - return addrconf_del_ifaddr(net, (void __user *) arg); + return addrconf_del_ifaddr(net, argp); case SIOCSIFDSTADDR: - return addrconf_set_dstaddr(net, (void __user *) arg); + return addrconf_set_dstaddr(net, argp); default: if (!sk->sk_prot->ioctl) return -ENOIOCTLCMD; @@ -567,6 +576,56 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } EXPORT_SYMBOL(inet6_ioctl); +#ifdef CONFIG_COMPAT +struct compat_in6_rtmsg { + struct in6_addr rtmsg_dst; + struct in6_addr rtmsg_src; + struct in6_addr rtmsg_gateway; + u32 rtmsg_type; + u16 rtmsg_dst_len; + u16 rtmsg_src_len; + u32 rtmsg_metric; + u32 rtmsg_info; + u32 rtmsg_flags; + s32 rtmsg_ifindex; +}; + +static int inet6_compat_routing_ioctl(struct sock *sk, unsigned int cmd, + struct compat_in6_rtmsg __user *ur) +{ + struct in6_rtmsg rt; + + if (copy_from_user(&rt.rtmsg_dst, &ur->rtmsg_dst, + 3 * sizeof(struct in6_addr)) || + get_user(rt.rtmsg_type, &ur->rtmsg_type) || + get_user(rt.rtmsg_dst_len, &ur->rtmsg_dst_len) || + get_user(rt.rtmsg_src_len, &ur->rtmsg_src_len) || + get_user(rt.rtmsg_metric, &ur->rtmsg_metric) || + get_user(rt.rtmsg_info, &ur->rtmsg_info) || + get_user(rt.rtmsg_flags, &ur->rtmsg_flags) || + get_user(rt.rtmsg_ifindex, &ur->rtmsg_ifindex)) + return -EFAULT; + + + return ipv6_route_ioctl(sock_net(sk), cmd, &rt); +} + +int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + void __user *argp = compat_ptr(arg); + struct sock *sk = sock->sk; + + switch (cmd) { + case SIOCADDRT: + case SIOCDELRT: + return inet6_compat_routing_ioctl(sk, cmd, argp); + default: + return -ENOIOCTLCMD; + } +} +EXPORT_SYMBOL_GPL(inet6_compat_ioctl); +#endif /* CONFIG_COMPAT */ + INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *, size_t)); int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) @@ -628,6 +687,7 @@ const struct proto_ops inet6_stream_ops = { .read_sock = tcp_read_sock, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif @@ -656,6 +716,7 @@ const struct proto_ops inet6_dgram_ops = { .sendpage = sock_no_sendpage, .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif @@ -963,6 +1024,11 @@ static const struct ipv6_stub ipv6_stub_impl = { .ip6_del_rt = ip6_del_rt, .udpv6_encap_enable = udpv6_encap_enable, .ndisc_send_na = ndisc_send_na, +#if IS_ENABLED(CONFIG_XFRM) + .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu, + .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, + .xfrm6_rcv_encap = xfrm6_rcv_encap, +#endif .nd_tbl = &nd_tbl, }; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 45e2adc56610..d88d97617f7e 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -767,6 +767,7 @@ static const struct xfrm_type ah6_type = { static struct xfrm6_protocol ah6_protocol = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = ah6_rcv_cb, .err_handler = ah6_err, .priority = 0, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 11143d039f16..c43592771126 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -26,11 +26,16 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <net/ip6_checksum.h> #include <net/ip6_route.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> +#include <net/udp.h> #include <linux/icmpv6.h> +#include <net/tcp.h> +#include <net/espintcp.h> +#include <net/inet6_hashtables.h> #include <linux/highmem.h> @@ -39,6 +44,11 @@ struct esp_skb_cb { void *tmp; }; +struct esp_output_extra { + __be32 seqhi; + u32 esphoff; +}; + #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) /* @@ -72,9 +82,9 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen) return kmalloc(len, GFP_ATOMIC); } -static inline __be32 *esp_tmp_seqhi(void *tmp) +static inline void *esp_tmp_extra(void *tmp) { - return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); + return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra)); } static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) @@ -104,16 +114,17 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, static void esp_ssg_unref(struct xfrm_state *x, void *tmp) { + struct esp_output_extra *extra = esp_tmp_extra(tmp); struct crypto_aead *aead = x->data; - int seqhilen = 0; + int extralen = 0; u8 *iv; struct aead_request *req; struct scatterlist *sg; if (x->props.flags & XFRM_STATE_ESN) - seqhilen += sizeof(__be32); + extralen += sizeof(*extra); - iv = esp_tmp_iv(aead, tmp, seqhilen); + iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); /* Unref skb_frag_pages in the src scatterlist if necessary. @@ -124,6 +135,149 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp) put_page(sg_page(sg)); } +#ifdef CONFIG_INET6_ESPINTCP +struct esp_tcp_sk { + struct sock *sk; + struct rcu_head rcu; +}; + +static void esp_free_tcp_sk(struct rcu_head *head) +{ + struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); + + sock_put(esk->sk); + kfree(esk); +} + +static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) +{ + struct xfrm_encap_tmpl *encap = x->encap; + struct esp_tcp_sk *esk; + __be16 sport, dport; + struct sock *nsk; + struct sock *sk; + + sk = rcu_dereference(x->encap_sk); + if (sk && sk->sk_state == TCP_ESTABLISHED) + return sk; + + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; + nsk = rcu_dereference_protected(x->encap_sk, + lockdep_is_held(&x->lock)); + if (sk && sk == nsk) { + esk = kmalloc(sizeof(*esk), GFP_ATOMIC); + if (!esk) { + spin_unlock_bh(&x->lock); + return ERR_PTR(-ENOMEM); + } + RCU_INIT_POINTER(x->encap_sk, NULL); + esk->sk = sk; + call_rcu(&esk->rcu, esp_free_tcp_sk); + } + spin_unlock_bh(&x->lock); + + sk = __inet6_lookup_established(xs_net(x), &tcp_hashinfo, &x->id.daddr.in6, + dport, &x->props.saddr.in6, ntohs(sport), 0, 0); + if (!sk) + return ERR_PTR(-ENOENT); + + if (!tcp_is_ulp_esp(sk)) { + sock_put(sk); + return ERR_PTR(-EINVAL); + } + + spin_lock_bh(&x->lock); + nsk = rcu_dereference_protected(x->encap_sk, + lockdep_is_held(&x->lock)); + if (encap->encap_sport != sport || + encap->encap_dport != dport) { + sock_put(sk); + sk = nsk ?: ERR_PTR(-EREMCHG); + } else if (sk == nsk) { + sock_put(sk); + } else { + rcu_assign_pointer(x->encap_sk, sk); + } + spin_unlock_bh(&x->lock); + + return sk; +} + +static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) +{ + struct sock *sk; + int err; + + rcu_read_lock(); + + sk = esp6_find_tcp_sk(x); + err = PTR_ERR_OR_ZERO(sk); + if (err) + goto out; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) + err = espintcp_queue_out(sk, skb); + else + err = espintcp_push_skb(sk, skb); + bh_unlock_sock(sk); + +out: + rcu_read_unlock(); + return err; +} + +static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + + return esp_output_tcp_finish(x, skb); +} + +static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + + local_bh_disable(); + err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb); + local_bh_enable(); + + /* EINPROGRESS just happens to do the right thing. It + * actually means that the skb has been consumed and + * isn't coming back. + */ + return err ?: -EINPROGRESS; +} +#else +static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb) +{ + kfree_skb(skb); + + return -EOPNOTSUPP; +} +#endif + +static void esp_output_encap_csum(struct sk_buff *skb) +{ + /* UDP encap with IPv6 requires a valid checksum */ + if (*skb_mac_header(skb) == IPPROTO_UDP) { + struct udphdr *uh = udp_hdr(skb); + struct ipv6hdr *ip6h = ipv6_hdr(skb); + int len = ntohs(uh->len); + unsigned int offset = skb_transport_offset(skb); + __wsum csum = skb_checksum(skb, offset, skb->len - offset, 0); + + uh->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + len, IPPROTO_UDP, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } +} + static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -143,6 +297,8 @@ static void esp_output_done(struct crypto_async_request *base, int err) esp_ssg_unref(x, tmp); kfree(tmp); + esp_output_encap_csum(skb); + if (xo && (xo->flags & XFRM_DEV_RESUME)) { if (err) { XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); @@ -154,7 +310,11 @@ static void esp_output_done(struct crypto_async_request *base, int err) secpath_reset(skb); xfrm_dev_resume(skb); } else { - xfrm_output_resume(skb, err); + if (!err && + x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) + esp_output_tail_tcp(x, skb); + else + xfrm_output_resume(skb, err); } } @@ -163,7 +323,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) { struct ip_esp_hdr *esph = (void *)(skb->data + offset); void *tmp = ESP_SKB_CB(skb)->tmp; - __be32 *seqhi = esp_tmp_seqhi(tmp); + __be32 *seqhi = esp_tmp_extra(tmp); esph->seq_no = esph->spi; esph->spi = *seqhi; @@ -171,27 +331,36 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) static void esp_output_restore_header(struct sk_buff *skb) { - esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); + void *tmp = ESP_SKB_CB(skb)->tmp; + struct esp_output_extra *extra = esp_tmp_extra(tmp); + + esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff - + sizeof(__be32)); } static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb, struct xfrm_state *x, struct ip_esp_hdr *esph, - __be32 *seqhi) + struct esp_output_extra *extra) { /* For ESN we move the header forward by 4 bytes to * accomodate the high bits. We will move it back after * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { + __u32 seqhi; struct xfrm_offload *xo = xfrm_offload(skb); - esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); - *seqhi = esph->spi; if (xo) - esph->seq_no = htonl(xo->seq.hi); + seqhi = xo->seq.hi; else - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + seqhi = XFRM_SKB_CB(skb)->seq.output.hi; + + extra->esphoff = (unsigned char *)esph - + skb_transport_header(skb); + esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); + extra->seqhi = esph->spi; + esph->seq_no = htonl(seqhi); } esph->spi = x->id.spi; @@ -207,15 +376,122 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) esp_output_done(base, err); } +static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb, + int encap_type, + struct esp_info *esp, + __be16 sport, + __be16 dport) +{ + struct udphdr *uh; + __be32 *udpdata32; + unsigned int len; + + len = skb->len + esp->tailen - skb_transport_offset(skb); + if (len > U16_MAX) + return ERR_PTR(-EMSGSIZE); + + uh = (struct udphdr *)esp->esph; + uh->source = sport; + uh->dest = dport; + uh->len = htons(len); + uh->check = 0; + + *skb_mac_header(skb) = IPPROTO_UDP; + + if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) { + udpdata32 = (__be32 *)(uh + 1); + udpdata32[0] = udpdata32[1] = 0; + return (struct ip_esp_hdr *)(udpdata32 + 2); + } + + return (struct ip_esp_hdr *)(uh + 1); +} + +#ifdef CONFIG_INET6_ESPINTCP +static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, + struct sk_buff *skb, + struct esp_info *esp) +{ + __be16 *lenp = (void *)esp->esph; + struct ip_esp_hdr *esph; + unsigned int len; + struct sock *sk; + + len = skb->len + esp->tailen - skb_transport_offset(skb); + if (len > IP_MAX_MTU) + return ERR_PTR(-EMSGSIZE); + + rcu_read_lock(); + sk = esp6_find_tcp_sk(x); + rcu_read_unlock(); + + if (IS_ERR(sk)) + return ERR_CAST(sk); + + *lenp = htons(len); + esph = (struct ip_esp_hdr *)(lenp + 1); + + return esph; +} +#else +static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, + struct sk_buff *skb, + struct esp_info *esp) +{ + return ERR_PTR(-EOPNOTSUPP); +} +#endif + +static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb, + struct esp_info *esp) +{ + struct xfrm_encap_tmpl *encap = x->encap; + struct ip_esp_hdr *esph; + __be16 sport, dport; + int encap_type; + + spin_lock_bh(&x->lock); + sport = encap->encap_sport; + dport = encap->encap_dport; + encap_type = encap->encap_type; + spin_unlock_bh(&x->lock); + + switch (encap_type) { + default: + case UDP_ENCAP_ESPINUDP: + case UDP_ENCAP_ESPINUDP_NON_IKE: + esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport); + break; + case TCP_ENCAP_ESPINTCP: + esph = esp6_output_tcp_encap(x, skb, esp); + break; + } + + if (IS_ERR(esph)) + return PTR_ERR(esph); + + esp->esph = esph; + + return 0; +} + int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { u8 *tail; u8 *vaddr; int nfrags; + int esph_offset; struct page *page; struct sk_buff *trailer; int tailen = esp->tailen; + if (x->encap) { + int err = esp6_output_encap(x, skb, esp); + + if (err < 0) + return err; + } + if (!skb_cloned(skb)) { if (tailen <= skb_tailroom(skb)) { nfrags = 1; @@ -274,10 +550,13 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info } cow: + esph_offset = (unsigned char *)esp->esph - skb_transport_header(skb); + nfrags = skb_cow_data(skb, tailen, &trailer); if (nfrags < 0) goto out; tail = skb_tail_pointer(trailer); + esp->esph = (struct ip_esp_hdr *)(skb_transport_header(skb) + esph_offset); skip_cow: esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); @@ -295,20 +574,20 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info void *tmp; int ivlen; int assoclen; - int seqhilen; - __be32 *seqhi; + int extralen; struct page *page; struct ip_esp_hdr *esph; struct aead_request *req; struct crypto_aead *aead; struct scatterlist *sg, *dsg; + struct esp_output_extra *extra; int err = -ENOMEM; assoclen = sizeof(struct ip_esp_hdr); - seqhilen = 0; + extralen = 0; if (x->props.flags & XFRM_STATE_ESN) { - seqhilen += sizeof(__be32); + extralen += sizeof(*extra); assoclen += sizeof(__be32); } @@ -316,12 +595,12 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info alen = crypto_aead_authsize(aead); ivlen = crypto_aead_ivsize(aead); - tmp = esp_alloc_tmp(aead, esp->nfrags + 2, seqhilen); + tmp = esp_alloc_tmp(aead, esp->nfrags + 2, extralen); if (!tmp) goto error; - seqhi = esp_tmp_seqhi(tmp); - iv = esp_tmp_iv(aead, tmp, seqhilen); + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -330,7 +609,8 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info else dsg = &sg[esp->nfrags]; - esph = esp_output_set_esn(skb, x, ip_esp_hdr(skb), seqhi); + esph = esp_output_set_esn(skb, x, esp->esph, extra); + esp->esph = esph; sg_init_table(sg, esp->nfrags); err = skb_to_sgvec(skb, sg, @@ -394,11 +674,15 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info case 0: if ((x->props.flags & XFRM_STATE_ESN)) esp_output_restore_header(skb); + esp_output_encap_csum(skb); } if (sg != dsg) esp_ssg_unref(x, tmp); + if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) + err = esp_output_tail_tcp(x, skb); + error_free: kfree(tmp); error: @@ -438,11 +722,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) esp.plen = esp.clen - skb->len - esp.tfclen; esp.tailen = esp.tfclen + esp.plen + alen; + esp.esph = ip_esp_hdr(skb); + esp.nfrags = esp6_output_head(x, skb, &esp); if (esp.nfrags < 0) return esp.nfrags; - esph = ip_esp_hdr(skb); + esph = esp.esph; esph->spi = x->id.spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); @@ -517,6 +803,60 @@ int esp6_input_done2(struct sk_buff *skb, int err) if (unlikely(err < 0)) goto out; + if (x->encap) { + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct xfrm_encap_tmpl *encap = x->encap; + struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len); + struct tcphdr *th = (void *)(skb_network_header(skb) + hdr_len); + __be16 source; + + switch (x->encap->encap_type) { + case TCP_ENCAP_ESPINTCP: + source = th->source; + break; + case UDP_ENCAP_ESPINUDP: + case UDP_ENCAP_ESPINUDP_NON_IKE: + source = uh->source; + break; + default: + WARN_ON_ONCE(1); + err = -EINVAL; + goto out; + } + + /* + * 1) if the NAT-T peer's IP or port changed then + * advertize the change to the keying daemon. + * This is an inbound SA, so just compare + * SRC ports. + */ + if (!ipv6_addr_equal(&ip6h->saddr, &x->props.saddr.in6) || + source != encap->encap_sport) { + xfrm_address_t ipaddr; + + memcpy(&ipaddr.a6, &ip6h->saddr.s6_addr, sizeof(ipaddr.a6)); + km_new_mapping(x, &ipaddr, source); + + /* XXX: perhaps add an extra + * policy check here, to see + * if we should allow or + * reject a packet from a + * different source + * address/port. + */ + } + + /* + * 2) ignore UDP/TCP checksums in case + * of NAT-T in Transport Mode, or + * perform other post-processing fixes + * as per draft-ietf-ipsec-udp-encaps-06, + * section 3.1.2 + */ + if (x->props.mode == XFRM_MODE_TRANSPORT) + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); skb_pull_rcsum(skb, hlen); @@ -632,7 +972,7 @@ skip_cow: goto out; ESP_SKB_CB(skb)->tmp = tmp; - seqhi = esp_tmp_seqhi(tmp); + seqhi = esp_tmp_extra(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -836,9 +1176,6 @@ static int esp6_init_state(struct xfrm_state *x) u32 align; int err; - if (x->encap) - return -EINVAL; - x->data = NULL; if (x->aead) @@ -867,6 +1204,30 @@ static int esp6_init_state(struct xfrm_state *x) break; } + if (x->encap) { + struct xfrm_encap_tmpl *encap = x->encap; + + switch (encap->encap_type) { + default: + err = -EINVAL; + goto error; + case UDP_ENCAP_ESPINUDP: + x->props.header_len += sizeof(struct udphdr); + break; + case UDP_ENCAP_ESPINUDP_NON_IKE: + x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32); + break; +#ifdef CONFIG_INET6_ESPINTCP + case TCP_ENCAP_ESPINTCP: + /* only the length field, TCP encap is done by + * the socket + */ + x->props.header_len += 2; + break; +#endif + } + } + align = ALIGN(crypto_aead_blocksize(aead), 4); x->props.trailer_len = align + 1 + crypto_aead_authsize(aead); @@ -893,6 +1254,7 @@ static const struct xfrm_type esp6_type = { static struct xfrm6_protocol esp6_protocol = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = esp6_rcv_cb, .err_handler = esp6_err, .priority = 0, diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 8eab2c869d61..55addea1948f 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -85,10 +85,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, sp->olen++; xo = xfrm_offload(skb); - if (!xo) { - xfrm_state_put(x); + if (!xo) goto out_reset; - } } xo->flags |= XFRM_GRO; @@ -123,9 +121,16 @@ static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb) struct ip_esp_hdr *esph; struct ipv6hdr *iph = ipv6_hdr(skb); struct xfrm_offload *xo = xfrm_offload(skb); - int proto = iph->nexthdr; + u8 proto = iph->nexthdr; skb_push(skb, -skb_network_offset(skb)); + + if (x->outer_mode.encap == XFRM_MODE_TRANSPORT) { + __be16 frag; + + ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &proto, &frag); + } + esph = ip_esp_hdr(skb); *skb_mac_header(skb) = IPPROTO_ESP; @@ -166,23 +171,31 @@ static struct sk_buff *xfrm6_beet_gso_segment(struct xfrm_state *x, struct xfrm_offload *xo = xfrm_offload(skb); struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; - int proto = xo->proto; + u8 proto = xo->proto; skb->transport_header += x->props.header_len; - if (proto == IPPROTO_BEETPH) { - struct ip_beet_phdr *ph = (struct ip_beet_phdr *)skb->data; - - skb->transport_header += ph->hdrlen * 8; - proto = ph->nexthdr; - } - if (x->sel.family != AF_INET6) { skb->transport_header -= (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); + if (proto == IPPROTO_BEETPH) { + struct ip_beet_phdr *ph = + (struct ip_beet_phdr *)skb->data; + + skb->transport_header += ph->hdrlen * 8; + proto = ph->nexthdr; + } else { + skb->transport_header -= IPV4_BEET_PHMAXLEN; + } + if (proto == IPPROTO_TCP) skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; + } else { + __be16 frag; + + skb->transport_header += + ipv6_skip_exthdr(skb, 0, &proto, &frag); } __skb_pull(skb, skb_transport_offset(skb)); @@ -271,7 +284,6 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features int alen; int blksize; struct xfrm_offload *xo; - struct ip_esp_hdr *esph; struct crypto_aead *aead; struct esp_info esp; bool hw_offload = true; @@ -312,13 +324,13 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features seq = xo->seq.low; - esph = ip_esp_hdr(skb); - esph->spi = x->id.spi; + esp.esph = ip_esp_hdr(skb); + esp.esph->spi = x->id.spi; skb_push(skb, -skb_network_offset(skb)); if (xo->flags & XFRM_GSO_SEGMENT) { - esph->seq_no = htonl(seq); + esp.esph->seq_no = htonl(seq); if (!skb_is_gso(skb)) xo->seq.low++; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 250ff52c674e..49ee89bbcba0 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -664,7 +664,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (arg.filter.table_id) { tb = fib6_get_table(net, arg.filter.table_id); if (!tb) { - if (arg.filter.dump_all_families) + if (rtnl_msg_family(cb->nlh) != PF_INET6) goto out; NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist"); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 4703b09808d0..821d96c720b9 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -89,6 +89,11 @@ struct ip6_tnl_net { struct ip6_tnl __rcu *collect_md_tun; }; +static inline int ip6_tnl_mpls_supported(void) +{ + return IS_ENABLED(CONFIG_MPLS); +} + static struct net_device_stats *ip6_get_stats(struct net_device *dev) { struct pcpu_sw_netstats tmp, sum = { 0 }; @@ -718,6 +723,20 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } +static int +mplsip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + __u32 rel_info = ntohl(info); + int err, rel_msg = 0; + u8 rel_type = type; + u8 rel_code = code; + + err = ip6_tnl_err(skb, IPPROTO_MPLS, opt, &rel_type, &rel_code, + &rel_msg, &rel_info, offset); + return err; +} + static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb) @@ -740,6 +759,14 @@ static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, return IP6_ECN_decapsulate(ipv6h, skb); } +static inline int mplsip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb) +{ + /* ECN is not supported in AF_MPLS */ + return 0; +} + __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) @@ -901,6 +928,11 @@ static const struct tnl_ptk_info tpi_v4 = { .proto = htons(ETH_P_IP), }; +static const struct tnl_ptk_info tpi_mpls = { + /* no tunnel info required for mplsip6. */ + .proto = htons(ETH_P_MPLS_UC), +}; + static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, const struct tnl_ptk_info *tpi, int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, @@ -958,6 +990,12 @@ static int ip6ip6_rcv(struct sk_buff *skb) ip6ip6_dscp_ecn_decapsulate); } +static int mplsip6_rcv(struct sk_buff *skb) +{ + return ipxip6_rcv(skb, IPPROTO_MPLS, &tpi_mpls, + mplsip6_dscp_ecn_decapsulate); +} + struct ipv6_tel_txoption { struct ipv6_txoptions ops; __u8 dst_opt[8]; @@ -1232,6 +1270,8 @@ route_lookup: ipv6_push_frag_opts(skb, &opt.ops, &proto); } + skb_set_inner_ipproto(skb, proto); + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); @@ -1253,22 +1293,22 @@ tx_err_dst_release: EXPORT_SYMBOL(ip6_tnl_xmit); static inline int -ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) +ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, + u8 protocol) { struct ip6_tnl *t = netdev_priv(dev); + struct ipv6hdr *ipv6h; const struct iphdr *iph; int encap_limit = -1; + __u16 offset; struct flowi6 fl6; - __u8 dsfield; + __u8 dsfield, orig_dsfield; __u32 mtu; u8 tproto; int err; - iph = ip_hdr(skb); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - tproto = READ_ONCE(t->parms.proto); - if (tproto != IPPROTO_IPIP && tproto != 0) + if (tproto != protocol && tproto != 0) return -1; if (t->parms.collect_md) { @@ -1281,129 +1321,100 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) return -1; key = &tun_info->key; memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; + fl6.flowi6_proto = protocol; fl6.saddr = key->u.ipv6.src; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; dsfield = key->tos; + switch (protocol) { + case IPPROTO_IPIP: + iph = ip_hdr(skb); + orig_dsfield = ipv4_get_dsfield(iph); + break; + case IPPROTO_IPV6: + ipv6h = ipv6_hdr(skb); + orig_dsfield = ipv6_get_dsfield(ipv6h); + break; + default: + orig_dsfield = dsfield; + break; + } } else { if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; + if (protocol == IPPROTO_IPV6) { + offset = ip6_tnl_parse_tlv_enc_lim(skb, + skb_network_header(skb)); + /* ip6_tnl_parse_tlv_enc_lim() might have + * reallocated skb->head + */ + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; - - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - dsfield = ipv4_get_dsfield(iph); - else - dsfield = ip6_tclass(t->parms.flowinfo); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; - else - fl6.flowi6_mark = t->parms.fwmark; - } - - fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); - dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); - - if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) - return -1; - - skb_set_inner_ipproto(skb, IPPROTO_IPIP); - - err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, - IPPROTO_IPIP); - if (err != 0) { - /* XXX: send ICMP error even if DF is not set. */ - if (err == -EMSGSIZE) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); - return -1; - } - - return 0; -} - -static inline int -ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct ip6_tnl *t = netdev_priv(dev); - struct ipv6hdr *ipv6h; - int encap_limit = -1; - __u16 offset; - struct flowi6 fl6; - __u8 dsfield; - __u32 mtu; - u8 tproto; - int err; - - ipv6h = ipv6_hdr(skb); - tproto = READ_ONCE(t->parms.proto); - if ((tproto != IPPROTO_IPV6 && tproto != 0) || - ip6_tnl_addr_conflict(t, ipv6h)) - return -1; - - if (t->parms.collect_md) { - struct ip_tunnel_info *tun_info; - const struct ip_tunnel_key *key; - - tun_info = skb_tunnel_info(skb); - if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || - ip_tunnel_info_af(tun_info) != AF_INET6)) - return -1; - key = &tun_info->key; - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; - fl6.saddr = key->u.ipv6.src; - fl6.daddr = key->u.ipv6.dst; - fl6.flowlabel = key->label; - dsfield = key->tos; - } else { - offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); - /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ - ipv6h = ipv6_hdr(skb); - if (offset > 0) { - struct ipv6_tlv_tnl_enc_lim *tel; - - tel = (void *)&skb_network_header(skb)[offset]; - if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); - return -1; + tel = (void *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + encap_limit = tel->encap_limit - 1; } - encap_limit = tel->encap_limit - 1; - } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) { - encap_limit = t->parms.encap_limit; } memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; + fl6.flowi6_proto = protocol; - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - dsfield = ipv6_get_dsfield(ipv6h); - else - dsfield = ip6_tclass(t->parms.flowinfo); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; else fl6.flowi6_mark = t->parms.fwmark; + switch (protocol) { + case IPPROTO_IPIP: + iph = ip_hdr(skb); + orig_dsfield = ipv4_get_dsfield(iph); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + dsfield = orig_dsfield; + else + dsfield = ip6_tclass(t->parms.flowinfo); + break; + case IPPROTO_IPV6: + ipv6h = ipv6_hdr(skb); + orig_dsfield = ipv6_get_dsfield(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + dsfield = orig_dsfield; + else + dsfield = ip6_tclass(t->parms.flowinfo); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= ip6_flowlabel(ipv6h); + break; + default: + orig_dsfield = dsfield = ip6_tclass(t->parms.flowinfo); + break; + } } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); - dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); + dsfield = INET_ECN_encapsulate(dsfield, orig_dsfield); if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; - skb_set_inner_ipproto(skb, IPPROTO_IPV6); - err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, - IPPROTO_IPV6); + protocol); if (err != 0) { + /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + switch (protocol) { + case IPPROTO_IPIP: + icmp_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); + break; + case IPPROTO_IPV6: + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + break; + default: + break; + } return -1; } @@ -1415,6 +1426,7 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; + u8 ipproto; int ret; if (!pskb_inet_may_pull(skb)) @@ -1422,15 +1434,21 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->protocol) { case htons(ETH_P_IP): - ret = ip4ip6_tnl_xmit(skb, dev); + ipproto = IPPROTO_IPIP; break; case htons(ETH_P_IPV6): - ret = ip6ip6_tnl_xmit(skb, dev); + if (ip6_tnl_addr_conflict(t, ipv6_hdr(skb))) + goto tx_err; + ipproto = IPPROTO_IPV6; + break; + case htons(ETH_P_MPLS_UC): + ipproto = IPPROTO_MPLS; break; default: goto tx_err; } + ret = ipxip6_tnl_xmit(skb, dev, ipproto); if (ret < 0) goto tx_err; @@ -2218,6 +2236,12 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = { .priority = 1, }; +static struct xfrm6_tunnel mplsip6_handler __read_mostly = { + .handler = mplsip6_rcv, + .err_handler = mplsip6_err, + .priority = 1, +}; + static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list) { struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); @@ -2332,6 +2356,15 @@ static int __init ip6_tunnel_init(void) pr_err("%s: can't register ip6ip6\n", __func__); goto out_ip6ip6; } + + if (ip6_tnl_mpls_supported()) { + err = xfrm6_tunnel_register(&mplsip6_handler, AF_MPLS); + if (err < 0) { + pr_err("%s: can't register mplsip6\n", __func__); + goto out_mplsip6; + } + } + err = rtnl_link_register(&ip6_link_ops); if (err < 0) goto rtnl_link_failed; @@ -2339,6 +2372,9 @@ static int __init ip6_tunnel_init(void) return 0; rtnl_link_failed: + if (ip6_tnl_mpls_supported()) + xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS); +out_mplsip6: xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); out_ip6ip6: xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); @@ -2361,6 +2397,9 @@ static void __exit ip6_tunnel_cleanup(void) if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) pr_info("%s: can't deregister ip6ip6\n", __func__); + if (ip6_tnl_mpls_supported() && + xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS)) + pr_info("%s: can't deregister mplsip6\n", __func__); unregister_pernet_device(&ip6_tnl_net_ops); } diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 58956a6b66a2..2e0ad1bc84a8 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -25,17 +25,12 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, goto error; if (cfg->ipv6_v6only) { - int val = 1; - - err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, - (char *) &val, sizeof(val)); + err = ip6_sock_set_v6only(sock->sk); if (err < 0) goto error; } if (cfg->bind_ifindex) { - err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX, - (void *)&cfg->bind_ifindex, - sizeof(cfg->bind_ifindex)); + err = sock_bindtoindex(sock->sk, cfg->bind_ifindex); if (err < 0) goto error; } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index cc6180e08a4f..1147f647b9a0 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -296,7 +296,8 @@ static void vti6_dev_uninit(struct net_device *dev) dev_put(dev); } -static int vti6_rcv(struct sk_buff *skb) +static int vti6_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -323,7 +324,10 @@ static int vti6_rcv(struct sk_buff *skb) rcu_read_unlock(); - return xfrm6_rcv_tnl(skb, t); + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; + XFRM_SPI_SKB_CB(skb)->family = AF_INET6; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + return xfrm_input(skb, nexthdr, spi, encap_type); } rcu_read_unlock(); return -EINVAL; @@ -332,6 +336,13 @@ discard: return 0; } +static int vti6_rcv(struct sk_buff *skb) +{ + int nexthdr = skb_network_header(skb)[IP6CB(skb)->nhoff]; + + return vti6_input_proto(skb, nexthdr, 0, 0); +} + static int vti6_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; @@ -1185,6 +1196,7 @@ static struct pernet_operations vti6_net_ops = { static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { .handler = vti6_rcv, + .input_handler = vti6_input_proto, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 100, @@ -1192,6 +1204,7 @@ static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { static struct xfrm6_protocol vti_ah6_protocol __read_mostly = { .handler = vti6_rcv, + .input_handler = vti6_input_proto, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 100, @@ -1199,6 +1212,7 @@ static struct xfrm6_protocol vti_ah6_protocol __read_mostly = { static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .handler = vti6_rcv, + .input_handler = vti6_input_proto, .cb_handler = vti6_rcv_cb, .err_handler = vti6_err, .priority = 100, diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 65a54d74acc1..1f4d20e97c07 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -98,7 +98,8 @@ static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES #define ip6mr_for_each_table(mrt, net) \ list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \ - lockdep_rtnl_is_held()) + lockdep_rtnl_is_held() || \ + list_empty(&net->ipv6.mr6_tables)) static struct mr_table *ip6mr_mr_table_iter(struct net *net, struct mr_table *mrt) @@ -2502,7 +2503,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) { - if (filter.dump_all_families) + if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR) return skb->len; NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 3752bd3e92ce..99668bfebd85 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -183,6 +183,7 @@ static const struct xfrm_type ipcomp6_type = { static struct xfrm6_protocol ipcomp6_protocol = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = ipcomp6_rcv_cb, .err_handler = ipcomp6_err, .priority = 0, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a0e50cc57e54..adbfed6adf11 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -136,6 +136,41 @@ static bool setsockopt_needs_rtnl(int optname) return false; } +static int do_ipv6_mcast_group_source(struct sock *sk, int optname, + struct group_source_req *greqs) +{ + int omode, add; + + if (greqs->gsr_group.ss_family != AF_INET6 || + greqs->gsr_source.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + + if (optname == MCAST_BLOCK_SOURCE) { + omode = MCAST_EXCLUDE; + add = 1; + } else if (optname == MCAST_UNBLOCK_SOURCE) { + omode = MCAST_EXCLUDE; + add = 0; + } else if (optname == MCAST_JOIN_SOURCE_GROUP) { + struct sockaddr_in6 *psin6; + int retv; + + psin6 = (struct sockaddr_in6 *)&greqs->gsr_group; + retv = ipv6_sock_mc_join_ssm(sk, greqs->gsr_interface, + &psin6->sin6_addr, + MCAST_INCLUDE); + /* prior join w/ different source is ok */ + if (retv && retv != -EADDRINUSE) + return retv; + omode = MCAST_INCLUDE; + add = 1; + } else /* MCAST_LEAVE_SOURCE_GROUP */ { + omode = MCAST_INCLUDE; + add = 0; + } + return ip6_mc_source(add, omode, sk, greqs); +} + static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -715,7 +750,6 @@ done: case MCAST_UNBLOCK_SOURCE: { struct group_source_req greqs; - int omode, add; if (optlen < sizeof(struct group_source_req)) goto e_inval; @@ -723,34 +757,7 @@ done: retv = -EFAULT; break; } - if (greqs.gsr_group.ss_family != AF_INET6 || - greqs.gsr_source.ss_family != AF_INET6) { - retv = -EADDRNOTAVAIL; - break; - } - if (optname == MCAST_BLOCK_SOURCE) { - omode = MCAST_EXCLUDE; - add = 1; - } else if (optname == MCAST_UNBLOCK_SOURCE) { - omode = MCAST_EXCLUDE; - add = 0; - } else if (optname == MCAST_JOIN_SOURCE_GROUP) { - struct sockaddr_in6 *psin6; - - psin6 = (struct sockaddr_in6 *)&greqs.gsr_group; - retv = ipv6_sock_mc_join_ssm(sk, greqs.gsr_interface, - &psin6->sin6_addr, - MCAST_INCLUDE); - /* prior join w/ different source is ok */ - if (retv && retv != -EADDRINUSE) - break; - omode = MCAST_INCLUDE; - add = 1; - } else /* MCAST_LEAVE_SOURCE_GROUP */ { - omode = MCAST_INCLUDE; - add = 0; - } - retv = ip6_mc_source(add, omode, sk, &greqs); + retv = do_ipv6_mcast_group_source(sk, optname, &greqs); break; } case MCAST_MSFILTER: @@ -780,7 +787,7 @@ done: retv = -EINVAL; break; } - retv = ip6_mc_msfilter(sk, gsf); + retv = ip6_mc_msfilter(sk, gsf, gsf->gf_slist); kfree(gsf); break; @@ -838,67 +845,10 @@ done: break; case IPV6_ADDR_PREFERENCES: - { - unsigned int pref = 0; - unsigned int prefmask = ~0; - if (optlen < sizeof(int)) goto e_inval; - - retv = -EINVAL; - - /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */ - switch (val & (IPV6_PREFER_SRC_PUBLIC| - IPV6_PREFER_SRC_TMP| - IPV6_PREFER_SRC_PUBTMP_DEFAULT)) { - case IPV6_PREFER_SRC_PUBLIC: - pref |= IPV6_PREFER_SRC_PUBLIC; - break; - case IPV6_PREFER_SRC_TMP: - pref |= IPV6_PREFER_SRC_TMP; - break; - case IPV6_PREFER_SRC_PUBTMP_DEFAULT: - break; - case 0: - goto pref_skip_pubtmp; - default: - goto e_inval; - } - - prefmask &= ~(IPV6_PREFER_SRC_PUBLIC| - IPV6_PREFER_SRC_TMP); -pref_skip_pubtmp: - - /* check HOME/COA conflicts */ - switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) { - case IPV6_PREFER_SRC_HOME: - break; - case IPV6_PREFER_SRC_COA: - pref |= IPV6_PREFER_SRC_COA; - case 0: - goto pref_skip_coa; - default: - goto e_inval; - } - - prefmask &= ~IPV6_PREFER_SRC_COA; -pref_skip_coa: - - /* check CGA/NONCGA conflicts */ - switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) { - case IPV6_PREFER_SRC_CGA: - case IPV6_PREFER_SRC_NONCGA: - case 0: - break; - default: - goto e_inval; - } - - np->srcprefs = (np->srcprefs & prefmask) | pref; - retv = 0; - + retv = __ip6_sock_set_addr_preferences(sk, val); break; - } case IPV6_MINHOPCOUNT: if (optlen < sizeof(int)) goto e_inval; @@ -973,9 +923,110 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, if (level != SOL_IPV6) return -ENOPROTOOPT; - if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER) - return compat_mc_setsockopt(sk, level, optname, optval, optlen, - ipv6_setsockopt); + switch (optname) { + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + { + struct compat_group_req __user *gr32 = (void __user *)optval; + struct group_req greq; + struct sockaddr_in6 *psin6 = (struct sockaddr_in6 *)&greq.gr_group; + + if (optlen < sizeof(struct compat_group_req)) + return -EINVAL; + + if (get_user(greq.gr_interface, &gr32->gr_interface) || + copy_from_user(&greq.gr_group, &gr32->gr_group, + sizeof(greq.gr_group))) + return -EFAULT; + + if (greq.gr_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + + rtnl_lock(); + lock_sock(sk); + if (optname == MCAST_JOIN_GROUP) + err = ipv6_sock_mc_join(sk, greq.gr_interface, + &psin6->sin6_addr); + else + err = ipv6_sock_mc_drop(sk, greq.gr_interface, + &psin6->sin6_addr); + release_sock(sk); + rtnl_unlock(); + return err; + } + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + { + struct compat_group_source_req __user *gsr32 = (void __user *)optval; + struct group_source_req greqs; + + if (optlen < sizeof(struct compat_group_source_req)) + return -EINVAL; + + if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) || + copy_from_user(&greqs.gsr_group, &gsr32->gsr_group, + sizeof(greqs.gsr_group)) || + copy_from_user(&greqs.gsr_source, &gsr32->gsr_source, + sizeof(greqs.gsr_source))) + return -EFAULT; + + rtnl_lock(); + lock_sock(sk); + err = do_ipv6_mcast_group_source(sk, optname, &greqs); + release_sock(sk); + rtnl_unlock(); + return err; + } + case MCAST_MSFILTER: + { + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter *gf32; + void *p; + int n; + + if (optlen < size0) + return -EINVAL; + if (optlen > sysctl_optmem_max - 4) + return -ENOBUFS; + + p = kmalloc(optlen + 4, GFP_KERNEL); + if (!p) + return -ENOMEM; + + gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + if (copy_from_user(gf32, optval, optlen)) { + err = -EFAULT; + goto mc_msf_out; + } + + n = gf32->gf_numsrc; + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + if (n >= 0x1ffffffU || + n > sysctl_mld_max_msf) { + err = -ENOBUFS; + goto mc_msf_out; + } + if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) { + err = -EINVAL; + goto mc_msf_out; + } + + rtnl_lock(); + lock_sock(sk); + err = ip6_mc_msfilter(sk, &(struct group_filter){ + .gf_interface = gf32->gf_interface, + .gf_group = gf32->gf_group, + .gf_fmode = gf32->gf_fmode, + .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist); + release_sock(sk); + rtnl_unlock(); +mc_msf_out: + kfree(p); + return err; + } + } err = do_ipv6_setsockopt(sk, level, optname, optval, optlen); #ifdef CONFIG_NETFILTER @@ -1048,18 +1099,28 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case MCAST_MSFILTER: { + struct group_filter __user *p = (void __user *)optval; struct group_filter gsf; + const int size0 = offsetof(struct group_filter, gf_slist); + int num; int err; - if (len < GROUP_FILTER_SIZE(0)) + if (len < size0) return -EINVAL; - if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) + if (copy_from_user(&gsf, p, size0)) return -EFAULT; if (gsf.gf_group.ss_family != AF_INET6) return -EADDRNOTAVAIL; + num = gsf.gf_numsrc; lock_sock(sk); - err = ip6_mc_msfget(sk, &gsf, - (struct group_filter __user *)optval, optlen); + err = ip6_mc_msfget(sk, &gsf, p->gf_slist); + if (!err) { + if (num > gsf.gf_numsrc) + num = gsf.gf_numsrc; + if (put_user(GROUP_FILTER_SIZE(num), optlen) || + copy_to_user(p, &gsf, size0)) + err = -EFAULT; + } release_sock(sk); return err; } @@ -1428,9 +1489,44 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, if (level != SOL_IPV6) return -ENOPROTOOPT; - if (optname == MCAST_MSFILTER) - return compat_mc_getsockopt(sk, level, optname, optval, optlen, - ipv6_getsockopt); + if (optname == MCAST_MSFILTER) { + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter __user *p = (void __user *)optval; + struct compat_group_filter gf32; + struct group_filter gf; + int ulen, err; + int num; + + if (get_user(ulen, optlen)) + return -EFAULT; + + if (ulen < size0) + return -EINVAL; + + if (copy_from_user(&gf32, p, size0)) + return -EFAULT; + + gf.gf_interface = gf32.gf_interface; + gf.gf_fmode = gf32.gf_fmode; + num = gf.gf_numsrc = gf32.gf_numsrc; + gf.gf_group = gf32.gf_group; + + if (gf.gf_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + lock_sock(sk); + err = ip6_mc_msfget(sk, &gf, p->gf_slist); + release_sock(sk); + if (err) + return err; + if (num > gf.gf_numsrc) + num = gf.gf_numsrc; + ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf)-sizeof(gf32)); + if (put_user(ulen, optlen) || + put_user(gf.gf_fmode, &p->gf_fmode) || + put_user(gf.gf_numsrc, &p->gf_numsrc)) + return -EFAULT; + return 0; + } err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, MSG_CMSG_COMPAT); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index eaa4c2cc2fbb..7e12d2114158 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -457,7 +457,8 @@ done: return err; } -int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) +int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, + struct sockaddr_storage *list) { const struct in6_addr *group; struct ipv6_mc_socklist *pmc; @@ -509,10 +510,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) goto done; } newpsl->sl_max = newpsl->sl_count = gsf->gf_numsrc; - for (i = 0; i < newpsl->sl_count; ++i) { + for (i = 0; i < newpsl->sl_count; ++i, ++list) { struct sockaddr_in6 *psin6; - psin6 = (struct sockaddr_in6 *)&gsf->gf_slist[i]; + psin6 = (struct sockaddr_in6 *)list; newpsl->sl_addr[i] = psin6->sin6_addr; } err = ip6_mc_add_src(idev, group, gsf->gf_fmode, @@ -547,7 +548,7 @@ done: } int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, - struct group_filter __user *optval, int __user *optlen) + struct sockaddr_storage *p) { int err, i, count, copycount; const struct in6_addr *group; @@ -592,14 +593,10 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; - if (put_user(GROUP_FILTER_SIZE(copycount), optlen) || - copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { - return -EFAULT; - } /* changes to psl require the socket lock, and a write lock * on pmc->sflock. We have the socket lock so reading here is safe. */ - for (i = 0; i < copycount; i++) { + for (i = 0; i < copycount; i++, p++) { struct sockaddr_in6 *psin6; struct sockaddr_storage ss; @@ -607,7 +604,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, memset(&ss, 0, sizeof(ss)); psin6->sin6_family = AF_INET6; psin6->sin6_addr = psl->sl_addr[i]; - if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss))) + if (copy_to_user(p, &ss, sizeof(ss))) return -EFAULT; } return 0; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0028aa1d7869..8ef5a7b30524 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1377,6 +1377,7 @@ const struct proto_ops inet6_sockraw_ops = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a8b4add0b545..82cbb46a2a4f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3421,6 +3421,11 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, #ifdef CONFIG_IPV6_ROUTER_PREF fib6_nh->last_probe = jiffies; #endif + if (cfg->fc_is_fdb) { + fib6_nh->fib_nh_gw6 = cfg->fc_gateway; + fib6_nh->fib_nh_gw_family = AF_INET6; + return 0; + } err = -ENODEV; if (cfg->fc_ifindex) { @@ -4336,41 +4341,29 @@ static void rtmsg_to_fib6_config(struct net *net, }; } -int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) +int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg) { struct fib6_config cfg; - struct in6_rtmsg rtmsg; int err; - switch (cmd) { - case SIOCADDRT: /* Add a route */ - case SIOCDELRT: /* Delete a route */ - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - err = copy_from_user(&rtmsg, arg, - sizeof(struct in6_rtmsg)); - if (err) - return -EFAULT; + if (cmd != SIOCADDRT && cmd != SIOCDELRT) + return -EINVAL; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; - rtmsg_to_fib6_config(net, &rtmsg, &cfg); + rtmsg_to_fib6_config(net, rtmsg, &cfg); - rtnl_lock(); - switch (cmd) { - case SIOCADDRT: - err = ip6_route_add(&cfg, GFP_KERNEL, NULL); - break; - case SIOCDELRT: - err = ip6_route_del(&cfg, NULL); - break; - default: - err = -EINVAL; - } - rtnl_unlock(); - - return err; + rtnl_lock(); + switch (cmd) { + case SIOCADDRT: + err = ip6_route_add(&cfg, GFP_KERNEL, NULL); + break; + case SIOCDELRT: + err = ip6_route_del(&cfg, NULL); + break; } - - return -EINVAL; + rtnl_unlock(); + return err; } /* diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 98954830c40b..1fbb4dfbb191 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -83,6 +83,13 @@ struct sit_net { struct net_device *fb_tunnel_dev; }; +static inline struct sit_net *dev_to_sit_net(struct net_device *dev) +{ + struct ip_tunnel *t = netdev_priv(dev); + + return net_generic(t->net, sit_net_id); +} + /* * Must be invoked with rcu_read_lock */ @@ -291,14 +298,18 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) } -static int ipip6_tunnel_get_prl(struct ip_tunnel *t, - struct ip_tunnel_prl __user *a) +static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) { + struct ip_tunnel_prl __user *a = ifr->ifr_ifru.ifru_data; + struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_prl kprl, *kp; struct ip_tunnel_prl_entry *prl; unsigned int cmax, c = 0, ca, len; int ret = 0; + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) + return -EINVAL; + if (copy_from_user(&kprl, a, sizeof(kprl))) return -EFAULT; cmax = kprl.datalen / sizeof(kprl); @@ -441,6 +452,35 @@ out: return err; } +static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr, + int cmd) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_prl prl; + int err; + + if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) + return -EINVAL; + + if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) + return -EFAULT; + + switch (cmd) { + case SIOCDELPRL: + err = ipip6_tunnel_del_prl(t, &prl); + break; + case SIOCADDPRL: + case SIOCCHGPRL: + err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); + break; + } + dst_cache_reset(&t->dst_cache); + netdev_state_change(dev); + return err; +} + static int isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t) { @@ -1151,7 +1191,53 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, netdev_state_change(t->dev); return 0; } -#endif + +static int +ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_6rd ip6rd; + struct ip_tunnel_parm p; + + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + return -EFAULT; + t = ipip6_tunnel_locate(t->net, &p, 0); + } + if (!t) + t = netdev_priv(dev); + + ip6rd.prefix = t->ip6rd.prefix; + ip6rd.relay_prefix = t->ip6rd.relay_prefix; + ip6rd.prefixlen = t->ip6rd.prefixlen; + ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; + if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, sizeof(ip6rd))) + return -EFAULT; + return 0; +} + +static int +ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_6rd ip6rd; + int err; + + if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, sizeof(ip6rd))) + return -EFAULT; + + if (cmd != SIOCDEL6RD) { + err = ipip6_tunnel_update_6rd(t, &ip6rd); + if (err < 0) + return err; + } else + ipip6_tunnel_clone_6rd(dev, dev_to_sit_net(dev)); + return 0; +} + +#endif /* CONFIG_IPV6_SIT_6RD */ static bool ipip6_valid_ip_proto(u8 ipproto) { @@ -1164,185 +1250,145 @@ static bool ipip6_valid_ip_proto(u8 ipproto) } static int -ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +__ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p) +{ + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (!ipip6_valid_ip_proto(p->iph.protocol)) + return -EINVAL; + if (p->iph.version != 4 || + p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF))) + return -EINVAL; + + if (p->iph.ttl) + p->iph.frag_off |= htons(IP_DF); + return 0; +} + +static int +ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p) { - int err = 0; - struct ip_tunnel_parm p; - struct ip_tunnel_prl prl; struct ip_tunnel *t = netdev_priv(dev); - struct net *net = t->net; - struct sit_net *sitn = net_generic(net, sit_net_id); -#ifdef CONFIG_IPV6_SIT_6RD - struct ip_tunnel_6rd ip6rd; -#endif - switch (cmd) { - case SIOCGETTUNNEL: -#ifdef CONFIG_IPV6_SIT_6RD - case SIOCGET6RD: -#endif - if (dev == sitn->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { - err = -EFAULT; - break; - } - t = ipip6_tunnel_locate(net, &p, 0); - if (!t) - t = netdev_priv(dev); - } + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) + t = ipip6_tunnel_locate(t->net, p, 0); + if (!t) + t = netdev_priv(dev); + memcpy(p, &t->parms, sizeof(*p)); + return 0; +} - err = -EFAULT; - if (cmd == SIOCGETTUNNEL) { - memcpy(&p, &t->parms, sizeof(p)); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, - sizeof(p))) - goto done; -#ifdef CONFIG_IPV6_SIT_6RD +static int +ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t = netdev_priv(dev); + int err; + + err = __ipip6_tunnel_ioctl_validate(t->net, p); + if (err) + return err; + + t = ipip6_tunnel_locate(t->net, p, 1); + if (!t) + return -ENOBUFS; + return 0; +} + +static int +ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t = netdev_priv(dev); + int err; + + err = __ipip6_tunnel_ioctl_validate(t->net, p); + if (err) + return err; + + t = ipip6_tunnel_locate(t->net, p, 0); + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { + if (!t) + return -ENOENT; + } else { + if (t) { + if (t->dev != dev) + return -EEXIST; } else { - ip6rd.prefix = t->ip6rd.prefix; - ip6rd.relay_prefix = t->ip6rd.relay_prefix; - ip6rd.prefixlen = t->ip6rd.prefixlen; - ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, - sizeof(ip6rd))) - goto done; -#endif + if (((dev->flags & IFF_POINTOPOINT) && !p->iph.daddr) || + (!(dev->flags & IFF_POINTOPOINT) && p->iph.daddr)) + return -EINVAL; + t = netdev_priv(dev); } - err = 0; - break; - case SIOCADDTUNNEL: - case SIOCCHGTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; + ipip6_tunnel_update(t, p, t->fwmark); + } - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - - err = -EINVAL; - if (!ipip6_valid_ip_proto(p.iph.protocol)) - goto done; - if (p.iph.version != 4 || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) - goto done; - if (p.iph.ttl) - p.iph.frag_off |= htons(IP_DF); - - t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); - - if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t) { - if (t->dev != dev) { - err = -EEXIST; - break; - } - } else { - if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || - (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { - err = -EINVAL; - break; - } - t = netdev_priv(dev); - } + return 0; +} - ipip6_tunnel_update(t, &p, t->fwmark); - } +static int +ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t = netdev_priv(dev); - if (t) { - err = 0; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) - err = -EFAULT; - } else - err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); - break; + if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { + t = ipip6_tunnel_locate(t->net, p, 0); + if (!t) + return -ENOENT; + if (t == netdev_priv(dev_to_sit_net(dev)->fb_tunnel_dev)) + return -EPERM; + dev = t->dev; + } + unregister_netdevice(dev); + return 0; +} +static int +ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +{ + switch (cmd) { + case SIOCGETTUNNEL: + return ipip6_tunnel_get(dev, p); + case SIOCADDTUNNEL: + return ipip6_tunnel_add(dev, p); + case SIOCCHGTUNNEL: + return ipip6_tunnel_change(dev, p); case SIOCDELTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - if (dev == sitn->fb_tunnel_dev) { - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - err = -ENOENT; - t = ipip6_tunnel_locate(net, &p, 0); - if (!t) - goto done; - err = -EPERM; - if (t == netdev_priv(sitn->fb_tunnel_dev)) - goto done; - dev = t->dev; - } - unregister_netdevice(dev); - err = 0; - break; + return ipip6_tunnel_del(dev, p); + default: + return -EINVAL; + } +} +static int +ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { + case SIOCGETTUNNEL: + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + case SIOCDELTUNNEL: + return ip_tunnel_ioctl(dev, ifr, cmd); case SIOCGETPRL: - err = -EINVAL; - if (dev == sitn->fb_tunnel_dev) - goto done; - err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); - break; - + return ipip6_tunnel_get_prl(dev, ifr); case SIOCADDPRL: case SIOCDELPRL: case SIOCCHGPRL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - err = -EINVAL; - if (dev == sitn->fb_tunnel_dev) - goto done; - err = -EFAULT; - if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) - goto done; - - switch (cmd) { - case SIOCDELPRL: - err = ipip6_tunnel_del_prl(t, &prl); - break; - case SIOCADDPRL: - case SIOCCHGPRL: - err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); - break; - } - dst_cache_reset(&t->dst_cache); - netdev_state_change(dev); - break; - + return ipip6_tunnel_prl_ctl(dev, ifr, cmd); #ifdef CONFIG_IPV6_SIT_6RD + case SIOCGET6RD: + return ipip6_tunnel_get6rd(dev, ifr); case SIOCADD6RD: case SIOCCHG6RD: case SIOCDEL6RD: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - err = -EFAULT; - if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, - sizeof(ip6rd))) - goto done; - - if (cmd != SIOCDEL6RD) { - err = ipip6_tunnel_update_6rd(t, &ip6rd); - if (err < 0) - goto done; - } else - ipip6_tunnel_clone_6rd(dev, sitn); - - err = 0; - break; + return ipip6_tunnel_6rdctl(dev, ifr, cmd); #endif - default: - err = -EINVAL; + return -EINVAL; } - -done: - return err; } static const struct net_device_ops ipip6_netdev_ops = { @@ -1352,6 +1398,7 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = ipip6_tunnel_ctl, }; static void ipip6_dev_free(struct net_device *dev) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 413b3425ac66..b7415ca75c2d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -463,6 +463,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (fastopen && !fastopen->sk) break; + ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); + if (!sock_owned_by_user(sk)) { sk->sk_err = err; sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ @@ -471,6 +473,15 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else sk->sk_err_soft = err; goto out; + case TCP_LISTEN: + break; + default: + /* check if this ICMP message allows revert of backoff. + * (see RFC 6069) + */ + if (!fastopen && type == ICMPV6_DEST_UNREACH && + code == ICMPV6_NOROUTE) + tcp_ld_RTO_revert(sk, seq); } if (!sock_owned_by_user(sk) && np->recverr) { diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 21e7b95ddbfa..06c02ebe6b9b 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -21,8 +21,14 @@ static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly; static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly; +static struct xfrm6_tunnel __rcu *tunnelmpls6_handlers __read_mostly; static DEFINE_MUTEX(tunnel6_mutex); +static inline int xfrm6_tunnel_mpls_supported(void) +{ + return IS_ENABLED(CONFIG_MPLS); +} + int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family) { struct xfrm6_tunnel __rcu **pprev; @@ -32,8 +38,21 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family) mutex_lock(&tunnel6_mutex); - for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers; - (t = rcu_dereference_protected(*pprev, + switch (family) { + case AF_INET6: + pprev = &tunnel6_handlers; + break; + case AF_INET: + pprev = &tunnel46_handlers; + break; + case AF_MPLS: + pprev = &tunnelmpls6_handlers; + break; + default: + goto err; + } + + for (; (t = rcu_dereference_protected(*pprev, lockdep_is_held(&tunnel6_mutex))) != NULL; pprev = &t->next) { if (t->priority > priority) @@ -62,8 +81,21 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) mutex_lock(&tunnel6_mutex); - for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers; - (t = rcu_dereference_protected(*pprev, + switch (family) { + case AF_INET6: + pprev = &tunnel6_handlers; + break; + case AF_INET: + pprev = &tunnel46_handlers; + break; + case AF_MPLS: + pprev = &tunnelmpls6_handlers; + break; + default: + goto err; + } + + for (; (t = rcu_dereference_protected(*pprev, lockdep_is_held(&tunnel6_mutex))) != NULL; pprev = &t->next) { if (t == handler) { @@ -73,6 +105,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) } } +err: mutex_unlock(&tunnel6_mutex); synchronize_net(); @@ -86,6 +119,24 @@ EXPORT_SYMBOL(xfrm6_tunnel_deregister); handler != NULL; \ handler = rcu_dereference(handler->next)) \ +static int tunnelmpls6_rcv(struct sk_buff *skb) +{ + struct xfrm6_tunnel *handler; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto drop; + + for_each_tunnel_rcu(tunnelmpls6_handlers, handler) + if (!handler->handler(skb)) + return 0; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + +drop: + kfree_skb(skb); + return 0; +} + static int tunnel6_rcv(struct sk_buff *skb) { struct xfrm6_tunnel *handler; @@ -146,6 +197,18 @@ static int tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return -ENOENT; } +static int tunnelmpls6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct xfrm6_tunnel *handler; + + for_each_tunnel_rcu(tunnelmpls6_handlers, handler) + if (!handler->err_handler(skb, opt, type, code, offset, info)) + return 0; + + return -ENOENT; +} + static const struct inet6_protocol tunnel6_protocol = { .handler = tunnel6_rcv, .err_handler = tunnel6_err, @@ -158,6 +221,12 @@ static const struct inet6_protocol tunnel46_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; +static const struct inet6_protocol tunnelmpls6_protocol = { + .handler = tunnelmpls6_rcv, + .err_handler = tunnelmpls6_err, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, +}; + static int __init tunnel6_init(void) { if (inet6_add_protocol(&tunnel6_protocol, IPPROTO_IPV6)) { @@ -169,6 +238,13 @@ static int __init tunnel6_init(void) inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6); return -EAGAIN; } + if (xfrm6_tunnel_mpls_supported() && + inet6_add_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS)) { + pr_err("%s: can't add protocol\n", __func__); + inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6); + inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP); + return -EAGAIN; + } return 0; } @@ -178,6 +254,9 @@ static void __exit tunnel6_fini(void) pr_err("%s: can't remove protocol\n", __func__); if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6)) pr_err("%s: can't remove protocol\n", __func__); + if (xfrm6_tunnel_mpls_supported() && + inet6_del_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS)) + pr_err("%s: can't remove protocol\n", __func__); } module_init(tunnel6_init); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index a52cb3fc6df5..04cbeefd8982 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -17,11 +17,6 @@ #include <net/ipv6.h> #include <net/xfrm.h> -int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb) -{ - return xfrm6_extract_header(skb); -} - int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t) { @@ -35,9 +30,12 @@ EXPORT_SYMBOL(xfrm6_rcv_spi); static int xfrm6_transport_finish2(struct net *net, struct sock *sk, struct sk_buff *skb) { - if (xfrm_trans_queue(skb, ip6_rcv_finish)) - __kfree_skb(skb); - return -1; + if (xfrm_trans_queue(skb, ip6_rcv_finish)) { + kfree_skb(skb); + return NET_RX_DROP; + } + + return 0; } int xfrm6_transport_finish(struct sk_buff *skb, int async) @@ -60,13 +58,106 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); skb_reset_transport_header(skb); - return -1; + return 0; } NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, xfrm6_transport_finish2); - return -1; + return 0; +} + +/* If it's a keepalive packet, then just eat it. + * If it's an encapsulated packet, then pass it to the + * IPsec xfrm input. + * Returns 0 if skb passed to xfrm or was dropped. + * Returns >0 if skb should be passed to UDP. + * Returns <0 if skb should be resubmitted (-ret is protocol) + */ +int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct udp_sock *up = udp_sk(sk); + struct udphdr *uh; + struct ipv6hdr *ip6h; + int len; + int ip6hlen = sizeof(struct ipv6hdr); + + __u8 *udpdata; + __be32 *udpdata32; + __u16 encap_type = up->encap_type; + + /* if this is not encapsulated socket, then just return now */ + if (!encap_type) + return 1; + + /* If this is a paged skb, make sure we pull up + * whatever data we need to look at. */ + len = skb->len - sizeof(struct udphdr); + if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8))) + return 1; + + /* Now we can get the pointers */ + uh = udp_hdr(skb); + udpdata = (__u8 *)uh + sizeof(struct udphdr); + udpdata32 = (__be32 *)udpdata; + + switch (encap_type) { + default: + case UDP_ENCAP_ESPINUDP: + /* Check if this is a keepalive packet. If so, eat it. */ + if (len == 1 && udpdata[0] == 0xff) { + goto drop; + } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { + /* ESP Packet without Non-ESP header */ + len = sizeof(struct udphdr); + } else + /* Must be an IKE packet.. pass it through */ + return 1; + break; + case UDP_ENCAP_ESPINUDP_NON_IKE: + /* Check if this is a keepalive packet. If so, eat it. */ + if (len == 1 && udpdata[0] == 0xff) { + goto drop; + } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && + udpdata32[0] == 0 && udpdata32[1] == 0) { + + /* ESP Packet with Non-IKE marker */ + len = sizeof(struct udphdr) + 2 * sizeof(u32); + } else + /* Must be an IKE packet.. pass it through */ + return 1; + break; + } + + /* At this point we are sure that this is an ESPinUDP packet, + * so we need to remove 'len' bytes from the packet (the UDP + * header and optional ESP marker bytes) and then modify the + * protocol to ESP, and then call into the transform receiver. + */ + if (skb_unclone(skb, GFP_ATOMIC)) + goto drop; + + /* Now we can update and verify the packet length... */ + ip6h = ipv6_hdr(skb); + ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len); + if (skb->len < ip6hlen + len) { + /* packet is too small!?! */ + goto drop; + } + + /* pull the data buffer up to the ESP header and set the + * transport header to point to ESP. Keep UDP on the stack + * for later. + */ + __skb_pull(skb, len); + skb_reset_transport_header(skb); + + /* process ESP */ + return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); + +drop: + kfree_skb(skb); + return 0; } int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index e34167f790e6..8b84d534b19d 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -23,24 +23,7 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, } EXPORT_SYMBOL(xfrm6_find_1stfragopt); -static int xfrm6_local_dontfrag(struct sk_buff *skb) -{ - int proto; - struct sock *sk = skb->sk; - - if (sk) { - if (sk->sk_family != AF_INET6) - return 0; - - proto = sk->sk_protocol; - if (proto == IPPROTO_UDP || proto == IPPROTO_RAW) - return inet6_sk(sk)->dontfrag; - } - - return 0; -} - -static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) +void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu) { struct flowi6 fl6; struct sock *sk = skb->sk; @@ -64,80 +47,9 @@ void xfrm6_local_error(struct sk_buff *skb, u32 mtu) ipv6_local_error(sk, EMSGSIZE, &fl6, mtu); } -static int xfrm6_tunnel_check_size(struct sk_buff *skb) -{ - int mtu, ret = 0; - struct dst_entry *dst = skb_dst(skb); - - if (skb->ignore_df) - goto out; - - mtu = dst_mtu(dst); - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; - - if ((!skb_is_gso(skb) && skb->len > mtu) || - (skb_is_gso(skb) && - !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { - skb->dev = dst->dev; - skb->protocol = htons(ETH_P_IPV6); - - if (xfrm6_local_dontfrag(skb)) - xfrm6_local_rxpmtu(skb, mtu); - else if (skb->sk) - xfrm_local_error(skb, mtu); - else - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - ret = -EMSGSIZE; - } -out: - return ret; -} - -int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) -{ - int err; - - err = xfrm6_tunnel_check_size(skb); - if (err) - return err; - - XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; - - return xfrm6_extract_header(skb); -} - -int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) -{ - memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - - IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; - - return xfrm_output(sk, skb); -} - -static int __xfrm6_output_state_finish(struct xfrm_state *x, struct sock *sk, - struct sk_buff *skb) -{ - const struct xfrm_state_afinfo *afinfo; - int ret = -EAFNOSUPPORT; - - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); - if (likely(afinfo)) - ret = afinfo->output_finish(sk, skb); - else - kfree_skb(skb); - rcu_read_unlock(); - - return ret; -} - static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct xfrm_state *x = skb_dst(skb)->xfrm; - - return __xfrm6_output_state_finish(x, sk, skb); + return xfrm_output(sk, skb); } static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -164,7 +76,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) toobig = skb->len > mtu && !skb_is_gso(skb); - if (toobig && xfrm6_local_dontfrag(skb)) { + if (toobig && xfrm6_local_dontfrag(skb->sk)) { xfrm6_local_rxpmtu(skb, mtu); kfree_skb(skb); return -EMSGSIZE; @@ -179,7 +91,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) __xfrm6_output_finish); skip_frag: - return __xfrm6_output_state_finish(x, sk, skb); + return xfrm_output(sk, skb); } int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index 34cb65c7d5a7..ea2f805d3b01 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -14,6 +14,7 @@ #include <linux/mutex.h> #include <linux/skbuff.h> #include <linux/icmpv6.h> +#include <net/ip6_route.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/xfrm.h> @@ -58,6 +59,53 @@ static int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) return 0; } +int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) +{ + int ret; + struct xfrm6_protocol *handler; + struct xfrm6_protocol __rcu **head = proto_handlers(nexthdr); + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + XFRM_SPI_SKB_CB(skb)->family = AF_INET6; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + + if (!head) + goto out; + + if (!skb_dst(skb)) { + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct dst_entry *dst; + struct flowi6 fl6 = { + .flowi6_iif = skb->dev->ifindex, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_mark = skb->mark, + .flowi6_proto = ip6h->nexthdr, + }; + + dst = ip6_route_input_lookup(dev_net(skb->dev), skb->dev, &fl6, + skb, flags); + if (dst->error) + goto drop; + skb_dst_set(skb, dst); + } + + for_each_protocol_rcu(*head, handler) + if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL) + return ret; + +out: + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + +drop: + kfree_skb(skb); + return 0; +} +EXPORT_SYMBOL(xfrm6_rcv_encap); + static int xfrm6_esp_rcv(struct sk_buff *skb) { int ret; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 78daadecbdef..6610b2198fa9 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -13,37 +13,11 @@ */ #include <net/xfrm.h> -#include <linux/pfkeyv2.h> -#include <linux/ipsec.h> -#include <linux/netfilter_ipv6.h> -#include <linux/export.h> -#include <net/dsfield.h> -#include <net/ipv6.h> -#include <net/addrconf.h> - -int xfrm6_extract_header(struct sk_buff *skb) -{ - struct ipv6hdr *iph = ipv6_hdr(skb); - - XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); - XFRM_MODE_SKB_CB(skb)->id = 0; - XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); - XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); - XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; - XFRM_MODE_SKB_CB(skb)->optlen = 0; - memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, - sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); - - return 0; -} static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .proto = IPPROTO_IPV6, .output = xfrm6_output, - .output_finish = xfrm6_output_finish, - .extract_input = xfrm6_extract_input, - .extract_output = xfrm6_extract_output, .transport_finish = xfrm6_transport_finish, .local_error = xfrm6_local_error, }; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c4bdcbc84b07..ee0add15497d 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -16,6 +16,7 @@ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/types.h> +#include <linux/limits.h> #include <linux/list.h> #include <linux/errno.h> #include <linux/kernel.h> @@ -36,8 +37,6 @@ static char iucv_userid[80]; -static const struct proto_ops iucv_sock_ops; - static struct proto iucv_proto = { .name = "AF_IUCV", .owner = THIS_MODULE, @@ -85,14 +84,11 @@ do { \ __ret; \ }) +static struct sock *iucv_accept_dequeue(struct sock *parent, + struct socket *newsock); static void iucv_sock_kill(struct sock *sk); static void iucv_sock_close(struct sock *sk); -static void iucv_sever_path(struct sock *, int); -static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev); -static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, - struct sk_buff *skb, u8 flags); static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify); /* Call Back functions */ @@ -127,110 +123,6 @@ static inline void low_nmcpy(unsigned char *dst, char *src) memcpy(&dst[8], src, 8); } -static int afiucv_pm_prepare(struct device *dev) -{ -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "afiucv_pm_prepare\n"); -#endif - return 0; -} - -static void afiucv_pm_complete(struct device *dev) -{ -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "afiucv_pm_complete\n"); -#endif -} - -/** - * afiucv_pm_freeze() - Freeze PM callback - * @dev: AFIUCV dummy device - * - * Sever all established IUCV communication pathes - */ -static int afiucv_pm_freeze(struct device *dev) -{ - struct iucv_sock *iucv; - struct sock *sk; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "afiucv_pm_freeze\n"); -#endif - read_lock(&iucv_sk_list.lock); - sk_for_each(sk, &iucv_sk_list.head) { - iucv = iucv_sk(sk); - switch (sk->sk_state) { - case IUCV_DISCONN: - case IUCV_CLOSING: - case IUCV_CONNECTED: - iucv_sever_path(sk, 0); - break; - case IUCV_OPEN: - case IUCV_BOUND: - case IUCV_LISTEN: - case IUCV_CLOSED: - default: - break; - } - skb_queue_purge(&iucv->send_skb_q); - skb_queue_purge(&iucv->backlog_skb_q); - } - read_unlock(&iucv_sk_list.lock); - return 0; -} - -/** - * afiucv_pm_restore_thaw() - Thaw and restore PM callback - * @dev: AFIUCV dummy device - * - * socket clean up after freeze - */ -static int afiucv_pm_restore_thaw(struct device *dev) -{ - struct sock *sk; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "afiucv_pm_restore_thaw\n"); -#endif - read_lock(&iucv_sk_list.lock); - sk_for_each(sk, &iucv_sk_list.head) { - switch (sk->sk_state) { - case IUCV_CONNECTED: - sk->sk_err = EPIPE; - sk->sk_state = IUCV_DISCONN; - sk->sk_state_change(sk); - break; - case IUCV_DISCONN: - case IUCV_CLOSING: - case IUCV_LISTEN: - case IUCV_BOUND: - case IUCV_OPEN: - default: - break; - } - } - read_unlock(&iucv_sk_list.lock); - return 0; -} - -static const struct dev_pm_ops afiucv_pm_ops = { - .prepare = afiucv_pm_prepare, - .complete = afiucv_pm_complete, - .freeze = afiucv_pm_freeze, - .thaw = afiucv_pm_restore_thaw, - .restore = afiucv_pm_restore_thaw, -}; - -static struct device_driver af_iucv_driver = { - .owner = THIS_MODULE, - .name = "afiucv", - .bus = NULL, - .pm = &afiucv_pm_ops, -}; - -/* dummy device used as trigger for PM functions */ -static struct device *af_iucv_dev; - /** * iucv_msg_length() - Returns the length of an iucv message. * @msg: Pointer to struct iucv_message, MUST NOT be NULL @@ -435,6 +327,20 @@ static void iucv_sock_cleanup_listen(struct sock *parent) parent->sk_state = IUCV_CLOSED; } +static void iucv_sock_link(struct iucv_sock_list *l, struct sock *sk) +{ + write_lock_bh(&l->lock); + sk_add_node(sk, &l->head); + write_unlock_bh(&l->lock); +} + +static void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk) +{ + write_lock_bh(&l->lock); + sk_del_node_init(sk); + write_unlock_bh(&l->lock); +} + /* Kill socket (only if zapped and orphaned) */ static void iucv_sock_kill(struct sock *sk) { @@ -607,53 +513,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio, return sk; } -/* Create an IUCV socket */ -static int iucv_sock_create(struct net *net, struct socket *sock, int protocol, - int kern) -{ - struct sock *sk; - - if (protocol && protocol != PF_IUCV) - return -EPROTONOSUPPORT; - - sock->state = SS_UNCONNECTED; - - switch (sock->type) { - case SOCK_STREAM: - sock->ops = &iucv_sock_ops; - break; - case SOCK_SEQPACKET: - /* currently, proto ops can handle both sk types */ - sock->ops = &iucv_sock_ops; - break; - default: - return -ESOCKTNOSUPPORT; - } - - sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern); - if (!sk) - return -ENOMEM; - - iucv_sock_init(sk, NULL); - - return 0; -} - -void iucv_sock_link(struct iucv_sock_list *l, struct sock *sk) -{ - write_lock_bh(&l->lock); - sk_add_node(sk, &l->head); - write_unlock_bh(&l->lock); -} - -void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk) -{ - write_lock_bh(&l->lock); - sk_del_node_init(sk); - write_unlock_bh(&l->lock); -} - -void iucv_accept_enqueue(struct sock *parent, struct sock *sk) +static void iucv_accept_enqueue(struct sock *parent, struct sock *sk) { unsigned long flags; struct iucv_sock *par = iucv_sk(parent); @@ -666,7 +526,7 @@ void iucv_accept_enqueue(struct sock *parent, struct sock *sk) sk_acceptq_added(parent); } -void iucv_accept_unlink(struct sock *sk) +static void iucv_accept_unlink(struct sock *sk) { unsigned long flags; struct iucv_sock *par = iucv_sk(iucv_sk(sk)->parent); @@ -679,7 +539,8 @@ void iucv_accept_unlink(struct sock *sk) sock_put(sk); } -struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) +static struct sock *iucv_accept_dequeue(struct sock *parent, + struct socket *newsock) { struct iucv_sock *isk, *n; struct sock *sk; @@ -1100,7 +961,6 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, /* initialize defaults */ cmsg_done = 0; /* check for duplicate headers */ - txmsg.class = 0; /* iterate over control messages */ for_each_cmsghdr(cmsg, msg) { @@ -1511,8 +1371,8 @@ static inline __poll_t iucv_accept_poll(struct sock *parent) return 0; } -__poll_t iucv_sock_poll(struct file *file, struct socket *sock, - poll_table *wait) +static __poll_t iucv_sock_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; @@ -1664,7 +1524,7 @@ static int iucv_sock_setsockopt(struct socket *sock, int level, int optname, switch (sk->sk_state) { case IUCV_OPEN: case IUCV_BOUND: - if (val < 1 || val > (u16)(~0)) + if (val < 1 || val > U16_MAX) rc = -EINVAL; else iucv->msglimit = val; @@ -2396,6 +2256,35 @@ static const struct proto_ops iucv_sock_ops = { .getsockopt = iucv_sock_getsockopt, }; +static int iucv_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) +{ + struct sock *sk; + + if (protocol && protocol != PF_IUCV) + return -EPROTONOSUPPORT; + + sock->state = SS_UNCONNECTED; + + switch (sock->type) { + case SOCK_STREAM: + case SOCK_SEQPACKET: + /* currently, proto ops can handle both sk types */ + sock->ops = &iucv_sock_ops; + break; + default: + return -ESOCKTNOSUPPORT; + } + + sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern); + if (!sk) + return -ENOMEM; + + iucv_sock_init(sk, NULL); + + return 0; +} + static const struct net_proto_family iucv_sock_family_ops = { .family = AF_IUCV, .owner = THIS_MODULE, @@ -2409,45 +2298,11 @@ static struct packet_type iucv_packet_type = { static int afiucv_iucv_init(void) { - int err; - - err = pr_iucv->iucv_register(&af_iucv_handler, 0); - if (err) - goto out; - /* establish dummy device */ - af_iucv_driver.bus = pr_iucv->bus; - err = driver_register(&af_iucv_driver); - if (err) - goto out_iucv; - af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL); - if (!af_iucv_dev) { - err = -ENOMEM; - goto out_driver; - } - dev_set_name(af_iucv_dev, "af_iucv"); - af_iucv_dev->bus = pr_iucv->bus; - af_iucv_dev->parent = pr_iucv->root; - af_iucv_dev->release = (void (*)(struct device *))kfree; - af_iucv_dev->driver = &af_iucv_driver; - err = device_register(af_iucv_dev); - if (err) - goto out_iucv_dev; - return 0; - -out_iucv_dev: - put_device(af_iucv_dev); -out_driver: - driver_unregister(&af_iucv_driver); -out_iucv: - pr_iucv->iucv_unregister(&af_iucv_handler, 0); -out: - return err; + return pr_iucv->iucv_register(&af_iucv_handler, 0); } static void afiucv_iucv_exit(void) { - device_unregister(af_iucv_dev); - driver_unregister(&af_iucv_driver); pr_iucv->iucv_unregister(&af_iucv_handler, 0); } diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 9a2d023842fe..19250a0c85d3 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -67,32 +67,9 @@ static int iucv_bus_match(struct device *dev, struct device_driver *drv) return 0; } -enum iucv_pm_states { - IUCV_PM_INITIAL = 0, - IUCV_PM_FREEZING = 1, - IUCV_PM_THAWING = 2, - IUCV_PM_RESTORING = 3, -}; -static enum iucv_pm_states iucv_pm_state; - -static int iucv_pm_prepare(struct device *); -static void iucv_pm_complete(struct device *); -static int iucv_pm_freeze(struct device *); -static int iucv_pm_thaw(struct device *); -static int iucv_pm_restore(struct device *); - -static const struct dev_pm_ops iucv_pm_ops = { - .prepare = iucv_pm_prepare, - .complete = iucv_pm_complete, - .freeze = iucv_pm_freeze, - .thaw = iucv_pm_thaw, - .restore = iucv_pm_restore, -}; - struct bus_type iucv_bus = { .name = "iucv", .match = iucv_bus_match, - .pm = &iucv_pm_ops, }; EXPORT_SYMBOL(iucv_bus); @@ -435,31 +412,6 @@ static void iucv_block_cpu(void *data) } /** - * iucv_block_cpu_almost - * @data: unused - * - * Allow connection-severed interrupts only on this cpu. - */ -static void iucv_block_cpu_almost(void *data) -{ - int cpu = smp_processor_id(); - union iucv_param *parm; - - /* Allow iucv control interrupts only */ - parm = iucv_param_irq[cpu]; - memset(parm, 0, sizeof(union iucv_param)); - parm->set_mask.ipmask = 0x08; - iucv_call_b2f0(IUCV_SETMASK, parm); - /* Allow iucv-severed interrupt only */ - memset(parm, 0, sizeof(union iucv_param)); - parm->set_mask.ipmask = 0x20; - iucv_call_b2f0(IUCV_SETCONTROLMASK, parm); - - /* Clear indication that iucv interrupts are allowed for this cpu. */ - cpumask_clear_cpu(cpu, &iucv_irq_cpumask); -} - -/** * iucv_declare_cpu * @data: unused * @@ -1834,146 +1786,6 @@ static void iucv_external_interrupt(struct ext_code ext_code, spin_unlock(&iucv_queue_lock); } -static int iucv_pm_prepare(struct device *dev) -{ - int rc = 0; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_INFO "iucv_pm_prepare\n"); -#endif - if (dev->driver && dev->driver->pm && dev->driver->pm->prepare) - rc = dev->driver->pm->prepare(dev); - return rc; -} - -static void iucv_pm_complete(struct device *dev) -{ -#ifdef CONFIG_PM_DEBUG - printk(KERN_INFO "iucv_pm_complete\n"); -#endif - if (dev->driver && dev->driver->pm && dev->driver->pm->complete) - dev->driver->pm->complete(dev); -} - -/** - * iucv_path_table_empty() - determine if iucv path table is empty - * - * Returns 0 if there are still iucv pathes defined - * 1 if there are no iucv pathes defined - */ -static int iucv_path_table_empty(void) -{ - int i; - - for (i = 0; i < iucv_max_pathid; i++) { - if (iucv_path_table[i]) - return 0; - } - return 1; -} - -/** - * iucv_pm_freeze() - Freeze PM callback - * @dev: iucv-based device - * - * disable iucv interrupts - * invoke callback function of the iucv-based driver - * shut down iucv, if no iucv-pathes are established anymore - */ -static int iucv_pm_freeze(struct device *dev) -{ - int cpu; - struct iucv_irq_list *p, *n; - int rc = 0; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "iucv_pm_freeze\n"); -#endif - if (iucv_pm_state != IUCV_PM_FREEZING) { - for_each_cpu(cpu, &iucv_irq_cpumask) - smp_call_function_single(cpu, iucv_block_cpu_almost, - NULL, 1); - cancel_work_sync(&iucv_work); - list_for_each_entry_safe(p, n, &iucv_work_queue, list) { - list_del_init(&p->list); - iucv_sever_pathid(p->data.ippathid, - iucv_error_no_listener); - kfree(p); - } - } - iucv_pm_state = IUCV_PM_FREEZING; - if (dev->driver && dev->driver->pm && dev->driver->pm->freeze) - rc = dev->driver->pm->freeze(dev); - if (iucv_path_table_empty()) - iucv_disable(); - return rc; -} - -/** - * iucv_pm_thaw() - Thaw PM callback - * @dev: iucv-based device - * - * make iucv ready for use again: allocate path table, declare interrupt buffers - * and enable iucv interrupts - * invoke callback function of the iucv-based driver - */ -static int iucv_pm_thaw(struct device *dev) -{ - int rc = 0; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "iucv_pm_thaw\n"); -#endif - iucv_pm_state = IUCV_PM_THAWING; - if (!iucv_path_table) { - rc = iucv_enable(); - if (rc) - goto out; - } - if (cpumask_empty(&iucv_irq_cpumask)) { - if (iucv_nonsmp_handler) - /* enable interrupts on one cpu */ - iucv_allow_cpu(NULL); - else - /* enable interrupts on all cpus */ - iucv_setmask_mp(); - } - if (dev->driver && dev->driver->pm && dev->driver->pm->thaw) - rc = dev->driver->pm->thaw(dev); -out: - return rc; -} - -/** - * iucv_pm_restore() - Restore PM callback - * @dev: iucv-based device - * - * make iucv ready for use again: allocate path table, declare interrupt buffers - * and enable iucv interrupts - * invoke callback function of the iucv-based driver - */ -static int iucv_pm_restore(struct device *dev) -{ - int rc = 0; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "iucv_pm_restore %p\n", iucv_path_table); -#endif - if ((iucv_pm_state != IUCV_PM_RESTORING) && iucv_path_table) - pr_warn("Suspending Linux did not completely close all IUCV connections\n"); - iucv_pm_state = IUCV_PM_RESTORING; - if (cpumask_empty(&iucv_irq_cpumask)) { - rc = iucv_query_maxconn(); - rc = iucv_enable(); - if (rc) - goto out; - } - if (dev->driver && dev->driver->pm && dev->driver->pm->restore) - rc = dev->driver->pm->restore(dev); -out: - return rc; -} - struct iucv_interface iucv_if = { .message_receive = iucv_message_receive, .__message_receive = __iucv_message_receive, diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index fcb53ed1c4fb..6d7ef78c88af 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1458,6 +1458,9 @@ static int l2tp_validate_socket(const struct sock *sk, const struct net *net, if (sk->sk_type != SOCK_DGRAM) return -EPROTONOSUPPORT; + if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) + return -EPROTONOSUPPORT; + if ((encap == L2TP_ENCAPTYPE_UDP && sk->sk_protocol != IPPROTO_UDP) || (encap == L2TP_ENCAPTYPE_IP && sk->sk_protocol != IPPROTO_L2TP)) return -EPROTONOSUPPORT; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 0d7c887a2b75..955662a6dee7 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -20,7 +20,6 @@ #include <net/icmp.h> #include <net/udp.h> #include <net/inet_common.h> -#include <net/inet_hashtables.h> #include <net/tcp_states.h> #include <net/protocol.h> #include <net/xfrm.h> @@ -209,15 +208,31 @@ discard: return 0; } -static int l2tp_ip_open(struct sock *sk) +static int l2tp_ip_hash(struct sock *sk) { - /* Prevent autobind. We don't have ports. */ - inet_sk(sk)->inet_num = IPPROTO_L2TP; + if (sk_unhashed(sk)) { + write_lock_bh(&l2tp_ip_lock); + sk_add_node(sk, &l2tp_ip_table); + write_unlock_bh(&l2tp_ip_lock); + } + return 0; +} +static void l2tp_ip_unhash(struct sock *sk) +{ + if (sk_unhashed(sk)) + return; write_lock_bh(&l2tp_ip_lock); - sk_add_node(sk, &l2tp_ip_table); + sk_del_node_init(sk); write_unlock_bh(&l2tp_ip_lock); +} + +static int l2tp_ip_open(struct sock *sk) +{ + /* Prevent autobind. We don't have ports. */ + inet_sk(sk)->inet_num = IPPROTO_L2TP; + l2tp_ip_hash(sk); return 0; } @@ -594,8 +609,8 @@ static struct proto l2tp_ip_prot = { .sendmsg = l2tp_ip_sendmsg, .recvmsg = l2tp_ip_recvmsg, .backlog_rcv = l2tp_ip_backlog_recv, - .hash = inet_hash, - .unhash = inet_unhash, + .hash = l2tp_ip_hash, + .unhash = l2tp_ip_unhash, .obj_size = sizeof(struct l2tp_ip_sock), #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ip_setsockopt, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index d148766f40d1..526ed2c24dd5 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -20,8 +20,6 @@ #include <net/icmp.h> #include <net/udp.h> #include <net/inet_common.h> -#include <net/inet_hashtables.h> -#include <net/inet6_hashtables.h> #include <net/tcp_states.h> #include <net/protocol.h> #include <net/xfrm.h> @@ -222,15 +220,31 @@ discard: return 0; } -static int l2tp_ip6_open(struct sock *sk) +static int l2tp_ip6_hash(struct sock *sk) { - /* Prevent autobind. We don't have ports. */ - inet_sk(sk)->inet_num = IPPROTO_L2TP; + if (sk_unhashed(sk)) { + write_lock_bh(&l2tp_ip6_lock); + sk_add_node(sk, &l2tp_ip6_table); + write_unlock_bh(&l2tp_ip6_lock); + } + return 0; +} +static void l2tp_ip6_unhash(struct sock *sk) +{ + if (sk_unhashed(sk)) + return; write_lock_bh(&l2tp_ip6_lock); - sk_add_node(sk, &l2tp_ip6_table); + sk_del_node_init(sk); write_unlock_bh(&l2tp_ip6_lock); +} + +static int l2tp_ip6_open(struct sock *sk) +{ + /* Prevent autobind. We don't have ports. */ + inet_sk(sk)->inet_num = IPPROTO_L2TP; + l2tp_ip6_hash(sk); return 0; } @@ -728,8 +742,8 @@ static struct proto l2tp_ip6_prot = { .sendmsg = l2tp_ip6_sendmsg, .recvmsg = l2tp_ip6_recvmsg, .backlog_rcv = l2tp_ip6_backlog_recv, - .hash = inet6_hash, - .unhash = inet_unhash, + .hash = l2tp_ip6_hash, + .unhash = l2tp_ip6_unhash, .obj_size = sizeof(struct l2tp_ip6_sock), #ifdef CONFIG_COMPAT .compat_setsockopt = compat_ipv6_setsockopt, @@ -758,6 +772,7 @@ static const struct proto_ops l2tp_ip6_ops = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 4d1c335e06e5..7f245e9f114c 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation */ /** @@ -292,7 +292,8 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, goto end; } - if (!sta->sta.ht_cap.ht_supported) { + if (!sta->sta.ht_cap.ht_supported && + sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) { ht_dbg(sta->sdata, "STA %pM erroneously requests BA session on tid %d w/o QoS\n", sta->sta.addr, tid); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 33da6f738c99..b37c8a983d88 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2019 Intel Corporation + * Copyright (C) 2018 - 2020 Intel Corporation */ #include <linux/ieee80211.h> @@ -448,6 +448,45 @@ static void sta_addba_resp_timer_expired(struct timer_list *t) ieee80211_stop_tx_ba_session(&sta->sta, tid); } +static void ieee80211_send_addba_with_timeout(struct sta_info *sta, + struct tid_ampdu_tx *tid_tx) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sta->local; + u8 tid = tid_tx->tid; + u16 buf_size; + + /* activate the timer for the recipient's addBA response */ + mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); + ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n", + sta->sta.addr, tid); + + spin_lock_bh(&sta->lock); + sta->ampdu_mlme.last_addba_req_time[tid] = jiffies; + sta->ampdu_mlme.addba_req_num[tid]++; + spin_unlock_bh(&sta->lock); + + if (sta->sta.he_cap.has_he) { + buf_size = local->hw.max_tx_aggregation_subframes; + } else { + /* + * We really should use what the driver told us it will + * transmit as the maximum, but certain APs (e.g. the + * LinkSys WRT120N with FW v1.0.07 build 002 Jun 18 2012) + * will crash when we use a lower number. + */ + buf_size = IEEE80211_MAX_AMPDU_BUF_HT; + } + + /* send AddBA request */ + ieee80211_send_addba_request(sdata, sta->sta.addr, tid, + tid_tx->dialog_token, + sta->tid_seq[tid] >> 4, + buf_size, tid_tx->timeout); + + WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)); +} + void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; @@ -462,7 +501,6 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) .timeout = 0, }; int ret; - u16 buf_size; tid_tx = rcu_dereference_protected_tid_tx(sta, tid); @@ -485,7 +523,9 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); - if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) { + if (ret == IEEE80211_AMPDU_TX_START_DELAY_ADDBA) { + return; + } else if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) { /* * We didn't send the request yet, so don't need to check * here if we already got a response, just mark as driver @@ -508,32 +548,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) return; } - /* activate the timer for the recipient's addBA response */ - mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); - ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n", - sta->sta.addr, tid); - - spin_lock_bh(&sta->lock); - sta->ampdu_mlme.last_addba_req_time[tid] = jiffies; - sta->ampdu_mlme.addba_req_num[tid]++; - spin_unlock_bh(&sta->lock); - - if (sta->sta.he_cap.has_he) { - buf_size = local->hw.max_tx_aggregation_subframes; - } else { - /* - * We really should use what the driver told us it will - * transmit as the maximum, but certain APs (e.g. the - * LinkSys WRT120N with FW v1.0.07 build 002 Jun 18 2012) - * will crash when we use a lower number. - */ - buf_size = IEEE80211_MAX_AMPDU_BUF_HT; - } - - /* send AddBA request */ - ieee80211_send_addba_request(sdata, sta->sta.addr, tid, - tid_tx->dialog_token, params.ssn, - buf_size, tid_tx->timeout); + ieee80211_send_addba_with_timeout(sta, tid_tx); } /* @@ -578,7 +593,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, "Requested to start BA session on reserved tid=%d", tid)) return -EINVAL; - if (!pubsta->ht_cap.ht_supported) + if (!pubsta->ht_cap.ht_supported && + sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) return -EINVAL; if (WARN_ON_ONCE(!local->ops->ampdu_action)) @@ -754,6 +770,12 @@ void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid, if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))) return; + if (!test_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)) { + ieee80211_send_addba_with_timeout(sta, tid_tx); + /* RESPONSE_RECEIVED state whould trigger the flow again */ + return; + } + if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) ieee80211_agg_tx_operational(local, sta, tid); } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0f72813fed53..9b360544ad6f 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -994,7 +994,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, BSS_CHANGED_TWT | BSS_CHANGED_HE_OBSS_PD | BSS_CHANGED_HE_BSS_COLOR; - int err; + int i, err; int prev_beacon_int; old = sdata_dereference(sdata->u.ap.beacon, sdata); @@ -1085,6 +1085,17 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |= IEEE80211_P2P_OPPPS_ENABLE_BIT; + sdata->beacon_rate_set = false; + if (wiphy_ext_feature_isset(local->hw.wiphy, + NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { + sdata->beacon_rateidx_mask[i] = + params->beacon_rate.control[i].legacy; + if (sdata->beacon_rateidx_mask[i]) + sdata->beacon_rate_set = true; + } + } + err = ieee80211_assign_beacon(sdata, ¶ms->beacon, NULL); if (err < 0) { ieee80211_vif_release_channel(sdata); @@ -1189,6 +1200,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) ieee80211_free_keys(sdata, true); sdata->vif.bss_conf.enable_beacon = false; + sdata->beacon_rate_set = false; sdata->vif.bss_conf.ssid_len = 0; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); @@ -1508,7 +1520,9 @@ static int sta_apply_parameters(struct ieee80211_local *local, if (params->he_capa) ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, (void *)params->he_capa, - params->he_capa_len, sta); + params->he_capa_len, + (void *)params->he_6ghz_capa, + sta); if (params->opmode_notif_used) { /* returned value is only needed for rc update, but the @@ -1949,6 +1963,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, const u8 *old_ie; struct ieee80211_sub_if_data *sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); + int i; /* allocate information elements */ new_ie = NULL; @@ -1987,6 +2002,17 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, sdata->vif.bss_conf.beacon_int = setup->beacon_interval; sdata->vif.bss_conf.dtim_period = setup->dtim_period; + sdata->beacon_rate_set = false; + if (wiphy_ext_feature_isset(sdata->local->hw.wiphy, + NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) { + for (i = 0; i < NUM_NL80211_BANDS; i++) { + sdata->beacon_rateidx_mask[i] = + setup->beacon_rate.control[i].legacy; + if (sdata->beacon_rateidx_mask[i]) + sdata->beacon_rate_set = true; + } + } + return 0; } @@ -2172,7 +2198,8 @@ static int ieee80211_change_bss(struct wiphy *wiphy, } if (!sdata->vif.bss_conf.use_short_slot && - sband->band == NL80211_BAND_5GHZ) { + (sband->band == NL80211_BAND_5GHZ || + sband->band == NL80211_BAND_6GHZ)) { sdata->vif.bss_conf.use_short_slot = true; changed |= BSS_CHANGED_ERP_SLOT; } @@ -3287,6 +3314,12 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, goto out; } + if (params->chandef.chan->freq_offset) { + /* this may work, but is untested */ + err = -EOPNOTSUPP; + goto out; + } + chanctx = container_of(conf, struct ieee80211_chanctx, conf); ch_switch.timestamp = 0; @@ -3398,41 +3431,43 @@ int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, return 0; } -static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, +static void +ieee80211_update_mgmt_frame_registrations(struct wiphy *wiphy, struct wireless_dev *wdev, - u16 frame_type, bool reg) + struct mgmt_frame_regs *upd) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + u32 preq_mask = BIT(IEEE80211_STYPE_PROBE_REQ >> 4); + u32 action_mask = BIT(IEEE80211_STYPE_ACTION >> 4); + bool global_change, intf_change; + + global_change = + (local->probe_req_reg != !!(upd->global_stypes & preq_mask)) || + (local->rx_mcast_action_reg != + !!(upd->global_mcast_stypes & action_mask)); + local->probe_req_reg = upd->global_stypes & preq_mask; + local->rx_mcast_action_reg = upd->global_mcast_stypes & action_mask; + + intf_change = (sdata->vif.probe_req_reg != + !!(upd->interface_stypes & preq_mask)) || + (sdata->vif.rx_mcast_action_reg != + !!(upd->interface_mcast_stypes & action_mask)); + sdata->vif.probe_req_reg = upd->interface_stypes & preq_mask; + sdata->vif.rx_mcast_action_reg = + upd->interface_mcast_stypes & action_mask; + + if (!local->open_count) + return; - switch (frame_type) { - case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ: - if (reg) { - local->probe_req_reg++; - sdata->vif.probe_req_reg++; - } else { - if (local->probe_req_reg) - local->probe_req_reg--; - - if (sdata->vif.probe_req_reg) - sdata->vif.probe_req_reg--; - } - - if (!local->open_count) - break; - - if (sdata->vif.probe_req_reg == 1) - drv_config_iface_filter(local, sdata, FIF_PROBE_REQ, - FIF_PROBE_REQ); - else if (sdata->vif.probe_req_reg == 0) - drv_config_iface_filter(local, sdata, 0, - FIF_PROBE_REQ); + if (intf_change && ieee80211_sdata_running(sdata)) + drv_config_iface_filter(local, sdata, + sdata->vif.probe_req_reg ? + FIF_PROBE_REQ : 0, + FIF_PROBE_REQ); + if (global_change) ieee80211_configure_filter(local); - break; - default: - break; - } } static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant) @@ -3925,7 +3960,7 @@ static int ieee80211_set_tid_config(struct wiphy *wiphy, static int ieee80211_reset_tid_config(struct wiphy *wiphy, struct net_device *dev, - const u8 *peer, u8 tid) + const u8 *peer, u8 tids) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; @@ -3935,7 +3970,7 @@ static int ieee80211_reset_tid_config(struct wiphy *wiphy, return -EOPNOTSUPP; if (!peer) - return drv_reset_tid_config(sdata->local, sdata, NULL, tid); + return drv_reset_tid_config(sdata->local, sdata, NULL, tids); mutex_lock(&sdata->local->sta_mtx); sta = sta_info_get_bss(sdata, peer); @@ -3944,7 +3979,7 @@ static int ieee80211_reset_tid_config(struct wiphy *wiphy, return -ENOENT; } - ret = drv_reset_tid_config(sdata->local, sdata, &sta->sta, tid); + ret = drv_reset_tid_config(sdata->local, sdata, &sta->sta, tids); mutex_unlock(&sdata->local->sta_mtx); return ret; @@ -4017,7 +4052,8 @@ const struct cfg80211_ops mac80211_config_ops = { .mgmt_tx_cancel_wait = ieee80211_mgmt_tx_cancel_wait, .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, .set_cqm_rssi_range_config = ieee80211_set_cqm_rssi_range_config, - .mgmt_frame_register = ieee80211_mgmt_frame_register, + .update_mgmt_frame_registrations = + ieee80211_update_mgmt_frame_registrations, .set_antenna = ieee80211_set_antenna, .get_antenna = ieee80211_get_antenna, .set_rekey_data = ieee80211_set_rekey_data, diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 9c94baaf693c..e6e192f53e4e 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -533,6 +533,7 @@ static void ieee80211_del_chanctx(struct ieee80211_local *local, struct cfg80211_chan_def *chandef = &local->_oper_chandef; chandef->width = NL80211_CHAN_WIDTH_20_NOHT; chandef->center_freq1 = chandef->chan->center_freq; + chandef->freq1_offset = chandef->chan->freq_offset; chandef->center_freq2 = 0; /* NOTE: Disabling radar is only valid here for diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 3dbe7c5cefd1..d7e955127d5c 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -236,7 +236,7 @@ IEEE80211_IF_FILE_R(hw_queues); /* STA attributes */ IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC); -IEEE80211_IF_FILE(aid, u.mgd.aid, DEC); +IEEE80211_IF_FILE(aid, vif.bss_conf.aid, DEC); IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS); static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 3877710e3b48..de69fc9c4f07 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1375,12 +1375,12 @@ static inline int drv_set_tid_config(struct ieee80211_local *local, static inline int drv_reset_tid_config(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, u8 tid) + struct ieee80211_sta *sta, u8 tids) { int ret; might_sleep(); - ret = local->ops->reset_tid_config(&local->hw, &sdata->vif, sta, tid); + ret = local->ops->reset_tid_config(&local->hw, &sdata->vif, sta, tids); trace_drv_return_int(local, ret); return ret; diff --git a/net/mac80211/he.c b/net/mac80211/he.c index 1087f715338b..cc26f239838b 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -8,10 +8,55 @@ #include "ieee80211_i.h" +static void +ieee80211_update_from_he_6ghz_capa(const struct ieee80211_he_6ghz_capa *he_6ghz_capa, + struct sta_info *sta) +{ + enum ieee80211_smps_mode smps_mode; + + if (sta->sdata->vif.type == NL80211_IFTYPE_AP || + sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + switch (le16_get_bits(he_6ghz_capa->capa, + IEEE80211_HE_6GHZ_CAP_SM_PS)) { + case WLAN_HT_CAP_SM_PS_INVALID: + case WLAN_HT_CAP_SM_PS_STATIC: + smps_mode = IEEE80211_SMPS_STATIC; + break; + case WLAN_HT_CAP_SM_PS_DYNAMIC: + smps_mode = IEEE80211_SMPS_DYNAMIC; + break; + case WLAN_HT_CAP_SM_PS_DISABLED: + smps_mode = IEEE80211_SMPS_OFF; + break; + } + + sta->sta.smps_mode = smps_mode; + } else { + sta->sta.smps_mode = IEEE80211_SMPS_OFF; + } + + switch (le16_get_bits(he_6ghz_capa->capa, + IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN)) { + case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: + sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454; + break; + case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991: + sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991; + break; + case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895: + default: + sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895; + break; + } + + sta->sta.he_6ghz_capa = *he_6ghz_capa; +} + void ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, const u8 *he_cap_ie, u8 he_cap_len, + const struct ieee80211_he_6ghz_capa *he_6ghz_capa, struct sta_info *sta) { struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap; @@ -53,21 +98,21 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta); sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta); + + if (sband->band == NL80211_BAND_6GHZ && he_6ghz_capa) + ieee80211_update_from_he_6ghz_capa(he_6ghz_capa, sta); } void ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif, - const struct ieee80211_he_operation *he_op_ie_elem) + const struct ieee80211_he_operation *he_op_ie) { - struct ieee80211_he_operation *he_operation = - &vif->bss_conf.he_operation; - - if (!he_op_ie_elem) { - memset(he_operation, 0, sizeof(*he_operation)); + memset(&vif->bss_conf.he_oper, 0, sizeof(vif->bss_conf.he_oper)); + if (!he_op_ie) return; - } - vif->bss_conf.he_operation = *he_op_ie_elem; + vif->bss_conf.he_oper.params = __le32_to_cpu(he_op_ie->he_oper_params); + vif->bss_conf.he_oper.nss_set = __le16_to_cpu(he_op_ie->he_mcs_nss_set); } void diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index d40744903fa9..81d26fef41e9 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -9,7 +9,7 @@ * Copyright 2009, Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2016 Intel Deutschland GmbH - * Copyright(c) 2018-2019 Intel Corporation + * Copyright(c) 2018-2020 Intel Corporation */ #include <linux/delay.h> @@ -781,6 +781,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, enum nl80211_channel_type ch_type; int err; u32 sta_flags; + u32 vht_cap_info = 0; sdata_assert_lock(sdata); @@ -798,9 +799,13 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, break; } + if (elems->vht_cap_elem) + vht_cap_info = le32_to_cpu(elems->vht_cap_elem->vht_cap_info); + memset(¶ms, 0, sizeof(params)); err = ieee80211_parse_ch_switch_ie(sdata, elems, ifibss->chandef.chan->band, + vht_cap_info, sta_flags, ifibss->bssid, &csa_ie); /* can't switch to destination channel, fail */ if (err < 0) @@ -1060,8 +1065,10 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, /* we both use VHT */ struct ieee80211_vht_cap cap_ie; struct ieee80211_sta_vht_cap cap = sta->sta.vht_cap; + u32 vht_cap_info = + le32_to_cpu(elems->vht_cap_elem->vht_cap_info); - ieee80211_chandef_vht_oper(&local->hw, + ieee80211_chandef_vht_oper(&local->hw, vht_cap_info, elems->vht_operation, elems->ht_operation, &chandef); @@ -1758,6 +1765,11 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, int i; int ret; + if (params->chandef.chan->freq_offset) { + /* this may work, but is untested */ + return -EOPNOTSUPP; + } + ret = cfg80211_chandef_dfs_required(local->hw.wiphy, ¶ms->chandef, sdata->wdev.iftype); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f8ed4f621f7f..ec1a71ac65f2 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -111,6 +111,8 @@ struct ieee80211_bss { size_t supp_rates_len; struct ieee80211_rate *beacon_rate; + u32 vht_cap_info; + /* * During association, we save an ERP value from a probe response so * that we can feed ERP info to the driver when handling the @@ -267,7 +269,7 @@ struct probe_resp { struct rcu_head rcu_head; int len; u16 csa_counter_offsets[IEEE80211_MAX_CSA_COUNTERS_NUM]; - u8 data[0]; + u8 data[]; }; struct ps_data { @@ -450,8 +452,6 @@ struct ieee80211_if_managed { u8 bssid[ETH_ALEN] __aligned(2); - u16 aid; - bool powersave; /* powersave requested for this iface */ bool broken_ap; /* AP is broken -- turn off powersave */ bool have_beacon; @@ -964,6 +964,10 @@ struct ieee80211_sub_if_data { bool rc_has_vht_mcs_mask[NUM_NL80211_BANDS]; u16 rc_rateidx_vht_mcs_mask[NUM_NL80211_BANDS][NL80211_VHT_NSS_MAX]; + /* Beacon frame (non-MCS) rate (as a bitmap) */ + u32 beacon_rateidx_mask[NUM_NL80211_BANDS]; + bool beacon_rate_set; + union { struct ieee80211_if_ap ap; struct ieee80211_if_wds wds; @@ -1169,7 +1173,8 @@ struct ieee80211_local { /* number of interfaces with corresponding FIF_ flags */ int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll, fif_probe_req; - int probe_req_reg; + bool probe_req_reg; + bool rx_mcast_action_reg; unsigned int filter_flags; /* FIF_* */ bool wiphy_ciphers_allocated; @@ -1491,6 +1496,7 @@ struct ieee802_11_elems { const struct ieee80211_he_operation *he_operation; const struct ieee80211_he_spr *he_spr; const struct ieee80211_mu_edca_param_set *mu_edca_param_set; + const struct ieee80211_he_6ghz_capa *he_6ghz_capa; const u8 *uora_element; const u8 *mesh_id; const u8 *peering; @@ -1780,7 +1786,8 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb, void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags, - u32 ctrl_flags); + u32 ctrl_flags, + u64 *cookie); void ieee80211_purge_tx_queue(struct ieee80211_hw *hw, struct sk_buff_head *skbs); struct sk_buff * @@ -1797,7 +1804,8 @@ void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata); void ieee80211_clear_fast_xmit(struct sta_info *sta); int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len, - const u8 *dest, __be16 proto, bool unencrypted); + const u8 *dest, __be16 proto, bool unencrypted, + u64 *cookie); int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len); @@ -1891,6 +1899,7 @@ void ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, const u8 *he_cap_ie, u8 he_cap_len, + const struct ieee80211_he_6ghz_capa *he_6ghz_capa, struct sta_info *sta); void ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif, @@ -1909,6 +1918,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, * @sdata: the sdata of the interface which has received the frame * @elems: parsed 802.11 elements received with the frame * @current_band: indicates the current band + * @vht_cap_info: VHT capabilities of the transmitter * @sta_flags: contains information about own capabilities and restrictions * to decide which channel switch announcements can be accepted. Only the * following subset of &enum ieee80211_sta_flags are evaluated: @@ -1923,6 +1933,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, enum nl80211_band current_band, + u32 vht_cap_info, u32 sta_flags, u8 *bssid, struct ieee80211_csa_ie *csa_ie); @@ -2133,7 +2144,7 @@ enum { IEEE80211_PROBE_FLAG_RANDOM_SN = BIT(2), }; -int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, +int ieee80211_build_preq_ies(struct ieee80211_sub_if_data *sdata, u8 *buffer, size_t buffer_len, struct ieee80211_scan_ies *ie_desc, const u8 *ie, size_t ie_len, @@ -2171,7 +2182,9 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype); u8 *ieee80211_ie_build_he_cap(u8 *pos, const struct ieee80211_sta_he_cap *he_cap, u8 *end); -u8 *ieee80211_ie_build_he_oper(u8 *pos); +void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef); int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef, const struct ieee80211_supported_band *sband, const u8 *srates, int srates_len, u32 *rates); @@ -2186,10 +2199,13 @@ u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo); /* channel management */ bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef); -bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, +bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info, const struct ieee80211_vht_operation *oper, const struct ieee80211_ht_operation *htop, struct cfg80211_chan_def *chandef); +bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_he_operation *he_oper, + struct cfg80211_chan_def *chandef); u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c); int __must_check diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index d069825705d6..f900c84fb40f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -644,6 +644,11 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) local->fif_probe_req++; } + if (sdata->vif.probe_req_reg) + drv_config_iface_filter(local, sdata, + FIF_PROBE_REQ, + FIF_PROBE_REQ); + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && sdata->vif.type != NL80211_IFTYPE_NAN) changed |= ieee80211_reset_erp_info(sdata); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 6423173bb87e..b4a2efe8e83a 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -64,6 +64,9 @@ void ieee80211_configure_filter(struct ieee80211_local *local) if (local->fif_pspoll) new_flags |= FIF_PSPOLL; + if (local->rx_mcast_action_reg) + new_flags |= FIF_MCAST_ACTION; + spin_lock_bh(&local->filter_lock); changed_flags = local->filter_flags ^ new_flags; @@ -104,13 +107,15 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) chandef.chan = local->tmp_channel; chandef.width = NL80211_CHAN_WIDTH_20_NOHT; chandef.center_freq1 = chandef.chan->center_freq; + chandef.freq1_offset = chandef.chan->freq_offset; } else chandef = local->_oper_chandef; WARN(!cfg80211_chandef_valid(&chandef), - "control:%d MHz width:%d center: %d/%d MHz", - chandef.chan->center_freq, chandef.width, - chandef.center_freq1, chandef.center_freq2); + "control:%d.%03d MHz width:%d center: %d.%03d/%d MHz", + chandef.chan->center_freq, chandef.chan->freq_offset, + chandef.width, chandef.center_freq1, chandef.freq1_offset, + chandef.center_freq2); if (!cfg80211_chandef_identical(&chandef, &local->_oper_chandef)) local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; @@ -591,6 +596,10 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CONTROL_PORT_NO_PREAUTH); + wiphy_ext_feature_set(wiphy, + NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS); + wiphy_ext_feature_set(wiphy, + NL80211_EXT_FEATURE_SCAN_FREQ_KHZ); if (!ops->hw_scan) { wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 36978a0e5000..5f1ca25b6c97 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2018 - 2019 Intel Corporation + * Copyright (C) 2018 - 2020 Intel Corporation * Authors: Luis Carlos Cobo <luisca@cozybit.com> * Javier Cardona <javier@cozybit.com> */ @@ -63,6 +63,7 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, u32 basic_rates = 0; struct cfg80211_chan_def sta_chan_def; struct ieee80211_supported_band *sband; + u32 vht_cap_info = 0; /* * As support for each feature is added, check for matching @@ -96,9 +97,14 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, cfg80211_chandef_create(&sta_chan_def, sdata->vif.bss_conf.chandef.chan, NL80211_CHAN_NO_HT); ieee80211_chandef_ht_oper(ie->ht_operation, &sta_chan_def); - ieee80211_chandef_vht_oper(&sdata->local->hw, + + if (ie->vht_cap_elem) + vht_cap_info = le32_to_cpu(ie->vht_cap_elem->vht_cap_info); + + ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info, ie->vht_operation, ie->ht_operation, &sta_chan_def); + ieee80211_chandef_he_6ghz_oper(sdata, ie->he_operation, &sta_chan_def); if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef, &sta_chan_def)) @@ -415,6 +421,10 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata, if (!sband) return -EINVAL; + /* HT not allowed in 6 GHz */ + if (sband->band == NL80211_BAND_6GHZ) + return 0; + if (!sband->ht_cap.ht_supported || sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || @@ -452,6 +462,10 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[channel->band]; ht_cap = &sband->ht_cap; + /* HT not allowed in 6 GHz */ + if (sband->band == NL80211_BAND_6GHZ) + return 0; + if (!ht_cap->ht_supported || sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || @@ -479,6 +493,10 @@ int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata, if (!sband) return -EINVAL; + /* VHT not allowed in 6 GHz */ + if (sband->band == NL80211_BAND_6GHZ) + return 0; + if (!sband->vht_cap.vht_supported || sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || @@ -516,6 +534,10 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[channel->band]; vht_cap = &sband->vht_cap; + /* VHT not allowed in 6 GHz */ + if (sband->band == NL80211_BAND_6GHZ) + return 0; + if (!vht_cap->vht_supported || sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || @@ -565,6 +587,7 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata, { const struct ieee80211_sta_he_cap *he_cap; struct ieee80211_supported_band *sband; + u32 len; u8 *pos; sband = ieee80211_get_sband(sdata); @@ -578,12 +601,23 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) return 0; - if (skb_tailroom(skb) < 2 + 1 + sizeof(struct ieee80211_he_operation)) + len = 2 + 1 + sizeof(struct ieee80211_he_operation); + if (sdata->vif.bss_conf.chandef.chan->band == NL80211_BAND_6GHZ) + len += sizeof(struct ieee80211_he_6ghz_oper); + + if (skb_tailroom(skb) < len) return -ENOMEM; - pos = skb_put(skb, 2 + 1 + sizeof(struct ieee80211_he_operation)); - ieee80211_ie_build_he_oper(pos); + pos = skb_put(skb, len); + ieee80211_ie_build_he_oper(pos, &sdata->vif.bss_conf.chandef); + + return 0; +} +int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + ieee80211_ie_build_he_6ghz_cap(sdata, skb); return 0; } @@ -766,6 +800,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) 2 + sizeof(struct ieee80211_vht_operation) + ie_len_he_cap + 2 + 1 + sizeof(struct ieee80211_he_operation) + + sizeof(struct ieee80211_he_6ghz_oper) + + 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) + ifmsh->ie_len; bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL); @@ -885,6 +921,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) mesh_add_vht_oper_ie(sdata, skb) || mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) || mesh_add_he_oper_ie(sdata, skb) || + mesh_add_he_6ghz_cap_ie(sdata, skb) || mesh_add_vendor_ies(sdata, skb)) goto out_free; @@ -994,6 +1031,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) /* stop the beacon */ ifmsh->mesh_id_len = 0; sdata->vif.bss_conf.enable_beacon = false; + sdata->beacon_rate_set = false; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); @@ -1044,7 +1082,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_supported_band *sband; int err; - u32 sta_flags; + u32 sta_flags, vht_cap_info = 0; sdata_assert_lock(sdata); @@ -1067,8 +1105,13 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, break; } + if (elems->vht_cap_elem) + vht_cap_info = + le32_to_cpu(elems->vht_cap_elem->vht_cap_info); + memset(¶ms, 0, sizeof(params)); err = ieee80211_parse_ch_switch_ie(sdata, elems, sband->band, + vht_cap_info, sta_flags, sdata->vif.addr, &csa_ie); if (err < 0) diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 953f720754e8..40492d1bd8fd 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -222,6 +222,8 @@ int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u8 ie_len); int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); +int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); void ieee80211s_init(void); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 38a0383dfbcf..aa5150929996 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1103,7 +1103,14 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) mesh_path_sel_frame_tx(MPATH_PREQ, 0, sdata->vif.addr, ifmsh->sn, target_flags, mpath->dst, mpath->sn, da, 0, ttl, lifetime, 0, ifmsh->preq_id++, sdata); + + spin_lock_bh(&mpath->state_lock); + if (mpath->flags & MESH_PATH_DELETED) { + spin_unlock_bh(&mpath->state_lock); + goto enddiscovery; + } mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout); + spin_unlock_bh(&mpath->state_lock); enddiscovery: rcu_read_unlock(); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 737c5f4dbf52..798e4b6b383f 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -238,6 +238,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, 2 + sizeof(struct ieee80211_vht_operation) + ie_len_he_cap + 2 + 1 + sizeof(struct ieee80211_he_operation) + + sizeof(struct ieee80211_he_6ghz_oper) + + 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) + 2 + 8 + /* peering IE */ sdata->u.mesh.ie_len); if (!skb) @@ -328,7 +330,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, mesh_add_vht_cap_ie(sdata, skb) || mesh_add_vht_oper_ie(sdata, skb) || mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) || - mesh_add_he_oper_ie(sdata, skb)) + mesh_add_he_oper_ie(sdata, skb) || + mesh_add_he_6ghz_cap_ie(sdata, skb)) goto free; } @@ -441,7 +444,9 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, elems->vht_cap_elem, sta); ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap, - elems->he_cap_len, sta); + elems->he_cap_len, + elems->he_6ghz_capa, + sta); if (bw != sta->sta.bandwidth) changed |= IEEE80211_RC_BW_CHANGED; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 16d75da0996a..5820ef02a587 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -145,6 +145,7 @@ static u32 ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, struct ieee80211_channel *channel, + u32 vht_cap_info, const struct ieee80211_ht_operation *ht_oper, const struct ieee80211_vht_operation *vht_oper, const struct ieee80211_he_operation *he_oper, @@ -155,13 +156,23 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_ht_cap sta_ht_cap; u32 ht_cfreq, ret; - memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); - ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); - memset(chandef, 0, sizeof(struct cfg80211_chan_def)); chandef->chan = channel; chandef->width = NL80211_CHAN_WIDTH_20_NOHT; chandef->center_freq1 = channel->center_freq; + chandef->freq1_offset = channel->freq_offset; + + if (channel->band == NL80211_BAND_6GHZ) { + if (!ieee80211_chandef_he_6ghz_oper(sdata, he_oper, chandef)) + ret = IEEE80211_STA_DISABLE_HT | + IEEE80211_STA_DISABLE_VHT | + IEEE80211_STA_DISABLE_HE; + vht_chandef = *chandef; + goto out; + } + + memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); + ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); if (!ht_oper || !sta_ht_cap.ht_supported) { ret = IEEE80211_STA_DISABLE_HT | @@ -222,7 +233,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, memcpy(&he_oper_vht_cap, he_oper->optional, 3); he_oper_vht_cap.basic_mcs_set = cpu_to_le16(0); - if (!ieee80211_chandef_vht_oper(&sdata->local->hw, + if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info, &he_oper_vht_cap, ht_oper, &vht_chandef)) { if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) @@ -231,8 +242,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ret = IEEE80211_STA_DISABLE_HE; goto out; } - } else if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_oper, - ht_oper, &vht_chandef)) { + } else if (!ieee80211_chandef_vht_oper(&sdata->local->hw, + vht_cap_info, + vht_oper, ht_oper, + &vht_chandef)) { if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, "AP VHT information is invalid, disable VHT\n"); @@ -328,6 +341,7 @@ out: static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, const struct ieee80211_ht_cap *ht_cap, + const struct ieee80211_vht_cap *vht_cap, const struct ieee80211_ht_operation *ht_oper, const struct ieee80211_vht_operation *vht_oper, const struct ieee80211_he_operation *he_oper, @@ -342,6 +356,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, u16 ht_opmode; u32 flags; enum ieee80211_sta_rx_bandwidth new_sta_bw; + u32 vht_cap_info = 0; int ret; /* if HT was/is disabled, don't track any bandwidth changes */ @@ -370,8 +385,11 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.ht_operation_mode = ht_opmode; } + if (vht_cap) + vht_cap_info = le32_to_cpu(vht_cap->vht_cap_info); + /* calculate new channel (type) based on HT/VHT/HE operation IEs */ - flags = ieee80211_determine_chantype(sdata, sband, chan, + flags = ieee80211_determine_chantype(sdata, sband, chan, vht_cap_info, ht_oper, vht_oper, he_oper, &chandef, true); @@ -396,9 +414,12 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, return 0; sdata_info(sdata, - "AP %pM changed bandwidth, new config is %d MHz, width %d (%d/%d MHz)\n", - ifmgd->bssid, chandef.chan->center_freq, chandef.width, - chandef.center_freq1, chandef.center_freq2); + "AP %pM changed bandwidth, new config is %d.%03d MHz, " + "width %d (%d.%03d/%d MHz)\n", + ifmgd->bssid, chandef.chan->center_freq, + chandef.chan->freq_offset, chandef.width, + chandef.center_freq1, chandef.freq1_offset, + chandef.center_freq2); if (flags != (ifmgd->flags & (IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT | @@ -654,6 +675,8 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, he_cap->he_cap_elem.phy_cap_info); pos = skb_put(skb, he_cap_size); ieee80211_ie_build_he_cap(pos, he_cap, pos + he_cap_size); + + ieee80211_ie_build_he_6ghz_cap(sdata, skb); } static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) @@ -727,6 +750,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) 2 + 1 + sizeof(struct ieee80211_he_cap_elem) + /* HE */ sizeof(struct ieee80211_he_mcs_nss_supp) + IEEE80211_HE_PPE_THRES_MAX_LEN + + 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) + assoc_data->ie_len + /* extra IEs */ (assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) + 9, /* WMM */ @@ -899,7 +923,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))) ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; - if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) + if (sband->band != NL80211_BAND_6GHZ && + !(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) ieee80211_add_ht_ie(sdata, skb, assoc_data->ap_ht_param, sband, chan, sdata->smps_mode); @@ -953,7 +978,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) offset = noffset; } - if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) + if (sband->band != NL80211_BAND_6GHZ && + !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) ieee80211_add_vht_ie(sdata, skb, sband, &assoc_data->ap_vht_cap); @@ -1320,6 +1346,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, enum nl80211_band current_band; struct ieee80211_csa_ie csa_ie; struct ieee80211_channel_switch ch_switch; + struct ieee80211_bss *bss; int res; sdata_assert_lock(sdata); @@ -1331,7 +1358,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; current_band = cbss->channel->band; + bss = (void *)cbss->priv; res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band, + bss->vht_cap_info, ifmgd->flags, ifmgd->associated->bssid, &csa_ie); @@ -1364,10 +1393,14 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chandef, IEEE80211_CHAN_DISABLED)) { sdata_info(sdata, - "AP %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", + "AP %pM switches to unsupported channel " + "(%d.%03d MHz, width:%d, CF1/2: %d.%03d/%d MHz), " + "disconnecting\n", ifmgd->associated->bssid, csa_ie.chandef.chan->center_freq, + csa_ie.chandef.chan->freq_offset, csa_ie.chandef.width, csa_ie.chandef.center_freq1, + csa_ie.chandef.freq1_offset, csa_ie.chandef.center_freq2); ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); @@ -1500,6 +1533,7 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata, chan_increment = 1; break; case NL80211_BAND_5GHZ: + case NL80211_BAND_6GHZ: chan_increment = 4; break; } @@ -2137,7 +2171,8 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, } use_short_slot = !!(capab & WLAN_CAPABILITY_SHORT_SLOT_TIME); - if (sband->band == NL80211_BAND_5GHZ) + if (sband->band == NL80211_BAND_5GHZ || + sband->band == NL80211_BAND_6GHZ) use_short_slot = true; if (use_protection != bss_conf->use_cts_prot) { @@ -2948,10 +2983,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, } if (status_code != WLAN_STATUS_SUCCESS) { + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); + + if (auth_alg == WLAN_AUTH_SAE && + status_code == WLAN_STATUS_ANTI_CLOG_REQUIRED) + return; + sdata_info(sdata, "%pM denied authentication (status %d)\n", mgmt->sa, status_code); ieee80211_destroy_auth_data(sdata, false); - cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); event.u.mlme.status = MLME_DENIED; event.u.mlme.reason = status_code; drv_event_callback(sdata->local, sdata, &event); @@ -3149,15 +3189,16 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband, *have_higher_than_11mbit = true; /* - * Skip HT and VHT BSS membership selectors since they're not - * rates. + * Skip HT, VHT and HE BSS membership selectors since they're + * not rates. * * Note: Even though the membership selector and the basic * rate flag share the same bit, they are not exactly * the same. */ if (supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HT_PHY) || - supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_VHT_PHY)) + supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_VHT_PHY) || + supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HE_PHY)) continue; for (j = 0; j < sband->n_bitrates; j++) { @@ -3220,6 +3261,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; const struct cfg80211_bss_ies *bss_ies = NULL; struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data; + bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; u32 changed = 0; int err; bool ret; @@ -3249,7 +3291,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, return false; } - ifmgd->aid = aid; + sdata->vif.bss_conf.aid = aid; ifmgd->tdls_chan_switch_prohibited = elems->ext_capab && elems->ext_capab_len >= 5 && (elems->ext_capab[4] & WLAN_EXT_CAPA5_TDLS_CH_SW_PROHIBITED); @@ -3261,11 +3303,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, * 2G/3G/4G wifi routers, reported models include the "Onda PN51T", * "Vodafone PocketWiFi 2", "ZTE MF60" and a similar T-Mobile device. */ - if ((assoc_data->wmm && !elems->wmm_param) || - (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && - (!elems->ht_cap_elem || !elems->ht_operation)) || - (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && - (!elems->vht_cap_elem || !elems->vht_operation))) { + if (!is_6ghz && + ((assoc_data->wmm && !elems->wmm_param) || + (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && + (!elems->ht_cap_elem || !elems->ht_operation)) || + (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && + (!elems->vht_cap_elem || !elems->vht_operation)))) { const struct cfg80211_bss_ies *ies; struct ieee802_11_elems bss_elems; @@ -3323,7 +3366,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, * We previously checked these in the beacon/probe response, so * they should be present here. This is just a safety net. */ - if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && + if (!is_6ghz && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && (!elems->wmm_param || !elems->ht_cap_elem || !elems->ht_operation)) { sdata_info(sdata, "HT AP is missing WMM params or HT capability/operation\n"); @@ -3331,7 +3374,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, goto out; } - if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && + if (!is_6ghz && !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && (!elems->vht_cap_elem || !elems->vht_operation)) { sdata_info(sdata, "VHT AP is missing VHT capability/operation\n"); @@ -3339,6 +3382,14 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, goto out; } + if (is_6ghz && !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) && + !elems->he_6ghz_capa) { + sdata_info(sdata, + "HE 6 GHz AP is missing HE 6 GHz band capability\n"); + ret = false; + goto out; + } + mutex_lock(&sdata->local->sta_mtx); /* * station info was already allocated and inserted before @@ -3381,13 +3432,23 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap, elems->he_cap_len, + elems->he_6ghz_capa, sta); bss_conf->he_support = sta->sta.he_cap.has_he; + if (elems->rsnx && elems->rsnx_len && + (elems->rsnx[0] & WLAN_RSNX_CAPA_PROTECTED_TWT) && + wiphy_ext_feature_isset(local->hw.wiphy, + NL80211_EXT_FEATURE_PROTECTED_TWT)) + bss_conf->twt_protected = true; + else + bss_conf->twt_protected = false; + changed |= ieee80211_recalc_twt_req(sdata, sta, elems); } else { bss_conf->he_support = false; bss_conf->twt_requester = false; + bss_conf->twt_protected = false; } if (bss_conf->he_support) { @@ -3521,9 +3582,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, bss_conf->protected_keep_alive = false; } - /* set AID and assoc capability, + /* set assoc capability (AID was already set earlier), * ieee80211_set_associated() will tell the driver */ - bss_conf->aid = aid; bss_conf->assoc_capability = capab_info; ieee80211_set_associated(sdata, cbss, changed); @@ -3661,7 +3721,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, sdata_assert_lock(sdata); - channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq); + channel = ieee80211_get_channel_khz(local->hw.wiphy, + ieee80211_rx_status_to_khz(rx_status)); if (!channel) return; @@ -3877,7 +3938,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, return; } - if (rx_status->freq != chanctx_conf->def.chan->center_freq) { + if (ieee80211_rx_status_to_khz(rx_status) != + ieee80211_channel_to_khz(chanctx_conf->def.chan)) { rcu_read_unlock(); return; } @@ -3948,7 +4010,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, mgmt->bssid, bssid); if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) && - ieee80211_check_tim(elems.tim, elems.tim_len, ifmgd->aid)) { + ieee80211_check_tim(elems.tim, elems.tim_len, bss_conf->aid)) { if (local->hw.conf.dynamic_ps_timeout > 0) { if (local->hw.conf.flags & IEEE80211_CONF_PS) { local->hw.conf.flags &= ~IEEE80211_CONF_PS; @@ -4070,8 +4132,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, changed |= ieee80211_recalc_twt_req(sdata, sta, &elems); - if (ieee80211_config_bw(sdata, sta, - elems.ht_cap_elem, elems.ht_operation, + if (ieee80211_config_bw(sdata, sta, elems.ht_cap_elem, + elems.vht_cap_elem, elems.ht_operation, elems.vht_operation, elems.he_operation, bssid, &changed)) { mutex_unlock(&local->sta_mtx); @@ -4788,6 +4850,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, const struct ieee80211_he_operation *he_oper = NULL; struct ieee80211_supported_band *sband; struct cfg80211_chan_def chandef; + bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; + struct ieee80211_bss *bss = (void *)cbss->priv; int ret; u32 i; bool have_80mhz; @@ -4799,21 +4863,23 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, IEEE80211_STA_DISABLE_160MHZ); /* disable HT/VHT/HE if we don't support them */ - if (!sband->ht_cap.ht_supported) { + if (!sband->ht_cap.ht_supported && !is_6ghz) { ifmgd->flags |= IEEE80211_STA_DISABLE_HT; ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; ifmgd->flags |= IEEE80211_STA_DISABLE_HE; } - if (!sband->vht_cap.vht_supported) + if (!sband->vht_cap.vht_supported && !is_6ghz) { ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + ifmgd->flags |= IEEE80211_STA_DISABLE_HE; + } if (!ieee80211_get_he_sta_cap(sband)) ifmgd->flags |= IEEE80211_STA_DISABLE_HE; rcu_read_lock(); - if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) { + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && !is_6ghz) { const u8 *ht_oper_ie, *ht_cap_ie; ht_oper_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_OPERATION); @@ -4830,7 +4896,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, } } - if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) { + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && !is_6ghz) { const u8 *vht_oper_ie, *vht_cap; vht_oper_ie = ieee80211_bss_get_ie(cbss, @@ -4886,6 +4952,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= ieee80211_determine_chantype(sdata, sband, cbss->channel, + bss->vht_cap_info, ht_oper, vht_oper, he_oper, &chandef, false); @@ -4894,6 +4961,11 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); + if (ifmgd->flags & IEEE80211_STA_DISABLE_HE && is_6ghz) { + sdata_info(sdata, "Rejecting non-HE 6/7 GHz connection"); + return -EINVAL; + } + /* will change later if needed */ sdata->smps_mode = IEEE80211_SMPS_OFF; @@ -5022,8 +5094,16 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, * doesn't happen any more, but keep the workaround so * in case some *other* APs are buggy in different ways * we can connect -- with a warning. + * Allow this workaround only in case the AP provided at least + * one rate. */ - if (!basic_rates && min_rate_index >= 0) { + if (min_rate_index < 0) { + sdata_info(sdata, + "No legacy rates in association response\n"); + + sta_info_free(local, new_sta); + return -EINVAL; + } else if (!basic_rates) { sdata_info(sdata, "No basic rates, using min rate instead\n"); basic_rates = BIT(min_rate_index); @@ -5267,6 +5347,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct cfg80211_assoc_request *req) { + bool is_6ghz = req->bss->channel->band == NL80211_BAND_6GHZ; struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss *bss = (void *)req->bss->priv; @@ -5409,14 +5490,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, if (ht_ie && ht_ie[1] >= sizeof(struct ieee80211_ht_operation)) assoc_data->ap_ht_param = ((struct ieee80211_ht_operation *)(ht_ie + 2))->ht_param; - else + else if (!is_6ghz) ifmgd->flags |= IEEE80211_STA_DISABLE_HT; vht_ie = ieee80211_bss_get_ie(req->bss, WLAN_EID_VHT_CAPABILITY); if (vht_ie && vht_ie[1] >= sizeof(struct ieee80211_vht_cap)) memcpy(&assoc_data->ap_vht_cap, vht_ie + 2, sizeof(struct ieee80211_vht_cap)); - else - ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + else if (!is_6ghz) + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT | + IEEE80211_STA_DISABLE_HE; rcu_read_unlock(); if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) && @@ -5517,7 +5599,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, assoc_data->timeout_started = true; assoc_data->need_beacon = true; } else if (beacon_ies) { - const u8 *ie; + const struct element *elem; u8 dtim_count = 0; ieee80211_get_dtim(beacon_ies, &dtim_count, @@ -5534,15 +5616,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.sync_dtim_count = dtim_count; } - ie = cfg80211_find_ext_ie(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION, - beacon_ies->data, beacon_ies->len); - if (ie && ie[1] >= 3) - sdata->vif.bss_conf.profile_periodicity = ie[4]; + elem = cfg80211_find_ext_elem(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION, + beacon_ies->data, beacon_ies->len); + if (elem && elem->datalen >= 3) + sdata->vif.bss_conf.profile_periodicity = elem->data[2]; - ie = cfg80211_find_ie(WLAN_EID_EXT_CAPABILITY, - beacon_ies->data, beacon_ies->len); - if (ie && ie[1] >= 11 && - (ie[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) + elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, + beacon_ies->data, beacon_ies->len); + if (elem && elem->datalen >= 11 && + (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) sdata->vif.bss_conf.ema_ap = true; } else { assoc_data->timeout = jiffies; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index c710504ccf1a..db3b8bf75656 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -557,6 +557,10 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, lockdep_assert_held(&local->mtx); + if (channel->freq_offset) + /* this may work, but is untested */ + return -EOPNOTSUPP; + if (local->use_chanctx && !local->ops->remain_on_channel) return -EOPNOTSUPP; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5dc3e5bc4e64..b11a2af55b06 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010-2013 Felix Fietkau <nbd@openwrt.org> + * Copyright (C) 2019-2020 Intel Corporation */ #include <linux/netdevice.h> #include <linux/types.h> @@ -490,7 +491,7 @@ minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi, tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); - if (tmp_cck_tp_rate && tmp_cck_tp > tmp_mcs_tp) { + if (tmp_cck_tp > tmp_mcs_tp) { for(i = 0; i < MAX_THR_RATES; i++) { minstrel_ht_sort_best_tp_rates(mi, tmp_cck_tp_rate[i], tmp_mcs_tp_rate); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 91a13aee4378..21854a61a2b7 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -93,13 +93,44 @@ static u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, * This function cleans up the SKB, i.e. it removes all the stuff * only useful for monitoring. */ -static void remove_monitor_info(struct sk_buff *skb, - unsigned int present_fcs_len, - unsigned int rtap_space) +static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb, + unsigned int present_fcs_len, + unsigned int rtap_space) { + struct ieee80211_hdr *hdr; + unsigned int hdrlen; + __le16 fc; + if (present_fcs_len) __pskb_trim(skb, skb->len - present_fcs_len); __pskb_pull(skb, rtap_space); + + hdr = (void *)skb->data; + fc = hdr->frame_control; + + /* + * Remove the HT-Control field (if present) on management + * frames after we've sent the frame to monitoring. We + * (currently) don't need it, and don't properly parse + * frames with it present, due to the assumption of a + * fixed management header length. + */ + if (likely(!ieee80211_is_mgmt(fc) || !ieee80211_has_order(fc))) + return skb; + + hdrlen = ieee80211_hdrlen(fc); + hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_ORDER); + + if (!pskb_may_pull(skb, hdrlen)) { + dev_kfree_skb(skb); + return NULL; + } + + memmove(skb->data + IEEE80211_HT_CTL_LEN, skb->data, + hdrlen - IEEE80211_HT_CTL_LEN); + __pskb_pull(skb, IEEE80211_HT_CTL_LEN); + + return skb; } static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len, @@ -412,6 +443,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos++; /* IEEE80211_RADIOTAP_CHANNEL */ + /* TODO: frequency offset in KHz */ put_unaligned_le16(status->freq, pos); pos += 2; if (status->bw == RATE_INFO_BW_10) @@ -826,8 +858,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, return NULL; } - remove_monitor_info(origskb, present_fcs_len, rtap_space); - return origskb; + return ieee80211_clean_skb(origskb, present_fcs_len, + rtap_space); } ieee80211_handle_mu_mimo_mon(monitor_sdata, origskb, rtap_space); @@ -870,8 +902,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, if (!origskb) return NULL; - remove_monitor_info(origskb, present_fcs_len, rtap_space); - return origskb; + return ieee80211_clean_skb(origskb, present_fcs_len, rtap_space); } static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) @@ -1984,8 +2015,12 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (mmie_keyidx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS || mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + - NUM_DEFAULT_BEACON_KEYS) + NUM_DEFAULT_BEACON_KEYS) { + cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, + skb->data, + skb->len); return RX_DROP_MONITOR; /* unexpected BIP keyidx */ + } rx->key = ieee80211_rx_get_bigtk(rx, mmie_keyidx); if (!rx->key) @@ -2131,6 +2166,10 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) /* either the frame has been decrypted or will be dropped */ status->flag |= RX_FLAG_DECRYPTED; + if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE)) + cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, + skb->data, skb->len); + return result; } @@ -2411,8 +2450,12 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) return -EACCES; } if (unlikely(ieee80211_is_beacon(fc) && rx->key && - ieee80211_get_mmie_keyidx(rx->skb) < 0)) + ieee80211_get_mmie_keyidx(rx->skb) < 0)) { + cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, + rx->skb->data, + rx->skb->len); return -EACCES; + } /* * When using MFP, Action frames are not allowed prior to * having configured keys. @@ -3082,9 +3125,10 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) !(status->flag & RX_FLAG_NO_SIGNAL_VAL)) sig = status->signal; - cfg80211_report_obss_beacon(rx->local->hw.wiphy, - rx->skb->data, rx->skb->len, - status->freq, sig); + cfg80211_report_obss_beacon_khz(rx->local->hw.wiphy, + rx->skb->data, rx->skb->len, + ieee80211_rx_status_to_khz(status), + sig); rx->flags |= IEEE80211_RX_BEACON_REPORTED; } @@ -3340,19 +3384,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } } break; - case WLAN_CATEGORY_SA_QUERY: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.sa_query))) - break; - - switch (mgmt->u.action.u.sa_query.action) { - case WLAN_ACTION_SA_QUERY_REQUEST: - if (sdata->vif.type != NL80211_IFTYPE_STATION) - break; - ieee80211_process_sa_query_req(sdata, mgmt, len); - goto handled; - } - break; case WLAN_CATEGORY_SELF_PROTECTED: if (len < (IEEE80211_MIN_ACTION_SIZE + sizeof(mgmt->u.action.u.self_prot.action_code))) @@ -3430,8 +3461,9 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx) !(status->flag & RX_FLAG_NO_SIGNAL_VAL)) sig = status->signal; - if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig, - rx->skb->data, rx->skb->len, 0)) { + if (cfg80211_rx_mgmt_khz(&rx->sdata->wdev, + ieee80211_rx_status_to_khz(status), sig, + rx->skb->data, rx->skb->len, 0)) { if (rx->sta) rx->sta->rx_stats.packets++; dev_kfree_skb(rx->skb); @@ -3442,6 +3474,41 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx) } static ieee80211_rx_result debug_noinline +ieee80211_rx_h_action_post_userspace(struct ieee80211_rx_data *rx) +{ + struct ieee80211_sub_if_data *sdata = rx->sdata; + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; + int len = rx->skb->len; + + if (!ieee80211_is_action(mgmt->frame_control)) + return RX_CONTINUE; + + switch (mgmt->u.action.category) { + case WLAN_CATEGORY_SA_QUERY: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.sa_query))) + break; + + switch (mgmt->u.action.u.sa_query.action) { + case WLAN_ACTION_SA_QUERY_REQUEST: + if (sdata->vif.type != NL80211_IFTYPE_STATION) + break; + ieee80211_process_sa_query_req(sdata, mgmt, len); + goto handled; + } + break; + } + + return RX_CONTINUE; + + handled: + if (rx->sta) + rx->sta->rx_stats.packets++; + dev_kfree_skb(rx->skb); + return RX_QUEUED; +} + +static ieee80211_rx_result debug_noinline ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) { struct ieee80211_local *local = rx->local; @@ -3721,6 +3788,7 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, CALL_RXH(ieee80211_rx_h_mgmt_check); CALL_RXH(ieee80211_rx_h_action); CALL_RXH(ieee80211_rx_h_userspace_mgmt); + CALL_RXH(ieee80211_rx_h_action_post_userspace); CALL_RXH(ieee80211_rx_h_action_return); CALL_RXH(ieee80211_rx_h_mgmt); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index fdac8192a519..ad90bbe57457 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -132,6 +132,12 @@ ieee80211_update_bss_from_elems(struct ieee80211_local *local, bss->beacon_rate = &sband->bitrates[rx_status->rate_idx]; } + + if (elems->vht_cap_elem) + bss->vht_cap_info = + le32_to_cpu(elems->vht_cap_elem->vht_cap_info); + else + bss->vht_cap_info = 0; } struct ieee80211_bss * @@ -275,7 +281,8 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) return; } - channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq); + channel = ieee80211_get_channel_khz(local->hw.wiphy, + ieee80211_rx_status_to_khz(rx_status)); if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) return; @@ -306,8 +313,9 @@ ieee80211_prepare_scan_chandef(struct cfg80211_chan_def *chandef, } /* return false if no more work */ -static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) +static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_local *local = sdata->local; struct cfg80211_scan_request *req; struct cfg80211_chan_def chandef; u8 bands_used = 0; @@ -354,7 +362,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT) flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT; - ielen = ieee80211_build_preq_ies(local, + ielen = ieee80211_build_preq_ies(sdata, (u8 *)local->hw_scan_req->req.ie, local->hw_scan_ies_bufsize, &local->hw_scan_req->ies, @@ -394,9 +402,12 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) if (WARN_ON(!local->scan_req)) return; + scan_sdata = rcu_dereference_protected(local->scan_sdata, + lockdep_is_held(&local->mtx)); + if (hw_scan && !aborted && !ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS) && - ieee80211_prep_hw_scan(local)) { + ieee80211_prep_hw_scan(scan_sdata)) { int rc; rc = drv_hw_scan(local, @@ -425,9 +436,6 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) cfg80211_scan_done(scan_req, &local->scan_info); } RCU_INIT_POINTER(local->scan_req, NULL); - - scan_sdata = rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx)); RCU_INIT_POINTER(local->scan_sdata, NULL); local->scanning = 0; @@ -769,7 +777,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_idle(local); if (hw_scan) { - WARN_ON(!ieee80211_prep_hw_scan(local)); + WARN_ON(!ieee80211_prep_hw_scan(sdata)); rc = drv_hw_scan(local, sdata, local->hw_scan_req); } else { rc = ieee80211_start_sw_scan(local, sdata); @@ -896,6 +904,7 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, local->scan_chandef.chan = chan; local->scan_chandef.center_freq1 = chan->center_freq; + local->scan_chandef.freq1_offset = chan->freq_offset; local->scan_chandef.center_freq2 = 0; switch (scan_req->scan_width) { case NL80211_BSS_CHAN_WIDTH_5: @@ -1266,7 +1275,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, ieee80211_prepare_scan_chandef(&chandef, req->scan_width); - ieee80211_build_preq_ies(local, ie, num_bands * iebufsz, + ieee80211_build_preq_ies(sdata, ie, num_bands * iebufsz, &sched_scan_ies, req->ie, req->ie_len, bands_used, rate_masks, &chandef, flags); diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 5fe2b645912f..ae1cb2c68722 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2008, Intel Corporation * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018, 2020 Intel Corporation */ #include <linux/ieee80211.h> @@ -22,6 +22,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *elems, enum nl80211_band current_band, + u32 vht_cap_info, u32 sta_flags, u8 *bssid, struct ieee80211_csa_ie *csa_ie) { @@ -150,6 +151,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, /* ignore if parsing fails */ if (!ieee80211_chandef_vht_oper(&sdata->local->hw, + vht_cap_info, &vht_oper, &ht_oper, &new_vht_chandef)) new_vht_chandef.chan = NULL; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 36f1abaab9ff..49728047dfad 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -3,6 +3,7 @@ * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015-2017 Intel Deutschland GmbH + * Copyright(c) 2020 Intel Corporation */ #ifndef STA_INFO_H @@ -68,6 +69,8 @@ * @WLAN_STA_MPSP_RECIPIENT: local STA is recipient of a MPSP. * @WLAN_STA_PS_DELIVER: station woke up, but we're still blocking TX * until pending frames are delivered + * @WLAN_STA_USES_ENCRYPTION: This station was configured for encryption, + * so drop all packets without a key later. * * @NUM_WLAN_STA_FLAGS: number of defined flags */ @@ -116,6 +119,7 @@ enum ieee80211_sta_info_flags { #define HT_AGG_STATE_WANT_STOP 5 #define HT_AGG_STATE_START_CB 6 #define HT_AGG_STATE_STOP_CB 7 +#define HT_AGG_STATE_SENT_ADDBA 8 DECLARE_EWMA(avg_signal, 10, 8) enum ieee80211_agg_stop_reason { diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 22512805eafb..7b1bacac39c6 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -649,10 +649,17 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, info->status.ack_signal, info->status.is_valid_ack_signal, GFP_ATOMIC); - else + else if (ieee80211_is_mgmt(hdr->frame_control)) cfg80211_mgmt_tx_status(&sdata->wdev, cookie, skb->data, skb->len, acked, GFP_ATOMIC); + else + cfg80211_control_port_tx_status(&sdata->wdev, + cookie, + skb->data, + skb->len, + acked, + GFP_ATOMIC); } rcu_read_unlock(); diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index fca1f5477396..4b0cff4a07bd 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -226,12 +226,11 @@ static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata, static void ieee80211_tdls_add_aid(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 *pos = skb_put(skb, 4); *pos++ = WLAN_EID_AID; *pos++ = 2; /* len */ - put_unaligned_le16(ifmgd->aid, pos); + put_unaligned_le16(sdata->vif.bss_conf.aid, pos); } /* translate numbering in the WMM parameter IE to the mac80211 notation */ @@ -1055,7 +1054,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, /* disable bottom halves when entering the Tx path */ local_bh_disable(); - __ieee80211_subif_start_xmit(skb, dev, flags, 0); + __ieee80211_subif_start_xmit(skb, dev, flags, 0, NULL); local_bh_enable(); return ret; @@ -1567,6 +1566,10 @@ ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev, u32 ch_sw_tm_ie; int ret; + if (chandef->chan->freq_offset) + /* this may work, but is untested */ + return -EOPNOTSUPP; + mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, addr); if (!sta) { diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 427f51a0a994..1b4709694d2a 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -37,32 +37,42 @@ #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" #define CHANDEF_ENTRY __field(u32, control_freq) \ + __field(u32, freq_offset) \ __field(u32, chan_width) \ __field(u32, center_freq1) \ + __field(u32, freq1_offset) \ __field(u32, center_freq2) #define CHANDEF_ASSIGN(c) \ __entry->control_freq = (c) ? ((c)->chan ? (c)->chan->center_freq : 0) : 0; \ + __entry->freq_offset = (c) ? ((c)->chan ? (c)->chan->freq_offset : 0) : 0; \ __entry->chan_width = (c) ? (c)->width : 0; \ __entry->center_freq1 = (c) ? (c)->center_freq1 : 0; \ + __entry->freq1_offset = (c) ? (c)->freq1_offset : 0; \ __entry->center_freq2 = (c) ? (c)->center_freq2 : 0; -#define CHANDEF_PR_FMT " control:%d MHz width:%d center: %d/%d MHz" -#define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \ - __entry->center_freq1, __entry->center_freq2 +#define CHANDEF_PR_FMT " control:%d.%03d MHz width:%d center: %d.%03d/%d MHz" +#define CHANDEF_PR_ARG __entry->control_freq, __entry->freq_offset, __entry->chan_width, \ + __entry->center_freq1, __entry->freq1_offset, __entry->center_freq2 #define MIN_CHANDEF_ENTRY \ __field(u32, min_control_freq) \ + __field(u32, min_freq_offset) \ __field(u32, min_chan_width) \ __field(u32, min_center_freq1) \ + __field(u32, min_freq1_offset) \ __field(u32, min_center_freq2) #define MIN_CHANDEF_ASSIGN(c) \ __entry->min_control_freq = (c)->chan ? (c)->chan->center_freq : 0; \ + __entry->min_freq_offset = (c)->chan ? (c)->chan->freq_offset : 0; \ __entry->min_chan_width = (c)->width; \ __entry->min_center_freq1 = (c)->center_freq1; \ + __entry->freq1_offset = (c)->freq1_offset; \ __entry->min_center_freq2 = (c)->center_freq2; -#define MIN_CHANDEF_PR_FMT " min_control:%d MHz min_width:%d min_center: %d/%d MHz" -#define MIN_CHANDEF_PR_ARG __entry->min_control_freq, __entry->min_chan_width, \ - __entry->min_center_freq1, __entry->min_center_freq2 +#define MIN_CHANDEF_PR_FMT " min_control:%d.%03d MHz min_width:%d min_center: %d.%03d/%d MHz" +#define MIN_CHANDEF_PR_ARG __entry->min_control_freq, __entry->min_freq_offset, \ + __entry->min_chan_width, \ + __entry->min_center_freq1, __entry->min_freq1_offset, \ + __entry->min_center_freq2 #define CHANCTX_ENTRY CHANDEF_ENTRY \ MIN_CHANDEF_ENTRY \ @@ -412,6 +422,7 @@ TRACE_EVENT(drv_bss_info_changed, __field(s32, cqm_rssi_hyst) __field(u32, channel_width) __field(u32, channel_cfreq1) + __field(u32, channel_cfreq1_offset) __dynamic_array(u32, arp_addr_list, info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ? IEEE80211_BSS_ARP_ADDR_LIST_LEN : @@ -452,6 +463,7 @@ TRACE_EVENT(drv_bss_info_changed, __entry->cqm_rssi_hyst = info->cqm_rssi_hyst; __entry->channel_width = info->chandef.width; __entry->channel_cfreq1 = info->chandef.center_freq1; + __entry->channel_cfreq1_offset = info->chandef.freq1_offset; __entry->arp_addr_cnt = info->arp_addr_cnt; memcpy(__get_dynamic_array(arp_addr_list), info->arp_addr_list, sizeof(u32) * (info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ? @@ -1223,6 +1235,7 @@ TRACE_EVENT(drv_remain_on_channel, LOCAL_ENTRY VIF_ENTRY __field(int, center_freq) + __field(int, freq_offset) __field(unsigned int, duration) __field(u32, type) ), @@ -1231,14 +1244,16 @@ TRACE_EVENT(drv_remain_on_channel, LOCAL_ASSIGN; VIF_ASSIGN; __entry->center_freq = chan->center_freq; + __entry->freq_offset = chan->freq_offset; __entry->duration = duration; __entry->type = type; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT " freq:%dMHz duration:%dms type=%d", + LOCAL_PR_FMT VIF_PR_FMT " freq:%d.%03dMHz duration:%dms type=%d", LOCAL_PR_ARG, VIF_PR_ARG, - __entry->center_freq, __entry->duration, __entry->type + __entry->center_freq, __entry->freq_offset, + __entry->duration, __entry->type ) ); @@ -1546,8 +1561,10 @@ struct trace_vif_entry { struct trace_chandef_entry { u32 control_freq; + u32 freq_offset; u32 chan_width; u32 center_freq1; + u32 freq1_offset; u32 center_freq2; } __packed; @@ -1597,18 +1614,26 @@ TRACE_EVENT(drv_switch_vif_chanctx, sizeof(local_vifs[i].vif.vif_name)); SWITCH_ENTRY_ASSIGN(old_chandef.control_freq, old_ctx->def.chan->center_freq); + SWITCH_ENTRY_ASSIGN(old_chandef.freq_offset, + old_ctx->def.chan->freq_offset); SWITCH_ENTRY_ASSIGN(old_chandef.chan_width, old_ctx->def.width); SWITCH_ENTRY_ASSIGN(old_chandef.center_freq1, old_ctx->def.center_freq1); + SWITCH_ENTRY_ASSIGN(old_chandef.freq1_offset, + old_ctx->def.freq1_offset); SWITCH_ENTRY_ASSIGN(old_chandef.center_freq2, old_ctx->def.center_freq2); SWITCH_ENTRY_ASSIGN(new_chandef.control_freq, new_ctx->def.chan->center_freq); + SWITCH_ENTRY_ASSIGN(new_chandef.freq_offset, + new_ctx->def.chan->freq_offset); SWITCH_ENTRY_ASSIGN(new_chandef.chan_width, new_ctx->def.width); SWITCH_ENTRY_ASSIGN(new_chandef.center_freq1, new_ctx->def.center_freq1); + SWITCH_ENTRY_ASSIGN(new_chandef.freq1_offset, + new_ctx->def.freq1_offset); SWITCH_ENTRY_ASSIGN(new_chandef.center_freq2, new_ctx->def.center_freq2); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 9849c14694db..e9ce658141f5 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2436,13 +2436,19 @@ int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, return 0; } -static int ieee80211_store_ack_skb(struct ieee80211_local *local, +static u16 ieee80211_store_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, - u32 *info_flags) + u32 *info_flags, + u64 *cookie) { - struct sk_buff *ack_skb = skb_clone_sk(skb); + struct sk_buff *ack_skb; u16 info_id = 0; + if (skb->sk) + ack_skb = skb_clone_sk(skb); + else + ack_skb = skb_clone(skb, GFP_ATOMIC); + if (ack_skb) { unsigned long flags; int id; @@ -2455,6 +2461,10 @@ static int ieee80211_store_ack_skb(struct ieee80211_local *local, if (id >= 0) { info_id = id; *info_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + if (cookie) { + *cookie = ieee80211_mgmt_tx_cookie(local); + IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie; + } } else { kfree_skb(ack_skb); } @@ -2484,7 +2494,8 @@ static int ieee80211_store_ack_skb(struct ieee80211_local *local, */ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 info_flags, - struct sta_info *sta, u32 ctrl_flags) + struct sta_info *sta, u32 ctrl_flags, + u64 *cookie) { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info; @@ -2755,9 +2766,11 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, goto free; } - if (unlikely(!multicast && skb->sk && - skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) - info_id = ieee80211_store_ack_skb(local, skb, &info_flags); + if (unlikely(!multicast && ((skb->sk && + skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) || + ctrl_flags & IEEE80211_TX_CTL_REQ_TX_STATUS))) + info_id = ieee80211_store_ack_skb(local, skb, &info_flags, + cookie); /* * If the skb is shared we need to obtain our own copy. @@ -3913,7 +3926,8 @@ EXPORT_SYMBOL(ieee80211_txq_schedule_start); void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags, - u32 ctrl_flags) + u32 ctrl_flags, + u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -3983,7 +3997,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, skb_mark_not_on_list(skb); skb = ieee80211_build_hdr(sdata, skb, info_flags, - sta, ctrl_flags); + sta, ctrl_flags, cookie); if (IS_ERR(skb)) { kfree_skb_list(next); goto out; @@ -4125,9 +4139,9 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, __skb_queue_head_init(&queue); ieee80211_convert_to_unicast(skb, dev, &queue); while ((skb = __skb_dequeue(&queue))) - __ieee80211_subif_start_xmit(skb, dev, 0, 0); + __ieee80211_subif_start_xmit(skb, dev, 0, 0, NULL); } else { - __ieee80211_subif_start_xmit(skb, dev, 0, 0); + __ieee80211_subif_start_xmit(skb, dev, 0, 0, NULL); } return NETDEV_TX_OK; @@ -4215,7 +4229,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, if (unlikely(!multicast && skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) - ieee80211_store_ack_skb(local, skb, &info->flags); + ieee80211_store_ack_skb(local, skb, &info->flags, NULL); memset(info, 0, sizeof(*info)); @@ -4299,7 +4313,7 @@ ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata, goto out; } - skb = ieee80211_build_hdr(sdata, skb, info_flags, sta, 0); + skb = ieee80211_build_hdr(sdata, skb, info_flags, sta, 0, NULL); if (IS_ERR(skb)) goto out; @@ -4883,7 +4897,10 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, txrc.bss_conf = &sdata->vif.bss_conf; txrc.skb = skb; txrc.reported_rate.idx = -1; - txrc.rate_idx_mask = sdata->rc_rateidx_mask[band]; + if (sdata->beacon_rate_set && sdata->beacon_rateidx_mask[band]) + txrc.rate_idx_mask = sdata->beacon_rateidx_mask[band]; + else + txrc.rate_idx_mask = sdata->rc_rateidx_mask[band]; txrc.bss = true; rate_control_get_rate(sdata, NULL, &txrc); @@ -5006,7 +5023,7 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, pspoll = skb_put_zero(skb, sizeof(*pspoll)); pspoll->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL); - pspoll->aid = cpu_to_le16(ifmgd->aid); + pspoll->aid = cpu_to_le16(sdata->vif.bss_conf.aid); /* aid in PS-Poll has its two MSBs each set to 1 */ pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14); @@ -5336,14 +5353,15 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len, - const u8 *dest, __be16 proto, bool unencrypted) + const u8 *dest, __be16 proto, bool unencrypted, + u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ethhdr *ehdr; u32 ctrl_flags = 0; - u32 flags; + u32 flags = 0; /* Only accept CONTROL_PORT_PROTOCOL configured in CONNECT/ASSOCIATE * or Pre-Authentication @@ -5356,9 +5374,13 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, ctrl_flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO; if (unencrypted) - flags = IEEE80211_TX_INTFL_DONT_ENCRYPT; - else - flags = 0; + flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + + if (cookie) + ctrl_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + + flags |= IEEE80211_TX_INTFL_NL80211_FRAME_TX | + IEEE80211_TX_CTL_INJECTED; skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(struct ethhdr) + len); @@ -5379,10 +5401,15 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, skb_reset_network_header(skb); skb_reset_mac_header(skb); + /* mutex lock is only needed for incrementing the cookie counter */ + mutex_lock(&local->mtx); + local_bh_disable(); - __ieee80211_subif_start_xmit(skb, skb->dev, flags, ctrl_flags); + __ieee80211_subif_start_xmit(skb, skb->dev, flags, ctrl_flags, cookie); local_bh_enable(); + mutex_unlock(&local->mtx); + return 0; } @@ -5409,7 +5436,8 @@ int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev, local_bh_disable(); __ieee80211_subif_start_xmit(skb, skb->dev, 0, - IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP); + IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP, + NULL); local_bh_enable(); return 0; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 20436c86b9bf..21c94094a699 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -936,6 +936,10 @@ static void ieee80211_parse_extension_element(u32 *crc, len >= ieee80211_he_spr_size(data)) elems->he_spr = data; break; + case WLAN_EID_EXT_HE_6GHZ_CAPA: + if (len == sizeof(*elems->he_6ghz_capa)) + elems->he_6ghz_capa = data; + break; } } @@ -1659,7 +1663,20 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, } } -static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, +static u8 *ieee80211_write_he_6ghz_cap(u8 *pos, __le16 cap, u8 *end) +{ + if ((end - pos) < 5) + return pos; + + *pos++ = WLAN_EID_EXTENSION; + *pos++ = 1 + sizeof(cap); + *pos++ = WLAN_EID_EXT_HE_6GHZ_CAPA; + memcpy(pos, &cap, sizeof(cap)); + + return pos + 2; +} + +static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, u8 *buffer, size_t buffer_len, const u8 *ie, size_t ie_len, enum nl80211_band band, @@ -1667,6 +1684,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, struct cfg80211_chan_def *chandef, size_t *offset, u32 flags) { + struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; const struct ieee80211_sta_he_cap *he_cap; u8 *pos = buffer, *end = buffer + buffer_len; @@ -1844,6 +1862,14 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, pos = ieee80211_ie_build_he_cap(pos, he_cap, end); if (!pos) goto out_err; + + if (sband->band == NL80211_BAND_6GHZ) { + enum nl80211_iftype iftype = + ieee80211_vif_type_p2p(&sdata->vif); + __le16 cap = ieee80211_get_he_6ghz_capa(sband, iftype); + + pos = ieee80211_write_he_6ghz_cap(pos, cap, end); + } } /* @@ -1858,7 +1884,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, return pos - buffer; } -int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, +int ieee80211_build_preq_ies(struct ieee80211_sub_if_data *sdata, u8 *buffer, size_t buffer_len, struct ieee80211_scan_ies *ie_desc, const u8 *ie, size_t ie_len, @@ -1873,7 +1899,7 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, for (i = 0; i < NUM_NL80211_BANDS; i++) { if (bands_used & BIT(i)) { - pos += ieee80211_build_preq_ies_band(local, + pos += ieee80211_build_preq_ies_band(sdata, buffer + pos, buffer_len - pos, ie, ie_len, i, @@ -1935,7 +1961,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, return NULL; rate_masks[chan->band] = ratemask; - ies_len = ieee80211_build_preq_ies(local, skb_tail_pointer(skb), + ies_len = ieee80211_build_preq_ies(sdata, skb_tail_pointer(skb), skb_tailroom(skb), &dummy_ie_desc, ie, ie_len, BIT(chan->band), rate_masks, &chandef, flags); @@ -2835,6 +2861,50 @@ end: return pos; } +void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_supported_band *sband; + const struct ieee80211_sband_iftype_data *iftd; + enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif); + u8 *pos; + u16 cap; + + sband = ieee80211_get_sband(sdata); + if (!sband) + return; + + iftd = ieee80211_get_sband_iftype_data(sband, iftype); + if (WARN_ON(!iftd)) + return; + + cap = le16_to_cpu(iftd->he_6ghz_capa.capa); + cap &= ~IEEE80211_HE_6GHZ_CAP_SM_PS; + + switch (sdata->smps_mode) { + case IEEE80211_SMPS_AUTOMATIC: + case IEEE80211_SMPS_NUM_MODES: + WARN_ON(1); + /* fall through */ + case IEEE80211_SMPS_OFF: + cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_DISABLED, + IEEE80211_HE_6GHZ_CAP_SM_PS); + break; + case IEEE80211_SMPS_STATIC: + cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_STATIC, + IEEE80211_HE_6GHZ_CAP_SM_PS); + break; + case IEEE80211_SMPS_DYNAMIC: + cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_DYNAMIC, + IEEE80211_HE_6GHZ_CAP_SM_PS); + break; + } + + pos = skb_put(skb, 2 + 1 + sizeof(cap)); + ieee80211_write_he_6ghz_cap(pos, cpu_to_le16(cap), + pos + 2 + 1 + sizeof(cap)); +} + u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, const struct cfg80211_chan_def *chandef, u16 prot_mode, bool rifs_mode) @@ -2958,13 +3028,18 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, return pos + sizeof(struct ieee80211_vht_operation); } -u8 *ieee80211_ie_build_he_oper(u8 *pos) +u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef) { struct ieee80211_he_operation *he_oper; + struct ieee80211_he_6ghz_oper *he_6ghz_op; u32 he_oper_params; + u8 ie_len = 1 + sizeof(struct ieee80211_he_operation); + + if (chandef->chan->band == NL80211_BAND_6GHZ) + ie_len += sizeof(struct ieee80211_he_6ghz_oper); *pos++ = WLAN_EID_EXTENSION; - *pos++ = 1 + sizeof(struct ieee80211_he_operation); + *pos++ = ie_len; *pos++ = WLAN_EID_EXT_HE_OPERATION; he_oper_params = 0; @@ -2974,16 +3049,68 @@ u8 *ieee80211_ie_build_he_oper(u8 *pos) IEEE80211_HE_OPERATION_ER_SU_DISABLE); he_oper_params |= u32_encode_bits(1, IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED); + if (chandef->chan->band == NL80211_BAND_6GHZ) + he_oper_params |= u32_encode_bits(1, + IEEE80211_HE_OPERATION_6GHZ_OP_INFO); he_oper = (struct ieee80211_he_operation *)pos; he_oper->he_oper_params = cpu_to_le32(he_oper_params); /* don't require special HE peer rates */ he_oper->he_mcs_nss_set = cpu_to_le16(0xffff); + pos += sizeof(struct ieee80211_he_operation); - /* TODO add VHT operational and 6GHz operational subelement? */ + if (chandef->chan->band != NL80211_BAND_6GHZ) + goto out; - return pos + sizeof(struct ieee80211_vht_operation); + /* TODO add VHT operational */ + he_6ghz_op = (struct ieee80211_he_6ghz_oper *)pos; + he_6ghz_op->minrate = 6; /* 6 Mbps */ + he_6ghz_op->primary = + ieee80211_frequency_to_channel(chandef->chan->center_freq); + he_6ghz_op->ccfs0 = + ieee80211_frequency_to_channel(chandef->center_freq1); + if (chandef->center_freq2) + he_6ghz_op->ccfs1 = + ieee80211_frequency_to_channel(chandef->center_freq2); + else + he_6ghz_op->ccfs1 = 0; + + switch (chandef->width) { + case NL80211_CHAN_WIDTH_160: + /* Convert 160 MHz channel width to new style as interop + * workaround. + */ + he_6ghz_op->control = + IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ; + he_6ghz_op->ccfs1 = he_6ghz_op->ccfs0; + if (chandef->chan->center_freq < chandef->center_freq1) + he_6ghz_op->ccfs0 -= 8; + else + he_6ghz_op->ccfs0 += 8; + fallthrough; + case NL80211_CHAN_WIDTH_80P80: + he_6ghz_op->control = + IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ; + break; + case NL80211_CHAN_WIDTH_80: + he_6ghz_op->control = + IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ; + break; + case NL80211_CHAN_WIDTH_40: + he_6ghz_op->control = + IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ; + break; + default: + he_6ghz_op->control = + IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ; + break; + } + + pos += sizeof(struct ieee80211_he_6ghz_oper); + +out: + return pos; } bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper, @@ -3013,7 +3140,7 @@ bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper, return true; } -bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, +bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info, const struct ieee80211_vht_operation *oper, const struct ieee80211_ht_operation *htop, struct cfg80211_chan_def *chandef) @@ -3025,6 +3152,10 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap; bool support_80_80 = false; bool support_160 = false; + u8 ext_nss_bw_supp = u32_get_bits(vht_cap_info, + IEEE80211_VHT_CAP_EXT_NSS_BW_MASK); + u8 supp_chwidth = u32_get_bits(vht_cap_info, + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK); if (!oper || !htop) return false; @@ -3044,11 +3175,48 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, IEEE80211_HT_OP_MODE_CCFS2_MASK) >> IEEE80211_HT_OP_MODE_CCFS2_SHIFT; - /* when parsing (and we know how to) CCFS1 and CCFS2 are equivalent */ ccf0 = ccfs0; - ccf1 = ccfs1; - if (!ccfs1 && ieee80211_hw_check(hw, SUPPORTS_VHT_EXT_NSS_BW)) + + /* if not supported, parse as though we didn't understand it */ + if (!ieee80211_hw_check(hw, SUPPORTS_VHT_EXT_NSS_BW)) + ext_nss_bw_supp = 0; + + /* + * Cf. IEEE 802.11 Table 9-250 + * + * We really just consider that because it's inefficient to connect + * at a higher bandwidth than we'll actually be able to use. + */ + switch ((supp_chwidth << 4) | ext_nss_bw_supp) { + default: + case 0x00: + ccf1 = 0; + support_160 = false; + support_80_80 = false; + break; + case 0x01: + support_80_80 = false; + /* fall through */ + case 0x02: + case 0x03: ccf1 = ccfs2; + break; + case 0x10: + ccf1 = ccfs1; + break; + case 0x11: + case 0x12: + if (!ccfs1) + ccf1 = ccfs2; + else + ccf1 = ccfs1; + break; + case 0x13: + case 0x20: + case 0x23: + ccf1 = ccfs1; + break; + } cf0 = ieee80211_channel_to_frequency(ccf0, chandef->chan->band); cf1 = ieee80211_channel_to_frequency(ccf1, chandef->chan->band); @@ -3096,6 +3264,112 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, return true; } +bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_he_operation *he_oper, + struct cfg80211_chan_def *chandef) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif); + const struct ieee80211_sta_he_cap *he_cap; + struct cfg80211_chan_def he_chandef = *chandef; + const struct ieee80211_he_6ghz_oper *he_6ghz_oper; + bool support_80_80, support_160; + u8 he_phy_cap; + u32 freq; + + if (chandef->chan->band != NL80211_BAND_6GHZ) + return true; + + sband = local->hw.wiphy->bands[NL80211_BAND_6GHZ]; + + he_cap = ieee80211_get_he_iftype_cap(sband, iftype); + if (!he_cap) { + sdata_info(sdata, "Missing iftype sband data/HE cap"); + return false; + } + + he_phy_cap = he_cap->he_cap_elem.phy_cap_info[0]; + support_160 = + he_phy_cap & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; + support_80_80 = + he_phy_cap & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G; + + if (!he_oper) { + sdata_info(sdata, + "HE is not advertised on (on %d MHz), expect issues\n", + chandef->chan->center_freq); + return false; + } + + he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper); + + if (!he_6ghz_oper) { + sdata_info(sdata, + "HE 6GHz operation missing (on %d MHz), expect issues\n", + chandef->chan->center_freq); + return false; + } + + freq = ieee80211_channel_to_frequency(he_6ghz_oper->primary, + NL80211_BAND_6GHZ); + he_chandef.chan = ieee80211_get_channel(sdata->local->hw.wiphy, freq); + + switch (u8_get_bits(he_6ghz_oper->control, + IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH)) { + case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ: + he_chandef.width = NL80211_CHAN_WIDTH_20; + break; + case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ: + he_chandef.width = NL80211_CHAN_WIDTH_40; + break; + case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ: + he_chandef.width = NL80211_CHAN_WIDTH_80; + break; + case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ: + he_chandef.width = NL80211_CHAN_WIDTH_80; + if (!he_6ghz_oper->ccfs1) + break; + if (abs(he_6ghz_oper->ccfs1 - he_6ghz_oper->ccfs0) == 8) { + if (support_160) + he_chandef.width = NL80211_CHAN_WIDTH_160; + } else { + if (support_80_80) + he_chandef.width = NL80211_CHAN_WIDTH_80P80; + } + break; + } + + if (he_chandef.width == NL80211_CHAN_WIDTH_160) { + he_chandef.center_freq1 = + ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1, + NL80211_BAND_6GHZ); + } else { + he_chandef.center_freq1 = + ieee80211_channel_to_frequency(he_6ghz_oper->ccfs0, + NL80211_BAND_6GHZ); + he_chandef.center_freq2 = + ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1, + NL80211_BAND_6GHZ); + } + + if (!cfg80211_chandef_valid(&he_chandef)) { + sdata_info(sdata, + "HE 6GHz operation resulted in invalid chandef: %d MHz/%d/%d MHz/%d MHz\n", + he_chandef.chan ? he_chandef.chan->center_freq : 0, + he_chandef.width, + he_chandef.center_freq1, + he_chandef.center_freq2); + return false; + } + + *chandef = he_chandef; + + return true; +} + int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef, const struct ieee80211_supported_band *sband, const u8 *srates, int srates_len, u32 *rates) diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 632f07401850..9c6045f9c24d 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -4,7 +4,7 @@ * * Portions of this file * Copyright(c) 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2018 - 2019 Intel Corporation + * Copyright (C) 2018 - 2020 Intel Corporation */ #include <linux/ieee80211.h> @@ -575,15 +575,21 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) { case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: + /* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */ sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20; break; case IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: + /* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */ sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40; break; case IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: - sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; + if (opmode & IEEE80211_OPMODE_NOTIF_BW_160_80P80) + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; + else + sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; break; case IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: + /* legacy only, no longer used by newer spec */ sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; break; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index a42e4ed5ab0e..fd30ea61336e 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1593,7 +1593,8 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, dev->type == ARPHRD_IPGRE || dev->type == ARPHRD_IP6GRE || dev->type == ARPHRD_SIT || - dev->type == ARPHRD_TUNNEL) { + dev->type == ARPHRD_TUNNEL || + dev->type == ARPHRD_TUNNEL6) { mdev = mpls_add_dev(dev); if (IS_ERR(mdev)) return notifier_from_errno(PTR_ERR(mdev)); diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 0e9aa94adc07..838cdfc10e47 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -172,17 +172,6 @@ struct mpls_route { /* next hop label forwarding entry */ #define endfor_nexthops(rt) } -static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos) -{ - struct mpls_shim_hdr result; - result.label_stack_entry = - cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) | - (tc << MPLS_LS_TC_SHIFT) | - (bos ? (1 << MPLS_LS_S_SHIFT) : 0) | - (ttl << MPLS_LS_TTL_SHIFT)); - return result; -} - static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr) { struct mpls_entry_decoded result; diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c index c151628bd416..0f5a414a9366 100644 --- a/net/mptcp/crypto.c +++ b/net/mptcp/crypto.c @@ -47,8 +47,6 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn) void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) { u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE]; - __be32 mptcp_hashed_key[SHA256_DIGEST_WORDS]; - __be32 *hash_out = (__force __be32 *)hmac; struct sha256_state state; u8 key1be[8]; u8 key2be[8]; @@ -86,11 +84,7 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) sha256_init(&state); sha256_update(&state, input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE); - sha256_final(&state, (u8 *)mptcp_hashed_key); - - /* takes only first 160 bits */ - for (i = 0; i < 5; i++) - hash_out[i] = mptcp_hashed_key[i]; + sha256_final(&state, (u8 *)hmac); } #ifdef CONFIG_MPTCP_HMAC_TEST @@ -101,29 +95,29 @@ struct test_cast { }; /* we can't reuse RFC 4231 test vectors, as we have constraint on the - * input and key size, and we truncate the output. + * input and key size. */ static struct test_cast tests[] = { { .key = "0b0b0b0b0b0b0b0b", .msg = "48692054", - .result = "8385e24fb4235ac37556b6b886db106284a1da67", + .result = "8385e24fb4235ac37556b6b886db106284a1da671699f46db1f235ec622dcafa", }, { .key = "aaaaaaaaaaaaaaaa", .msg = "dddddddd", - .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492", + .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492984e1eb71aff9022f71046e9", }, { .key = "0102030405060708", .msg = "cdcdcdcd", - .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6", + .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6f23b4d8c4da736a5dbbc6e7d", }, }; static int __init test_mptcp_crypto(void) { - char hmac[20], hmac_hex[41]; + char hmac[32], hmac_hex[65]; u32 nonce1, nonce2; u64 key1, key2; u8 msg[8]; @@ -140,11 +134,11 @@ static int __init test_mptcp_crypto(void) put_unaligned_be32(nonce2, &msg[4]); mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac); - for (j = 0; j < 20; ++j) + for (j = 0; j < 32; ++j) sprintf(&hmac_hex[j << 1], "%02x", hmac[j] & 0xff); - hmac_hex[40] = 0; + hmac_hex[64] = 0; - if (memcmp(hmac_hex, tests[i].result, 40)) + if (memcmp(hmac_hex, tests[i].result, 64)) pr_err("test %d failed, got %s expected %s", i, hmac_hex, tests[i].result); else diff --git a/net/mptcp/options.c b/net/mptcp/options.c index ece6f92cf7d1..01f1f4cf4902 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "MPTCP: " fmt #include <linux/kernel.h> +#include <crypto/sha.h> #include <net/tcp.h> #include <net/mptcp.h> #include "protocol.h" @@ -540,7 +541,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id, struct in_addr *addr) { - u8 hmac[MPTCP_ADDR_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[7]; msg[0] = addr_id; @@ -550,14 +551,14 @@ static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id, mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac); - return get_unaligned_be64(hmac); + return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]); } #if IS_ENABLED(CONFIG_MPTCP_IPV6) static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id, struct in6_addr *addr) { - u8 hmac[MPTCP_ADDR_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[19]; msg[0] = addr_id; @@ -567,7 +568,7 @@ static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id, mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac); - return get_unaligned_be64(hmac); + return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]); } #endif diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e3a628bea2b8..14b253d10ccf 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -144,12 +144,29 @@ static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, unsigned int offset, size_t copy_len) { struct sock *sk = (struct sock *)msk; + struct sk_buff *tail; __skb_unlink(skb, &ssk->sk_receive_queue); - skb_set_owner_r(skb, sk); - __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_ext_reset(skb); + skb_orphan(skb); msk->ack_seq += copy_len; + + tail = skb_peek_tail(&sk->sk_receive_queue); + if (offset == 0 && tail) { + bool fragstolen; + int delta; + + if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) { + kfree_skb_partial(skb, fragstolen); + atomic_add(delta, &sk->sk_rmem_alloc); + sk_mem_charge(sk, delta); + return; + } + } + + skb_set_owner_r(skb, sk); + __skb_queue_tail(&sk->sk_receive_queue, skb); MPTCP_SKB_CB(skb)->offset = offset; } @@ -1015,7 +1032,8 @@ fallback: pr_debug("block timeout %ld", timeo); mptcp_wait_data(sk, &timeo); - if (unlikely(__mptcp_tcp_fallback(msk))) + ssock = __mptcp_tcp_fallback(msk); + if (unlikely(ssock)) goto fallback; } @@ -1329,11 +1347,14 @@ static void mptcp_close(struct sock *sk, long timeout) lock_sock(sk); - mptcp_token_destroy(msk->token); inet_sk_state_store(sk, TCP_CLOSE); - __mptcp_flush_join_list(msk); - + /* be sure to always acquire the join list lock, to sync vs + * mptcp_finish_join(). + */ + spin_lock_bh(&msk->join_list_lock); + list_splice_tail_init(&msk->join_list, &msk->conn_list); + spin_unlock_bh(&msk->join_list_lock); list_splice_init(&msk->conn_list, &conn_list); data_fin_tx_seq = msk->write_seq; @@ -1523,6 +1544,7 @@ static void mptcp_destroy(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); + mptcp_token_destroy(msk->token); if (msk->cached_ext) __skb_ext_put(msk->cached_ext); @@ -1689,22 +1711,30 @@ bool mptcp_finish_join(struct sock *sk) if (!msk->pm.server_side) return true; - /* passive connection, attach to msk socket */ + if (!mptcp_pm_allow_new_subflow(msk)) + return false; + + /* active connections are already on conn_list, and we can't acquire + * msk lock here. + * use the join list lock as synchronization point and double-check + * msk status to avoid racing with mptcp_close() + */ + spin_lock_bh(&msk->join_list_lock); + ret = inet_sk_state_load(parent) == TCP_ESTABLISHED; + if (ret && !WARN_ON_ONCE(!list_empty(&subflow->node))) + list_add_tail(&subflow->node, &msk->join_list); + spin_unlock_bh(&msk->join_list_lock); + if (!ret) + return false; + + /* attach to msk socket only after we are sure he will deal with us + * at close time + */ parent_sock = READ_ONCE(parent->sk_socket); if (parent_sock && !sk->sk_socket) mptcp_sock_graft(sk, parent_sock); - - ret = mptcp_pm_allow_new_subflow(msk); - if (ret) { - subflow->map_seq = msk->ack_seq; - - /* active connections are already on conn_list */ - spin_lock_bh(&msk->join_list_lock); - if (!WARN_ON_ONCE(!list_empty(&subflow->node))) - list_add_tail(&subflow->node, &msk->join_list); - spin_unlock_bh(&msk->join_list_lock); - } - return ret; + subflow->map_seq = msk->ack_seq; + return true; } static bool mptcp_memory_free(const struct sock *sk, int wake) @@ -1771,6 +1801,14 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int err; lock_sock(sock->sk); + if (sock->state != SS_UNCONNECTED && msk->subflow) { + /* pending connection or invalid state, let existing subflow + * cope with that + */ + ssock = msk->subflow; + goto do_connect; + } + ssock = __mptcp_socket_create(msk, TCP_SYN_SENT); if (IS_ERR(ssock)) { err = PTR_ERR(ssock); @@ -1785,9 +1823,17 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, mptcp_subflow_ctx(ssock->sk)->request_mptcp = 0; #endif +do_connect: err = ssock->ops->connect(ssock, uaddr, addr_len, flags); - inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk)); - mptcp_copy_inaddrs(sock->sk, ssock->sk); + sock->state = ssock->state; + + /* on successful connect, the msk state will be moved to established by + * subflow_finish_connect() + */ + if (!err || err == EINPROGRESS) + mptcp_copy_inaddrs(sock->sk, ssock->sk); + else + inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk)); unlock: release_sock(sock->sk); @@ -2068,6 +2114,7 @@ static const struct proto_ops mptcp_v6_stream_ops = { .mmap = sock_no_mmap, .sendpage = inet_sendpage, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f5adca93e8fb..809687d3f410 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -81,7 +81,6 @@ /* MPTCP ADD_ADDR flags */ #define MPTCP_ADDR_ECHO BIT(0) -#define MPTCP_ADDR_HMAC_LEN 20 #define MPTCP_ADDR_IPVERSION_4 4 #define MPTCP_ADDR_IPVERSION_6 6 diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 0020d356233d..493b98a0825c 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/netdevice.h> #include <crypto/algapi.h> +#include <crypto/sha.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@ -89,7 +90,7 @@ static bool subflow_token_join_request(struct request_sock *req, const struct sk_buff *skb) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - u8 hmac[MPTCPOPT_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; struct mptcp_sock *msk; int local_id; @@ -201,7 +202,7 @@ static void subflow_v6_init_req(struct request_sock *req, /* validate received truncated hmac and create hmac for third ACK */ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow) { - u8 hmac[MPTCPOPT_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; u64 thmac; subflow_generate_hmac(subflow->remote_key, subflow->local_key, @@ -267,6 +268,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->ssn_offset = TCP_SKB_CB(skb)->seq; } } else if (subflow->mp_join) { + u8 hmac[SHA256_DIGEST_SIZE]; + pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, subflow->thmac, subflow->remote_nonce); @@ -279,7 +282,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow_generate_hmac(subflow->local_key, subflow->remote_key, subflow->local_nonce, subflow->remote_nonce, - subflow->hmac); + hmac); + + memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN); if (skb) subflow->ssn_offset = TCP_SKB_CB(skb)->seq; @@ -347,7 +352,7 @@ static bool subflow_hmac_valid(const struct request_sock *req, const struct mptcp_options_received *mp_opt) { const struct mptcp_subflow_request_sock *subflow_req; - u8 hmac[MPTCPOPT_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; struct mptcp_sock *msk; bool ret; @@ -361,7 +366,7 @@ static bool subflow_hmac_valid(const struct request_sock *req, subflow_req->local_nonce, hmac); ret = true; - if (crypto_memneq(hmac, mp_opt->hmac, sizeof(hmac))) + if (crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN)) ret = false; sock_put((struct sock *)msk); @@ -408,6 +413,20 @@ static void subflow_ulp_fallback(struct sock *sk, tcp_sk(sk)->is_mptcp = 0; } +static void subflow_drop_ctx(struct sock *ssk) +{ + struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk); + + if (!ctx) + return; + + subflow_ulp_fallback(ssk, ctx); + if (ctx->conn) + sock_put(ctx->conn); + + kfree_rcu(ctx, rcu); +} + static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -480,10 +499,7 @@ create_child: if (fallback_is_fatal) goto dispose_child; - if (ctx) { - subflow_ulp_fallback(child, ctx); - kfree_rcu(ctx, rcu); - } + subflow_drop_ctx(child); goto out; } @@ -532,6 +548,7 @@ out: return child; dispose_child: + subflow_drop_ctx(child); tcp_rsk(req)->drop_req = true; tcp_send_active_reset(child, GFP_ATOMIC); inet_csk_prepare_for_destroy_sock(child); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index cd747c0962fd..5a67f7966574 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -59,7 +59,7 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb, /* Don't lookup sub-counters at all */ opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS; if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) - opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE; + opt->cmdflags |= IPSET_FLAG_SKIP_COUNTER_UPDATE; list_for_each_entry_rcu(e, &map->members, list) { ret = ip_set_test(e->id, skb, par, opt); if (ret <= 0) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1d57b95d3481..bb72ca5f3999 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2016,22 +2016,18 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) nf_conntrack_get(skb_nfct(nskb)); } -static int nf_conntrack_update(struct net *net, struct sk_buff *skb) +static int __nf_conntrack_update(struct net *net, struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; - enum ip_conntrack_info ctinfo; struct nf_nat_hook *nat_hook; unsigned int status; - struct nf_conn *ct; int dataoff; u16 l3num; u8 l4num; - ct = nf_ct_get(skb, &ctinfo); - if (!ct || nf_ct_is_confirmed(ct)) - return 0; - l3num = nf_ct_l3num(ct); dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num); @@ -2088,6 +2084,76 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb) return 0; } +/* This packet is coming from userspace via nf_queue, complete the packet + * processing after the helper invocation in nf_confirm(). + */ +static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + const struct nf_conntrack_helper *helper; + const struct nf_conn_help *help; + int protoff; + + help = nfct_help(ct); + if (!help) + return 0; + + helper = rcu_dereference(help->helper); + if (!(helper->flags & NF_CT_HELPER_F_USERSPACE)) + return 0; + + switch (nf_ct_l3num(ct)) { + case NFPROTO_IPV4: + protoff = skb_network_offset(skb) + ip_hdrlen(skb); + break; +#if IS_ENABLED(CONFIG_IPV6) + case NFPROTO_IPV6: { + __be16 frag_off; + u8 pnum; + + pnum = ipv6_hdr(skb)->nexthdr; + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, + &frag_off); + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) + return 0; + break; + } +#endif + default: + return 0; + } + + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_is_loopback_packet(skb)) { + if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); + return -1; + } + } + + /* We've seen it coming out the other side: confirm it */ + return nf_conntrack_confirm(skb) == NF_DROP ? - 1 : 0; +} + +static int nf_conntrack_update(struct net *net, struct sk_buff *skb) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + int err; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + return 0; + + if (!nf_ct_is_confirmed(ct)) { + err = __nf_conntrack_update(net, skb, ct, ctinfo); + if (err < 0) + return err; + } + + return nf_confirm_cthelper(skb, ct, ctinfo); +} + static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) { diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index a971183f11af..1f44d523b512 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -72,24 +72,32 @@ EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); #if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) /* PptpControlMessageType names */ -const char *const pptp_msg_name[] = { - "UNKNOWN_MESSAGE", - "START_SESSION_REQUEST", - "START_SESSION_REPLY", - "STOP_SESSION_REQUEST", - "STOP_SESSION_REPLY", - "ECHO_REQUEST", - "ECHO_REPLY", - "OUT_CALL_REQUEST", - "OUT_CALL_REPLY", - "IN_CALL_REQUEST", - "IN_CALL_REPLY", - "IN_CALL_CONNECT", - "CALL_CLEAR_REQUEST", - "CALL_DISCONNECT_NOTIFY", - "WAN_ERROR_NOTIFY", - "SET_LINK_INFO" +static const char *const pptp_msg_name_array[PPTP_MSG_MAX + 1] = { + [0] = "UNKNOWN_MESSAGE", + [PPTP_START_SESSION_REQUEST] = "START_SESSION_REQUEST", + [PPTP_START_SESSION_REPLY] = "START_SESSION_REPLY", + [PPTP_STOP_SESSION_REQUEST] = "STOP_SESSION_REQUEST", + [PPTP_STOP_SESSION_REPLY] = "STOP_SESSION_REPLY", + [PPTP_ECHO_REQUEST] = "ECHO_REQUEST", + [PPTP_ECHO_REPLY] = "ECHO_REPLY", + [PPTP_OUT_CALL_REQUEST] = "OUT_CALL_REQUEST", + [PPTP_OUT_CALL_REPLY] = "OUT_CALL_REPLY", + [PPTP_IN_CALL_REQUEST] = "IN_CALL_REQUEST", + [PPTP_IN_CALL_REPLY] = "IN_CALL_REPLY", + [PPTP_IN_CALL_CONNECT] = "IN_CALL_CONNECT", + [PPTP_CALL_CLEAR_REQUEST] = "CALL_CLEAR_REQUEST", + [PPTP_CALL_DISCONNECT_NOTIFY] = "CALL_DISCONNECT_NOTIFY", + [PPTP_WAN_ERROR_NOTIFY] = "WAN_ERROR_NOTIFY", + [PPTP_SET_LINK_INFO] = "SET_LINK_INFO" }; + +const char *pptp_msg_name(u_int16_t msg) +{ + if (msg > PPTP_MSG_MAX) + return pptp_msg_name_array[0]; + + return pptp_msg_name_array[msg]; +} EXPORT_SYMBOL(pptp_msg_name); #endif @@ -276,7 +284,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff, typeof(nf_nat_pptp_hook_inbound) nf_nat_pptp_inbound; msg = ntohs(ctlh->messageType); - pr_debug("inbound control message %s\n", pptp_msg_name[msg]); + pr_debug("inbound control message %s\n", pptp_msg_name(msg)); switch (msg) { case PPTP_START_SESSION_REPLY: @@ -311,7 +319,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff, pcid = pptpReq->ocack.peersCallID; if (info->pns_call_id != pcid) goto invalid; - pr_debug("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], + pr_debug("%s, CID=%X, PCID=%X\n", pptp_msg_name(msg), ntohs(cid), ntohs(pcid)); if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) { @@ -328,7 +336,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff, goto invalid; cid = pptpReq->icreq.callID; - pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + pr_debug("%s, CID=%X\n", pptp_msg_name(msg), ntohs(cid)); info->cstate = PPTP_CALL_IN_REQ; info->pac_call_id = cid; break; @@ -347,7 +355,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff, if (info->pns_call_id != pcid) goto invalid; - pr_debug("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); + pr_debug("%s, PCID=%X\n", pptp_msg_name(msg), ntohs(pcid)); info->cstate = PPTP_CALL_IN_CONF; /* we expect a GRE connection from PAC to PNS */ @@ -357,7 +365,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff, case PPTP_CALL_DISCONNECT_NOTIFY: /* server confirms disconnect */ cid = pptpReq->disc.callID; - pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + pr_debug("%s, CID=%X\n", pptp_msg_name(msg), ntohs(cid)); info->cstate = PPTP_CALL_NONE; /* untrack this call id, unexpect GRE packets */ @@ -384,7 +392,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff, invalid: pr_debug("invalid %s: type=%d cid=%u pcid=%u " "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", - msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], + pptp_msg_name(msg), msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, ntohs(info->pns_call_id), ntohs(info->pac_call_id)); return NF_ACCEPT; @@ -404,7 +412,7 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff, typeof(nf_nat_pptp_hook_outbound) nf_nat_pptp_outbound; msg = ntohs(ctlh->messageType); - pr_debug("outbound control message %s\n", pptp_msg_name[msg]); + pr_debug("outbound control message %s\n", pptp_msg_name(msg)); switch (msg) { case PPTP_START_SESSION_REQUEST: @@ -426,7 +434,7 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff, info->cstate = PPTP_CALL_OUT_REQ; /* track PNS call id */ cid = pptpReq->ocreq.callID; - pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + pr_debug("%s, CID=%X\n", pptp_msg_name(msg), ntohs(cid)); info->pns_call_id = cid; break; @@ -440,7 +448,7 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff, pcid = pptpReq->icack.peersCallID; if (info->pac_call_id != pcid) goto invalid; - pr_debug("%s, CID=%X PCID=%X\n", pptp_msg_name[msg], + pr_debug("%s, CID=%X PCID=%X\n", pptp_msg_name(msg), ntohs(cid), ntohs(pcid)); if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) { @@ -480,7 +488,7 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff, invalid: pr_debug("invalid %s: type=%d cid=%u pcid=%u " "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", - msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], + pptp_msg_name(msg), msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, ntohs(info->pns_call_id), ntohs(info->pac_call_id)); return NF_ACCEPT; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index a5f294aa8e4c..5b0d0a77379c 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -103,7 +103,7 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) if (help->helper->data_len == 0) return -EINVAL; - nla_memcpy(help->data, nla_data(attr), sizeof(help->data)); + nla_memcpy(help->data, attr, sizeof(help->data)); return 0; } @@ -240,6 +240,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[], ret = -ENOMEM; goto err2; } + helper->data_len = size; helper->flags |= NF_CT_HELPER_F_USERSPACE; memcpy(&helper->tuple, tuple, sizeof(struct nf_conntrack_tuple)); diff --git a/net/psample/psample.c b/net/psample/psample.c index 6f2fbc6b9eb2..a042261a45c5 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -14,6 +14,8 @@ #include <net/genetlink.h> #include <net/psample.h> #include <linux/spinlock.h> +#include <net/ip_tunnels.h> +#include <net/dst_metadata.h> #define PSAMPLE_MAX_PACKET_SIZE 0xffff @@ -207,10 +209,159 @@ void psample_group_put(struct psample_group *group) } EXPORT_SYMBOL_GPL(psample_group_put); +#ifdef CONFIG_INET +static int __psample_ip_tun_to_nlattr(struct sk_buff *skb, + struct ip_tunnel_info *tun_info) +{ + unsigned short tun_proto = ip_tunnel_info_af(tun_info); + const void *tun_opts = ip_tunnel_info_opts(tun_info); + const struct ip_tunnel_key *tun_key = &tun_info->key; + int tun_opts_len = tun_info->options_len; + + if (tun_key->tun_flags & TUNNEL_KEY && + nla_put_be64(skb, PSAMPLE_TUNNEL_KEY_ATTR_ID, tun_key->tun_id, + PSAMPLE_TUNNEL_KEY_ATTR_PAD)) + return -EMSGSIZE; + + if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE && + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE)) + return -EMSGSIZE; + + switch (tun_proto) { + case AF_INET: + if (tun_key->u.ipv4.src && + nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_SRC, + tun_key->u.ipv4.src)) + return -EMSGSIZE; + if (tun_key->u.ipv4.dst && + nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_DST, + tun_key->u.ipv4.dst)) + return -EMSGSIZE; + break; + case AF_INET6: + if (!ipv6_addr_any(&tun_key->u.ipv6.src) && + nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_SRC, + &tun_key->u.ipv6.src)) + return -EMSGSIZE; + if (!ipv6_addr_any(&tun_key->u.ipv6.dst) && + nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_DST, + &tun_key->u.ipv6.dst)) + return -EMSGSIZE; + break; + } + if (tun_key->tos && + nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TOS, tun_key->tos)) + return -EMSGSIZE; + if (nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TTL, tun_key->ttl)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_CSUM) && + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_CSUM)) + return -EMSGSIZE; + if (tun_key->tp_src && + nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_SRC, tun_key->tp_src)) + return -EMSGSIZE; + if (tun_key->tp_dst && + nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, tun_key->tp_dst)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_OAM) && + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_OAM)) + return -EMSGSIZE; + if (tun_opts_len) { + if (tun_key->tun_flags & TUNNEL_GENEVE_OPT && + nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS, + tun_opts_len, tun_opts)) + return -EMSGSIZE; + else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT && + nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS, + tun_opts_len, tun_opts)) + return -EMSGSIZE; + } + + return 0; +} + +static int psample_ip_tun_to_nlattr(struct sk_buff *skb, + struct ip_tunnel_info *tun_info) +{ + struct nlattr *nla; + int err; + + nla = nla_nest_start_noflag(skb, PSAMPLE_ATTR_TUNNEL); + if (!nla) + return -EMSGSIZE; + + err = __psample_ip_tun_to_nlattr(skb, tun_info); + if (err) { + nla_nest_cancel(skb, nla); + return err; + } + + nla_nest_end(skb, nla); + + return 0; +} + +static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) +{ + unsigned short tun_proto = ip_tunnel_info_af(tun_info); + const struct ip_tunnel_key *tun_key = &tun_info->key; + int tun_opts_len = tun_info->options_len; + int sum = 0; + + if (tun_key->tun_flags & TUNNEL_KEY) + sum += nla_total_size(sizeof(u64)); + + if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE) + sum += nla_total_size(0); + + switch (tun_proto) { + case AF_INET: + if (tun_key->u.ipv4.src) + sum += nla_total_size(sizeof(u32)); + if (tun_key->u.ipv4.dst) + sum += nla_total_size(sizeof(u32)); + break; + case AF_INET6: + if (!ipv6_addr_any(&tun_key->u.ipv6.src)) + sum += nla_total_size(sizeof(struct in6_addr)); + if (!ipv6_addr_any(&tun_key->u.ipv6.dst)) + sum += nla_total_size(sizeof(struct in6_addr)); + break; + } + if (tun_key->tos) + sum += nla_total_size(sizeof(u8)); + sum += nla_total_size(sizeof(u8)); /* TTL */ + if (tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) + sum += nla_total_size(0); + if (tun_key->tun_flags & TUNNEL_CSUM) + sum += nla_total_size(0); + if (tun_key->tp_src) + sum += nla_total_size(sizeof(u16)); + if (tun_key->tp_dst) + sum += nla_total_size(sizeof(u16)); + if (tun_key->tun_flags & TUNNEL_OAM) + sum += nla_total_size(0); + if (tun_opts_len) { + if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) + sum += nla_total_size(tun_opts_len); + else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT) + sum += nla_total_size(tun_opts_len); + } + + return sum; +} +#endif + void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, u32 trunc_size, int in_ifindex, int out_ifindex, u32 sample_rate) { +#ifdef CONFIG_INET + struct ip_tunnel_info *tun_info; +#endif struct sk_buff *nl_skb; int data_len; int meta_len; @@ -224,6 +375,12 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, nla_total_size(sizeof(u32)) + /* group_num */ nla_total_size(sizeof(u32)); /* seq */ +#ifdef CONFIG_INET + tun_info = skb_tunnel_info(skb); + if (tun_info) + meta_len += psample_tunnel_meta_len(tun_info); +#endif + data_len = min(skb->len, trunc_size); if (meta_len + nla_total_size(data_len) > PSAMPLE_MAX_PACKET_SIZE) data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN @@ -278,6 +435,14 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, goto error; } +#ifdef CONFIG_INET + if (tun_info) { + ret = psample_ip_tun_to_nlattr(nl_skb, tun_info); + if (unlikely(ret < 0)) + goto error; + } +#endif + genlmsg_end(nl_skb, data); genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC); diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 3ca196fc7f9b..d8252fdab851 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -714,6 +714,10 @@ void qrtr_ns_init(void) goto err_sock; } + qrtr_ns.workqueue = alloc_workqueue("qrtr_ns_handler", WQ_UNBOUND, 1); + if (!qrtr_ns.workqueue) + goto err_sock; + qrtr_ns.sock->sk->sk_data_ready = qrtr_ns_data_ready; sq.sq_port = QRTR_PORT_CTRL; @@ -722,17 +726,13 @@ void qrtr_ns_init(void) ret = kernel_bind(qrtr_ns.sock, (struct sockaddr *)&sq, sizeof(sq)); if (ret < 0) { pr_err("failed to bind to socket\n"); - goto err_sock; + goto err_wq; } qrtr_ns.bcast_sq.sq_family = AF_QIPCRTR; qrtr_ns.bcast_sq.sq_node = QRTR_NODE_BCAST; qrtr_ns.bcast_sq.sq_port = QRTR_PORT_CTRL; - qrtr_ns.workqueue = alloc_workqueue("qrtr_ns_handler", WQ_UNBOUND, 1); - if (!qrtr_ns.workqueue) - goto err_sock; - ret = say_hello(&qrtr_ns.bcast_sq); if (ret < 0) goto err_wq; diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 7ed31b5e77e4..2d8d6131bc5f 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -854,7 +854,7 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb, } mutex_unlock(&qrtr_node_lock); - qrtr_local_enqueue(node, skb, type, from, to); + qrtr_local_enqueue(NULL, skb, type, from, to); return 0; } diff --git a/net/rds/info.c b/net/rds/info.c index e1d63563e81c..b6b46a8214a0 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -234,7 +234,8 @@ call_func: ret = -EFAULT; out: - unpin_user_pages(pages, nr_pages); + if (pages) + unpin_user_pages(pages, nr_pages); kfree(pages); return ret; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 46782fac4c16..43db0eca911f 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -89,15 +89,6 @@ static struct ctl_table rds_tcp_sysctl_table[] = { { } }; -/* doing it this way avoids calling tcp_sk() */ -void rds_tcp_nonagle(struct socket *sock) -{ - int val = 1; - - kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (void *)&val, - sizeof(val)); -} - u32 rds_tcp_write_seq(struct rds_tcp_connection *tc) { /* seq# of the last byte of data in tcp send buffer */ @@ -502,7 +493,7 @@ void rds_tcp_tune(struct socket *sock) struct net *net = sock_net(sk); struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); - rds_tcp_nonagle(sock); + tcp_sock_set_nodelay(sock->sk); lock_sock(sk); if (rtn->sndbuf_size > 0) { sk->sk_sndbuf = rtn->sndbuf_size; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 3c69361d21c7..bad9cf49d565 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -50,7 +50,6 @@ struct rds_tcp_statistics { /* tcp.c */ void rds_tcp_tune(struct socket *sock); -void rds_tcp_nonagle(struct socket *sock); void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_restore_callbacks(struct socket *sock, @@ -71,9 +70,8 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6); void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor); void rds_tcp_listen_data_ready(struct sock *sk); int rds_tcp_accept_one(struct socket *sock); -int rds_tcp_keepalive(struct socket *sock); +void rds_tcp_keepalive(struct socket *sock); void *rds_tcp_listen_sock_def_readable(struct net *net); -void rds_tcp_set_linger(struct socket *sock); /* tcp_recv.c */ int rds_tcp_recv_init(void); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 008f50fb25dd..4e64598176b0 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -207,7 +207,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp) if (sock) { if (rds_destroy_pending(cp->cp_conn)) - rds_tcp_set_linger(sock); + sock_no_linger(sock->sk); sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); lock_sock(sock->sk); rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */ diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 810a3a49e947..101cf14215a0 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -38,36 +38,19 @@ #include "rds.h" #include "tcp.h" -int rds_tcp_keepalive(struct socket *sock) +void rds_tcp_keepalive(struct socket *sock) { /* values below based on xs_udp_default_timeout */ int keepidle = 5; /* send a probe 'keepidle' secs after last data */ int keepcnt = 5; /* number of unack'ed probes before declaring dead */ - int keepalive = 1; - int ret = 0; - - ret = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, - (char *)&keepalive, sizeof(keepalive)); - if (ret < 0) - goto bail; - - ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, - (char *)&keepcnt, sizeof(keepcnt)); - if (ret < 0) - goto bail; - - ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, - (char *)&keepidle, sizeof(keepidle)); - if (ret < 0) - goto bail; + sock_set_keepalive(sock->sk); + tcp_sock_set_keepcnt(sock->sk, keepcnt); + tcp_sock_set_keepidle(sock->sk, keepidle); /* KEEPINTVL is the interval between successive probes. We follow * the model in xs_tcp_finish_connecting() and re-use keepidle. */ - ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, - (char *)&keepidle, sizeof(keepidle)); -bail: - return ret; + tcp_sock_set_keepintvl(sock->sk, keepidle); } /* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the @@ -111,17 +94,6 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) return NULL; } -void rds_tcp_set_linger(struct socket *sock) -{ - struct linger no_linger = { - .l_onoff = 1, - .l_linger = 0, - }; - - kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, - (char *)&no_linger, sizeof(no_linger)); -} - int rds_tcp_accept_one(struct socket *sock) { struct socket *new_sock = NULL; @@ -160,10 +132,7 @@ int rds_tcp_accept_one(struct socket *sock) new_sock->ops = sock->ops; __module_get(new_sock->ops->owner); - ret = rds_tcp_keepalive(new_sock); - if (ret < 0) - goto out; - + rds_tcp_keepalive(new_sock); rds_tcp_tune(new_sock); inet = inet_sk(new_sock->sk); @@ -241,7 +210,7 @@ rst_nsk: * be pending on it. By setting linger, we achieve the side-effect * of avoiding TIME_WAIT state on new_sock. */ - rds_tcp_set_linger(new_sock); + sock_no_linger(new_sock->sk); kernel_sock_shutdown(new_sock, SHUT_RDWR); ret = 0; out: @@ -303,7 +272,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6) } sock->sk->sk_reuse = SK_CAN_REUSE; - rds_tcp_nonagle(sock); + tcp_sock_set_nodelay(sock->sk); write_lock_bh(&sock->sk->sk_callback_lock); sock->sk->sk_user_data = sock->sk->sk_data_ready; diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 78a2554a4497..8c4d1d6e9249 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -38,23 +38,18 @@ #include "rds.h" #include "tcp.h" -static void rds_tcp_cork(struct socket *sock, int val) -{ - kernel_setsockopt(sock, SOL_TCP, TCP_CORK, (void *)&val, sizeof(val)); -} - void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp) { struct rds_tcp_connection *tc = cp->cp_transport_data; - rds_tcp_cork(tc->t_sock, 1); + tcp_sock_set_cork(tc->t_sock->sk, true); } void rds_tcp_xmit_path_complete(struct rds_conn_path *cp) { struct rds_tcp_connection *tc = cp->cp_transport_data; - rds_tcp_cork(tc->t_sock, 0); + tcp_sock_set_cork(tc->t_sock->sk, false); } /* the core send_sem serializes this with other xmit and shutdown */ diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index 6ffb7e9887ce..ddd0f95713a9 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -25,6 +25,7 @@ rxrpc-y := \ peer_event.o \ peer_object.o \ recvmsg.o \ + rtt.o \ security.o \ sendmsg.o \ skbuff.o \ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 15ee92d79581..394189b81849 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -571,6 +571,19 @@ out: return ret; } +int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val) +{ + if (sk->sk_state != RXRPC_UNBOUND) + return -EISCONN; + if (val > RXRPC_SECURITY_MAX) + return -EINVAL; + lock_sock(sk); + rxrpc_sk(sk)->min_sec_level = val; + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(rxrpc_sock_set_min_security_level); + /* * set RxRPC socket options */ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 3eb1ab40ca5c..9fe264bec70c 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -7,6 +7,7 @@ #include <linux/atomic.h> #include <linux/seqlock.h> +#include <linux/win_minmax.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/sock.h> @@ -311,11 +312,14 @@ struct rxrpc_peer { #define RXRPC_RTT_CACHE_SIZE 32 spinlock_t rtt_input_lock; /* RTT lock for input routine */ ktime_t rtt_last_req; /* Time of last RTT request */ - u64 rtt; /* Current RTT estimate (in nS) */ - u64 rtt_sum; /* Sum of cache contents */ - u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */ - u8 rtt_cursor; /* next entry at which to insert */ - u8 rtt_usage; /* amount of cache actually used */ + unsigned int rtt_count; /* Number of samples we've got */ + + u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + u32 mdev_us; /* medium deviation */ + u32 mdev_max_us; /* maximal mdev for the last rtt period */ + u32 rttvar_us; /* smoothed mdev_max */ + u32 rto_j; /* Retransmission timeout in jiffies */ + u8 backoff; /* Backoff timeout */ u8 cong_cwnd; /* Congestion window size */ }; @@ -1041,7 +1045,6 @@ extern unsigned long rxrpc_idle_ack_delay; extern unsigned int rxrpc_rx_window_size; extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; -extern unsigned long rxrpc_resend_timeout; extern const s8 rxrpc_ack_priority[]; @@ -1069,8 +1072,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *); * peer_event.c */ void rxrpc_error_report(struct sock *); -void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, - rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); void rxrpc_peer_keepalive_worker(struct work_struct *); /* @@ -1103,6 +1104,14 @@ void rxrpc_notify_socket(struct rxrpc_call *); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* + * rtt.c + */ +void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, + rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); +unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *, bool); +void rxrpc_peer_init_rtt(struct rxrpc_peer *); + +/* * rxkad.c */ #ifdef CONFIG_RXKAD diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 70e44abf106c..b7611cc159e5 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -248,7 +248,7 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_skb_priv *sp = rxrpc_skb(skb); ktime_t now = skb->tstamp; - if (call->peer->rtt_usage < 3 || + if (call->peer->rtt_count < 3 || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial, true, true, diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index cedbbb3a7c2e..2a65ac41055f 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -111,8 +111,8 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, } else { unsigned long now = jiffies, ack_at; - if (call->peer->rtt_usage > 0) - ack_at = nsecs_to_jiffies(call->peer->rtt); + if (call->peer->srtt_us != 0) + ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3); else ack_at = expiry; @@ -157,24 +157,18 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) { struct sk_buff *skb; - unsigned long resend_at; + unsigned long resend_at, rto_j; rxrpc_seq_t cursor, seq, top; - ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo; + ktime_t now, max_age, oldest, ack_ts; int ix; u8 annotation, anno_type, retrans = 0, unacked = 0; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - if (call->peer->rtt_usage > 1) - timeout = ns_to_ktime(call->peer->rtt * 3 / 2); - else - timeout = ms_to_ktime(rxrpc_resend_timeout); - min_timeo = ns_to_ktime((1000000000 / HZ) * 4); - if (ktime_before(timeout, min_timeo)) - timeout = min_timeo; + rto_j = call->peer->rto_j; now = ktime_get_real(); - max_age = ktime_sub(now, timeout); + max_age = ktime_sub(now, jiffies_to_usecs(rto_j)); spin_lock_bh(&call->lock); @@ -219,7 +213,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) } resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); - resend_at += jiffies + rxrpc_resend_timeout; + resend_at += jiffies + rto_j; WRITE_ONCE(call->resend_at, resend_at); if (unacked) @@ -234,7 +228,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) rxrpc_timer_set_for_resend); spin_unlock_bh(&call->lock); ack_ts = ktime_sub(now, call->acks_latest_ts); - if (ktime_to_ns(ack_ts) < call->peer->rtt) + if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3)) goto out; rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 69e09d69c896..3be4177baf70 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -91,11 +91,11 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, /* We analyse the number of packets that get ACK'd per RTT * period and increase the window if we managed to fill it. */ - if (call->peer->rtt_usage == 0) + if (call->peer->rtt_count == 0) goto out; if (ktime_before(skb->tstamp, - ktime_add_ns(call->cong_tstamp, - call->peer->rtt))) + ktime_add_us(call->cong_tstamp, + call->peer->srtt_us >> 3))) goto out_no_clear_ca; change = rxrpc_cong_rtt_window_end; call->cong_tstamp = skb->tstamp; @@ -803,6 +803,30 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, } /* + * Return true if the ACK is valid - ie. it doesn't appear to have regressed + * with respect to the ack state conveyed by preceding ACKs. + */ +static bool rxrpc_is_ack_valid(struct rxrpc_call *call, + rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt) +{ + rxrpc_seq_t base = READ_ONCE(call->ackr_first_seq); + + if (after(first_pkt, base)) + return true; /* The window advanced */ + + if (before(first_pkt, base)) + return false; /* firstPacket regressed */ + + if (after_eq(prev_pkt, call->ackr_prev_seq)) + return true; /* previousPacket hasn't regressed. */ + + /* Some rx implementations put a serial number in previousPacket. */ + if (after_eq(prev_pkt, base + call->tx_winsize)) + return false; + return true; +} + +/* * Process an ACK packet. * * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet @@ -865,9 +889,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) } /* Discard any out-of-order or duplicate ACKs (outside lock). */ - if (before(first_soft_ack, call->ackr_first_seq) || - before(prev_pkt, call->ackr_prev_seq)) + if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { + trace_rxrpc_rx_discard_ack(call->debug_id, sp->hdr.serial, + first_soft_ack, call->ackr_first_seq, + prev_pkt, call->ackr_prev_seq); return; + } buf.info.rxMTU = 0; ioffset = offset + nr_acks + 3; @@ -878,9 +905,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) spin_lock(&call->input_lock); /* Discard any out-of-order or duplicate ACKs (inside lock). */ - if (before(first_soft_ack, call->ackr_first_seq) || - before(prev_pkt, call->ackr_prev_seq)) + if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { + trace_rxrpc_rx_discard_ack(call->debug_id, sp->hdr.serial, + first_soft_ack, call->ackr_first_seq, + prev_pkt, call->ackr_prev_seq); goto out; + } call->acks_latest_ts = skb->tstamp; call->ackr_first_seq = first_soft_ack; diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 01135e54d95d..c8b2097f499c 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -107,7 +107,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) { struct sock *usk; - int ret, opt; + int ret; _enter("%p{%d,%d}", local, local->srx.transport_type, local->srx.transport.family); @@ -157,13 +157,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) switch (local->srx.transport.family) { case AF_INET6: /* we want to receive ICMPv6 errors */ - opt = 1; - ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR, - (char *) &opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; - } + ip6_sock_set_recverr(local->socket->sk); /* Fall through and set IPv4 options too otherwise we don't get * errors from IPv4 packets sent through the IPv6 socket. @@ -171,31 +165,13 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) /* Fall through */ case AF_INET: /* we want to receive ICMP errors */ - opt = 1; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR, - (char *) &opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; - } + ip_sock_set_recverr(local->socket->sk); /* we want to set the don't fragment bit */ - opt = IP_PMTUDISC_DO; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER, - (char *) &opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; - } + ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DO); /* We want receive timestamps. */ - opt = 1; - ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS_OLD, - (char *)&opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; - } + sock_enable_timestamps(local->socket->sk); break; default: diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 214405f75346..d4144fd86f84 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -63,11 +63,6 @@ unsigned int rxrpc_rx_mtu = 5692; */ unsigned int rxrpc_rx_jumbo_max = 4; -/* - * Time till packet resend (in milliseconds). - */ -unsigned long rxrpc_resend_timeout = 4 * HZ; - const s8 rxrpc_ack_priority[] = { [0] = 0, [RXRPC_ACK_DELAY] = 1, diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 90e263c6aa69..1ba43c3df4ad 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -321,7 +321,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, struct kvec iov[2]; rxrpc_serial_t serial; size_t len; - int ret, opt; + int ret; _enter(",{%d}", skb->len); @@ -369,7 +369,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || retrans || call->cong_mode == RXRPC_CALL_SLOW_START || - (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || + (call->peer->rtt_count < 3 && sp->hdr.seq & 1) || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real()))) whdr.flags |= RXRPC_REQUEST_ACK; @@ -423,13 +423,10 @@ done: if (whdr.flags & RXRPC_REQUEST_ACK) { call->peer->rtt_last_req = skb->tstamp; trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); - if (call->peer->rtt_usage > 1) { + if (call->peer->rtt_count > 1) { unsigned long nowj = jiffies, ack_lost_at; - ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt); - if (ack_lost_at < 1) - ack_lost_at = 1; - + ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans); ack_lost_at += nowj; WRITE_ONCE(call->ack_lost_at, ack_lost_at); rxrpc_reduce_call_timer(call, ack_lost_at, nowj, @@ -476,18 +473,14 @@ send_fragmentable: switch (conn->params.local->srx.transport.family) { case AF_INET6: case AF_INET: - opt = IP_PMTUDISC_DONT; - kernel_setsockopt(conn->params.local->socket, - SOL_IP, IP_MTU_DISCOVER, - (char *)&opt, sizeof(opt)); + ip_sock_set_mtu_discover(conn->params.local->socket->sk, + IP_PMTUDISC_DONT); ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); conn->params.peer->last_tx_at = ktime_get_seconds(); - opt = IP_PMTUDISC_DO; - kernel_setsockopt(conn->params.local->socket, - SOL_IP, IP_MTU_DISCOVER, - (char *)&opt, sizeof(opt)); + ip_sock_set_mtu_discover(conn->params.local->socket->sk, + IP_PMTUDISC_DO); break; default: diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 923b263c401b..b1449d971883 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -296,52 +296,6 @@ static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error, } /* - * Add RTT information to cache. This is called in softirq mode and has - * exclusive access to the peer RTT data. - */ -void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, - rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, - ktime_t send_time, ktime_t resp_time) -{ - struct rxrpc_peer *peer = call->peer; - s64 rtt; - u64 sum = peer->rtt_sum, avg; - u8 cursor = peer->rtt_cursor, usage = peer->rtt_usage; - - rtt = ktime_to_ns(ktime_sub(resp_time, send_time)); - if (rtt < 0) - return; - - spin_lock(&peer->rtt_input_lock); - - /* Replace the oldest datum in the RTT buffer */ - sum -= peer->rtt_cache[cursor]; - sum += rtt; - peer->rtt_cache[cursor] = rtt; - peer->rtt_cursor = (cursor + 1) & (RXRPC_RTT_CACHE_SIZE - 1); - peer->rtt_sum = sum; - if (usage < RXRPC_RTT_CACHE_SIZE) { - usage++; - peer->rtt_usage = usage; - } - - spin_unlock(&peer->rtt_input_lock); - - /* Now recalculate the average */ - if (usage == RXRPC_RTT_CACHE_SIZE) { - avg = sum / RXRPC_RTT_CACHE_SIZE; - } else { - avg = sum; - do_div(avg, usage); - } - - /* Don't need to update this under lock */ - peer->rtt = avg; - trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt, - usage, avg); -} - -/* * Perform keep-alive pings. */ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 452163eadb98..ca29976bb193 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -225,6 +225,8 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) spin_lock_init(&peer->rtt_input_lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); + rxrpc_peer_init_rtt(peer); + if (RXRPC_TX_SMSS > 2190) peer->cong_cwnd = 2; else if (RXRPC_TX_SMSS > 1095) @@ -497,14 +499,14 @@ void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, EXPORT_SYMBOL(rxrpc_kernel_get_peer); /** - * rxrpc_kernel_get_rtt - Get a call's peer RTT + * rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT * @sock: The socket on which the call is in progress. * @call: The call to query * - * Get the call's peer RTT. + * Get the call's peer smoothed RTT. */ -u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call) +u32 rxrpc_kernel_get_srtt(struct socket *sock, struct rxrpc_call *call) { - return call->peer->rtt; + return call->peer->srtt_us >> 3; } -EXPORT_SYMBOL(rxrpc_kernel_get_rtt); +EXPORT_SYMBOL(rxrpc_kernel_get_srtt); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index b9d053e42821..8b179e3c802a 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -222,7 +222,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "Proto Local " " Remote " - " Use CW MTU LastUse RTT Rc\n" + " Use CW MTU LastUse RTT RTO\n" ); return 0; } @@ -236,15 +236,15 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) now = ktime_get_seconds(); seq_printf(seq, "UDP %-47.47s %-47.47s %3u" - " %3u %5u %6llus %12llu %2u\n", + " %3u %5u %6llus %8u %8u\n", lbuff, rbuff, atomic_read(&peer->usage), peer->cong_cwnd, peer->mtu, now - peer->last_tx_at, - peer->rtt, - peer->rtt_cursor); + peer->srtt_us >> 3, + jiffies_to_usecs(peer->rto_j)); return 0; } diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c new file mode 100644 index 000000000000..928d8b34a3ee --- /dev/null +++ b/net/rxrpc/rtt.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* RTT/RTO calculation. + * + * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com) + * + * https://tools.ietf.org/html/rfc6298 + * https://tools.ietf.org/html/rfc1122#section-4.2.3.1 + * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf + */ + +#include <linux/net.h> +#include "ar-internal.h" + +#define RXRPC_RTO_MAX ((unsigned)(120 * HZ)) +#define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ +#define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ +#define rxrpc_min_rtt_wlen 300 /* As sysctl_tcp_min_rtt_wlen */ + +static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) +{ + return 200; +} + +static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) +{ + return _usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); +} + +static u32 rxrpc_bound_rto(u32 rto) +{ + return min(rto, RXRPC_RTO_MAX); +} + +/* + * Called to compute a smoothed rtt estimate. The data fed to this + * routine either comes from timestamps, or from segments that were + * known _not_ to have been retransmitted [see Karn/Partridge + * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 + * piece by Van Jacobson. + * NOTE: the next three routines used to be one big routine. + * To save cycles in the RFC 1323 implementation it was better to break + * it up into three procedures. -- erics + */ +static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) +{ + long m = sample_rtt_us; /* RTT */ + u32 srtt = peer->srtt_us; + + /* The following amusing code comes from Jacobson's + * article in SIGCOMM '88. Note that rtt and mdev + * are scaled versions of rtt and mean deviation. + * This is designed to be as fast as possible + * m stands for "measurement". + * + * On a 1990 paper the rto value is changed to: + * RTO = rtt + 4 * mdev + * + * Funny. This algorithm seems to be very broken. + * These formulae increase RTO, when it should be decreased, increase + * too slowly, when it should be increased quickly, decrease too quickly + * etc. I guess in BSD RTO takes ONE value, so that it is absolutely + * does not matter how to _calculate_ it. Seems, it was trap + * that VJ failed to avoid. 8) + */ + if (srtt != 0) { + m -= (srtt >> 3); /* m is now error in rtt est */ + srtt += m; /* rtt = 7/8 rtt + 1/8 new */ + if (m < 0) { + m = -m; /* m is now abs(error) */ + m -= (peer->mdev_us >> 2); /* similar update on mdev */ + /* This is similar to one of Eifel findings. + * Eifel blocks mdev updates when rtt decreases. + * This solution is a bit different: we use finer gain + * for mdev in this case (alpha*beta). + * Like Eifel it also prevents growth of rto, + * but also it limits too fast rto decreases, + * happening in pure Eifel. + */ + if (m > 0) + m >>= 3; + } else { + m -= (peer->mdev_us >> 2); /* similar update on mdev */ + } + + peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ + if (peer->mdev_us > peer->mdev_max_us) { + peer->mdev_max_us = peer->mdev_us; + if (peer->mdev_max_us > peer->rttvar_us) + peer->rttvar_us = peer->mdev_max_us; + } + } else { + /* no previous measure. */ + srtt = m << 3; /* take the measured time to be rtt */ + peer->mdev_us = m << 1; /* make sure rto = 3*rtt */ + peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer)); + peer->mdev_max_us = peer->rttvar_us; + } + + peer->srtt_us = max(1U, srtt); +} + +/* + * Calculate rto without backoff. This is the second half of Van Jacobson's + * routine referred to above. + */ +static void rxrpc_set_rto(struct rxrpc_peer *peer) +{ + u32 rto; + + /* 1. If rtt variance happened to be less 50msec, it is hallucination. + * It cannot be less due to utterly erratic ACK generation made + * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ + * to do with delayed acks, because at cwnd>2 true delack timeout + * is invisible. Actually, Linux-2.4 also generates erratic + * ACKs in some circumstances. + */ + rto = __rxrpc_set_rto(peer); + + /* 2. Fixups made earlier cannot be right. + * If we do not estimate RTO correctly without them, + * all the algo is pure shit and should be replaced + * with correct one. It is exactly, which we pretend to do. + */ + + /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo + * guarantees that rto is higher. + */ + peer->rto_j = rxrpc_bound_rto(rto); +} + +static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us) +{ + if (rtt_us < 0) + return; + + //rxrpc_update_rtt_min(peer, rtt_us); + rxrpc_rtt_estimator(peer, rtt_us); + rxrpc_set_rto(peer); + + /* RFC6298: only reset backoff on valid RTT measurement. */ + peer->backoff = 0; +} + +/* + * Add RTT information to cache. This is called in softirq mode and has + * exclusive access to the peer RTT data. + */ +void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + ktime_t send_time, ktime_t resp_time) +{ + struct rxrpc_peer *peer = call->peer; + s64 rtt_us; + + rtt_us = ktime_to_us(ktime_sub(resp_time, send_time)); + if (rtt_us < 0) + return; + + spin_lock(&peer->rtt_input_lock); + rxrpc_ack_update_rtt(peer, rtt_us); + if (peer->rtt_count < 3) + peer->rtt_count++; + spin_unlock(&peer->rtt_input_lock); + + trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, + peer->srtt_us >> 3, peer->rto_j); +} + +/* + * Get the retransmission timeout to set in jiffies, backing it off each time + * we retransmit. + */ +unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) +{ + u64 timo_j; + u8 backoff = READ_ONCE(peer->backoff); + + timo_j = peer->rto_j; + timo_j <<= backoff; + if (retrans && timo_j * 2 <= RXRPC_RTO_MAX) + WRITE_ONCE(peer->backoff, backoff + 1); + + if (timo_j < 1) + timo_j = 1; + + return timo_j; +} + +void rxrpc_peer_init_rtt(struct rxrpc_peer *peer) +{ + peer->rto_j = RXRPC_TIMEOUT_INIT; + peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT); + peer->backoff = 0; + //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U); +} diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 098f1f9ec53b..52a24d4ef5d8 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -1148,7 +1148,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key, &expiry, _abort_code); if (ret < 0) - goto temporary_error_free_resp; + goto temporary_error_free_ticket; /* use the session key from inside the ticket to decrypt the * response */ @@ -1230,7 +1230,6 @@ protocol_error: temporary_error_free_ticket: kfree(ticket); -temporary_error_free_resp: kfree(response); temporary_error: /* Ignore the response packet if we got a temporary error such as diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0fcf157aa09f..5e9c43d4a314 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -66,15 +66,14 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, struct rxrpc_call *call) { rxrpc_seq_t tx_start, tx_win; - signed long rtt2, timeout; - u64 rtt; + signed long rtt, timeout; - rtt = READ_ONCE(call->peer->rtt); - rtt2 = nsecs_to_jiffies64(rtt) * 2; - if (rtt2 < 2) - rtt2 = 2; + rtt = READ_ONCE(call->peer->srtt_us) >> 3; + rtt = usecs_to_jiffies(rtt) * 2; + if (rtt < 2) + rtt = 2; - timeout = rtt2; + timeout = rtt; tx_start = READ_ONCE(call->tx_hard_ack); for (;;) { @@ -92,7 +91,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, return -EINTR; if (tx_win != tx_start) { - timeout = rtt2; + timeout = rtt; tx_start = tx_win; } @@ -271,16 +270,9 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); } else { - unsigned long now = jiffies, resend_at; + unsigned long now = jiffies; + unsigned long resend_at = now + call->peer->rto_j; - if (call->peer->rtt_usage > 1) - resend_at = nsecs_to_jiffies(call->peer->rtt * 3 / 2); - else - resend_at = rxrpc_resend_timeout; - if (resend_at < 1) - resend_at = 1; - - resend_at += now; WRITE_ONCE(call->resend_at, resend_at); rxrpc_reduce_call_timer(call, resend_at, now, rxrpc_timer_set_for_send); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 174e903e18de..e91acc95ff28 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -71,15 +71,6 @@ static struct ctl_table rxrpc_sysctl_table[] = { .extra1 = (void *)&one_jiffy, .extra2 = (void *)&max_jiffies, }, - { - .procname = "resend_timeout", - .data = &rxrpc_resend_timeout, - .maxlen = sizeof(unsigned long), - .mode = 0644, - .proc_handler = proc_doulongvec_ms_jiffies_minmax, - .extra1 = (void *)&one_jiffy, - .extra2 = (void *)&max_jiffies, - }, /* Non-time values */ { diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 9adff83b523b..e29f0f45d688 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -200,6 +200,9 @@ static int tcf_ct_flow_table_add_action_nat(struct net *net, const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; struct nf_conntrack_tuple target; + if (!(ct->status & IPS_NAT_MASK)) + return 0; + nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); switch (tuple->src.l3num) { diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 0c574700da75..96f5999281e0 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -668,6 +668,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_MPLS_BOS] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_MPLS_TC] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_MPLS_LABEL] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_MPLS_OPTS] = { .type = NLA_NESTED }, [TCA_FLOWER_KEY_TCP_FLAGS] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_TCP_FLAGS_MASK] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_IP_TOS] = { .type = NLA_U8 }, @@ -726,6 +727,20 @@ erspan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1] = { [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID] = { .type = NLA_U8 }, }; +static const struct nla_policy +mpls_opts_policy[TCA_FLOWER_KEY_MPLS_OPTS_MAX + 1] = { + [TCA_FLOWER_KEY_MPLS_OPTS_LSE] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy +mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = { + [TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_MPLS_OPT_LSE_TC] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL] = { .type = NLA_U32 }, +}; + static void fl_set_key_val(struct nlattr **tb, void *val, int val_type, void *mask, int mask_type, int len) @@ -776,14 +791,157 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key, return 0; } +static int fl_set_key_mpls_lse(const struct nlattr *nla_lse, + struct flow_dissector_key_mpls *key_val, + struct flow_dissector_key_mpls *key_mask, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1]; + struct flow_dissector_mpls_lse *lse_mask; + struct flow_dissector_mpls_lse *lse_val; + u8 lse_index; + u8 depth; + int err; + + err = nla_parse_nested(tb, TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX, nla_lse, + mpls_stack_entry_policy, extack); + if (err < 0) + return err; + + if (!tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]) { + NL_SET_ERR_MSG(extack, "Missing MPLS option \"depth\""); + return -EINVAL; + } + + depth = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]); + + /* LSE depth starts at 1, for consistency with terminology used by + * RFC 3031 (section 3.9), where depth 0 refers to unlabeled packets. + */ + if (depth < 1 || depth > FLOW_DIS_MPLS_MAX) { + NL_SET_ERR_MSG_ATTR(extack, + tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH], + "Invalid MPLS depth"); + return -EINVAL; + } + lse_index = depth - 1; + + dissector_set_mpls_lse(key_val, lse_index); + dissector_set_mpls_lse(key_mask, lse_index); + + lse_val = &key_val->ls[lse_index]; + lse_mask = &key_mask->ls[lse_index]; + + if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]) { + lse_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]); + lse_mask->mpls_ttl = MPLS_TTL_MASK; + } + if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS]) { + u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS]); + + if (bos & ~MPLS_BOS_MASK) { + NL_SET_ERR_MSG_ATTR(extack, + tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS], + "Bottom Of Stack (BOS) must be 0 or 1"); + return -EINVAL; + } + lse_val->mpls_bos = bos; + lse_mask->mpls_bos = MPLS_BOS_MASK; + } + if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC]) { + u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC]); + + if (tc & ~MPLS_TC_MASK) { + NL_SET_ERR_MSG_ATTR(extack, + tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC], + "Traffic Class (TC) must be between 0 and 7"); + return -EINVAL; + } + lse_val->mpls_tc = tc; + lse_mask->mpls_tc = MPLS_TC_MASK; + } + if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]) { + u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]); + + if (label & ~MPLS_LABEL_MASK) { + NL_SET_ERR_MSG_ATTR(extack, + tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL], + "Label must be between 0 and 1048575"); + return -EINVAL; + } + lse_val->mpls_label = label; + lse_mask->mpls_label = MPLS_LABEL_MASK; + } + + return 0; +} + +static int fl_set_key_mpls_opts(const struct nlattr *nla_mpls_opts, + struct flow_dissector_key_mpls *key_val, + struct flow_dissector_key_mpls *key_mask, + struct netlink_ext_ack *extack) +{ + struct nlattr *nla_lse; + int rem; + int err; + + if (!(nla_mpls_opts->nla_type & NLA_F_NESTED)) { + NL_SET_ERR_MSG_ATTR(extack, nla_mpls_opts, + "NLA_F_NESTED is missing"); + return -EINVAL; + } + + nla_for_each_nested(nla_lse, nla_mpls_opts, rem) { + if (nla_type(nla_lse) != TCA_FLOWER_KEY_MPLS_OPTS_LSE) { + NL_SET_ERR_MSG_ATTR(extack, nla_lse, + "Invalid MPLS option type"); + return -EINVAL; + } + + err = fl_set_key_mpls_lse(nla_lse, key_val, key_mask, extack); + if (err < 0) + return err; + } + if (rem) { + NL_SET_ERR_MSG(extack, + "Bytes leftover after parsing MPLS options"); + return -EINVAL; + } + + return 0; +} + static int fl_set_key_mpls(struct nlattr **tb, struct flow_dissector_key_mpls *key_val, struct flow_dissector_key_mpls *key_mask, struct netlink_ext_ack *extack) { + struct flow_dissector_mpls_lse *lse_mask; + struct flow_dissector_mpls_lse *lse_val; + + if (tb[TCA_FLOWER_KEY_MPLS_OPTS]) { + if (tb[TCA_FLOWER_KEY_MPLS_TTL] || + tb[TCA_FLOWER_KEY_MPLS_BOS] || + tb[TCA_FLOWER_KEY_MPLS_TC] || + tb[TCA_FLOWER_KEY_MPLS_LABEL]) { + NL_SET_ERR_MSG_ATTR(extack, + tb[TCA_FLOWER_KEY_MPLS_OPTS], + "MPLS label, Traffic Class, Bottom Of Stack and Time To Live must be encapsulated in the MPLS options attribute"); + return -EBADMSG; + } + + return fl_set_key_mpls_opts(tb[TCA_FLOWER_KEY_MPLS_OPTS], + key_val, key_mask, extack); + } + + lse_val = &key_val->ls[0]; + lse_mask = &key_mask->ls[0]; + if (tb[TCA_FLOWER_KEY_MPLS_TTL]) { - key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]); - key_mask->mpls_ttl = MPLS_TTL_MASK; + lse_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]); + lse_mask->mpls_ttl = MPLS_TTL_MASK; + dissector_set_mpls_lse(key_val, 0); + dissector_set_mpls_lse(key_mask, 0); } if (tb[TCA_FLOWER_KEY_MPLS_BOS]) { u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]); @@ -794,8 +952,10 @@ static int fl_set_key_mpls(struct nlattr **tb, "Bottom Of Stack (BOS) must be 0 or 1"); return -EINVAL; } - key_val->mpls_bos = bos; - key_mask->mpls_bos = MPLS_BOS_MASK; + lse_val->mpls_bos = bos; + lse_mask->mpls_bos = MPLS_BOS_MASK; + dissector_set_mpls_lse(key_val, 0); + dissector_set_mpls_lse(key_mask, 0); } if (tb[TCA_FLOWER_KEY_MPLS_TC]) { u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]); @@ -806,8 +966,10 @@ static int fl_set_key_mpls(struct nlattr **tb, "Traffic Class (TC) must be between 0 and 7"); return -EINVAL; } - key_val->mpls_tc = tc; - key_mask->mpls_tc = MPLS_TC_MASK; + lse_val->mpls_tc = tc; + lse_mask->mpls_tc = MPLS_TC_MASK; + dissector_set_mpls_lse(key_val, 0); + dissector_set_mpls_lse(key_mask, 0); } if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) { u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]); @@ -818,8 +980,10 @@ static int fl_set_key_mpls(struct nlattr **tb, "Label must be between 0 and 1048575"); return -EINVAL; } - key_val->mpls_label = label; - key_mask->mpls_label = MPLS_LABEL_MASK; + lse_val->mpls_label = label; + lse_mask->mpls_label = MPLS_LABEL_MASK; + dissector_set_mpls_lse(key_val, 0); + dissector_set_mpls_lse(key_mask, 0); } return 0; } @@ -2218,35 +2382,132 @@ static int fl_dump_key_port_range(struct sk_buff *skb, struct fl_flow_key *key, return 0; } +static int fl_dump_key_mpls_opt_lse(struct sk_buff *skb, + struct flow_dissector_key_mpls *mpls_key, + struct flow_dissector_key_mpls *mpls_mask, + u8 lse_index) +{ + struct flow_dissector_mpls_lse *lse_mask = &mpls_mask->ls[lse_index]; + struct flow_dissector_mpls_lse *lse_key = &mpls_key->ls[lse_index]; + int err; + + err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH, + lse_index + 1); + if (err) + return err; + + if (lse_mask->mpls_ttl) { + err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL, + lse_key->mpls_ttl); + if (err) + return err; + } + if (lse_mask->mpls_bos) { + err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS, + lse_key->mpls_bos); + if (err) + return err; + } + if (lse_mask->mpls_tc) { + err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_TC, + lse_key->mpls_tc); + if (err) + return err; + } + if (lse_mask->mpls_label) { + err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL, + lse_key->mpls_label); + if (err) + return err; + } + + return 0; +} + +static int fl_dump_key_mpls_opts(struct sk_buff *skb, + struct flow_dissector_key_mpls *mpls_key, + struct flow_dissector_key_mpls *mpls_mask) +{ + struct nlattr *opts; + struct nlattr *lse; + u8 lse_index; + int err; + + opts = nla_nest_start(skb, TCA_FLOWER_KEY_MPLS_OPTS); + if (!opts) + return -EMSGSIZE; + + for (lse_index = 0; lse_index < FLOW_DIS_MPLS_MAX; lse_index++) { + if (!(mpls_mask->used_lses & 1 << lse_index)) + continue; + + lse = nla_nest_start(skb, TCA_FLOWER_KEY_MPLS_OPTS_LSE); + if (!lse) { + err = -EMSGSIZE; + goto err_opts; + } + + err = fl_dump_key_mpls_opt_lse(skb, mpls_key, mpls_mask, + lse_index); + if (err) + goto err_opts_lse; + nla_nest_end(skb, lse); + } + nla_nest_end(skb, opts); + + return 0; + +err_opts_lse: + nla_nest_cancel(skb, lse); +err_opts: + nla_nest_cancel(skb, opts); + + return err; +} + static int fl_dump_key_mpls(struct sk_buff *skb, struct flow_dissector_key_mpls *mpls_key, struct flow_dissector_key_mpls *mpls_mask) { + struct flow_dissector_mpls_lse *lse_mask; + struct flow_dissector_mpls_lse *lse_key; int err; - if (!memchr_inv(mpls_mask, 0, sizeof(*mpls_mask))) + if (!mpls_mask->used_lses) return 0; - if (mpls_mask->mpls_ttl) { + + lse_mask = &mpls_mask->ls[0]; + lse_key = &mpls_key->ls[0]; + + /* For backward compatibility, don't use the MPLS nested attributes if + * the rule can be expressed using the old attributes. + */ + if (mpls_mask->used_lses & ~1 || + (!lse_mask->mpls_ttl && !lse_mask->mpls_bos && + !lse_mask->mpls_tc && !lse_mask->mpls_label)) + return fl_dump_key_mpls_opts(skb, mpls_key, mpls_mask); + + if (lse_mask->mpls_ttl) { err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TTL, - mpls_key->mpls_ttl); + lse_key->mpls_ttl); if (err) return err; } - if (mpls_mask->mpls_tc) { + if (lse_mask->mpls_tc) { err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TC, - mpls_key->mpls_tc); + lse_key->mpls_tc); if (err) return err; } - if (mpls_mask->mpls_label) { + if (lse_mask->mpls_label) { err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_LABEL, - mpls_key->mpls_label); + lse_key->mpls_label); if (err) return err; } - if (mpls_mask->mpls_bos) { + if (lse_mask->mpls_bos) { err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_BOS, - mpls_key->mpls_bos); + lse_key->mpls_bos); if (err) return err; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 0d99df1e764d..9a3449b56bd6 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -32,6 +32,8 @@ #include <net/pkt_sched.h> #include <net/pkt_cls.h> +#include <trace/events/qdisc.h> + /* Short review. @@ -1283,6 +1285,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, } qdisc_hash_add(sch, false); + trace_qdisc_create(ops, dev, parent); return sch; diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 1496e87cd07b..60f8ae578819 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -584,26 +584,48 @@ static bool cobalt_should_drop(struct cobalt_vars *vars, return drop; } -static void cake_update_flowkeys(struct flow_keys *keys, +static bool cake_update_flowkeys(struct flow_keys *keys, const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) struct nf_conntrack_tuple tuple = {}; - bool rev = !skb->_nfct; + bool rev = !skb->_nfct, upd = false; + __be32 ip; if (tc_skb_protocol(skb) != htons(ETH_P_IP)) - return; + return false; if (!nf_ct_get_tuple_skb(&tuple, skb)) - return; + return false; - keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip; - keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip; + ip = rev ? tuple.dst.u3.ip : tuple.src.u3.ip; + if (ip != keys->addrs.v4addrs.src) { + keys->addrs.v4addrs.src = ip; + upd = true; + } + ip = rev ? tuple.src.u3.ip : tuple.dst.u3.ip; + if (ip != keys->addrs.v4addrs.dst) { + keys->addrs.v4addrs.dst = ip; + upd = true; + } if (keys->ports.ports) { - keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all; - keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all; + __be16 port; + + port = rev ? tuple.dst.u.all : tuple.src.u.all; + if (port != keys->ports.src) { + keys->ports.src = port; + upd = true; + } + port = rev ? tuple.src.u.all : tuple.dst.u.all; + if (port != keys->ports.dst) { + port = keys->ports.dst; + upd = true; + } } + return upd; +#else + return false; #endif } @@ -624,23 +646,36 @@ static bool cake_ddst(int flow_mode) static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, int flow_mode, u16 flow_override, u16 host_override) { + bool hash_flows = (!flow_override && !!(flow_mode & CAKE_FLOW_FLOWS)); + bool hash_hosts = (!host_override && !!(flow_mode & CAKE_FLOW_HOSTS)); + bool nat_enabled = !!(flow_mode & CAKE_FLOW_NAT_FLAG); u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0; u16 reduced_hash, srchost_idx, dsthost_idx; struct flow_keys keys, host_keys; + bool use_skbhash = skb->l4_hash; if (unlikely(flow_mode == CAKE_FLOW_NONE)) return 0; - /* If both overrides are set we can skip packet dissection entirely */ - if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) && - (host_override || !(flow_mode & CAKE_FLOW_HOSTS))) + /* If both overrides are set, or we can use the SKB hash and nat mode is + * disabled, we can skip packet dissection entirely. If nat mode is + * enabled there's another check below after doing the conntrack lookup. + */ + if ((!hash_flows || (use_skbhash && !nat_enabled)) && !hash_hosts) goto skip_hash; skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); - if (flow_mode & CAKE_FLOW_NAT_FLAG) - cake_update_flowkeys(&keys, skb); + /* Don't use the SKB hash if we change the lookup keys from conntrack */ + if (nat_enabled && cake_update_flowkeys(&keys, skb)) + use_skbhash = false; + + /* If we can still use the SKB hash and don't need the host hash, we can + * skip the rest of the hashing procedure + */ + if (use_skbhash && !hash_hosts) + goto skip_hash; /* flow_hash_from_keys() sorts the addresses by value, so we have * to preserve their order in a separate data structure to treat @@ -679,12 +714,14 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, /* This *must* be after the above switch, since as a * side-effect it sorts the src and dst addresses. */ - if (flow_mode & CAKE_FLOW_FLOWS) + if (hash_flows && !use_skbhash) flow_hash = flow_hash_from_keys(&keys); skip_hash: if (flow_override) flow_hash = flow_override - 1; + else if (use_skbhash) + flow_hash = skb->hash; if (host_override) { dsthost_hash = host_override - 1; srchost_hash = host_override - 1; diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index a9da8776bf5b..fb760cee824e 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -297,9 +297,9 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, goto flow_error; } q->flows_cnt = nla_get_u32(tb[TCA_FQ_PIE_FLOWS]); - if (!q->flows_cnt || q->flows_cnt > 65536) { + if (!q->flows_cnt || q->flows_cnt >= 65536) { NL_SET_ERR_MSG_MOD(extack, - "Number of flows must be < 65536"); + "Number of flows must range in [1..65535]"); goto flow_error; } } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ebc55d884247..b19a0021a0bd 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -896,8 +896,10 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, } sch->parent = parentid; - if (!ops->init || ops->init(sch, NULL, extack) == 0) + if (!ops->init || ops->init(sch, NULL, extack) == 0) { + trace_qdisc_create(ops, dev_queue->dev, parentid); return sch; + } qdisc_put(sch); return NULL; @@ -911,6 +913,8 @@ void qdisc_reset(struct Qdisc *qdisc) const struct Qdisc_ops *ops = qdisc->ops; struct sk_buff *skb, *tmp; + trace_qdisc_reset(qdisc); + if (ops->reset) ops->reset(qdisc); @@ -949,7 +953,6 @@ static void qdisc_free_cb(struct rcu_head *head) static void qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; - struct sk_buff *skb, *tmp; #ifdef CONFIG_NET_SCHED qdisc_hash_del(qdisc); @@ -957,23 +960,16 @@ static void qdisc_destroy(struct Qdisc *qdisc) qdisc_put_stab(rtnl_dereference(qdisc->stab)); #endif gen_kill_estimator(&qdisc->rate_est); - if (ops->reset) - ops->reset(qdisc); + + qdisc_reset(qdisc); + if (ops->destroy) ops->destroy(qdisc); module_put(ops->owner); dev_put(qdisc_dev(qdisc)); - skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) { - __skb_unlink(skb, &qdisc->gso_skb); - kfree_skb_list(skb); - } - - skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) { - __skb_unlink(skb, &qdisc->skb_bad_txq); - kfree_skb_list(skb); - } + trace_qdisc_destroy(qdisc); call_rcu(&qdisc->rcu, qdisc_free_cb); } @@ -1132,6 +1128,28 @@ void dev_activate(struct net_device *dev) } EXPORT_SYMBOL(dev_activate); +static void qdisc_deactivate(struct Qdisc *qdisc) +{ + bool nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (qdisc->flags & TCQ_F_BUILTIN) + return; + if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state)) + return; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); + spin_lock_bh(qdisc_lock(qdisc)); + + set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); + + qdisc_reset(qdisc); + + spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) + spin_unlock_bh(&qdisc->seqlock); +} + static void dev_deactivate_queue(struct net_device *dev, struct netdev_queue *dev_queue, void *_qdisc_default) @@ -1141,21 +1159,8 @@ static void dev_deactivate_queue(struct net_device *dev, qdisc = rtnl_dereference(dev_queue->qdisc); if (qdisc) { - bool nolock = qdisc->flags & TCQ_F_NOLOCK; - - if (nolock) - spin_lock_bh(&qdisc->seqlock); - spin_lock_bh(qdisc_lock(qdisc)); - - if (!(qdisc->flags & TCQ_F_BUILTIN)) - set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); - + qdisc_deactivate(qdisc); rcu_assign_pointer(dev_queue->qdisc, qdisc_default); - qdisc_reset(qdisc); - - spin_unlock_bh(qdisc_lock(qdisc)); - if (nolock) - spin_unlock_bh(&qdisc->seqlock); } } @@ -1186,16 +1191,6 @@ static bool some_qdisc_is_busy(struct net_device *dev) return false; } -static void dev_qdisc_reset(struct net_device *dev, - struct netdev_queue *dev_queue, - void *none) -{ - struct Qdisc *qdisc = dev_queue->qdisc_sleeping; - - if (qdisc) - qdisc_reset(qdisc); -} - /** * dev_deactivate_many - deactivate transmissions on several devices * @head: list of devices to deactivate @@ -1232,12 +1227,6 @@ void dev_deactivate_many(struct list_head *head) */ schedule_timeout_uninterruptible(1); } - /* The new qdisc is assigned at this point so we can safely - * unwind stale skb lists and qdisc statistics - */ - netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL); - if (dev_ingress_queue(dev)) - dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL); } } diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 6e2eb1dd64ed..68934438ee19 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -31,7 +31,7 @@ menuconfig IP_SCTP homing at either or both ends of an association." To compile this protocol support as a module, choose M here: the - module will be called sctp. Debug messages are handeled by the + module will be called sctp. Debug messages are handled by the kernel's dynamic debugging framework. If in doubt, say N. diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 437079a4883d..72315137d7e7 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -432,7 +432,7 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, changeover = 1 ; asoc->peer.primary_path = transport; - sctp_ulpevent_nofity_peer_addr_change(transport, + sctp_ulpevent_notify_peer_addr_change(transport, SCTP_ADDR_MADE_PRIM, 0); /* Set a default msg_name for events. */ @@ -574,7 +574,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, asoc->peer.transport_count--; - sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_REMOVED, 0); + sctp_ulpevent_notify_peer_addr_change(peer, SCTP_ADDR_REMOVED, 0); sctp_transport_free(peer); } @@ -714,7 +714,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list); asoc->peer.transport_count++; - sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_ADDED, 0); + sctp_ulpevent_notify_peer_addr_change(peer, SCTP_ADDR_ADDED, 0); /* If we do not yet have a primary path, set one. */ if (!asoc->peer.primary_path) { @@ -840,7 +840,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, * to the user. */ if (ulp_notify) - sctp_ulpevent_nofity_peer_addr_change(transport, + sctp_ulpevent_notify_peer_addr_change(transport, spc_state, error); /* Select new active and retran paths. */ diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index c87af430107a..ccfa0ab3e7f4 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -1032,6 +1032,7 @@ static const struct proto_ops inet6_seqpacket_ops = { .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 2bc29463e1dc..9f36fe911d08 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1523,9 +1523,17 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, timeout = asoc->timeouts[cmd->obj.to]; BUG_ON(!timeout); - timer->expires = jiffies + timeout; - sctp_association_hold(asoc); - add_timer(timer); + /* + * SCTP has a hard time with timer starts. Because we process + * timer starts as side effects, it can be hard to tell if we + * have already started a timer or not, which leads to BUG + * halts when we call add_timer. So here, instead of just starting + * a timer, if the timer is already started, and just mod + * the timer with the shorter of the two expiration times + */ + if (!timer_pending(timer)) + sctp_association_hold(asoc); + timer_reduce(timer, jiffies + timeout); break; case SCTP_CMD_TIMER_RESTART: diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 26788f4a3b9e..e86620fbd90f 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1856,12 +1856,13 @@ static enum sctp_disposition sctp_sf_do_dupcook_a( /* Update the content of current association. */ sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); - if (sctp_state(asoc, SHUTDOWN_PENDING) && + if ((sctp_state(asoc, SHUTDOWN_PENDING) || + sctp_state(asoc, SHUTDOWN_SENT)) && (sctp_sstate(asoc->base.sk, CLOSING) || sock_flag(asoc->base.sk, SOCK_DEAD))) { - /* if were currently in SHUTDOWN_PENDING, but the socket - * has been closed by user, don't transition to ESTABLISHED. - * Instead trigger SHUTDOWN bundled with COOKIE_ACK. + /* If the socket has been closed by user, don't + * transition to ESTABLISHED. Instead trigger SHUTDOWN + * bundled with COOKIE_ACK. */ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); return sctp_sf_do_9_2_start_shutdown(net, ep, asoc, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 827a9903ee28..d57e1a002ffc 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -972,23 +972,22 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw) * it. * * sk The sk of the socket - * addrs The pointer to the addresses in user land + * addrs The pointer to the addresses * addrssize Size of the addrs buffer * op Operation to perform (add or remove, see the flags of * sctp_bindx) * * Returns 0 if ok, <0 errno code on error. */ -static int sctp_setsockopt_bindx(struct sock *sk, - struct sockaddr __user *addrs, - int addrs_size, int op) +static int sctp_setsockopt_bindx_kernel(struct sock *sk, + struct sockaddr *addrs, int addrs_size, + int op) { - struct sockaddr *kaddrs; int err; int addrcnt = 0; int walk_size = 0; struct sockaddr *sa_addr; - void *addr_buf; + void *addr_buf = addrs; struct sctp_af *af; pr_debug("%s: sk:%p addrs:%p addrs_size:%d opt:%d\n", @@ -997,17 +996,10 @@ static int sctp_setsockopt_bindx(struct sock *sk, if (unlikely(addrs_size <= 0)) return -EINVAL; - kaddrs = memdup_user(addrs, addrs_size); - if (IS_ERR(kaddrs)) - return PTR_ERR(kaddrs); - /* Walk through the addrs buffer and count the number of addresses. */ - addr_buf = kaddrs; while (walk_size < addrs_size) { - if (walk_size + sizeof(sa_family_t) > addrs_size) { - kfree(kaddrs); + if (walk_size + sizeof(sa_family_t) > addrs_size) return -EINVAL; - } sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa_family); @@ -1015,10 +1007,8 @@ static int sctp_setsockopt_bindx(struct sock *sk, /* If the address family is not supported or if this address * causes the address buffer to overflow return EINVAL. */ - if (!af || (walk_size + af->sockaddr_len) > addrs_size) { - kfree(kaddrs); + if (!af || (walk_size + af->sockaddr_len) > addrs_size) return -EINVAL; - } addrcnt++; addr_buf += af->sockaddr_len; walk_size += af->sockaddr_len; @@ -1029,31 +1019,48 @@ static int sctp_setsockopt_bindx(struct sock *sk, case SCTP_BINDX_ADD_ADDR: /* Allow security module to validate bindx addresses. */ err = security_sctp_bind_connect(sk, SCTP_SOCKOPT_BINDX_ADD, - (struct sockaddr *)kaddrs, - addrs_size); + addrs, addrs_size); if (err) - goto out; - err = sctp_bindx_add(sk, kaddrs, addrcnt); + return err; + err = sctp_bindx_add(sk, addrs, addrcnt); if (err) - goto out; - err = sctp_send_asconf_add_ip(sk, kaddrs, addrcnt); - break; - + return err; + return sctp_send_asconf_add_ip(sk, addrs, addrcnt); case SCTP_BINDX_REM_ADDR: - err = sctp_bindx_rem(sk, kaddrs, addrcnt); + err = sctp_bindx_rem(sk, addrs, addrcnt); if (err) - goto out; - err = sctp_send_asconf_del_ip(sk, kaddrs, addrcnt); - break; + return err; + return sctp_send_asconf_del_ip(sk, addrs, addrcnt); default: - err = -EINVAL; - break; + return -EINVAL; } +} -out: +static int sctp_setsockopt_bindx(struct sock *sk, + struct sockaddr __user *addrs, + int addrs_size, int op) +{ + struct sockaddr *kaddrs; + int err; + + kaddrs = memdup_user(addrs, addrs_size); + if (IS_ERR(kaddrs)) + return PTR_ERR(kaddrs); + err = sctp_setsockopt_bindx_kernel(sk, kaddrs, addrs_size, op); kfree(kaddrs); + return err; +} +static int sctp_bind_add(struct sock *sk, struct sockaddr *addrs, + int addrlen) +{ + int err; + + lock_sock(sk); + err = sctp_setsockopt_bindx_kernel(sk, addrs, addrlen, + SCTP_BINDX_ADD_ADDR); + release_sock(sk); return err; } @@ -9625,6 +9632,7 @@ struct proto sctp_prot = { .sendmsg = sctp_sendmsg, .recvmsg = sctp_recvmsg, .bind = sctp_bind, + .bind_add = sctp_bind_add, .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, @@ -9667,6 +9675,7 @@ struct proto sctpv6_prot = { .sendmsg = sctp_sendmsg, .recvmsg = sctp_recvmsg, .bind = sctp_bind, + .bind_add = sctp_bind_add, .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index c82dbdcf13f2..0c3d2b4d7321 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -336,13 +336,16 @@ fail: return NULL; } -void sctp_ulpevent_nofity_peer_addr_change(struct sctp_transport *transport, +void sctp_ulpevent_notify_peer_addr_change(struct sctp_transport *transport, int state, int error) { struct sctp_association *asoc = transport->asoc; struct sockaddr_storage addr; struct sctp_ulpevent *event; + if (asoc->state < SCTP_STATE_ESTABLISHED) + return; + memset(&addr, 0, sizeof(struct sockaddr_storage)); memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len); diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index b2b85e1be72c..a47e8855e045 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -116,19 +116,15 @@ int smc_cdc_msg_send(struct smc_connection *conn, } /* send a validation msg indicating the move of a conn to an other QP link */ -int smcr_cdc_msg_send_validation(struct smc_connection *conn) +int smcr_cdc_msg_send_validation(struct smc_connection *conn, + struct smc_cdc_tx_pend *pend, + struct smc_wr_buf *wr_buf) { struct smc_host_cdc_msg *local = &conn->local_tx_ctrl; struct smc_link *link = conn->lnk; - struct smc_cdc_tx_pend *pend; - struct smc_wr_buf *wr_buf; struct smc_cdc_msg *peer; int rc; - rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend); - if (rc) - return rc; - peer = (struct smc_cdc_msg *)wr_buf; peer->common.type = local->common.type; peer->len = local->len; diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 2ddcc5fb5ceb..0a0a89abd38b 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -296,7 +296,9 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); int smcd_cdc_msg_send(struct smc_connection *conn); -int smcr_cdc_msg_send_validation(struct smc_connection *conn); +int smcr_cdc_msg_send_validation(struct smc_connection *conn, + struct smc_cdc_tx_pend *pend, + struct smc_wr_buf *wr_buf); int smc_cdc_init(void) __init; void smcd_cdc_rx_init(struct smc_connection *conn); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 65de700e1f17..7964a21e5e6f 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -483,7 +483,8 @@ static int smc_write_space(struct smc_connection *conn) return space; } -static int smc_switch_cursor(struct smc_sock *smc) +static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend, + struct smc_wr_buf *wr_buf) { struct smc_connection *conn = &smc->conn; union smc_host_cursor cons, fin; @@ -520,11 +521,14 @@ static int smc_switch_cursor(struct smc_sock *smc) if (smc->sk.sk_state != SMC_INIT && smc->sk.sk_state != SMC_CLOSED) { - rc = smcr_cdc_msg_send_validation(conn); + rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf); if (!rc) { schedule_delayed_work(&conn->tx_work, 0); smc->sk.sk_data_ready(&smc->sk); } + } else { + smc_wr_tx_put_slot(conn->lnk, + (struct smc_wr_tx_pend_priv *)pend); } return rc; } @@ -533,7 +537,9 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr, struct smc_link *from_lnk, bool is_dev_err) { struct smc_link *to_lnk = NULL; + struct smc_cdc_tx_pend *pend; struct smc_connection *conn; + struct smc_wr_buf *wr_buf; struct smc_sock *smc; struct rb_node *node; int i, rc = 0; @@ -582,10 +588,16 @@ again: } sock_hold(&smc->sk); read_unlock_bh(&lgr->conns_lock); + /* pre-fetch buffer outside of send_lock, might sleep */ + rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend); + if (rc) { + smcr_link_down_cond_sched(to_lnk); + return NULL; + } /* avoid race with smcr_tx_sndbuf_nonempty() */ spin_lock_bh(&conn->send_lock); conn->lnk = to_lnk; - rc = smc_switch_cursor(smc); + rc = smc_switch_cursor(smc, pend, wr_buf); spin_unlock_bh(&conn->send_lock); sock_put(&smc->sk); if (rc) { diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index be03f1260d59..014d91b9778e 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -32,7 +32,7 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev); -static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { +static const struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { [SMC_PNETID_NAME] = { .type = NLA_NUL_STRING, .len = SMC_MAX_PNETID_LEN diff --git a/net/socket.c b/net/socket.c index 1c9a7260a41d..976426d03f09 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3366,94 +3366,6 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, return err; } -struct rtentry32 { - u32 rt_pad1; - struct sockaddr rt_dst; /* target address */ - struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ - struct sockaddr rt_genmask; /* target network mask (IP) */ - unsigned short rt_flags; - short rt_pad2; - u32 rt_pad3; - unsigned char rt_tos; - unsigned char rt_class; - short rt_pad4; - short rt_metric; /* +1 for binary compatibility! */ - /* char * */ u32 rt_dev; /* forcing the device at add */ - u32 rt_mtu; /* per route MTU/Window */ - u32 rt_window; /* Window clamping */ - unsigned short rt_irtt; /* Initial RTT */ -}; - -struct in6_rtmsg32 { - struct in6_addr rtmsg_dst; - struct in6_addr rtmsg_src; - struct in6_addr rtmsg_gateway; - u32 rtmsg_type; - u16 rtmsg_dst_len; - u16 rtmsg_src_len; - u32 rtmsg_metric; - u32 rtmsg_info; - u32 rtmsg_flags; - s32 rtmsg_ifindex; -}; - -static int routing_ioctl(struct net *net, struct socket *sock, - unsigned int cmd, void __user *argp) -{ - int ret; - void *r = NULL; - struct in6_rtmsg r6; - struct rtentry r4; - char devname[16]; - u32 rtdev; - mm_segment_t old_fs = get_fs(); - - if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ - struct in6_rtmsg32 __user *ur6 = argp; - ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), - 3 * sizeof(struct in6_addr)); - ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); - ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); - ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); - ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); - ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); - ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); - ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); - - r = (void *) &r6; - } else { /* ipv4 */ - struct rtentry32 __user *ur4 = argp; - ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), - 3 * sizeof(struct sockaddr)); - ret |= get_user(r4.rt_flags, &(ur4->rt_flags)); - ret |= get_user(r4.rt_metric, &(ur4->rt_metric)); - ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu)); - ret |= get_user(r4.rt_window, &(ur4->rt_window)); - ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt)); - ret |= get_user(rtdev, &(ur4->rt_dev)); - if (rtdev) { - ret |= copy_from_user(devname, compat_ptr(rtdev), 15); - r4.rt_dev = (char __user __force *)devname; - devname[15] = 0; - } else - r4.rt_dev = NULL; - - r = (void *) &r4; - } - - if (ret) { - ret = -EFAULT; - goto out; - } - - set_fs(KERNEL_DS); - ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); - set_fs(old_fs); - -out: - return ret; -} - /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE * for some operations; this forces use of the newer bridge-utils that * use compatible ioctls @@ -3492,9 +3404,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGIFMAP: case SIOCSIFMAP: return compat_sioc_ifmap(net, cmd, argp); - case SIOCADDRT: - case SIOCDELRT: - return routing_ioctl(net, sock, cmd, argp); case SIOCGSTAMP_OLD: case SIOCGSTAMPNS_OLD: if (!sock->ops->gettstamp) @@ -3716,71 +3625,6 @@ int kernel_getpeername(struct socket *sock, struct sockaddr *addr) EXPORT_SYMBOL(kernel_getpeername); /** - * kernel_getsockopt - get a socket option (kernel space) - * @sock: socket - * @level: API level (SOL_SOCKET, ...) - * @optname: option tag - * @optval: option value - * @optlen: option length - * - * Assigns the option length to @optlen. - * Returns 0 or an error. - */ - -int kernel_getsockopt(struct socket *sock, int level, int optname, - char *optval, int *optlen) -{ - mm_segment_t oldfs = get_fs(); - char __user *uoptval; - int __user *uoptlen; - int err; - - uoptval = (char __user __force *) optval; - uoptlen = (int __user __force *) optlen; - - set_fs(KERNEL_DS); - if (level == SOL_SOCKET) - err = sock_getsockopt(sock, level, optname, uoptval, uoptlen); - else - err = sock->ops->getsockopt(sock, level, optname, uoptval, - uoptlen); - set_fs(oldfs); - return err; -} -EXPORT_SYMBOL(kernel_getsockopt); - -/** - * kernel_setsockopt - set a socket option (kernel space) - * @sock: socket - * @level: API level (SOL_SOCKET, ...) - * @optname: option tag - * @optval: option value - * @optlen: option length - * - * Returns 0 or an error. - */ - -int kernel_setsockopt(struct socket *sock, int level, int optname, - char *optval, unsigned int optlen) -{ - mm_segment_t oldfs = get_fs(); - char __user *uoptval; - int err; - - uoptval = (char __user __force *) optval; - - set_fs(KERNEL_DS); - if (level == SOL_SOCKET) - err = sock_setsockopt(sock, level, optname, uoptval, optlen); - else - err = sock->ops->setsockopt(sock, level, optname, uoptval, - optlen); - set_fs(oldfs); - return err; -} -EXPORT_SYMBOL(kernel_setsockopt); - -/** * kernel_sendpage - send a &page through a socket (kernel space) * @sock: socket * @page: page diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8350d3a2e9a7..61b21dafd7c0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -889,7 +889,9 @@ static void rpc_free_client_work(struct work_struct *work) * here. */ rpc_clnt_debugfs_unregister(clnt); + rpc_free_clid(clnt); rpc_clnt_remove_pipedir(clnt); + xprt_put(rcu_dereference_raw(clnt->cl_xprt)); kfree(clnt); rpciod_down(); @@ -907,10 +909,8 @@ rpc_free_client(struct rpc_clnt *clnt) rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; - xprt_put(rcu_dereference_raw(clnt->cl_xprt)); xprt_iter_destroy(&clnt->cl_xpi); put_cred(clnt->cl_cred); - rpc_free_clid(clnt); INIT_WORK(&clnt->cl_work, rpc_free_client_work); schedule_work(&clnt->cl_work); @@ -2433,6 +2433,11 @@ rpc_check_timeout(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + if (RPC_SIGNALLED(task)) { + rpc_call_rpcerror(task, -ERESTARTSYS); + return; + } + if (xprt_adjust_timeout(task->tk_rqstp) == 0) return; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 023514e392b3..e7a0037d9b56 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -323,17 +323,9 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt) static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) { - struct svc_sock *svsk; - struct socket *sock; - struct linger no_linger = { - .l_onoff = 1, - .l_linger = 0, - }; + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - svsk = container_of(xprt, struct svc_sock, sk_xprt); - sock = svsk->sk_sock; - kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, - (char *)&no_linger, sizeof(no_linger)); + sock_no_linger(svsk->sk_sock->sk); } /* @@ -603,8 +595,6 @@ static struct svc_xprt_class svc_udp_class = { static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) { - int err, level, optname, one = 1; - svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class, &svsk->sk_xprt, serv); clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); @@ -624,19 +614,14 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) /* make sure we get destination address info */ switch (svsk->sk_sk->sk_family) { case AF_INET: - level = SOL_IP; - optname = IP_PKTINFO; + ip_sock_set_pktinfo(svsk->sk_sock->sk); break; case AF_INET6: - level = SOL_IPV6; - optname = IPV6_RECVPKTINFO; + ip6_sock_set_recvpktinfo(svsk->sk_sock->sk); break; default: BUG(); } - err = kernel_setsockopt(svsk->sk_sock, level, optname, - (char *)&one, sizeof(one)); - dprintk("svc: kernel_setsockopt returned %d\n", err); } /* @@ -1337,7 +1322,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, struct sockaddr *newsin = (struct sockaddr *)&addr; int newlen; int family; - int val; RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); dprintk("svc: svc_create_socket(%s, %d, %s)\n", @@ -1373,11 +1357,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, * getting requests from IPv4 remotes. Those should * be shunted to a PF_INET listener via rpcbind. */ - val = 1; if (family == PF_INET6) - kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, - (char *)&val, sizeof(val)); - + ip6_sock_set_v6only(sock->sk); if (type == SOCK_STREAM) sock->sk->sk_reuse = SK_CAN_REUSE; /* allow address reuse */ error = kernel_bind(sock, sin, len); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 845d0be805ec..3a143e250b9a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1594,21 +1594,6 @@ static int xs_get_random_port(void) return rand + min; } -/** - * xs_set_reuseaddr_port - set the socket's port and address reuse options - * @sock: socket - * - * Note that this function has to be called on all sockets that share the - * same port, and it must be called before binding. - */ -static void xs_sock_set_reuseport(struct socket *sock) -{ - int opt = 1; - - kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, - (char *)&opt, sizeof(opt)); -} - static unsigned short xs_sock_getport(struct socket *sock) { struct sockaddr_storage buf; @@ -1801,7 +1786,7 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, xs_reclassify_socket(family, sock); if (reuseport) - xs_sock_set_reuseport(sock); + sock_set_reuseport(sock->sk); err = xs_bind(transport, sock); if (err) { @@ -2110,7 +2095,6 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); unsigned int keepidle; unsigned int keepcnt; - unsigned int opt_on = 1; unsigned int timeo; spin_lock(&xprt->transport_lock); @@ -2122,18 +2106,13 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, spin_unlock(&xprt->transport_lock); /* TCP Keepalive options */ - kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, - (char *)&opt_on, sizeof(opt_on)); - kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, - (char *)&keepidle, sizeof(keepidle)); - kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, - (char *)&keepidle, sizeof(keepidle)); - kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, - (char *)&keepcnt, sizeof(keepcnt)); + sock_set_keepalive(sock->sk); + tcp_sock_set_keepidle(sock->sk, keepidle); + tcp_sock_set_keepintvl(sock->sk, keepidle); + tcp_sock_set_keepcnt(sock->sk, keepcnt); /* TCP user timeout (see RFC5482) */ - kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, - (char *)&timeo, sizeof(timeo)); + tcp_sock_set_user_timeout(sock->sk, timeo); } static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, @@ -2171,7 +2150,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) if (!transport->inet) { struct sock *sk = sock->sk; - unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC; /* Avoid temporary address, they are bad for long-lived * connections such as NFS mounts. @@ -2180,8 +2158,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) * knowledge about the normal duration of connections, * MAY override this as appropriate. */ - kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES, - (char *)&addr_pref, sizeof(addr_pref)); + if (xs_addr(xprt)->sa_family == PF_INET6) { + ip6_sock_set_addr_preferences(sk, + IPV6_PREFER_SRC_PUBLIC); + } xs_tcp_set_socket_timeouts(xprt, sock); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 4c20be08b9c4..383f87bc1061 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -46,6 +46,7 @@ #define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ const char tipc_bclink_name[] = "broadcast-link"; +unsigned long sysctl_tipc_bc_retruni __read_mostly; /** * struct tipc_bc_base - base structure for keeping broadcast send state @@ -474,7 +475,7 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); - tipc_link_bc_ack_rcv(l, acked, &xmitq); + tipc_link_bc_ack_rcv(l, acked, 0, NULL, &xmitq, NULL); tipc_bcast_unlock(net); tipc_bcbase_xmit(net, &xmitq); @@ -489,9 +490,11 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, * RCU is locked, no other locks set */ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, - struct tipc_msg *hdr) + struct tipc_msg *hdr, + struct sk_buff_head *retrq) { struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct tipc_gap_ack_blks *ga; struct sk_buff_head xmitq; int rc = 0; @@ -501,8 +504,13 @@ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, if (msg_type(hdr) != STATE_MSG) { tipc_link_bc_init_rcv(l, hdr); } else if (!msg_bc_ack_invalid(hdr)) { - tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq); - rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq); + tipc_get_gap_ack_blks(&ga, l, hdr, false); + if (!sysctl_tipc_bc_retruni) + retrq = &xmitq; + rc = tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), + msg_bc_gap(hdr), ga, &xmitq, + retrq); + rc |= tipc_link_bc_sync_rcv(l, hdr, &xmitq); } tipc_bcast_unlock(net); @@ -555,10 +563,8 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l) tipc_sk_rcv(net, inputq); } -int tipc_bclink_reset_stats(struct net *net) +int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l) { - struct tipc_link *l = tipc_bc_sndlink(net); - if (!l) return -ENOPROTOOPT; @@ -686,7 +692,7 @@ int tipc_bcast_init(struct net *net) tn->bcbase = bb; spin_lock_init(&tipc_net(net)->bclock); - if (!tipc_link_bc_create(net, 0, 0, + if (!tipc_link_bc_create(net, 0, 0, NULL, FB_MTU, BCLINK_WIN_DEFAULT, BCLINK_WIN_DEFAULT, diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 9e847d9617d3..4240c95188b1 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -45,6 +45,7 @@ struct tipc_nl_msg; struct tipc_nlist; struct tipc_nitem; extern const char tipc_bclink_name[]; +extern unsigned long sysctl_tipc_bc_retruni; #define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000) @@ -93,10 +94,12 @@ int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb); void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, struct tipc_msg *hdr); int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, - struct tipc_msg *hdr); -int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); + struct tipc_msg *hdr, + struct sk_buff_head *retrq); +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *bcl); int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); -int tipc_bclink_reset_stats(struct net *net); +int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l); u32 tipc_bcast_get_broadcast_mode(struct net *net); u32 tipc_bcast_get_broadcast_ratio(struct net *net); diff --git a/net/tipc/link.c b/net/tipc/link.c index d4675e922a8f..ee3b8d0576b8 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -188,6 +188,8 @@ struct tipc_link { /* Broadcast */ u16 ackers; u16 acked; + u16 last_gap; + struct tipc_gap_ack_blks *last_ga; struct tipc_link *bc_rcvlink; struct tipc_link *bc_sndlink; u8 nack_state; @@ -249,11 +251,14 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static void tipc_link_build_bc_init_msg(struct tipc_link *l, struct sk_buff_head *xmitq); -static int tipc_link_release_pkts(struct tipc_link *l, u16 to); -static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap); -static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap, +static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga, + struct tipc_link *l, u8 start_index); +static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr); +static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r, + u16 acked, u16 gap, struct tipc_gap_ack_blks *ga, - struct sk_buff_head *xmitq); + struct sk_buff_head *xmitq, + bool *retransmitted, int *rc); static void tipc_link_update_cwin(struct tipc_link *l, int released, bool retransmitted); /* @@ -370,7 +375,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l, snd_l->ackers--; rcv_l->bc_peer_is_up = true; rcv_l->state = LINK_ESTABLISHED; - tipc_link_bc_ack_rcv(rcv_l, ack, xmitq); + tipc_link_bc_ack_rcv(rcv_l, ack, 0, NULL, xmitq, NULL); trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!"); tipc_link_reset(rcv_l); rcv_l->state = LINK_RESET; @@ -534,7 +539,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, * * Returns true if link was created, otherwise false */ -bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, +bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id, int mtu, u32 min_win, u32 max_win, u16 peer_caps, struct sk_buff_head *inputq, struct sk_buff_head *namedq, @@ -549,7 +554,18 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, return false; l = *link; - strcpy(l->name, tipc_bclink_name); + if (peer_id) { + char peer_str[NODE_ID_STR_LEN] = {0,}; + + tipc_nodeid2string(peer_str, peer_id); + if (strlen(peer_str) > 16) + sprintf(peer_str, "%x", peer); + /* Broadcast receiver link name: "broadcast-link:<peer>" */ + snprintf(l->name, sizeof(l->name), "%s:%s", tipc_bclink_name, + peer_str); + } else { + strcpy(l->name, tipc_bclink_name); + } trace_tipc_link_reset(l, TIPC_DUMP_ALL, "bclink created!"); tipc_link_reset(l); l->state = LINK_RESET; @@ -784,8 +800,6 @@ bool tipc_link_too_silent(struct tipc_link *l) return (l->silent_intv_cnt + 2 > l->abort_limit); } -static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r, - u16 from, u16 to, struct sk_buff_head *xmitq); /* tipc_link_timeout - perform periodic task as instructed from node timeout */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) @@ -948,6 +962,9 @@ void tipc_link_reset(struct tipc_link *l) l->snd_nxt_state = 1; l->rcv_nxt_state = 1; l->acked = 0; + l->last_gap = 0; + kfree(l->last_ga); + l->last_ga = NULL; l->silent_intv_cnt = 0; l->rst_cnt = 0; l->bc_peer_is_up = false; @@ -1183,68 +1200,14 @@ static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r, if (link_is_bc_sndlink(l)) { r->state = LINK_RESET; - *rc = TIPC_LINK_DOWN_EVT; + *rc |= TIPC_LINK_DOWN_EVT; } else { - *rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + *rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } return true; } -/* tipc_link_bc_retrans() - retransmit zero or more packets - * @l: the link to transmit on - * @r: the receiving link ordering the retransmit. Same as l if unicast - * @from: retransmit from (inclusive) this sequence number - * @to: retransmit to (inclusive) this sequence number - * xmitq: queue for accumulating the retransmitted packets - */ -static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r, - u16 from, u16 to, struct sk_buff_head *xmitq) -{ - struct sk_buff *_skb, *skb = skb_peek(&l->transmq); - u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; - u16 ack = l->rcv_nxt - 1; - int retransmitted = 0; - struct tipc_msg *hdr; - int rc = 0; - - if (!skb) - return 0; - if (less(to, from)) - return 0; - - trace_tipc_link_retrans(r, from, to, &l->transmq); - - if (link_retransmit_failure(l, r, &rc)) - return rc; - - skb_queue_walk(&l->transmq, skb) { - hdr = buf_msg(skb); - if (less(msg_seqno(hdr), from)) - continue; - if (more(msg_seqno(hdr), to)) - break; - if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) - continue; - TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; - _skb = pskb_copy(skb, GFP_ATOMIC); - if (!_skb) - return 0; - hdr = buf_msg(_skb); - msg_set_ack(hdr, ack); - msg_set_bcast_ack(hdr, bc_ack); - _skb->priority = TC_PRIO_CONTROL; - __skb_queue_tail(xmitq, _skb); - l->stats.retransmitted++; - retransmitted++; - /* Increase actual retrans counter & mark first time */ - if (!TIPC_SKB_CB(skb)->retr_cnt++) - TIPC_SKB_CB(skb)->retr_stamp = jiffies; - } - tipc_link_update_cwin(l, 0, retransmitted); - return 0; -} - /* tipc_data_input - deliver data and name distr msgs to upper layer * * Consumes buffer if message is of right type @@ -1402,46 +1365,68 @@ static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb, return rc; } -static int tipc_link_release_pkts(struct tipc_link *l, u16 acked) -{ - int released = 0; - struct sk_buff *skb, *tmp; - - skb_queue_walk_safe(&l->transmq, skb, tmp) { - if (more(buf_seqno(skb), acked)) - break; - __skb_unlink(skb, &l->transmq); - kfree_skb(skb); - released++; +/** + * tipc_get_gap_ack_blks - get Gap ACK blocks from PROTOCOL/STATE_MSG + * @ga: returned pointer to the Gap ACK blocks if any + * @l: the tipc link + * @hdr: the PROTOCOL/STATE_MSG header + * @uc: desired Gap ACK blocks type, i.e. unicast (= 1) or broadcast (= 0) + * + * Return: the total Gap ACK blocks size + */ +u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l, + struct tipc_msg *hdr, bool uc) +{ + struct tipc_gap_ack_blks *p; + u16 sz = 0; + + /* Does peer support the Gap ACK blocks feature? */ + if (l->peer_caps & TIPC_GAP_ACK_BLOCK) { + p = (struct tipc_gap_ack_blks *)msg_data(hdr); + sz = ntohs(p->len); + /* Sanity check */ + if (sz == tipc_gap_ack_blks_sz(p->ugack_cnt + p->bgack_cnt)) { + /* Good, check if the desired type exists */ + if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt)) + goto ok; + /* Backward compatible: peer might not support bc, but uc? */ + } else if (uc && sz == tipc_gap_ack_blks_sz(p->ugack_cnt)) { + if (p->ugack_cnt) { + p->bgack_cnt = 0; + goto ok; + } + } } - return released; + /* Other cases: ignore! */ + p = NULL; + +ok: + *ga = p; + return sz; } -/* tipc_build_gap_ack_blks - build Gap ACK blocks - * @l: tipc link that data have come with gaps in sequence if any - * @data: data buffer to store the Gap ACK blocks after built - * - * returns the actual allocated memory size - */ -static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap) +static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga, + struct tipc_link *l, u8 start_index) { + struct tipc_gap_ack *gacks = &ga->gacks[start_index]; struct sk_buff *skb = skb_peek(&l->deferdq); - struct tipc_gap_ack_blks *ga = data; - u16 len, expect, seqno = 0; + u16 expect, seqno = 0; u8 n = 0; - if (!skb || !gap) - goto exit; + if (!skb) + return 0; expect = buf_seqno(skb); skb_queue_walk(&l->deferdq, skb) { seqno = buf_seqno(skb); if (unlikely(more(seqno, expect))) { - ga->gacks[n].ack = htons(expect - 1); - ga->gacks[n].gap = htons(seqno - expect); - if (++n >= MAX_GAP_ACK_BLKS) { - pr_info_ratelimited("Too few Gap ACK blocks!\n"); - goto exit; + gacks[n].ack = htons(expect - 1); + gacks[n].gap = htons(seqno - expect); + if (++n >= MAX_GAP_ACK_BLKS / 2) { + pr_info_ratelimited("Gacks on %s: %d, ql: %d!\n", + l->name, n, + skb_queue_len(&l->deferdq)); + return n; } } else if (unlikely(less(seqno, expect))) { pr_warn("Unexpected skb in deferdq!\n"); @@ -1451,14 +1436,44 @@ static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap) } /* last block */ - ga->gacks[n].ack = htons(seqno); - ga->gacks[n].gap = 0; + gacks[n].ack = htons(seqno); + gacks[n].gap = 0; n++; + return n; +} -exit: - len = tipc_gap_ack_blks_sz(n); +/* tipc_build_gap_ack_blks - build Gap ACK blocks + * @l: tipc unicast link + * @hdr: the tipc message buffer to store the Gap ACK blocks after built + * + * The function builds Gap ACK blocks for both the unicast & broadcast receiver + * links of a certain peer, the buffer after built has the network data format + * as found at the struct tipc_gap_ack_blks definition. + * + * returns the actual allocated memory size + */ +static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr) +{ + struct tipc_link *bcl = l->bc_rcvlink; + struct tipc_gap_ack_blks *ga; + u16 len; + + ga = (struct tipc_gap_ack_blks *)msg_data(hdr); + + /* Start with broadcast link first */ + tipc_bcast_lock(bcl->net); + msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1); + msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); + ga->bgack_cnt = __tipc_build_gap_ack_blks(ga, bcl, 0); + tipc_bcast_unlock(bcl->net); + + /* Now for unicast link, but an explicit NACK only (???) */ + ga->ugack_cnt = (msg_seq_gap(hdr)) ? + __tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0; + + /* Total len */ + len = tipc_gap_ack_blks_sz(ga->bgack_cnt + ga->ugack_cnt); ga->len = htons(len); - ga->gack_cnt = n; return len; } @@ -1466,47 +1481,111 @@ exit: * acked packets, also doing retransmissions if * gaps found * @l: tipc link with transmq queue to be advanced + * @r: tipc link "receiver" i.e. in case of broadcast (= "l" if unicast) * @acked: seqno of last packet acked by peer without any gaps before * @gap: # of gap packets * @ga: buffer pointer to Gap ACK blocks from peer * @xmitq: queue for accumulating the retransmitted packets if any + * @retransmitted: returned boolean value if a retransmission is really issued + * @rc: returned code e.g. TIPC_LINK_DOWN_EVT if a repeated retransmit failures + * happens (- unlikely case) * - * In case of a repeated retransmit failures, the call will return shortly - * with a returned code (e.g. TIPC_LINK_DOWN_EVT) + * Return: the number of packets released from the link transmq */ -static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap, +static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r, + u16 acked, u16 gap, struct tipc_gap_ack_blks *ga, - struct sk_buff_head *xmitq) + struct sk_buff_head *xmitq, + bool *retransmitted, int *rc) { + struct tipc_gap_ack_blks *last_ga = r->last_ga, *this_ga = NULL; + struct tipc_gap_ack *gacks = NULL; struct sk_buff *skb, *_skb, *tmp; struct tipc_msg *hdr; + u32 qlen = skb_queue_len(&l->transmq); + u16 nacked = acked, ngap = gap, gack_cnt = 0; u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; - bool retransmitted = false; u16 ack = l->rcv_nxt - 1; - bool passed = false; - u16 released = 0; u16 seqno, n = 0; - int rc = 0; + u16 end = r->acked, start = end, offset = r->last_gap; + u16 si = (last_ga) ? last_ga->start_index : 0; + bool is_uc = !link_is_bc_sndlink(l); + bool bc_has_acked = false; + + trace_tipc_link_retrans(r, acked + 1, acked + gap, &l->transmq); + + /* Determine Gap ACK blocks if any for the particular link */ + if (ga && is_uc) { + /* Get the Gap ACKs, uc part */ + gack_cnt = ga->ugack_cnt; + gacks = &ga->gacks[ga->bgack_cnt]; + } else if (ga) { + /* Copy the Gap ACKs, bc part, for later renewal if needed */ + this_ga = kmemdup(ga, tipc_gap_ack_blks_sz(ga->bgack_cnt), + GFP_ATOMIC); + if (likely(this_ga)) { + this_ga->start_index = 0; + /* Start with the bc Gap ACKs */ + gack_cnt = this_ga->bgack_cnt; + gacks = &this_ga->gacks[0]; + } else { + /* Hmm, we can get in trouble..., simply ignore it */ + pr_warn_ratelimited("Ignoring bc Gap ACKs, no memory\n"); + } + } + /* Advance the link transmq */ skb_queue_walk_safe(&l->transmq, skb, tmp) { seqno = buf_seqno(skb); next_gap_ack: - if (less_eq(seqno, acked)) { + if (less_eq(seqno, nacked)) { + if (is_uc) + goto release; + /* Skip packets peer has already acked */ + if (!more(seqno, r->acked)) + continue; + /* Get the next of last Gap ACK blocks */ + while (more(seqno, end)) { + if (!last_ga || si >= last_ga->bgack_cnt) + break; + start = end + offset + 1; + end = ntohs(last_ga->gacks[si].ack); + offset = ntohs(last_ga->gacks[si].gap); + si++; + WARN_ONCE(more(start, end) || + (!offset && + si < last_ga->bgack_cnt) || + si > MAX_GAP_ACK_BLKS, + "Corrupted Gap ACK: %d %d %d %d %d\n", + start, end, offset, si, + last_ga->bgack_cnt); + } + /* Check against the last Gap ACK block */ + if (in_range(seqno, start, end)) + continue; + /* Update/release the packet peer is acking */ + bc_has_acked = true; + if (--TIPC_SKB_CB(skb)->ackers) + continue; +release: /* release skb */ __skb_unlink(skb, &l->transmq); kfree_skb(skb); - released++; - } else if (less_eq(seqno, acked + gap)) { - /* First, check if repeated retrans failures occurs? */ - if (!passed && link_retransmit_failure(l, l, &rc)) - return rc; - passed = true; - + } else if (less_eq(seqno, nacked + ngap)) { + /* First gap: check if repeated retrans failures? */ + if (unlikely(seqno == acked + 1 && + link_retransmit_failure(l, r, rc))) { + /* Ignore this bc Gap ACKs if any */ + kfree(this_ga); + this_ga = NULL; + break; + } /* retransmit skb if unrestricted*/ if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) continue; - TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; + TIPC_SKB_CB(skb)->nxt_retr = (is_uc) ? + TIPC_UC_RETR_TIME : TIPC_BC_RETR_LIM; _skb = pskb_copy(skb, GFP_ATOMIC); if (!_skb) continue; @@ -1516,25 +1595,53 @@ next_gap_ack: _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); l->stats.retransmitted++; - retransmitted = true; + if (!is_uc) + r->stats.retransmitted++; + *retransmitted = true; /* Increase actual retrans counter & mark first time */ if (!TIPC_SKB_CB(skb)->retr_cnt++) TIPC_SKB_CB(skb)->retr_stamp = jiffies; } else { /* retry with Gap ACK blocks if any */ - if (!ga || n >= ga->gack_cnt) + if (n >= gack_cnt) break; - acked = ntohs(ga->gacks[n].ack); - gap = ntohs(ga->gacks[n].gap); + nacked = ntohs(gacks[n].ack); + ngap = ntohs(gacks[n].gap); n++; goto next_gap_ack; } } - if (released || retransmitted) - tipc_link_update_cwin(l, released, retransmitted); - if (released) - tipc_link_advance_backlog(l, xmitq); - return 0; + + /* Renew last Gap ACK blocks for bc if needed */ + if (bc_has_acked) { + if (this_ga) { + kfree(last_ga); + r->last_ga = this_ga; + r->last_gap = gap; + } else if (last_ga) { + if (less(acked, start)) { + si--; + offset = start - acked - 1; + } else if (less(acked, end)) { + acked = end; + } + if (si < last_ga->bgack_cnt) { + last_ga->start_index = si; + r->last_gap = offset; + } else { + kfree(last_ga); + r->last_ga = NULL; + r->last_gap = 0; + } + } else { + r->last_gap = 0; + } + r->acked = acked; + } else { + kfree(this_ga); + } + + return qlen - skb_queue_len(&l->transmq); } /* tipc_link_build_state_msg: prepare link state message for transmission @@ -1651,11 +1758,13 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, kfree_skb(skb); break; } - released += tipc_link_release_pkts(l, msg_ack(hdr)); + released += tipc_link_advance_transmq(l, l, msg_ack(hdr), 0, + NULL, NULL, NULL, NULL); /* Defer delivery if sequence gap */ if (unlikely(seqno != rcv_nxt)) { - __tipc_skb_queue_sorted(defq, seqno, skb); + if (!__tipc_skb_queue_sorted(defq, seqno, skb)) + l->stats.duplicates++; rc |= tipc_link_build_nack_msg(l, xmitq); break; } @@ -1689,15 +1798,15 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, int tolerance, int priority, struct sk_buff_head *xmitq) { + struct tipc_mon_state *mstate = &l->mon_state; + struct sk_buff_head *dfq = &l->deferdq; struct tipc_link *bcl = l->bc_rcvlink; - struct sk_buff *skb; struct tipc_msg *hdr; - struct sk_buff_head *dfq = &l->deferdq; + struct sk_buff *skb; bool node_up = link_is_up(bcl); - struct tipc_mon_state *mstate = &l->mon_state; + u16 glen = 0, bc_rcvgap = 0; int dlen = 0; void *data; - u16 glen = 0; /* Don't send protocol message during reset or link failover */ if (tipc_link_is_blocked(l)) @@ -1735,11 +1844,12 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, if (l->peer_caps & TIPC_LINK_PROTO_SEQNO) msg_set_seqno(hdr, l->snd_nxt_state++); msg_set_seq_gap(hdr, rcvgap); - msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); + bc_rcvgap = link_bc_rcv_gap(bcl); + msg_set_bc_gap(hdr, bc_rcvgap); msg_set_probe(hdr, probe); msg_set_is_keepalive(hdr, probe || probe_reply); if (l->peer_caps & TIPC_GAP_ACK_BLOCK) - glen = tipc_build_gap_ack_blks(l, data, rcvgap); + glen = tipc_build_gap_ack_blks(l, hdr); tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id); msg_set_size(hdr, INT_H_SIZE + glen + dlen); skb_trim(skb, INT_H_SIZE + glen + dlen); @@ -1760,6 +1870,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, l->stats.sent_probes++; if (rcvgap) l->stats.sent_nacks++; + if (bc_rcvgap) + bcl->stats.sent_nacks++; skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, skb); trace_tipc_proto_build(skb, false, l->name); @@ -2027,20 +2139,19 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, { struct tipc_msg *hdr = buf_msg(skb); struct tipc_gap_ack_blks *ga = NULL; - u16 rcvgap = 0; - u16 ack = msg_ack(hdr); - u16 gap = msg_seq_gap(hdr); + bool reply = msg_probe(hdr), retransmitted = false; + u16 dlen = msg_data_sz(hdr), glen = 0; u16 peers_snd_nxt = msg_next_sent(hdr); u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); + u16 gap = msg_seq_gap(hdr); + u16 ack = msg_ack(hdr); u16 rcv_nxt = l->rcv_nxt; - u16 dlen = msg_data_sz(hdr); + u16 rcvgap = 0; int mtyp = msg_type(hdr); - bool reply = msg_probe(hdr); - u16 glen = 0; - void *data; + int rc = 0, released; char *if_name; - int rc = 0; + void *data; trace_tipc_proto_rcv(skb, false, l->name); if (tipc_link_is_blocked(l) || !xmitq) @@ -2137,13 +2248,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, } /* Receive Gap ACK blocks from peer if any */ - if (l->peer_caps & TIPC_GAP_ACK_BLOCK) { - ga = (struct tipc_gap_ack_blks *)data; - glen = ntohs(ga->len); - /* sanity check: if failed, ignore Gap ACK blocks */ - if (glen != tipc_gap_ack_blks_sz(ga->gack_cnt)) - ga = NULL; - } + glen = tipc_get_gap_ack_blks(&ga, l, hdr, true); tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr, &l->mon_state, l->bearer_id); @@ -2158,9 +2263,14 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, tipc_link_build_proto_msg(l, STATE_MSG, 0, reply, rcvgap, 0, 0, xmitq); - rc |= tipc_link_advance_transmq(l, ack, gap, ga, xmitq); + released = tipc_link_advance_transmq(l, l, ack, gap, ga, xmitq, + &retransmitted, &rc); if (gap) l->stats.recv_nacks++; + if (released || retransmitted) + tipc_link_update_cwin(l, released, retransmitted); + if (released) + tipc_link_advance_backlog(l, xmitq); if (unlikely(!skb_queue_empty(&l->wakeupq))) link_prepare_wakeup(l); } @@ -2246,10 +2356,7 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, struct sk_buff_head *xmitq) { - struct tipc_link *snd_l = l->bc_sndlink; u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); - u16 from = msg_bcast_ack(hdr) + 1; - u16 to = from + msg_bc_gap(hdr) - 1; int rc = 0; if (!link_is_up(l)) @@ -2265,14 +2372,10 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, if (!l->bc_peer_is_up) return rc; - l->stats.recv_nacks++; - /* Ignore if peers_snd_nxt goes beyond receive window */ if (more(peers_snd_nxt, l->rcv_nxt + l->window)) return rc; - rc = tipc_link_bc_retrans(snd_l, l, from, to, xmitq); - l->snd_nxt = peers_snd_nxt; if (link_bc_rcv_gap(l)) rc |= TIPC_LINK_SND_STATE; @@ -2307,38 +2410,34 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, return 0; } -void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, - struct sk_buff_head *xmitq) +int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap, + struct tipc_gap_ack_blks *ga, + struct sk_buff_head *xmitq, + struct sk_buff_head *retrq) { - struct sk_buff *skb, *tmp; - struct tipc_link *snd_l = l->bc_sndlink; - - if (!link_is_up(l) || !l->bc_peer_is_up) - return; + struct tipc_link *l = r->bc_sndlink; + bool unused = false; + int rc = 0; - if (!more(acked, l->acked)) - return; + if (!link_is_up(r) || !r->bc_peer_is_up) + return 0; - trace_tipc_link_bc_ack(l, l->acked, acked, &snd_l->transmq); - /* Skip over packets peer has already acked */ - skb_queue_walk(&snd_l->transmq, skb) { - if (more(buf_seqno(skb), l->acked)) - break; + if (gap) { + l->stats.recv_nacks++; + r->stats.recv_nacks++; } - /* Update/release the packets peer is acking now */ - skb_queue_walk_from_safe(&snd_l->transmq, skb, tmp) { - if (more(buf_seqno(skb), acked)) - break; - if (!--TIPC_SKB_CB(skb)->ackers) { - __skb_unlink(skb, &snd_l->transmq); - kfree_skb(skb); - } - } - l->acked = acked; - tipc_link_advance_backlog(snd_l, xmitq); - if (unlikely(!skb_queue_empty(&snd_l->wakeupq))) - link_prepare_wakeup(snd_l); + if (less(acked, r->acked) || (acked == r->acked && !gap && !ga)) + return 0; + + trace_tipc_link_bc_ack(r, acked, gap, &l->transmq); + tipc_link_advance_transmq(l, r, acked, gap, ga, retrq, &unused, &rc); + + tipc_link_advance_backlog(l, xmitq); + if (unlikely(!skb_queue_empty(&l->wakeupq))) + link_prepare_wakeup(l); + + return rc; } /* tipc_link_bc_nack_rcv(): receive broadcast nack message @@ -2366,8 +2465,8 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, return 0; if (dnode == tipc_own_addr(l->net)) { - tipc_link_bc_ack_rcv(l, acked, xmitq); - rc = tipc_link_bc_retrans(l->bc_sndlink, l, from, to, xmitq); + rc = tipc_link_bc_ack_rcv(l, acked, to - acked, NULL, xmitq, + xmitq); l->stats.recv_nacks++; return rc; } @@ -2639,16 +2738,15 @@ msg_full: return -EMSGSIZE; } -int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *bcl) { int err; void *hdr; struct nlattr *attrs; struct nlattr *prop; - struct tipc_net *tn = net_generic(net, tipc_net_id); u32 bc_mode = tipc_bcast_get_broadcast_mode(net); u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net); - struct tipc_link *bcl = tn->bcl; if (!bcl) return 0; @@ -2735,21 +2833,6 @@ void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) l->abort_limit = limit; } -char *tipc_link_name_ext(struct tipc_link *l, char *buf) -{ - if (!l) - scnprintf(buf, TIPC_MAX_LINK_NAME, "null"); - else if (link_is_bc_sndlink(l)) - scnprintf(buf, TIPC_MAX_LINK_NAME, "broadcast-sender"); - else if (link_is_bc_rcvlink(l)) - scnprintf(buf, TIPC_MAX_LINK_NAME, - "broadcast-receiver, peer %x", l->addr); - else - memcpy(buf, l->name, TIPC_MAX_LINK_NAME); - - return buf; -} - /** * tipc_link_dump - dump TIPC link data * @l: tipc link to be dumped diff --git a/net/tipc/link.h b/net/tipc/link.h index d3c1c3fc1659..fc07232c9a12 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -80,7 +80,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, struct sk_buff_head *inputq, struct sk_buff_head *namedq, struct tipc_link **link); -bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, +bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id, int mtu, u32 min_win, u32 max_win, u16 peer_caps, struct sk_buff_head *inputq, struct sk_buff_head *namedq, @@ -111,7 +111,6 @@ u16 tipc_link_rcv_nxt(struct tipc_link *l); u16 tipc_link_acked(struct tipc_link *l); u32 tipc_link_id(struct tipc_link *l); char *tipc_link_name(struct tipc_link *l); -char *tipc_link_name_ext(struct tipc_link *l, char *buf); u32 tipc_link_state(struct tipc_link *l); char tipc_link_plane(struct tipc_link *l); int tipc_link_prio(struct tipc_link *l); @@ -143,8 +142,12 @@ int tipc_link_bc_peers(struct tipc_link *l); void tipc_link_set_mtu(struct tipc_link *l, int mtu); int tipc_link_mtu(struct tipc_link *l); int tipc_link_mss(struct tipc_link *l); -void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, - struct sk_buff_head *xmitq); +u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l, + struct tipc_msg *hdr, bool uc); +int tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, u16 gap, + struct tipc_gap_ack_blks *ga, + struct sk_buff_head *xmitq, + struct sk_buff_head *retrq); void tipc_link_build_bc_sync_msg(struct tipc_link *l, struct sk_buff_head *xmitq); void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 4d0e0bdd997b..c0afcd627c5e 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -212,7 +212,7 @@ err: int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen, int mss, struct sk_buff_head *txq) { - struct sk_buff *skb, *prev; + struct sk_buff *skb; int accounted, total, curr; int mlen, cpy, rem = dlen; struct tipc_msg *hdr; @@ -223,7 +223,6 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen, while (rem) { if (!skb || skb->len >= mss) { - prev = skb; skb = tipc_buf_acquire(mss, GFP_KERNEL); if (unlikely(!skb)) return -ENOMEM; @@ -235,9 +234,6 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen, msg_set_size(hdr, MIN_H_SIZE); __skb_queue_tail(txq, skb); total += 1; - if (prev) - msg_set_ack_required(buf_msg(prev), 0); - msg_set_ack_required(hdr, 1); } hdr = buf_msg(skb); curr = msg_blocks(hdr); @@ -825,19 +821,19 @@ bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, * @seqno: sequence number of buffer to add * @skb: buffer to add */ -void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, +bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, struct sk_buff *skb) { struct sk_buff *_skb, *tmp; if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) { __skb_queue_head(list, skb); - return; + return true; } if (more(seqno, buf_seqno(skb_peek_tail(list)))) { __skb_queue_tail(list, skb); - return; + return true; } skb_queue_walk_safe(list, _skb, tmp) { @@ -846,9 +842,10 @@ void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, if (seqno == buf_seqno(_skb)) break; __skb_queue_before(list, _skb, skb); - return; + return true; } kfree_skb(skb); + return false; } void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb, diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 871feadbbc19..58660d56bc83 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -160,20 +160,39 @@ struct tipc_gap_ack { /* struct tipc_gap_ack_blks * @len: actual length of the record - * @gack_cnt: number of Gap ACK blocks in the record + * @ugack_cnt: number of Gap ACK blocks for unicast (following the broadcast + * ones) + * @start_index: starting index for "valid" broadcast Gap ACK blocks + * @bgack_cnt: number of Gap ACK blocks for broadcast in the record * @gacks: array of Gap ACK blocks + * + * 31 16 15 0 + * +-------------+-------------+-------------+-------------+ + * | bgack_cnt | ugack_cnt | len | + * +-------------+-------------+-------------+-------------+ - + * | gap | ack | | + * +-------------+-------------+-------------+-------------+ > bc gacks + * : : : | + * +-------------+-------------+-------------+-------------+ - + * | gap | ack | | + * +-------------+-------------+-------------+-------------+ > uc gacks + * : : : | + * +-------------+-------------+-------------+-------------+ - */ struct tipc_gap_ack_blks { __be16 len; - u8 gack_cnt; - u8 reserved; + union { + u8 ugack_cnt; + u8 start_index; + }; + u8 bgack_cnt; struct tipc_gap_ack gacks[]; }; #define tipc_gap_ack_blks_sz(n) (sizeof(struct tipc_gap_ack_blks) + \ sizeof(struct tipc_gap_ack) * (n)) -#define MAX_GAP_ACK_BLKS 32 +#define MAX_GAP_ACK_BLKS 128 #define MAX_GAP_ACK_BLKS_SZ tipc_gap_ack_blks_sz(MAX_GAP_ACK_BLKS) static inline struct tipc_msg *buf_msg(struct sk_buff *skb) @@ -321,9 +340,19 @@ static inline int msg_ack_required(struct tipc_msg *m) return msg_bits(m, 0, 18, 1); } -static inline void msg_set_ack_required(struct tipc_msg *m, u32 d) +static inline void msg_set_ack_required(struct tipc_msg *m) { - msg_set_bits(m, 0, 18, 1, d); + msg_set_bits(m, 0, 18, 1, 1); +} + +static inline int msg_nagle_ack(struct tipc_msg *m) +{ + return msg_bits(m, 0, 18, 1); +} + +static inline void msg_set_nagle_ack(struct tipc_msg *m) +{ + msg_set_bits(m, 0, 18, 1, 1); } static inline bool msg_is_rcast(struct tipc_msg *m) @@ -1126,7 +1155,7 @@ bool tipc_msg_assemble(struct sk_buff_head *list); bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq); bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, struct sk_buff_head *cpy); -void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, +bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, struct sk_buff *skb); bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index bb9862410e68..c4aee6247d55 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -188,7 +188,7 @@ static const struct genl_ops tipc_genl_v2_ops[] = { }, { .cmd = TIPC_NL_LINK_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT, .doit = tipc_nl_node_get_link, .dumpit = tipc_nl_node_dump_link, }, diff --git a/net/tipc/node.c b/net/tipc/node.c index 803a3a6d0f50..0312fb181d94 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1138,7 +1138,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, if (unlikely(!n->bc_entry.link)) { snd_l = tipc_bc_sndlink(net); if (!tipc_link_bc_create(net, tipc_own_addr(net), - addr, U16_MAX, + addr, peer_id, U16_MAX, tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), n->capabilities, @@ -1772,7 +1772,7 @@ static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr, struct tipc_link *ucl; int rc; - rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr); + rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr, xmitq); if (rc & TIPC_LINK_DOWN_EVT) { tipc_node_reset_links(n); @@ -2071,10 +2071,16 @@ rcv: le = &n->links[bearer_id]; /* Ensure broadcast reception is in synch with peer's send state */ - if (unlikely(usr == LINK_PROTOCOL)) + if (unlikely(usr == LINK_PROTOCOL)) { + if (unlikely(skb_linearize(skb))) { + tipc_node_put(n); + goto discard; + } + hdr = buf_msg(skb); tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq); - else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) + } else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) { tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr); + } /* Receive packet directly if conditions permit */ tipc_node_read_lock(n); @@ -2429,7 +2435,7 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) return -ENOMEM; if (strcmp(name, tipc_bclink_name) == 0) { - err = tipc_nl_add_bc_link(net, &msg); + err = tipc_nl_add_bc_link(net, &msg, tipc_net(net)->bcl); if (err) goto err_free; } else { @@ -2473,6 +2479,7 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) struct tipc_node *node; struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; struct net *net = sock_net(skb->sk); + struct tipc_net *tn = tipc_net(net); struct tipc_link_entry *le; if (!info->attrs[TIPC_NLA_LINK]) @@ -2489,11 +2496,26 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); - if (strcmp(link_name, tipc_bclink_name) == 0) { - err = tipc_bclink_reset_stats(net); + err = -EINVAL; + if (!strcmp(link_name, tipc_bclink_name)) { + err = tipc_bclink_reset_stats(net, tipc_bc_sndlink(net)); if (err) return err; return 0; + } else if (strstr(link_name, tipc_bclink_name)) { + rcu_read_lock(); + list_for_each_entry_rcu(node, &tn->node_list, list) { + tipc_node_read_lock(node); + link = node->bc_entry.link; + if (link && !strcmp(link_name, tipc_link_name(link))) { + err = tipc_bclink_reset_stats(net, link); + tipc_node_read_unlock(node); + break; + } + tipc_node_read_unlock(node); + } + rcu_read_unlock(); + return err; } node = tipc_node_find_by_name(net, link_name, &bearer_id); @@ -2517,7 +2539,8 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) /* Caller should hold node lock */ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, - struct tipc_node *node, u32 *prev_link) + struct tipc_node *node, u32 *prev_link, + bool bc_link) { u32 i; int err; @@ -2533,6 +2556,14 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, if (err) return err; } + + if (bc_link) { + *prev_link = i; + err = tipc_nl_add_bc_link(net, msg, node->bc_entry.link); + if (err) + return err; + } + *prev_link = 0; return 0; @@ -2541,17 +2572,36 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + struct nlattr **attrs = genl_dumpit_info(cb)->attrs; + struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *node; struct tipc_nl_msg msg; u32 prev_node = cb->args[0]; u32 prev_link = cb->args[1]; int done = cb->args[2]; + bool bc_link = cb->args[3]; int err; if (done) return 0; + if (!prev_node) { + /* Check if broadcast-receiver links dumping is needed */ + if (attrs && attrs[TIPC_NLA_LINK]) { + err = nla_parse_nested_deprecated(link, + TIPC_NLA_LINK_MAX, + attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, + NULL); + if (unlikely(err)) + return err; + if (unlikely(!link[TIPC_NLA_LINK_BROADCAST])) + return -EINVAL; + bc_link = true; + } + } + msg.skb = skb; msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; @@ -2575,7 +2625,7 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) list) { tipc_node_read_lock(node); err = __tipc_nl_add_node_links(net, &msg, node, - &prev_link); + &prev_link, bc_link); tipc_node_read_unlock(node); if (err) goto out; @@ -2583,14 +2633,14 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) prev_node = node->addr; } } else { - err = tipc_nl_add_bc_link(net, &msg); + err = tipc_nl_add_bc_link(net, &msg, tn->bcl); if (err) goto out; list_for_each_entry_rcu(node, &tn->node_list, list) { tipc_node_read_lock(node); err = __tipc_nl_add_node_links(net, &msg, node, - &prev_link); + &prev_link, bc_link); tipc_node_read_unlock(node); if (err) goto out; @@ -2605,6 +2655,7 @@ out: cb->args[0] = prev_node; cb->args[1] = prev_link; cb->args[2] = done; + cb->args[3] = bc_link; return skb->len; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e370ad0edd76..3734cdbedc9c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -48,6 +48,8 @@ #include "group.h" #include "trace.h" +#define NAGLE_START_INIT 4 +#define NAGLE_START_MAX 1024 #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ #define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */ #define TIPC_FWD_MSG 1 @@ -119,7 +121,10 @@ struct tipc_sock { struct rcu_head rcu; struct tipc_group *group; u32 oneway; + u32 nagle_start; u16 snd_backlog; + u16 msg_acc; + u16 pkt_cnt; bool expect_ack; bool nodelay; bool group_is_open; @@ -143,7 +148,7 @@ static int tipc_sk_insert(struct tipc_sock *tsk); static void tipc_sk_remove(struct tipc_sock *tsk); static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz); static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); -static void tipc_sk_push_backlog(struct tipc_sock *tsk); +static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -191,17 +196,17 @@ static int tsk_importance(struct tipc_sock *tsk) return msg_importance(&tsk->phdr); } -static int tsk_set_importance(struct tipc_sock *tsk, int imp) +static struct tipc_sock *tipc_sk(const struct sock *sk) { - if (imp > TIPC_CRITICAL_IMPORTANCE) - return -EINVAL; - msg_set_importance(&tsk->phdr, (u32)imp); - return 0; + return container_of(sk, struct tipc_sock, sk); } -static struct tipc_sock *tipc_sk(const struct sock *sk) +int tsk_set_importance(struct sock *sk, int imp) { - return container_of(sk, struct tipc_sock, sk); + if (imp > TIPC_CRITICAL_IMPORTANCE) + return -EINVAL; + msg_set_importance(&tipc_sk(sk)->phdr, (u32)imp); + return 0; } static bool tsk_conn_cong(struct tipc_sock *tsk) @@ -474,6 +479,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, tsk = tipc_sk(sk); tsk->max_pkt = MAX_PKT_DEFAULT; tsk->maxnagle = 0; + tsk->nagle_start = NAGLE_START_INIT; INIT_LIST_HEAD(&tsk->publications); INIT_LIST_HEAD(&tsk->cong_links); msg = &tsk->phdr; @@ -541,7 +547,7 @@ static void __tipc_shutdown(struct socket *sock, int error) !tsk_conn_cong(tsk))); /* Push out delayed messages if in Nagle mode */ - tipc_sk_push_backlog(tsk); + tipc_sk_push_backlog(tsk, false); /* Remove pending SYN */ __skb_queue_purge(&sk->sk_write_queue); @@ -1252,14 +1258,37 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, /* tipc_sk_push_backlog(): send accumulated buffers in socket write queue * when socket is in Nagle mode */ -static void tipc_sk_push_backlog(struct tipc_sock *tsk) +static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack) { struct sk_buff_head *txq = &tsk->sk.sk_write_queue; + struct sk_buff *skb = skb_peek_tail(txq); struct net *net = sock_net(&tsk->sk); u32 dnode = tsk_peer_node(tsk); - struct sk_buff *skb = skb_peek(txq); int rc; + if (nagle_ack) { + tsk->pkt_cnt += skb_queue_len(txq); + if (!tsk->pkt_cnt || tsk->msg_acc / tsk->pkt_cnt < 2) { + tsk->oneway = 0; + if (tsk->nagle_start < NAGLE_START_MAX) + tsk->nagle_start *= 2; + tsk->expect_ack = false; + pr_debug("tsk %10u: bad nagle %u -> %u, next start %u!\n", + tsk->portid, tsk->msg_acc, tsk->pkt_cnt, + tsk->nagle_start); + } else { + tsk->nagle_start = NAGLE_START_INIT; + if (skb) { + msg_set_ack_required(buf_msg(skb)); + tsk->expect_ack = true; + } else { + tsk->expect_ack = false; + } + } + tsk->msg_acc = 0; + tsk->pkt_cnt = 0; + } + if (!skb || tsk->cong_link_cnt) return; @@ -1267,9 +1296,10 @@ static void tipc_sk_push_backlog(struct tipc_sock *tsk) if (msg_is_syn(buf_msg(skb))) return; + if (tsk->msg_acc) + tsk->pkt_cnt += skb_queue_len(txq); tsk->snt_unacked += tsk->snd_backlog; tsk->snd_backlog = 0; - tsk->expect_ack = true; rc = tipc_node_xmit(net, txq, dnode, tsk->portid); if (rc == -ELINKCONG) tsk->cong_link_cnt = 1; @@ -1322,8 +1352,7 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, return; } else if (mtyp == CONN_ACK) { was_cong = tsk_conn_cong(tsk); - tsk->expect_ack = false; - tipc_sk_push_backlog(tsk); + tipc_sk_push_backlog(tsk, msg_nagle_ack(hdr)); tsk->snt_unacked -= msg_conn_ack(hdr); if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) tsk->snd_win = msg_adv_win(hdr); @@ -1516,6 +1545,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); + struct sk_buff *skb; u32 dnode = tsk_peer_node(tsk); int maxnagle = tsk->maxnagle; int maxpkt = tsk->max_pkt; @@ -1544,17 +1574,25 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) break; send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE); blocks = tsk->snd_backlog; - if (tsk->oneway++ >= 4 && send <= maxnagle) { + if (tsk->oneway++ >= tsk->nagle_start && send <= maxnagle) { rc = tipc_msg_append(hdr, m, send, maxnagle, txq); if (unlikely(rc < 0)) break; blocks += rc; + tsk->msg_acc++; if (blocks <= 64 && tsk->expect_ack) { tsk->snd_backlog = blocks; sent += send; break; + } else if (blocks > 64) { + tsk->pkt_cnt += skb_queue_len(txq); + } else { + skb = skb_peek_tail(txq); + msg_set_ack_required(buf_msg(skb)); + tsk->expect_ack = true; + tsk->msg_acc = 0; + tsk->pkt_cnt = 0; } - tsk->expect_ack = true; } else { rc = tipc_msg_build(hdr, m, sent, send, maxpkt, txq); if (unlikely(rc != send)) @@ -2091,7 +2129,7 @@ static void tipc_sk_proto_rcv(struct sock *sk, smp_wmb(); tsk->cong_link_cnt--; wakeup = true; - tipc_sk_push_backlog(tsk); + tipc_sk_push_backlog(tsk, false); break; case GROUP_PROTOCOL: tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq); @@ -2180,7 +2218,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb, return false; case TIPC_ESTABLISHED: if (!skb_queue_empty(&sk->sk_write_queue)) - tipc_sk_push_backlog(tsk); + tipc_sk_push_backlog(tsk, false); /* Accept only connection-based messages sent by peer */ if (likely(con_msg && !err && pport == oport && pnode == onode)) { @@ -2188,8 +2226,10 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb, struct sk_buff *skb; skb = tipc_sk_build_ack(tsk); - if (skb) + if (skb) { + msg_set_nagle_ack(buf_msg(skb)); __skb_queue_tail(xmitq, skb); + } } return true; } @@ -2681,7 +2721,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, /* Connect new socket to it's peer */ tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg)); - tsk_set_importance(new_tsock, msg_importance(msg)); + tsk_set_importance(new_sk, msg_importance(msg)); if (msg_named(msg)) { new_tsock->conn_type = msg_nametype(msg); new_tsock->conn_instance = msg_nameinst(msg); @@ -3099,7 +3139,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, switch (opt) { case TIPC_IMPORTANCE: - res = tsk_set_importance(tsk, value); + res = tsk_set_importance(sk, value); break; case TIPC_SRC_DROPPABLE: if (sock->type != SOCK_STREAM) diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 235b9679acee..b11575afc66f 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -75,4 +75,6 @@ u32 tipc_sock_get_portid(struct sock *sk); bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb); bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb); +int tsk_set_importance(struct sock *sk, int imp); + #endif diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c index 58ab3d6dcdce..97a6264a2993 100644 --- a/net/tipc/sysctl.c +++ b/net/tipc/sysctl.c @@ -36,7 +36,7 @@ #include "core.h" #include "trace.h" #include "crypto.h" - +#include "bcast.h" #include <linux/sysctl.h> static struct ctl_table_header *tipc_ctl_hdr; @@ -75,6 +75,13 @@ static struct ctl_table tipc_table[] = { .extra1 = SYSCTL_ONE, }, #endif + { + .procname = "bc_retruni", + .data = &sysctl_tipc_bc_retruni, + .maxlen = sizeof(sysctl_tipc_bc_retruni), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, {} }; diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 446af7bbd13e..1489cfb941d8 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -497,7 +497,6 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk) static int tipc_topsrv_create_listener(struct tipc_topsrv *srv) { - int imp = TIPC_CRITICAL_IMPORTANCE; struct socket *lsock = NULL; struct sockaddr_tipc saddr; struct sock *sk; @@ -514,8 +513,9 @@ static int tipc_topsrv_create_listener(struct tipc_topsrv *srv) sk->sk_user_data = srv; write_unlock_bh(&sk->sk_callback_lock); - rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE, - (char *)&imp, sizeof(imp)); + lock_sock(sk); + rc = tsk_set_importance(sk, TIPC_CRITICAL_IMPORTANCE); + release_sock(sk); if (rc < 0) goto err; diff --git a/net/tipc/trace.h b/net/tipc/trace.h index 4d8e00483afc..04af83f0500c 100644 --- a/net/tipc/trace.h +++ b/net/tipc/trace.h @@ -255,7 +255,7 @@ DECLARE_EVENT_CLASS(tipc_link_class, TP_fast_assign( __assign_str(header, header); - tipc_link_name_ext(l, __entry->name); + memcpy(__entry->name, tipc_link_name(l), TIPC_MAX_LINK_NAME); tipc_link_dump(l, dqueues, __get_str(buf)); ), @@ -295,12 +295,14 @@ DECLARE_EVENT_CLASS(tipc_link_transmq_class, ), TP_fast_assign( - tipc_link_name_ext(r, __entry->name); + memcpy(__entry->name, tipc_link_name(r), TIPC_MAX_LINK_NAME); __entry->from = f; __entry->to = t; __entry->len = skb_queue_len(tq); - __entry->fseqno = msg_seqno(buf_msg(skb_peek(tq))); - __entry->lseqno = msg_seqno(buf_msg(skb_peek_tail(tq))); + __entry->fseqno = __entry->len ? + msg_seqno(buf_msg(skb_peek(tq))) : 0; + __entry->lseqno = __entry->len ? + msg_seqno(buf_msg(skb_peek_tail(tq))) : 0; ), TP_printk("<%s> retrans req: [%u-%u] transmq: %u [%u-%u]\n", @@ -308,15 +310,16 @@ DECLARE_EVENT_CLASS(tipc_link_transmq_class, __entry->len, __entry->fseqno, __entry->lseqno) ); -DEFINE_EVENT(tipc_link_transmq_class, tipc_link_retrans, +DEFINE_EVENT_CONDITION(tipc_link_transmq_class, tipc_link_retrans, TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq), - TP_ARGS(r, f, t, tq) + TP_ARGS(r, f, t, tq), + TP_CONDITION(less_eq(f, t)) ); DEFINE_EVENT_PRINT(tipc_link_transmq_class, tipc_link_bc_ack, TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq), TP_ARGS(r, f, t, tq), - TP_printk("<%s> acked: [%u-%u] transmq: %u [%u-%u]\n", + TP_printk("<%s> acked: %u gap: %u transmq: %u [%u-%u]\n", __entry->name, __entry->from, __entry->to, __entry->len, __entry->fseqno, __entry->lseqno) ); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index d6620ad53546..28a283f26a8d 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -161,9 +161,11 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, struct udp_bearer *ub, struct udp_media_addr *src, struct udp_media_addr *dst, struct dst_cache *cache) { - struct dst_entry *ndst = dst_cache_get(cache); + struct dst_entry *ndst; int ttl, err = 0; + local_bh_disable(); + ndst = dst_cache_get(cache); if (dst->proto == htons(ETH_P_IP)) { struct rtable *rt = (struct rtable *)ndst; @@ -210,9 +212,11 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, src->port, dst->port, false); #endif } + local_bh_enable(); return err; tx_error: + local_bh_enable(); kfree_skb(skb); return err; } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index a562ebaaa33c..0e55f8365ce2 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -694,10 +694,11 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_rx *rx_ctx; + bool is_req_pending, is_force_resync; u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE]; - u32 sock_data, is_req_pending; struct tls_prot_info *prot; s64 resync_req; + u32 sock_data; u32 req_seq; if (tls_ctx->rx_conf != TLS_HW) @@ -712,9 +713,11 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) resync_req = atomic64_read(&rx_ctx->resync_req); req_seq = resync_req >> 32; seq += TLS_HEADER_SIZE - 1; - is_req_pending = resync_req; + is_req_pending = resync_req & RESYNC_REQ; + is_force_resync = resync_req & RESYNC_REQ_FORCE; - if (likely(!is_req_pending) || req_seq != seq || + if (likely(!is_req_pending) || + (!is_force_resync && req_seq != seq) || !atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0)) return; break; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index e23f94a5549b..8c2763eb6aae 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -206,10 +206,12 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) kfree(aead_req); + spin_lock_bh(&ctx->decrypt_compl_lock); pending = atomic_dec_return(&ctx->decrypt_pending); - if (!pending && READ_ONCE(ctx->async_notify)) + if (!pending && ctx->async_notify) complete(&ctx->async_wait.completion); + spin_unlock_bh(&ctx->decrypt_compl_lock); } static int tls_do_decryption(struct sock *sk, @@ -467,10 +469,12 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err) ready = true; } + spin_lock_bh(&ctx->encrypt_compl_lock); pending = atomic_dec_return(&ctx->encrypt_pending); - if (!pending && READ_ONCE(ctx->async_notify)) + if (!pending && ctx->async_notify) complete(&ctx->async_wait.completion); + spin_unlock_bh(&ctx->encrypt_compl_lock); if (!ready) return; @@ -780,7 +784,7 @@ static int tls_push_record(struct sock *sk, int flags, static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, bool full_record, u8 record_type, - size_t *copied, int flags) + ssize_t *copied, int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); @@ -796,9 +800,10 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, psock = sk_psock_get(sk); if (!psock || !policy) { err = tls_push_record(sk, flags, record_type); - if (err && err != -EINPROGRESS) { + if (err && sk->sk_err == EBADMSG) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); + err = -sk->sk_err; } if (psock) sk_psock_put(sk, psock); @@ -824,9 +829,10 @@ more_data: switch (psock->eval) { case __SK_PASS: err = tls_push_record(sk, flags, record_type); - if (err && err != -EINPROGRESS) { + if (err && sk->sk_err == EBADMSG) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); + err = -sk->sk_err; goto out_err; } break; @@ -916,7 +922,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) unsigned char record_type = TLS_RECORD_TYPE_DATA; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool eor = !(msg->msg_flags & MSG_MORE); - size_t try_to_copy, copied = 0; + size_t try_to_copy; + ssize_t copied = 0; struct sk_msg *msg_pl, *msg_en; struct tls_rec *rec; int required_size; @@ -926,6 +933,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int num_zc = 0; int orig_size; int ret = 0; + int pending; if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) return -EOPNOTSUPP; @@ -1092,13 +1100,19 @@ trim_sgl: goto send_end; } else if (num_zc) { /* Wait for pending encryptions to get completed */ - smp_store_mb(ctx->async_notify, true); + spin_lock_bh(&ctx->encrypt_compl_lock); + ctx->async_notify = true; - if (atomic_read(&ctx->encrypt_pending)) + pending = atomic_read(&ctx->encrypt_pending); + spin_unlock_bh(&ctx->encrypt_compl_lock); + if (pending) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); else reinit_completion(&ctx->async_wait.completion); + /* There can be no concurrent accesses, since we have no + * pending encrypt operations + */ WRITE_ONCE(ctx->async_notify, false); if (ctx->async_wait.err) { @@ -1118,7 +1132,7 @@ send_end: release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); - return copied ? copied : ret; + return copied > 0 ? copied : ret; } static int tls_sw_do_sendpage(struct sock *sk, struct page *page, @@ -1132,7 +1146,7 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page, struct sk_msg *msg_pl; struct tls_rec *rec; int num_async = 0; - size_t copied = 0; + ssize_t copied = 0; bool full_record; int record_room; int ret = 0; @@ -1234,7 +1248,7 @@ wait_for_memory: } sendpage_end: ret = sk_stream_error(sk, flags, ret); - return copied ? copied : ret; + return copied > 0 ? copied : ret; } int tls_sw_sendpage_locked(struct sock *sk, struct page *page, @@ -1729,6 +1743,7 @@ int tls_sw_recvmsg(struct sock *sk, bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; int num_async = 0; + int pending; flags |= nonblock; @@ -1891,8 +1906,11 @@ pick_next_record: recv_end: if (num_async) { /* Wait for all previously submitted records to be decrypted */ - smp_store_mb(ctx->async_notify, true); - if (atomic_read(&ctx->decrypt_pending)) { + spin_lock_bh(&ctx->decrypt_compl_lock); + ctx->async_notify = true; + pending = atomic_read(&ctx->decrypt_pending); + spin_unlock_bh(&ctx->decrypt_compl_lock); + if (pending) { err = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); if (err) { /* one of async decrypt failed */ @@ -1904,6 +1922,10 @@ recv_end: } else { reinit_completion(&ctx->async_wait.completion); } + + /* There can be no concurrent accesses, since we have no + * pending decrypt operations + */ WRITE_ONCE(ctx->async_notify, false); /* Drain records from the rx_list & copy if required */ @@ -2290,6 +2312,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) if (tx) { crypto_init_wait(&sw_ctx_tx->async_wait); + spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); crypto_info = &ctx->crypto_send.info; cctx = &ctx->tx; aead = &sw_ctx_tx->aead_send; @@ -2298,6 +2321,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) sw_ctx_tx->tx_work.sk = sk; } else { crypto_init_wait(&sw_ctx_rx->async_wait); + spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; skb_queue_head_init(&sw_ctx_rx->rx_list); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index a5f28708e0e7..626bf9044418 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1408,7 +1408,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, /* Wait for children sockets to appear; these are the new sockets * created upon connection establishment. */ - timeout = sock_sndtimeo(listener, flags & O_NONBLOCK); + timeout = sock_rcvtimeo(listener, flags & O_NONBLOCK); prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); while ((connected = vsock_dequeue_accept(listener)) == NULL && diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 69efc891885f..0edda1edf988 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1132,6 +1132,14 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, lock_sock(sk); + /* Check if sk has been released before lock_sock */ + if (sk->sk_shutdown == SHUTDOWN_MASK) { + (void)virtio_transport_reset_no_sock(t, pkt); + release_sock(sk); + sock_put(sk); + goto free_pkt; + } + /* Update CID in case it has changed after a transport reset event */ vsk->local_addr.svm_cid = dst.svm_cid; diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 63cf7131f601..813e93644ae7 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -181,8 +181,8 @@ config CFG80211_CRDA_SUPPORT default y help You should enable this option unless you know for sure you have no - need for it, for example when using internal regdb (above) or the - database loaded as a firmware file. + need for it, for example when using the regulatory database loaded as + a firmware file. If unsure, say Y. diff --git a/net/wireless/chan.c b/net/wireless/chan.c index fcac5c6366e1..cddf92c5d09e 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -6,7 +6,7 @@ * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2018 Intel Corporation + * Copyright 2018-2020 Intel Corporation */ #include <linux/export.h> @@ -27,6 +27,7 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, return; chandef->chan = chan; + chandef->freq1_offset = chan->freq_offset; chandef->center_freq2 = 0; chandef->edmg.bw_config = 0; chandef->edmg.channels = 0; @@ -146,6 +147,9 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) if (!chandef->chan) return false; + if (chandef->freq1_offset >= 1000) + return false; + control_freq = chandef->chan->center_freq; switch (chandef->width) { @@ -153,7 +157,8 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: - if (chandef->center_freq1 != control_freq) + if (ieee80211_chandef_to_khz(chandef) != + ieee80211_channel_to_khz(chandef->chan)) return false; if (chandef->center_freq2) return false; @@ -386,10 +391,11 @@ static u32 cfg80211_get_start_freq(u32 center_freq, { u32 start_freq; - if (bandwidth <= 20) + bandwidth = MHZ_TO_KHZ(bandwidth); + if (bandwidth <= MHZ_TO_KHZ(20)) start_freq = center_freq; else - start_freq = center_freq - bandwidth/2 + 10; + start_freq = center_freq - bandwidth / 2 + MHZ_TO_KHZ(10); return start_freq; } @@ -399,10 +405,11 @@ static u32 cfg80211_get_end_freq(u32 center_freq, { u32 end_freq; - if (bandwidth <= 20) + bandwidth = MHZ_TO_KHZ(bandwidth); + if (bandwidth <= MHZ_TO_KHZ(20)) end_freq = center_freq; else - end_freq = center_freq + bandwidth/2 - 10; + end_freq = center_freq + bandwidth / 2 - MHZ_TO_KHZ(10); return end_freq; } @@ -417,8 +424,8 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, start_freq = cfg80211_get_start_freq(center_freq, bandwidth); end_freq = cfg80211_get_end_freq(center_freq, bandwidth); - for (freq = start_freq; freq <= end_freq; freq += 20) { - c = ieee80211_get_channel(wiphy, freq); + for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) { + c = ieee80211_get_channel_khz(wiphy, freq); if (!c) return -EINVAL; @@ -449,8 +456,8 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, return -EINVAL; ret = cfg80211_get_chans_dfs_required(wiphy, - chandef->center_freq1, - width); + ieee80211_chandef_to_khz(chandef), + width); if (ret < 0) return ret; else if (ret > 0) @@ -460,8 +467,8 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, return 0; ret = cfg80211_get_chans_dfs_required(wiphy, - chandef->center_freq2, - width); + MHZ_TO_KHZ(chandef->center_freq2), + width); if (ret < 0) return ret; else if (ret > 0) @@ -503,8 +510,8 @@ static int cfg80211_get_chans_dfs_usable(struct wiphy *wiphy, * DFS_AVAILABLE). Return number of usable channels * (require CAC). Allow DFS and non-DFS channel mix. */ - for (freq = start_freq; freq <= end_freq; freq += 20) { - c = ieee80211_get_channel(wiphy, freq); + for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) { + c = ieee80211_get_channel_khz(wiphy, freq); if (!c) return -EINVAL; @@ -536,8 +543,9 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, if (width < 0) return false; - r1 = cfg80211_get_chans_dfs_usable(wiphy, chandef->center_freq1, - width); + r1 = cfg80211_get_chans_dfs_usable(wiphy, + MHZ_TO_KHZ(chandef->center_freq1), + width); if (r1 < 0) return false; @@ -546,8 +554,8 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, case NL80211_CHAN_WIDTH_80P80: WARN_ON(!chandef->center_freq2); r2 = cfg80211_get_chans_dfs_usable(wiphy, - chandef->center_freq2, - width); + MHZ_TO_KHZ(chandef->center_freq2), + width); if (r2 < 0) return false; break; @@ -694,8 +702,8 @@ static bool cfg80211_get_chans_dfs_available(struct wiphy *wiphy, * If any channel in between is disabled or has not * had gone through CAC return false */ - for (freq = start_freq; freq <= end_freq; freq += 20) { - c = ieee80211_get_channel(wiphy, freq); + for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) { + c = ieee80211_get_channel_khz(wiphy, freq); if (!c) return false; @@ -724,7 +732,8 @@ static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy, if (width < 0) return false; - r = cfg80211_get_chans_dfs_available(wiphy, chandef->center_freq1, + r = cfg80211_get_chans_dfs_available(wiphy, + MHZ_TO_KHZ(chandef->center_freq1), width); /* If any of channels unavailable for cf1 just return */ @@ -735,8 +744,8 @@ static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy, case NL80211_CHAN_WIDTH_80P80: WARN_ON(!chandef->center_freq2); r = cfg80211_get_chans_dfs_available(wiphy, - chandef->center_freq2, - width); + MHZ_TO_KHZ(chandef->center_freq2), + width); break; default: WARN_ON(chandef->center_freq2); @@ -757,8 +766,8 @@ static unsigned int cfg80211_get_chans_dfs_cac_time(struct wiphy *wiphy, start_freq = cfg80211_get_start_freq(center_freq, bandwidth); end_freq = cfg80211_get_end_freq(center_freq, bandwidth); - for (freq = start_freq; freq <= end_freq; freq += 20) { - c = ieee80211_get_channel(wiphy, freq); + for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) { + c = ieee80211_get_channel_khz(wiphy, freq); if (!c) return 0; @@ -790,14 +799,14 @@ cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, return 0; t1 = cfg80211_get_chans_dfs_cac_time(wiphy, - chandef->center_freq1, + MHZ_TO_KHZ(chandef->center_freq1), width); if (!chandef->center_freq2) return t1; t2 = cfg80211_get_chans_dfs_cac_time(wiphy, - chandef->center_freq2, + MHZ_TO_KHZ(chandef->center_freq2), width); return max(t1, t2); @@ -813,8 +822,8 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, start_freq = cfg80211_get_start_freq(center_freq, bandwidth); end_freq = cfg80211_get_end_freq(center_freq, bandwidth); - for (freq = start_freq; freq <= end_freq; freq += 20) { - c = ieee80211_get_channel(wiphy, freq); + for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) { + c = ieee80211_get_channel_khz(wiphy, freq); if (!c || c->flags & prohibited_flags) return false; } @@ -910,7 +919,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, width = 10; break; case NL80211_CHAN_WIDTH_20: - if (!ht_cap->ht_supported) + if (!ht_cap->ht_supported && + chandef->chan->band != NL80211_BAND_6GHZ) return false; /* fall through */ case NL80211_CHAN_WIDTH_20_NOHT: @@ -919,6 +929,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, break; case NL80211_CHAN_WIDTH_40: width = 40; + if (chandef->chan->band == NL80211_BAND_6GHZ) + break; if (!ht_cap->ht_supported) return false; if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) || @@ -933,24 +945,29 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, break; case NL80211_CHAN_WIDTH_80P80: cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; - if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) + if (chandef->chan->band != NL80211_BAND_6GHZ && + cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) return false; /* fall through */ case NL80211_CHAN_WIDTH_80: - if (!vht_cap->vht_supported) - return false; prohibited_flags |= IEEE80211_CHAN_NO_80MHZ; width = 80; + if (chandef->chan->band == NL80211_BAND_6GHZ) + break; + if (!vht_cap->vht_supported) + return false; break; case NL80211_CHAN_WIDTH_160: + prohibited_flags |= IEEE80211_CHAN_NO_160MHZ; + width = 160; + if (chandef->chan->band == NL80211_BAND_6GHZ) + break; if (!vht_cap->vht_supported) return false; cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ && cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) return false; - prohibited_flags |= IEEE80211_CHAN_NO_160MHZ; - width = 160; break; default: WARN_ON_ONCE(1); @@ -976,13 +993,15 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, prohibited_flags |= IEEE80211_CHAN_NO_OFDM; - if (!cfg80211_secondary_chans_ok(wiphy, chandef->center_freq1, + if (!cfg80211_secondary_chans_ok(wiphy, + ieee80211_chandef_to_khz(chandef), width, prohibited_flags)) return false; if (!chandef->center_freq2) return true; - return cfg80211_secondary_chans_ok(wiphy, chandef->center_freq2, + return cfg80211_secondary_chans_ok(wiphy, + MHZ_TO_KHZ(chandef->center_freq2), width, prohibited_flags); } EXPORT_SYMBOL(cfg80211_chandef_usable); diff --git a/net/wireless/core.c b/net/wireless/core.c index 341402b4f178..f0226ae9561c 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2019 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -142,7 +142,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, if (result) return result; - if (rdev->wiphy.debugfsdir) + if (!IS_ERR_OR_NULL(rdev->wiphy.debugfsdir)) debugfs_rename(rdev->wiphy.debugfsdir->d_parent, rdev->wiphy.debugfsdir, rdev->wiphy.debugfsdir->d_parent, newname); @@ -480,9 +480,6 @@ use_default_name: INIT_LIST_HEAD(&rdev->bss_list); INIT_LIST_HEAD(&rdev->sched_scan_req_list); INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done); - INIT_LIST_HEAD(&rdev->mlme_unreg); - spin_lock_init(&rdev->mlme_unreg_lock); - INIT_WORK(&rdev->mlme_unreg_wk, cfg80211_mlme_unreg_wk); INIT_DELAYED_WORK(&rdev->dfs_update_channels_wk, cfg80211_dfs_channels_update_work); #ifdef CONFIG_CFG80211_WEXT @@ -794,6 +791,7 @@ int wiphy_register(struct wiphy *wiphy) /* sanity check supported bands/channels */ for (band = 0; band < NUM_NL80211_BANDS; band++) { u16 types = 0; + bool have_he = false; sband = wiphy->bands[band]; if (!sband) @@ -810,6 +808,11 @@ int wiphy_register(struct wiphy *wiphy) !sband->n_bitrates)) return -EINVAL; + if (WARN_ON(band == NL80211_BAND_6GHZ && + (sband->ht_cap.ht_supported || + sband->vht_cap.vht_supported))) + return -EINVAL; + /* * Since cfg80211_disable_40mhz_24ghz is global, we can * modify the sband's ht data even if the driver uses a @@ -837,6 +840,9 @@ int wiphy_register(struct wiphy *wiphy) sband->channels[i].orig_mpwr = sband->channels[i].max_power; sband->channels[i].band = band; + + if (WARN_ON(sband->channels[i].freq_offset >= 1000)) + return -EINVAL; } for (i = 0; i < sband->n_iftype_data; i++) { @@ -854,8 +860,17 @@ int wiphy_register(struct wiphy *wiphy) return -EINVAL; types |= iftd->types_mask; + + if (i == 0) + have_he = iftd->he_cap.has_he; + else + have_he = have_he && + iftd->he_cap.has_he; } + if (WARN_ON(!have_he && band == NL80211_BAND_6GHZ)) + return -EINVAL; + have_band = true; } @@ -1030,7 +1045,6 @@ void wiphy_unregister(struct wiphy *wiphy) cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); flush_work(&rdev->destroy_work); flush_work(&rdev->sched_scan_stop_wk); - flush_work(&rdev->mlme_unreg_wk); flush_work(&rdev->propagate_radar_detect_wk); flush_work(&rdev->propagate_cac_done_wk); @@ -1094,6 +1108,7 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync) rdev->devlist_generation++; cfg80211_mlme_purge_registrations(wdev); + flush_work(&wdev->mgmt_registrations_update_wk); switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: @@ -1238,6 +1253,8 @@ void cfg80211_init_wdev(struct cfg80211_registered_device *rdev, spin_lock_init(&wdev->event_lock); INIT_LIST_HEAD(&wdev->mgmt_registrations); spin_lock_init(&wdev->mgmt_registrations_lock); + INIT_WORK(&wdev->mgmt_registrations_update_wk, + cfg80211_mgmt_registrations_update_wk); INIT_LIST_HEAD(&wdev->pmsr_list); spin_lock_init(&wdev->pmsr_lock); INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk); diff --git a/net/wireless/core.h b/net/wireless/core.h index bb897a803ffe..e0e5b3ee9699 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -60,10 +60,6 @@ struct cfg80211_registered_device { struct list_head beacon_registrations; spinlock_t beacon_registrations_lock; - struct list_head mlme_unreg; - spinlock_t mlme_unreg_lock; - struct work_struct mlme_unreg_wk; - /* protected by RTNL only */ int num_running_ifaces; int num_running_monitor_ifaces; @@ -290,7 +286,7 @@ struct cfg80211_cqm_config { u32 rssi_hyst; s32 last_rssi_event_value; int n_rssi_thresholds; - s32 rssi_thresholds[0]; + s32 rssi_thresholds[]; }; void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev); @@ -385,8 +381,9 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct net_device *dev); int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, u16 frame_type, const u8 *match_data, - int match_len, struct netlink_ext_ack *extack); -void cfg80211_mlme_unreg_wk(struct work_struct *wk); + int match_len, bool multicast_rx, + struct netlink_ext_ack *extack); +void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk); void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index e4805a3bd310..189334314cba 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -426,58 +426,62 @@ struct cfg80211_mgmt_registration { __le16 frame_type; + bool multicast_rx; + u8 match[]; }; -static void -cfg80211_process_mlme_unregistrations(struct cfg80211_registered_device *rdev) +static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev) { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + struct wireless_dev *tmp; struct cfg80211_mgmt_registration *reg; + struct mgmt_frame_regs upd = {}; ASSERT_RTNL(); - spin_lock_bh(&rdev->mlme_unreg_lock); - while ((reg = list_first_entry_or_null(&rdev->mlme_unreg, - struct cfg80211_mgmt_registration, - list))) { - list_del(®->list); - spin_unlock_bh(&rdev->mlme_unreg_lock); + rcu_read_lock(); + list_for_each_entry_rcu(tmp, &rdev->wiphy.wdev_list, list) { + list_for_each_entry_rcu(reg, &tmp->mgmt_registrations, list) { + u32 mask = BIT(le16_to_cpu(reg->frame_type) >> 4); + u32 mcast_mask = 0; - if (rdev->ops->mgmt_frame_register) { - u16 frame_type = le16_to_cpu(reg->frame_type); + if (reg->multicast_rx) + mcast_mask = mask; - rdev_mgmt_frame_register(rdev, reg->wdev, - frame_type, false); - } + upd.global_stypes |= mask; + upd.global_mcast_stypes |= mcast_mask; - kfree(reg); - - spin_lock_bh(&rdev->mlme_unreg_lock); + if (tmp == wdev) { + upd.interface_stypes |= mask; + upd.interface_mcast_stypes |= mcast_mask; + } + } } - spin_unlock_bh(&rdev->mlme_unreg_lock); + rcu_read_unlock(); + + rdev_update_mgmt_frame_registrations(rdev, wdev, &upd); } -void cfg80211_mlme_unreg_wk(struct work_struct *wk) +void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk) { - struct cfg80211_registered_device *rdev; - - rdev = container_of(wk, struct cfg80211_registered_device, - mlme_unreg_wk); + struct wireless_dev *wdev = container_of(wk, struct wireless_dev, + mgmt_registrations_update_wk); rtnl_lock(); - cfg80211_process_mlme_unregistrations(rdev); + cfg80211_mgmt_registrations_update(wdev); rtnl_unlock(); } int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, u16 frame_type, const u8 *match_data, - int match_len, struct netlink_ext_ack *extack) + int match_len, bool multicast_rx, + struct netlink_ext_ack *extack) { - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_mgmt_registration *reg, *nreg; int err = 0; u16 mgmt_type; + bool update_multicast = false; if (!wdev->wiphy->mgmt_stypes) return -EOPNOTSUPP; @@ -528,34 +532,39 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, continue; if (memcmp(reg->match, match_data, mlen) == 0) { + if (reg->multicast_rx != multicast_rx) { + update_multicast = true; + reg->multicast_rx = multicast_rx; + break; + } NL_SET_ERR_MSG(extack, "Match already configured"); err = -EALREADY; break; } } - if (err) { - kfree(nreg); + if (err) goto out; - } - memcpy(nreg->match, match_data, match_len); - nreg->match_len = match_len; - nreg->nlportid = snd_portid; - nreg->frame_type = cpu_to_le16(frame_type); - nreg->wdev = wdev; - list_add(&nreg->list, &wdev->mgmt_registrations); + if (update_multicast) { + kfree(nreg); + } else { + memcpy(nreg->match, match_data, match_len); + nreg->match_len = match_len; + nreg->nlportid = snd_portid; + nreg->frame_type = cpu_to_le16(frame_type); + nreg->wdev = wdev; + nreg->multicast_rx = multicast_rx; + list_add(&nreg->list, &wdev->mgmt_registrations); + } spin_unlock_bh(&wdev->mgmt_registrations_lock); - /* process all unregistrations to avoid driver confusion */ - cfg80211_process_mlme_unregistrations(rdev); - - if (rdev->ops->mgmt_frame_register) - rdev_mgmt_frame_register(rdev, wdev, frame_type, true); + cfg80211_mgmt_registrations_update(wdev); return 0; out: + kfree(nreg); spin_unlock_bh(&wdev->mgmt_registrations_lock); return err; @@ -574,11 +583,9 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) continue; list_del(®->list); - spin_lock(&rdev->mlme_unreg_lock); - list_add_tail(®->list, &rdev->mlme_unreg); - spin_unlock(&rdev->mlme_unreg_lock); + kfree(reg); - schedule_work(&rdev->mlme_unreg_wk); + schedule_work(&wdev->mgmt_registrations_update_wk); } spin_unlock_bh(&wdev->mgmt_registrations_lock); @@ -594,15 +601,16 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) { - struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + struct cfg80211_mgmt_registration *reg, *tmp; spin_lock_bh(&wdev->mgmt_registrations_lock); - spin_lock(&rdev->mlme_unreg_lock); - list_splice_tail_init(&wdev->mgmt_registrations, &rdev->mlme_unreg); - spin_unlock(&rdev->mlme_unreg_lock); + list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { + list_del(®->list); + kfree(reg); + } spin_unlock_bh(&wdev->mgmt_registrations_lock); - cfg80211_process_mlme_unregistrations(rdev); + cfg80211_mgmt_registrations_update(wdev); } int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, @@ -721,8 +729,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, return rdev_mgmt_tx(rdev, wdev, params, cookie); } -bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm, - const u8 *buf, size_t len, u32 flags) +bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm, + const u8 *buf, size_t len, u32 flags) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); @@ -777,7 +785,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm, trace_cfg80211_return_bool(result); return result; } -EXPORT_SYMBOL(cfg80211_rx_mgmt); +EXPORT_SYMBOL(cfg80211_rx_mgmt_khz); void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 519414468b5d..263ae395ad44 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -329,6 +329,15 @@ he_bss_color_policy[NL80211_HE_BSS_COLOR_ATTR_MAX + 1] = { [NL80211_HE_BSS_COLOR_ATTR_PARTIAL] = { .type = NLA_FLAG }, }; +static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { + [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_RATES }, + [NL80211_TXRATE_HT] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_HT_RATES }, + [NL80211_TXRATE_VHT] = NLA_POLICY_EXACT_LEN_WARN(sizeof(struct nl80211_txrate_vht)), + [NL80211_TXRATE_GI] = { .type = NLA_U8 }, +}; + static const struct nla_policy nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = { [NL80211_TID_CONFIG_ATTR_VIF_SUPP] = { .type = NLA_U64 }, @@ -343,6 +352,12 @@ nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = { NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), [NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL] = NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), + [NL80211_TID_CONFIG_ATTR_AMSDU_CTRL] = + NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), + [NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE] = + NLA_POLICY_MAX(NLA_U8, NL80211_TX_RATE_FIXED), + [NL80211_TID_CONFIG_ATTR_TX_RATE] = + NLA_POLICY_NESTED(nl80211_txattr_policy), }; static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { @@ -363,6 +378,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_CHANNEL_WIDTH] = { .type = NLA_U32 }, [NL80211_ATTR_CENTER_FREQ1] = { .type = NLA_U32 }, + [NL80211_ATTR_CENTER_FREQ1_OFFSET] = NLA_POLICY_RANGE(NLA_U32, 0, 999), [NL80211_ATTR_CENTER_FREQ2] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_RETRY_SHORT] = NLA_POLICY_MIN(NLA_U8, 1), @@ -635,6 +651,13 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_CONTROL_PORT_NO_PREAUTH] = { .type = NLA_FLAG }, [NL80211_ATTR_PMK_LIFETIME] = NLA_POLICY_MIN(NLA_U32, 1), [NL80211_ATTR_PMK_REAUTH_THRESHOLD] = NLA_POLICY_RANGE(NLA_U8, 1, 100), + [NL80211_ATTR_RECEIVE_MULTICAST] = { .type = NLA_FLAG }, + [NL80211_ATTR_WIPHY_FREQ_OFFSET] = NLA_POLICY_RANGE(NLA_U32, 0, 999), + [NL80211_ATTR_SCAN_FREQ_KHZ] = { .type = NLA_NESTED }, + [NL80211_ATTR_HE_6GHZ_CAPABILITY] = { + .type = NLA_EXACT_LEN, + .len = sizeof(struct ieee80211_he_6ghz_capa), + }, }; /* policy for the key attributes */ @@ -707,9 +730,16 @@ nl80211_coalesce_policy[NUM_NL80211_ATTR_COALESCE_RULE] = { /* policy for GTK rekey offload attributes */ static const struct nla_policy nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = { - [NL80211_REKEY_DATA_KEK] = NLA_POLICY_EXACT_LEN_WARN(NL80211_KEK_LEN), - [NL80211_REKEY_DATA_KCK] = NLA_POLICY_EXACT_LEN_WARN(NL80211_KCK_LEN), + [NL80211_REKEY_DATA_KEK] = { + .type = NLA_BINARY, + .len = NL80211_KEK_EXT_LEN + }, + [NL80211_REKEY_DATA_KCK] = { + .type = NLA_BINARY, + .len = NL80211_KCK_EXT_LEN + }, [NL80211_REKEY_DATA_REPLAY_CTR] = NLA_POLICY_EXACT_LEN_WARN(NL80211_REPLAY_CTR_LEN), + [NL80211_REKEY_DATA_AKM] = { .type = NLA_U32 }, }; static const struct nla_policy @@ -901,6 +931,9 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, chan->center_freq)) goto nla_put_failure; + if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_OFFSET, chan->freq_offset)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_DISABLED) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DISABLED)) goto nla_put_failure; @@ -1306,13 +1339,11 @@ static int nl80211_key_allowed(struct wireless_dev *wdev) } static struct ieee80211_channel *nl80211_get_valid_chan(struct wiphy *wiphy, - struct nlattr *tb) + u32 freq) { struct ieee80211_channel *chan; - if (tb == NULL) - return NULL; - chan = ieee80211_get_channel(wiphy, nla_get_u32(tb)); + chan = ieee80211_get_channel_khz(wiphy, freq); if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) return NULL; return chan; @@ -1538,6 +1569,7 @@ static int nl80211_send_coalesce(struct sk_buff *msg, static int nl80211_send_iftype_data(struct sk_buff *msg, + const struct ieee80211_supported_band *sband, const struct ieee80211_sband_iftype_data *iftdata) { const struct ieee80211_sta_he_cap *he_cap = &iftdata->he_cap; @@ -1561,6 +1593,12 @@ nl80211_send_iftype_data(struct sk_buff *msg, return -ENOBUFS; } + if (sband->band == NL80211_BAND_6GHZ && + nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA, + sizeof(iftdata->he_6ghz_capa), + &iftdata->he_6ghz_capa)) + return -ENOBUFS; + return 0; } @@ -1609,7 +1647,7 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg, if (!iftdata) return -ENOBUFS; - err = nl80211_send_iftype_data(msg, + err = nl80211_send_iftype_data(msg, sband, &sband->iftype_data[i]); if (err) return err; @@ -2767,13 +2805,17 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, if (!attrs[NL80211_ATTR_WIPHY_FREQ]) return -EINVAL; - control_freq = nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ]); + control_freq = MHZ_TO_KHZ( + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); + if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) + control_freq += + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); memset(chandef, 0, sizeof(*chandef)); - - chandef->chan = ieee80211_get_channel(&rdev->wiphy, control_freq); + chandef->chan = ieee80211_get_channel_khz(&rdev->wiphy, control_freq); chandef->width = NL80211_CHAN_WIDTH_20_NOHT; - chandef->center_freq1 = control_freq; + chandef->center_freq1 = KHZ_TO_MHZ(control_freq); + chandef->freq1_offset = control_freq % 1000; chandef->center_freq2 = 0; /* Primary channel not allowed */ @@ -2821,9 +2863,15 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, } else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) { chandef->width = nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]); - if (attrs[NL80211_ATTR_CENTER_FREQ1]) + if (attrs[NL80211_ATTR_CENTER_FREQ1]) { chandef->center_freq1 = nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]); + if (attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET]) + chandef->freq1_offset = nla_get_u32( + attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET]); + else + chandef->freq1_offset = 0; + } if (attrs[NL80211_ATTR_CENTER_FREQ2]) chandef->center_freq2 = nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ2]); @@ -3256,6 +3304,9 @@ static int nl80211_send_chandef(struct sk_buff *msg, if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, chandef->chan->center_freq)) return -ENOBUFS; + if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET, + chandef->chan->freq_offset)) + return -ENOBUFS; switch (chandef->width) { case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: @@ -3860,14 +3911,25 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) }; void *hdr; struct sk_buff *msg; + bool bigtk_support = false; + + if (wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_PROTECTION)) + bigtk_support = true; + + if ((dev->ieee80211_ptr->iftype == NL80211_IFTYPE_STATION || + dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_CLIENT) && + wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT)) + bigtk_support = true; if (info->attrs[NL80211_ATTR_KEY_IDX]) { key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); - if (key_idx > 5 && - !wiphy_ext_feature_isset( - &rdev->wiphy, - NL80211_EXT_FEATURE_BEACON_PROTECTION)) + + if (key_idx >= 6 && key_idx <= 7 && !bigtk_support) { + GENL_SET_ERR_MSG(info, "BIGTK not supported"); return -EINVAL; + } } if (info->attrs[NL80211_ATTR_MAC]) @@ -4357,16 +4419,9 @@ static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband, return true; } -static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { - [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, - .len = NL80211_MAX_SUPP_RATES }, - [NL80211_TXRATE_HT] = { .type = NLA_BINARY, - .len = NL80211_MAX_SUPP_HT_RATES }, - [NL80211_TXRATE_VHT] = NLA_POLICY_EXACT_LEN_WARN(sizeof(struct nl80211_txrate_vht)), - [NL80211_TXRATE_GI] = { .type = NLA_U8 }, -}; - static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, + struct nlattr *attrs[], + enum nl80211_attrs attr, struct cfg80211_bitrate_mask *mask) { struct nlattr *tb[NL80211_TXRATE_MAX + 1]; @@ -4397,14 +4452,14 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, } /* if no rates are given set it back to the defaults */ - if (!info->attrs[NL80211_ATTR_TX_RATES]) + if (!attrs[attr]) goto out; /* The nested attribute uses enum nl80211_band as the index. This maps * directly to the enum nl80211_band values used in cfg80211. */ BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8); - nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) { + nla_for_each_nested(tx_rates, attrs[attr], rem) { enum nl80211_band band = nla_type(tx_rates); int err; @@ -4679,6 +4734,8 @@ static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params, params->ht_required = true; if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_VHT_PHY) params->vht_required = true; + if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_HE_PHY) + params->he_required = true; } } @@ -4907,7 +4964,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) return -EINVAL; if (info->attrs[NL80211_ATTR_TX_RATES]) { - err = nl80211_parse_tx_bitrate_mask(info, ¶ms.beacon_rate); + err = nl80211_parse_tx_bitrate_mask(info, info->attrs, + NL80211_ATTR_TX_RATES, + ¶ms.beacon_rate); if (err) return err; @@ -5948,6 +6007,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]); } + if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]) + params.he_6ghz_capa = + nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]); + if (info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]) params.airtime_weight = nla_get_u16(info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]); @@ -6082,6 +6145,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } + if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]) + params.he_6ghz_capa = + nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]); + if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) { params.opmode_notif_used = true; params.opmode_notif = @@ -6126,10 +6193,14 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.vht_capa = NULL; /* HE requires WME */ - if (params.he_capa_len) + if (params.he_capa_len || params.he_6ghz_capa) return -EINVAL; } + /* Ensure that HT/VHT capabilities are not set for 6 GHz HE STA */ + if (params.he_6ghz_capa && (params.ht_capa || params.vht_capa)) + return -EINVAL; + /* When you run into this, adjust the code below for the new flag */ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); @@ -7687,6 +7758,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_scan_request *request; + struct nlattr *scan_freqs = NULL; + bool scan_freqs_khz = false; struct nlattr *attr; struct wiphy *wiphy; int err, tmp, n_ssids = 0, n_channels, i; @@ -7705,9 +7778,17 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) goto unlock; } - if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { - n_channels = validate_scan_freqs( - info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]); + if (info->attrs[NL80211_ATTR_SCAN_FREQ_KHZ]) { + if (!wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_SCAN_FREQ_KHZ)) + return -EOPNOTSUPP; + scan_freqs = info->attrs[NL80211_ATTR_SCAN_FREQ_KHZ]; + scan_freqs_khz = true; + } else if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) + scan_freqs = info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]; + + if (scan_freqs) { + n_channels = validate_scan_freqs(scan_freqs); if (!n_channels) { err = -EINVAL; goto unlock; @@ -7755,13 +7836,16 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) } i = 0; - if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { + if (scan_freqs) { /* user specified, bail out if channel not found */ - nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_FREQUENCIES], tmp) { + nla_for_each_nested(attr, scan_freqs, tmp) { struct ieee80211_channel *chan; + int freq = nla_get_u32(attr); - chan = ieee80211_get_channel(wiphy, nla_get_u32(attr)); + if (!scan_freqs_khz) + freq = MHZ_TO_KHZ(freq); + chan = ieee80211_get_channel_khz(wiphy, freq); if (!chan) { err = -EINVAL; goto out_free; @@ -8857,6 +8941,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, goto nla_put_failure; if (nla_put_u16(msg, NL80211_BSS_CAPABILITY, res->capability) || nla_put_u32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq) || + nla_put_u32(msg, NL80211_BSS_FREQUENCY_OFFSET, + res->channel->freq_offset) || nla_put_u32(msg, NL80211_BSS_CHAN_WIDTH, res->scan_width) || nla_put_u32(msg, NL80211_BSS_SEEN_MS_AGO, jiffies_to_msecs(jiffies - intbss->ts))) @@ -9125,6 +9211,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) enum nl80211_auth_type auth_type; struct key_parse key; bool local_state_change; + u32 freq; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; @@ -9181,8 +9268,12 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); - chan = nl80211_get_valid_chan(&rdev->wiphy, - info->attrs[NL80211_ATTR_WIPHY_FREQ]); + freq = MHZ_TO_KHZ(nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); + if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) + freq += + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); + + chan = nl80211_get_valid_chan(&rdev->wiphy, freq); if (!chan) return -EINVAL; @@ -9372,6 +9463,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_assoc_request req = {}; const u8 *bssid, *ssid; int err, ssid_len = 0; + u32 freq; if (dev->ieee80211_ptr->conn_owner_nlportid && dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid) @@ -9391,8 +9483,11 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); - chan = nl80211_get_valid_chan(&rdev->wiphy, - info->attrs[NL80211_ATTR_WIPHY_FREQ]); + freq = MHZ_TO_KHZ(nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); + if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) + freq += + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); + chan = nl80211_get_valid_chan(&rdev->wiphy, freq); if (!chan) return -EINVAL; @@ -10072,6 +10167,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) struct cfg80211_connect_params connect; struct wiphy *wiphy; struct cfg80211_cached_keys *connkeys = NULL; + u32 freq = 0; int err; memset(&connect, 0, sizeof(connect)); @@ -10142,14 +10238,21 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) connect.prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); - if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { - connect.channel = nl80211_get_valid_chan( - wiphy, info->attrs[NL80211_ATTR_WIPHY_FREQ]); + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) + freq = MHZ_TO_KHZ(nla_get_u32( + info->attrs[NL80211_ATTR_WIPHY_FREQ])); + if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) + freq += + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); + + if (freq) { + connect.channel = nl80211_get_valid_chan(wiphy, freq); if (!connect.channel) return -EINVAL; } else if (info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]) { - connect.channel_hint = nl80211_get_valid_chan( - wiphy, info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]); + freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]); + freq = MHZ_TO_KHZ(freq); + connect.channel_hint = nl80211_get_valid_chan(wiphy, freq); if (!connect.channel_hint) return -EINVAL; } @@ -10688,7 +10791,8 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; - err = nl80211_parse_tx_bitrate_mask(info, &mask); + err = nl80211_parse_tx_bitrate_mask(info, info->attrs, + NL80211_ATTR_TX_RATES, &mask); if (err) return err; @@ -10726,9 +10830,18 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; + if (info->attrs[NL80211_ATTR_RECEIVE_MULTICAST] && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS)) { + GENL_SET_ERR_MSG(info, + "multicast RX registrations are not supported"); + return -EOPNOTSUPP; + } + return cfg80211_mlme_register_mgmt(wdev, info->snd_portid, frame_type, nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]), + info->attrs[NL80211_ATTR_RECEIVE_MULTICAST], info->extack); } @@ -11285,7 +11398,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_TX_RATES]) { - err = nl80211_parse_tx_bitrate_mask(info, &setup.beacon_rate); + err = nl80211_parse_tx_bitrate_mask(info, info->attrs, + NL80211_ATTR_TX_RATES, + &setup.beacon_rate); if (err) return err; @@ -12239,14 +12354,22 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) return -EINVAL; if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN) return -ERANGE; - if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN) + if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN && + !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK && + nla_len(tb[NL80211_REKEY_DATA_KEK]) == NL80211_KEK_EXT_LEN)) return -ERANGE; - if (nla_len(tb[NL80211_REKEY_DATA_KCK]) != NL80211_KCK_LEN) + if (nla_len(tb[NL80211_REKEY_DATA_KCK]) != NL80211_KCK_LEN && + !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK && + nla_len(tb[NL80211_REKEY_DATA_KEK]) == NL80211_KCK_EXT_LEN)) return -ERANGE; rekey_data.kek = nla_data(tb[NL80211_REKEY_DATA_KEK]); rekey_data.kck = nla_data(tb[NL80211_REKEY_DATA_KCK]); rekey_data.replay_ctr = nla_data(tb[NL80211_REKEY_DATA_REPLAY_CTR]); + rekey_data.kek_len = nla_len(tb[NL80211_REKEY_DATA_KEK]); + rekey_data.kck_len = nla_len(tb[NL80211_REKEY_DATA_KCK]); + if (tb[NL80211_REKEY_DATA_AKM]) + rekey_data.akm = nla_get_u32(tb[NL80211_REKEY_DATA_AKM]); wdev_lock(wdev); if (!wdev->current_bss) { @@ -13792,6 +13915,7 @@ static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info) static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) { + bool dont_wait_for_ack = info->attrs[NL80211_ATTR_DONT_WAIT_FOR_ACK]; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -13800,6 +13924,7 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) u8 *dest; u16 proto; bool noencrypt; + u64 cookie = 0; int err; if (!wiphy_ext_feature_isset(&rdev->wiphy, @@ -13844,9 +13969,12 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) noencrypt = nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]); - return rdev_tx_control_port(rdev, dev, buf, len, - dest, cpu_to_be16(proto), noencrypt); - + err = rdev_tx_control_port(rdev, dev, buf, len, + dest, cpu_to_be16(proto), noencrypt, + dont_wait_for_ack ? NULL : &cookie); + if (!err && !dont_wait_for_ack) + nl_set_extack_cookie_u64(info->extack, cookie); + return err; out: wdev_unlock(wdev); return err; @@ -14011,10 +14139,7 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, if (rdev->ops->reset_tid_config) { err = rdev_reset_tid_config(rdev, dev, peer, tid_conf->tids); - /* If peer is there no other configuration will be - * allowed - */ - if (err || peer) + if (err) return err; } else { return -EINVAL; @@ -14057,6 +14182,29 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL]); } + if (attrs[NL80211_TID_CONFIG_ATTR_AMSDU_CTRL]) { + tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_AMSDU_CTRL); + tid_conf->amsdu = + nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_AMSDU_CTRL]); + } + + if (attrs[NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE]) { + u32 idx = NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE, attr; + + tid_conf->txrate_type = nla_get_u8(attrs[idx]); + + if (tid_conf->txrate_type != NL80211_TX_RATE_AUTOMATIC) { + attr = NL80211_TID_CONFIG_ATTR_TX_RATE; + err = nl80211_parse_tx_bitrate_mask(info, attrs, attr, + &tid_conf->txrate_mask); + if (err) + return err; + + tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_TX_RATE); + } + tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE); + } + if (peer) mask = rdev->wiphy.tid_config_support.peer; else @@ -15168,14 +15316,27 @@ static int nl80211_add_scan_req(struct sk_buff *msg, } nla_nest_end(msg, nest); - nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_FREQUENCIES); - if (!nest) - goto nla_put_failure; - for (i = 0; i < req->n_channels; i++) { - if (nla_put_u32(msg, i, req->channels[i]->center_freq)) + if (req->flags & NL80211_SCAN_FLAG_FREQ_KHZ) { + nest = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQ_KHZ); + if (!nest) goto nla_put_failure; + for (i = 0; i < req->n_channels; i++) { + if (nla_put_u32(msg, i, + ieee80211_channel_to_khz(req->channels[i]))) + goto nla_put_failure; + } + nla_nest_end(msg, nest); + } else { + nest = nla_nest_start_noflag(msg, + NL80211_ATTR_SCAN_FREQUENCIES); + if (!nest) + goto nla_put_failure; + for (i = 0; i < req->n_channels; i++) { + if (nla_put_u32(msg, i, req->channels[i]->center_freq)) + goto nla_put_failure; + } + nla_nest_end(msg, nest); } - nla_nest_end(msg, nest); if (req->ie && nla_put(msg, NL80211_ATTR_IE, req->ie_len, req->ie)) @@ -15495,10 +15656,19 @@ void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf, if (WARN_ON(len < 2)) return; - if (ieee80211_is_deauth(mgmt->frame_control)) + if (ieee80211_is_deauth(mgmt->frame_control)) { cmd = NL80211_CMD_UNPROT_DEAUTHENTICATE; - else + } else if (ieee80211_is_disassoc(mgmt->frame_control)) { cmd = NL80211_CMD_UNPROT_DISASSOCIATE; + } else if (ieee80211_is_beacon(mgmt->frame_control)) { + if (wdev->unprot_beacon_reported && + elapsed_jiffies_msecs(wdev->unprot_beacon_reported) < 10000) + return; + cmd = NL80211_CMD_UNPROT_BEACON; + wdev->unprot_beacon_reported = jiffies; + } else { + return; + } trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len); nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC, -1, @@ -16177,7 +16347,8 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, netdev->ifindex)) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || - nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq) || + nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, KHZ_TO_MHZ(freq)) || + nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET, freq % 1000) || (sig_dbm && nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) || nla_put(msg, NL80211_ATTR_FRAME, len, buf) || @@ -16194,8 +16365,9 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, return -ENOBUFS; } -void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, - const u8 *buf, size_t len, bool ack, gfp_t gfp) +static void nl80211_frame_tx_status(struct wireless_dev *wdev, u64 cookie, + const u8 *buf, size_t len, bool ack, + gfp_t gfp, enum nl80211_commands command) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); @@ -16203,13 +16375,16 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, struct sk_buff *msg; void *hdr; - trace_cfg80211_mgmt_tx_status(wdev, cookie, ack); + if (command == NL80211_CMD_FRAME_TX_STATUS) + trace_cfg80211_mgmt_tx_status(wdev, cookie, ack); + else + trace_cfg80211_control_port_tx_status(wdev, cookie, ack); msg = nlmsg_new(100 + len, gfp); if (!msg) return; - hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME_TX_STATUS); + hdr = nl80211hdr_put(msg, 0, 0, 0, command); if (!hdr) { nlmsg_free(msg); return; @@ -16232,9 +16407,25 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, NL80211_MCGRP_MLME, gfp); return; - nla_put_failure: +nla_put_failure: nlmsg_free(msg); } + +void cfg80211_control_port_tx_status(struct wireless_dev *wdev, u64 cookie, + const u8 *buf, size_t len, bool ack, + gfp_t gfp) +{ + nl80211_frame_tx_status(wdev, cookie, buf, len, ack, gfp, + NL80211_CMD_CONTROL_PORT_FRAME_TX_STATUS); +} +EXPORT_SYMBOL(cfg80211_control_port_tx_status); + +void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, + const u8 *buf, size_t len, bool ack, gfp_t gfp) +{ + nl80211_frame_tx_status(wdev, cookie, buf, len, ack, gfp, + NL80211_CMD_FRAME_TX_STATUS); +} EXPORT_SYMBOL(cfg80211_mgmt_tx_status); static int __nl80211_rx_control_port(struct net_device *dev, @@ -16803,9 +16994,8 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, } EXPORT_SYMBOL(cfg80211_probe_status); -void cfg80211_report_obss_beacon(struct wiphy *wiphy, - const u8 *frame, size_t len, - int freq, int sig_dbm) +void cfg80211_report_obss_beacon_khz(struct wiphy *wiphy, const u8 *frame, + size_t len, int freq, int sig_dbm) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; @@ -16828,7 +17018,10 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (freq && - nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq)) || + (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, + KHZ_TO_MHZ(freq)) || + nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET, + freq % 1000))) || (sig_dbm && nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) || nla_put(msg, NL80211_ATTR_FRAME, len, frame)) @@ -16845,7 +17038,7 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, spin_unlock_bh(&rdev->beacon_registrations_lock); nlmsg_free(msg); } -EXPORT_SYMBOL(cfg80211_report_obss_beacon); +EXPORT_SYMBOL(cfg80211_report_obss_beacon_khz); #ifdef CONFIG_PM static int cfg80211_net_detect_results(struct sk_buff *msg, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 99462f0c4e08..950d57494168 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -748,14 +748,17 @@ static inline int rdev_tx_control_port(struct cfg80211_registered_device *rdev, struct net_device *dev, const void *buf, size_t len, const u8 *dest, __be16 proto, - const bool noencrypt) + const bool noencrypt, u64 *cookie) { int ret; trace_rdev_tx_control_port(&rdev->wiphy, dev, buf, len, dest, proto, noencrypt); ret = rdev->ops->tx_control_port(&rdev->wiphy, dev, buf, len, - dest, proto, noencrypt); - trace_rdev_return_int(&rdev->wiphy, ret); + dest, proto, noencrypt, cookie); + if (cookie) + trace_rdev_return_int_cookie(&rdev->wiphy, ret, *cookie); + else + trace_rdev_return_int(&rdev->wiphy, ret); return ret; } @@ -819,13 +822,16 @@ rdev_set_cqm_txe_config(struct cfg80211_registered_device *rdev, } static inline void -rdev_mgmt_frame_register(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, u16 frame_type, bool reg) +rdev_update_mgmt_frame_registrations(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct mgmt_frame_regs *upd) { might_sleep(); - trace_rdev_mgmt_frame_register(&rdev->wiphy, wdev , frame_type, reg); - rdev->ops->mgmt_frame_register(&rdev->wiphy, wdev , frame_type, reg); + trace_rdev_update_mgmt_frame_registrations(&rdev->wiphy, wdev, upd); + if (rdev->ops->update_mgmt_frame_registrations) + rdev->ops->update_mgmt_frame_registrations(&rdev->wiphy, wdev, + upd); trace_rdev_return_void(&rdev->wiphy); } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index d476d4da0d09..0d74a31ef0ab 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1658,22 +1658,23 @@ static uint32_t reg_rule_to_chan_bw_flags(const struct ieee80211_regdomain *regd const struct ieee80211_channel *chan) { const struct ieee80211_freq_range *freq_range = NULL; - u32 max_bandwidth_khz, bw_flags = 0; + u32 max_bandwidth_khz, center_freq_khz, bw_flags = 0; freq_range = ®_rule->freq_range; max_bandwidth_khz = freq_range->max_bandwidth_khz; + center_freq_khz = ieee80211_channel_to_khz(chan); /* Check if auto calculation requested */ if (reg_rule->flags & NL80211_RRF_AUTO_BW) max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule); /* If we get a reg_rule we can assume that at least 5Mhz fit */ if (!cfg80211_does_bw_fit_range(freq_range, - MHZ_TO_KHZ(chan->center_freq), + center_freq_khz, MHZ_TO_KHZ(10))) bw_flags |= IEEE80211_CHAN_NO_10MHZ; if (!cfg80211_does_bw_fit_range(freq_range, - MHZ_TO_KHZ(chan->center_freq), + center_freq_khz, MHZ_TO_KHZ(20))) bw_flags |= IEEE80211_CHAN_NO_20MHZ; @@ -1710,7 +1711,7 @@ static void handle_channel(struct wiphy *wiphy, flags = chan->orig_flags; - reg_rule = freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq)); + reg_rule = freq_reg_info(wiphy, ieee80211_channel_to_khz(chan)); if (IS_ERR(reg_rule)) { /* * We will disable all channels that do not match our @@ -1729,13 +1730,13 @@ static void handle_channel(struct wiphy *wiphy, if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER && request_wiphy && request_wiphy == wiphy && request_wiphy->regulatory_flags & REGULATORY_STRICT_REG) { - pr_debug("Disabling freq %d MHz for good\n", - chan->center_freq); + pr_debug("Disabling freq %d.%03d MHz for good\n", + chan->center_freq, chan->freq_offset); chan->orig_flags |= IEEE80211_CHAN_DISABLED; chan->flags = chan->orig_flags; } else { - pr_debug("Disabling freq %d MHz\n", - chan->center_freq); + pr_debug("Disabling freq %d.%03d MHz\n", + chan->center_freq, chan->freq_offset); chan->flags |= IEEE80211_CHAN_DISABLED; } return; @@ -1936,7 +1937,7 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, sband = wiphy->bands[reg_beacon->chan.band]; chan = &sband->channels[chan_idx]; - if (likely(chan->center_freq != reg_beacon->chan.center_freq)) + if (likely(!ieee80211_channel_equal(chan, ®_beacon->chan))) return; if (chan->beacon_found) @@ -2269,18 +2270,18 @@ static void handle_channel_custom(struct wiphy *wiphy, u32 bw_flags = 0; const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; - u32 bw; + u32 bw, center_freq_khz; + center_freq_khz = ieee80211_channel_to_khz(chan); for (bw = MHZ_TO_KHZ(20); bw >= min_bw; bw = bw / 2) { - reg_rule = freq_reg_info_regd(MHZ_TO_KHZ(chan->center_freq), - regd, bw); + reg_rule = freq_reg_info_regd(center_freq_khz, regd, bw); if (!IS_ERR(reg_rule)) break; } if (IS_ERR_OR_NULL(reg_rule)) { - pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n", - chan->center_freq); + pr_debug("Disabling freq %d.%03d MHz as custom regd has no rule that fits it\n", + chan->center_freq, chan->freq_offset); if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) { chan->flags |= IEEE80211_CHAN_DISABLED; } else { @@ -3337,8 +3338,8 @@ static bool pending_reg_beacon(struct ieee80211_channel *beacon_chan) struct reg_beacon *pending_beacon; list_for_each_entry(pending_beacon, ®_pending_beacons, list) - if (beacon_chan->center_freq == - pending_beacon->chan.center_freq) + if (ieee80211_channel_equal(beacon_chan, + &pending_beacon->chan)) return true; return false; } @@ -3367,9 +3368,10 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy, if (!reg_beacon) return -ENOMEM; - pr_debug("Found new beacon on frequency: %d MHz (Ch %d) on %s\n", - beacon_chan->center_freq, - ieee80211_frequency_to_channel(beacon_chan->center_freq), + pr_debug("Found new beacon on frequency: %d.%03d MHz (Ch %d) on %s\n", + beacon_chan->center_freq, beacon_chan->freq_offset, + ieee80211_freq_khz_to_channel( + ieee80211_channel_to_khz(beacon_chan)), wiphy_name(wiphy)); memcpy(®_beacon->chan, beacon_chan, diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 4000382aef48..74ea4cfb39fb 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1322,8 +1322,8 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, return channel; } - freq = ieee80211_channel_to_frequency(channel_number, channel->band); - alt_channel = ieee80211_get_channel(wiphy, freq); + freq = ieee80211_channel_to_freq_khz(channel_number, channel->band); + alt_channel = ieee80211_get_channel_khz(wiphy, freq); if (!alt_channel) { if (channel->band == NL80211_BAND_2GHZ) { /* diff --git a/net/wireless/sme.c b/net/wireless/sme.c index ac3e60aa1fc8..15595cf401de 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -5,7 +5,7 @@ * (for nl80211's connect() and wext) * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2009 Intel Corporation. All rights reserved. + * Copyright (C) 2009, 2020 Intel Corporation. All rights reserved. * Copyright 2017 Intel Deutschland GmbH */ @@ -694,6 +694,7 @@ void __cfg80211_connect_result(struct net_device *dev, return; } + wdev->unprot_beacon_reported = 0; nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev, cr, GFP_KERNEL); @@ -921,6 +922,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev, cfg80211_hold_bss(bss_from_pub(info->bss)); wdev->current_bss = bss_from_pub(info->bss); + wdev->unprot_beacon_reported = 0; nl80211_send_roamed(wiphy_to_rdev(wdev->wiphy), wdev->netdev, info, GFP_KERNEL); @@ -1116,7 +1118,10 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, if (wiphy_ext_feature_isset( wdev->wiphy, - NL80211_EXT_FEATURE_BEACON_PROTECTION)) + NL80211_EXT_FEATURE_BEACON_PROTECTION) || + wiphy_ext_feature_isset( + wdev->wiphy, + NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT)) max_key_idx = 7; for (i = 0; i <= max_key_idx; i++) rdev_del_key(rdev, dev, i, false, NULL); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 839df54cee21..b23cab016521 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -112,24 +112,29 @@ } while (0) #define CHAN_ENTRY __field(enum nl80211_band, band) \ - __field(u32, center_freq) + __field(u32, center_freq) \ + __field(u16, freq_offset) #define CHAN_ASSIGN(chan) \ do { \ if (chan) { \ __entry->band = chan->band; \ __entry->center_freq = chan->center_freq; \ + __entry->freq_offset = chan->freq_offset; \ } else { \ __entry->band = 0; \ __entry->center_freq = 0; \ + __entry->freq_offset = 0; \ } \ } while (0) -#define CHAN_PR_FMT "band: %d, freq: %u" -#define CHAN_PR_ARG __entry->band, __entry->center_freq +#define CHAN_PR_FMT "band: %d, freq: %u.%03u" +#define CHAN_PR_ARG __entry->band, __entry->center_freq, __entry->freq_offset #define CHAN_DEF_ENTRY __field(enum nl80211_band, band) \ __field(u32, control_freq) \ + __field(u32, freq_offset) \ __field(u32, width) \ __field(u32, center_freq1) \ + __field(u32, freq1_offset) \ __field(u32, center_freq2) #define CHAN_DEF_ASSIGN(chandef) \ do { \ @@ -137,21 +142,27 @@ __entry->band = (chandef)->chan->band; \ __entry->control_freq = \ (chandef)->chan->center_freq; \ + __entry->freq_offset = \ + (chandef)->chan->freq_offset; \ __entry->width = (chandef)->width; \ __entry->center_freq1 = (chandef)->center_freq1;\ + __entry->freq1_offset = (chandef)->freq1_offset;\ __entry->center_freq2 = (chandef)->center_freq2;\ } else { \ __entry->band = 0; \ __entry->control_freq = 0; \ + __entry->freq_offset = 0; \ __entry->width = 0; \ __entry->center_freq1 = 0; \ + __entry->freq1_offset = 0; \ __entry->center_freq2 = 0; \ } \ } while (0) #define CHAN_DEF_PR_FMT \ - "band: %d, control freq: %u, width: %d, cf1: %u, cf2: %u" + "band: %d, control freq: %u.%03u, width: %d, cf1: %u.%03u, cf2: %u" #define CHAN_DEF_PR_ARG __entry->band, __entry->control_freq, \ - __entry->width, __entry->center_freq1, \ + __entry->freq_offset, __entry->width, \ + __entry->center_freq1, __entry->freq1_offset, \ __entry->center_freq2 #define SINFO_ENTRY __field(int, generation) \ @@ -1582,25 +1593,25 @@ TRACE_EVENT(rdev_set_bitrate_mask, WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer)) ); -TRACE_EVENT(rdev_mgmt_frame_register, +TRACE_EVENT(rdev_update_mgmt_frame_registrations, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, - u16 frame_type, bool reg), - TP_ARGS(wiphy, wdev, frame_type, reg), + struct mgmt_frame_regs *upd), + TP_ARGS(wiphy, wdev, upd), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY - __field(u16, frame_type) - __field(bool, reg) + __field(u16, global_stypes) + __field(u16, interface_stypes) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; - __entry->frame_type = frame_type; - __entry->reg = reg; + __entry->global_stypes = upd->global_stypes; + __entry->interface_stypes = upd->interface_stypes; ), - TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", frame_type: 0x%.2x, reg: %s ", - WIPHY_PR_ARG, WDEV_PR_ARG, __entry->frame_type, - __entry->reg ? "true" : "false") + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", global: 0x%.2x, intf: 0x%.2x", + WIPHY_PR_ARG, WDEV_PR_ARG, + __entry->global_stypes, __entry->interface_stypes) ); TRACE_EVENT(rdev_return_int_tx_rx, @@ -2829,8 +2840,8 @@ TRACE_EVENT(cfg80211_rx_mgmt, __entry->freq = freq; __entry->sig_dbm = sig_dbm; ), - TP_printk(WDEV_PR_FMT ", freq: %d, sig dbm: %d", - WDEV_PR_ARG, __entry->freq, __entry->sig_dbm) + TP_printk(WDEV_PR_FMT ", freq: "KHZ_F", sig dbm: %d", + WDEV_PR_ARG, PR_KHZ(__entry->freq), __entry->sig_dbm) ); TRACE_EVENT(cfg80211_mgmt_tx_status, @@ -2850,6 +2861,23 @@ TRACE_EVENT(cfg80211_mgmt_tx_status, WDEV_PR_ARG, __entry->cookie, BOOL_TO_STR(__entry->ack)) ); +TRACE_EVENT(cfg80211_control_port_tx_status, + TP_PROTO(struct wireless_dev *wdev, u64 cookie, bool ack), + TP_ARGS(wdev, cookie, ack), + TP_STRUCT__entry( + WDEV_ENTRY + __field(u64, cookie) + __field(bool, ack) + ), + TP_fast_assign( + WDEV_ASSIGN; + __entry->cookie = cookie; + __entry->ack = ack; + ), + TP_printk(WDEV_PR_FMT", cookie: %llu, ack: %s", + WDEV_PR_ARG, __entry->cookie, BOOL_TO_STR(__entry->ack)) +); + TRACE_EVENT(cfg80211_rx_control_port, TP_PROTO(struct net_device *netdev, struct sk_buff *skb, bool unencrypted), @@ -3110,8 +3138,8 @@ TRACE_EVENT(cfg80211_report_obss_beacon, __entry->freq = freq; __entry->sig_dbm = sig_dbm; ), - TP_printk(WIPHY_PR_FMT ", freq: %d, sig_dbm: %d", - WIPHY_PR_ARG, __entry->freq, __entry->sig_dbm) + TP_printk(WIPHY_PR_FMT ", freq: "KHZ_F", sig_dbm: %d", + WIPHY_PR_ARG, PR_KHZ(__entry->freq), __entry->sig_dbm) ); TRACE_EVENT(cfg80211_tdls_oper_request, diff --git a/net/wireless/util.c b/net/wireless/util.c index 6590efbbcbb9..4d3b76f94f55 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -5,7 +5,7 @@ * Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2019 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation */ #include <linux/export.h> #include <linux/bitops.h> @@ -72,7 +72,7 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband, } EXPORT_SYMBOL(ieee80211_mandatory_rates); -int ieee80211_channel_to_frequency(int chan, enum nl80211_band band) +u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band) { /* see 802.11 17.3.8.3.2 and Annex J * there are overlapping channel numbers in 5GHz and 2GHz bands */ @@ -81,34 +81,39 @@ int ieee80211_channel_to_frequency(int chan, enum nl80211_band band) switch (band) { case NL80211_BAND_2GHZ: if (chan == 14) - return 2484; + return MHZ_TO_KHZ(2484); else if (chan < 14) - return 2407 + chan * 5; + return MHZ_TO_KHZ(2407 + chan * 5); break; case NL80211_BAND_5GHZ: if (chan >= 182 && chan <= 196) - return 4000 + chan * 5; + return MHZ_TO_KHZ(4000 + chan * 5); else - return 5000 + chan * 5; + return MHZ_TO_KHZ(5000 + chan * 5); break; case NL80211_BAND_6GHZ: - /* see 802.11ax D4.1 27.3.22.2 */ + /* see 802.11ax D6.1 27.3.23.2 */ + if (chan == 2) + return MHZ_TO_KHZ(5935); if (chan <= 253) - return 5940 + chan * 5; + return MHZ_TO_KHZ(5950 + chan * 5); break; case NL80211_BAND_60GHZ: if (chan < 7) - return 56160 + chan * 2160; + return MHZ_TO_KHZ(56160 + chan * 2160); break; default: ; } return 0; /* not supported */ } -EXPORT_SYMBOL(ieee80211_channel_to_frequency); +EXPORT_SYMBOL(ieee80211_channel_to_freq_khz); -int ieee80211_frequency_to_channel(int freq) +int ieee80211_freq_khz_to_channel(u32 freq) { + /* TODO: just handle MHz for now */ + freq = KHZ_TO_MHZ(freq); + /* see 802.11 17.3.8.3.2 and Annex J */ if (freq == 2484) return 14; @@ -126,9 +131,10 @@ int ieee80211_frequency_to_channel(int freq) else return 0; } -EXPORT_SYMBOL(ieee80211_frequency_to_channel); +EXPORT_SYMBOL(ieee80211_freq_khz_to_channel); -struct ieee80211_channel *ieee80211_get_channel(struct wiphy *wiphy, int freq) +struct ieee80211_channel *ieee80211_get_channel_khz(struct wiphy *wiphy, + u32 freq) { enum nl80211_band band; struct ieee80211_supported_band *sband; @@ -141,14 +147,16 @@ struct ieee80211_channel *ieee80211_get_channel(struct wiphy *wiphy, int freq) continue; for (i = 0; i < sband->n_channels; i++) { - if (sband->channels[i].center_freq == freq) - return &sband->channels[i]; + struct ieee80211_channel *chan = &sband->channels[i]; + + if (ieee80211_channel_to_khz(chan) == freq) + return chan; } } return NULL; } -EXPORT_SYMBOL(ieee80211_get_channel); +EXPORT_SYMBOL(ieee80211_get_channel_khz); static void set_mandatory_flags_band(struct ieee80211_supported_band *sband) { @@ -234,7 +242,9 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, int max_key_idx = 5; if (wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_BEACON_PROTECTION)) + NL80211_EXT_FEATURE_BEACON_PROTECTION) || + wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT)) max_key_idx = 7; if (key_idx < 0 || key_idx > max_key_idx) return -EINVAL; @@ -2030,10 +2040,10 @@ EXPORT_SYMBOL(cfg80211_send_layer2_update); int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, enum ieee80211_vht_chanwidth bw, - int mcs, bool ext_nss_bw_capable) + int mcs, bool ext_nss_bw_capable, + unsigned int max_vht_nss) { u16 map = le16_to_cpu(cap->supp_mcs.rx_mcs_map); - int max_vht_nss = 0; int ext_nss_bw; int supp_width; int i, mcs_encoding; @@ -2041,7 +2051,7 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, if (map == 0xffff) return 0; - if (WARN_ON(mcs > 9)) + if (WARN_ON(mcs > 9 || max_vht_nss > 8)) return 0; if (mcs <= 7) mcs_encoding = 0; @@ -2050,16 +2060,18 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, else mcs_encoding = 2; - /* find max_vht_nss for the given MCS */ - for (i = 7; i >= 0; i--) { - int supp = (map >> (2 * i)) & 3; + if (!max_vht_nss) { + /* find max_vht_nss for the given MCS */ + for (i = 7; i >= 0; i--) { + int supp = (map >> (2 * i)) & 3; - if (supp == 3) - continue; + if (supp == 3) + continue; - if (supp >= mcs_encoding) { - max_vht_nss = i + 1; - break; + if (supp >= mcs_encoding) { + max_vht_nss = i + 1; + break; + } } } diff --git a/net/xdp/Makefile b/net/xdp/Makefile index 71e2bdafb2ce..30cdc4315f42 100644 --- a/net/xdp/Makefile +++ b/net/xdp/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o +obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o xskmap.o +obj-$(CONFIG_XDP_SOCKETS) += xsk_buff_pool.o obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 37ace3bc0d48..1bbaf1747e4f 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -179,37 +179,6 @@ void xdp_umem_clear_dev(struct xdp_umem *umem) umem->zc = false; } -static void xdp_umem_unmap_pages(struct xdp_umem *umem) -{ - unsigned int i; - - for (i = 0; i < umem->npgs; i++) - if (PageHighMem(umem->pgs[i])) - vunmap(umem->pages[i].addr); -} - -static int xdp_umem_map_pages(struct xdp_umem *umem) -{ - unsigned int i; - void *addr; - - for (i = 0; i < umem->npgs; i++) { - if (PageHighMem(umem->pgs[i])) - addr = vmap(&umem->pgs[i], 1, VM_MAP, PAGE_KERNEL); - else - addr = page_address(umem->pgs[i]); - - if (!addr) { - xdp_umem_unmap_pages(umem); - return -ENOMEM; - } - - umem->pages[i].addr = addr; - } - - return 0; -} - static void xdp_umem_unpin_pages(struct xdp_umem *umem) { unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); @@ -244,14 +213,9 @@ static void xdp_umem_release(struct xdp_umem *umem) umem->cq = NULL; } - xsk_reuseq_destroy(umem); - - xdp_umem_unmap_pages(umem); + xp_destroy(umem->pool); xdp_umem_unpin_pages(umem); - kvfree(umem->pages); - umem->pages = NULL; - xdp_umem_unaccount_pages(umem); kfree(umem); } @@ -341,8 +305,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; u32 chunk_size = mr->chunk_size, headroom = mr->headroom; + u64 npgs, addr = mr->addr, size = mr->len; unsigned int chunks, chunks_per_page; - u64 addr = mr->addr, size = mr->len; int err; if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { @@ -372,6 +336,10 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if ((addr + size) < addr) return -EINVAL; + npgs = div_u64(size, PAGE_SIZE); + if (npgs > U32_MAX) + return -EINVAL; + chunks = (unsigned int)div_u64(size, chunk_size); if (chunks == 0) return -EINVAL; @@ -385,12 +353,10 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (headroom >= chunk_size - XDP_PACKET_HEADROOM) return -EINVAL; - umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK - : ~((u64)chunk_size - 1); umem->size = size; umem->headroom = headroom; - umem->chunk_size_nohr = chunk_size - headroom; - umem->npgs = size / PAGE_SIZE; + umem->chunk_size = chunk_size; + umem->npgs = (u32)npgs; umem->pgs = NULL; umem->user = NULL; umem->flags = mr->flags; @@ -407,18 +373,13 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (err) goto out_account; - umem->pages = kvcalloc(umem->npgs, sizeof(*umem->pages), - GFP_KERNEL_ACCOUNT); - if (!umem->pages) { + umem->pool = xp_create(umem->pgs, umem->npgs, chunks, chunk_size, + headroom, size, unaligned_chunks); + if (!umem->pool) { err = -ENOMEM; goto out_pin; } - - err = xdp_umem_map_pages(umem); - if (!err) - return 0; - - kvfree(umem->pages); + return 0; out_pin: xdp_umem_unpin_pages(umem); diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h index a63a9fb251f5..32067fe98f65 100644 --- a/net/xdp/xdp_umem.h +++ b/net/xdp/xdp_umem.h @@ -6,7 +6,7 @@ #ifndef XDP_UMEM_H_ #define XDP_UMEM_H_ -#include <net/xdp_sock.h> +#include <net/xdp_sock_drv.h> int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, u16 queue_id, u16 flags); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 45ffd67b367d..b6c0f08bd80d 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -22,7 +22,7 @@ #include <linux/net.h> #include <linux/netdevice.h> #include <linux/rculist.h> -#include <net/xdp_sock.h> +#include <net/xdp_sock_drv.h> #include <net/xdp.h> #include "xsk_queue.h" @@ -39,24 +39,6 @@ bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) READ_ONCE(xs->umem->fq); } -bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt) -{ - return xskq_cons_has_entries(umem->fq, cnt); -} -EXPORT_SYMBOL(xsk_umem_has_addrs); - -bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) -{ - return xskq_cons_peek_addr(umem->fq, addr, umem); -} -EXPORT_SYMBOL(xsk_umem_peek_addr); - -void xsk_umem_release_addr(struct xdp_umem *umem) -{ - xskq_cons_release(umem->fq); -} -EXPORT_SYMBOL(xsk_umem_release_addr); - void xsk_set_rx_need_wakeup(struct xdp_umem *umem) { if (umem->need_wakeup & XDP_WAKEUP_RX) @@ -117,76 +99,82 @@ bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem) } EXPORT_SYMBOL(xsk_umem_uses_need_wakeup); -/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for - * each page. This is only required in copy mode. - */ -static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf, - u32 len, u32 metalen) +void xp_release(struct xdp_buff_xsk *xskb) { - void *to_buf = xdp_umem_get_data(umem, addr); - - addr = xsk_umem_add_offset_to_addr(addr); - if (xskq_cons_crosses_non_contig_pg(umem, addr, len + metalen)) { - void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr; - u64 page_start = addr & ~(PAGE_SIZE - 1); - u64 first_len = PAGE_SIZE - (addr - page_start); - - memcpy(to_buf, from_buf, first_len); - memcpy(next_pg_addr, from_buf + first_len, - len + metalen - first_len); + xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb; +} - return; - } +static u64 xp_get_handle(struct xdp_buff_xsk *xskb) +{ + u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start; - memcpy(to_buf, from_buf, len + metalen); + offset += xskb->pool->headroom; + if (!xskb->pool->unaligned) + return xskb->orig_addr + offset; + return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); } -static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) +static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { - u64 offset = xs->umem->headroom; - u64 addr, memcpy_addr; - void *from_buf; - u32 metalen; + struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); + u64 addr; int err; - if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) || - len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { + addr = xp_get_handle(xskb); + err = xskq_prod_reserve_desc(xs->rx, addr, len); + if (err) { xs->rx_dropped++; - return -ENOSPC; + return err; } - if (unlikely(xdp_data_meta_unsupported(xdp))) { - from_buf = xdp->data; - metalen = 0; - } else { - from_buf = xdp->data_meta; - metalen = xdp->data - xdp->data_meta; - } + xp_release(xskb); + return 0; +} - memcpy_addr = xsk_umem_adjust_offset(xs->umem, addr, offset); - __xsk_rcv_memcpy(xs->umem, memcpy_addr, from_buf, len, metalen); +static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len) +{ + void *from_buf, *to_buf; + u32 metalen; - offset += metalen; - addr = xsk_umem_adjust_offset(xs->umem, addr, offset); - err = xskq_prod_reserve_desc(xs->rx, addr, len); - if (!err) { - xskq_cons_release(xs->umem->fq); - xdp_return_buff(xdp); - return 0; + if (unlikely(xdp_data_meta_unsupported(from))) { + from_buf = from->data; + to_buf = to->data; + metalen = 0; + } else { + from_buf = from->data_meta; + metalen = from->data - from->data_meta; + to_buf = to->data - metalen; } - xs->rx_dropped++; - return err; + memcpy(to_buf, from_buf, len + metalen); } -static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) +static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len, + bool explicit_free) { - int err = xskq_prod_reserve_desc(xs->rx, xdp->handle, len); + struct xdp_buff *xsk_xdp; + int err; - if (err) + if (len > xsk_umem_get_rx_frame_size(xs->umem)) { xs->rx_dropped++; + return -ENOSPC; + } - return err; + xsk_xdp = xsk_buff_alloc(xs->umem); + if (!xsk_xdp) { + xs->rx_dropped++; + return -ENOSPC; + } + + xsk_copy_xdp(xsk_xdp, xdp, len); + err = __xsk_rcv_zc(xs, xsk_xdp, len); + if (err) { + xsk_buff_free(xsk_xdp); + return err; + } + if (explicit_free) + xdp_return_buff(xdp); + return 0; } static bool xsk_is_bound(struct xdp_sock *xs) @@ -199,7 +187,8 @@ static bool xsk_is_bound(struct xdp_sock *xs) return false; } -static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) +static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, + bool explicit_free) { u32 len; @@ -211,8 +200,9 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) len = xdp->data_end - xdp->data; - return (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) ? - __xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len); + return xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL ? + __xsk_rcv_zc(xs, xdp, len) : + __xsk_rcv(xs, xdp, len, explicit_free); } static void xsk_flush(struct xdp_sock *xs) @@ -224,46 +214,11 @@ static void xsk_flush(struct xdp_sock *xs) int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { - u32 metalen = xdp->data - xdp->data_meta; - u32 len = xdp->data_end - xdp->data; - u64 offset = xs->umem->headroom; - void *buffer; - u64 addr; int err; spin_lock_bh(&xs->rx_lock); - - if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) { - err = -EINVAL; - goto out_unlock; - } - - if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) || - len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { - err = -ENOSPC; - goto out_drop; - } - - addr = xsk_umem_adjust_offset(xs->umem, addr, offset); - buffer = xdp_umem_get_data(xs->umem, addr); - memcpy(buffer, xdp->data_meta, len + metalen); - - addr = xsk_umem_adjust_offset(xs->umem, addr, metalen); - err = xskq_prod_reserve_desc(xs->rx, addr, len); - if (err) - goto out_drop; - - xskq_cons_release(xs->umem->fq); - xskq_prod_submit(xs->rx); - - spin_unlock_bh(&xs->rx_lock); - - xs->sk.sk_data_ready(&xs->sk); - return 0; - -out_drop: - xs->rx_dropped++; -out_unlock: + err = xsk_rcv(xs, xdp, false); + xsk_flush(xs); spin_unlock_bh(&xs->rx_lock); return err; } @@ -273,7 +228,7 @@ int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); int err; - err = xsk_rcv(xs, xdp); + err = xsk_rcv(xs, xdp, true); if (err) return err; @@ -404,7 +359,7 @@ static int xsk_generic_xmit(struct sock *sk) skb_put(skb, len); addr = desc.addr; - buffer = xdp_umem_get_data(xs->umem, addr); + buffer = xsk_buff_raw_get_data(xs->umem, addr); err = skb_store_bits(skb, 0, buffer, len); /* This is the backpressure mechanism for the Tx path. * Reserve space in the completion queue and only proceed @@ -629,24 +584,6 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd) return sock; } -/* Check if umem pages are contiguous. - * If zero-copy mode, use the DMA address to do the page contiguity check - * For all other modes we use addr (kernel virtual address) - * Store the result in the low bits of addr. - */ -static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags) -{ - struct xdp_umem_page *pgs = umem->pages; - int i, is_contig; - - for (i = 0; i < umem->npgs - 1; i++) { - is_contig = (flags & XDP_ZEROCOPY) ? - (pgs[i].dma + PAGE_SIZE == pgs[i + 1].dma) : - (pgs[i].addr + PAGE_SIZE == pgs[i + 1].addr); - pgs[i].addr += is_contig << XSK_NEXT_PG_CONTIG_SHIFT; - } -} - static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr; @@ -729,23 +666,14 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) goto out_unlock; } else { /* This xsk has its own umem. */ - xskq_set_umem(xs->umem->fq, xs->umem->size, - xs->umem->chunk_mask); - xskq_set_umem(xs->umem->cq, xs->umem->size, - xs->umem->chunk_mask); - err = xdp_umem_assign_dev(xs->umem, dev, qid, flags); if (err) goto out_unlock; - - xsk_check_page_contiguity(xs->umem, flags); } xs->dev = dev; xs->zc = xs->umem->zc; xs->queue_id = qid; - xskq_set_umem(xs->rx, xs->umem->size, xs->umem->chunk_mask); - xskq_set_umem(xs->tx, xs->umem->size, xs->umem->chunk_mask); xdp_add_sk_umem(xs->umem, xs); out_unlock: @@ -860,6 +788,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq : &xs->umem->cq; err = xsk_init_queue(entries, q, true); + if (optname == XDP_UMEM_FILL_RING) + xp_set_fq(xs->umem->pool, *q); mutex_unlock(&xs->mutex); return err; } diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h index 4cfd106bdb53..455ddd480f3d 100644 --- a/net/xdp/xsk.h +++ b/net/xdp/xsk.h @@ -4,6 +4,20 @@ #ifndef XSK_H_ #define XSK_H_ +/* Masks for xdp_umem_page flags. + * The low 12-bits of the addr will be 0 since this is the page address, so we + * can use them for flags. + */ +#define XSK_NEXT_PG_CONTIG_SHIFT 0 +#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT) + +/* Flags for the umem flags field. + * + * The NEED_WAKEUP flag is 1 due to the reuse of the flags field for public + * flags. See inlude/uapi/include/linux/if_xdp.h. + */ +#define XDP_UMEM_USES_NEED_WAKEUP BIT(1) + struct xdp_ring_offset_v1 { __u64 producer; __u64 consumer; @@ -17,9 +31,25 @@ struct xdp_mmap_offsets_v1 { struct xdp_ring_offset_v1 cr; }; +/* Nodes are linked in the struct xdp_sock map_list field, and used to + * track which maps a certain socket reside in. + */ + +struct xsk_map_node { + struct list_head node; + struct xsk_map *map; + struct xdp_sock **map_entry; +}; + static inline struct xdp_sock *xdp_sk(struct sock *sk) { return (struct xdp_sock *)sk; } +bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs); +void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, + struct xdp_sock **map_entry); +int xsk_map_inc(struct xsk_map *map); +void xsk_map_put(struct xsk_map *map); + #endif /* XSK_H_ */ diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c new file mode 100644 index 000000000000..540ed75e4482 --- /dev/null +++ b/net/xdp/xsk_buff_pool.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <net/xsk_buff_pool.h> +#include <net/xdp_sock.h> +#include <linux/dma-direct.h> +#include <linux/dma-noncoherent.h> +#include <linux/swiotlb.h> + +#include "xsk_queue.h" + +static void xp_addr_unmap(struct xsk_buff_pool *pool) +{ + vunmap(pool->addrs); +} + +static int xp_addr_map(struct xsk_buff_pool *pool, + struct page **pages, u32 nr_pages) +{ + pool->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); + if (!pool->addrs) + return -ENOMEM; + return 0; +} + +void xp_destroy(struct xsk_buff_pool *pool) +{ + if (!pool) + return; + + xp_addr_unmap(pool); + kvfree(pool->heads); + kvfree(pool); +} + +struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks, + u32 chunk_size, u32 headroom, u64 size, + bool unaligned) +{ + struct xsk_buff_pool *pool; + struct xdp_buff_xsk *xskb; + int err; + u32 i; + + pool = kvzalloc(struct_size(pool, free_heads, chunks), GFP_KERNEL); + if (!pool) + goto out; + + pool->heads = kvcalloc(chunks, sizeof(*pool->heads), GFP_KERNEL); + if (!pool->heads) + goto out; + + pool->chunk_mask = ~((u64)chunk_size - 1); + pool->addrs_cnt = size; + pool->heads_cnt = chunks; + pool->free_heads_cnt = chunks; + pool->headroom = headroom; + pool->chunk_size = chunk_size; + pool->cheap_dma = true; + pool->unaligned = unaligned; + pool->frame_len = chunk_size - headroom - XDP_PACKET_HEADROOM; + INIT_LIST_HEAD(&pool->free_list); + + for (i = 0; i < pool->free_heads_cnt; i++) { + xskb = &pool->heads[i]; + xskb->pool = pool; + xskb->xdp.frame_sz = chunk_size - headroom; + pool->free_heads[i] = xskb; + } + + err = xp_addr_map(pool, pages, nr_pages); + if (!err) + return pool; + +out: + xp_destroy(pool); + return NULL; +} + +void xp_set_fq(struct xsk_buff_pool *pool, struct xsk_queue *fq) +{ + pool->fq = fq; +} + +void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) +{ + u32 i; + + for (i = 0; i < pool->heads_cnt; i++) + pool->heads[i].xdp.rxq = rxq; +} +EXPORT_SYMBOL(xp_set_rxq_info); + +void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) +{ + dma_addr_t *dma; + u32 i; + + if (pool->dma_pages_cnt == 0) + return; + + for (i = 0; i < pool->dma_pages_cnt; i++) { + dma = &pool->dma_pages[i]; + if (*dma) { + dma_unmap_page_attrs(pool->dev, *dma, PAGE_SIZE, + DMA_BIDIRECTIONAL, attrs); + *dma = 0; + } + } + + kvfree(pool->dma_pages); + pool->dma_pages_cnt = 0; + pool->dev = NULL; +} +EXPORT_SYMBOL(xp_dma_unmap); + +static void xp_check_dma_contiguity(struct xsk_buff_pool *pool) +{ + u32 i; + + for (i = 0; i < pool->dma_pages_cnt - 1; i++) { + if (pool->dma_pages[i] + PAGE_SIZE == pool->dma_pages[i + 1]) + pool->dma_pages[i] |= XSK_NEXT_PG_CONTIG_MASK; + else + pool->dma_pages[i] &= ~XSK_NEXT_PG_CONTIG_MASK; + } +} + +static bool __maybe_unused xp_check_swiotlb_dma(struct xsk_buff_pool *pool) +{ +#if defined(CONFIG_SWIOTLB) + phys_addr_t paddr; + u32 i; + + for (i = 0; i < pool->dma_pages_cnt; i++) { + paddr = dma_to_phys(pool->dev, pool->dma_pages[i]); + if (is_swiotlb_buffer(paddr)) + return false; + } +#endif + return true; +} + +static bool xp_check_cheap_dma(struct xsk_buff_pool *pool) +{ +#if defined(CONFIG_HAS_DMA) + const struct dma_map_ops *ops = get_dma_ops(pool->dev); + + if (ops) { + return !ops->sync_single_for_cpu && + !ops->sync_single_for_device; + } + + if (!dma_is_direct(ops)) + return false; + + if (!xp_check_swiotlb_dma(pool)) + return false; + + if (!dev_is_dma_coherent(pool->dev)) { +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) + return false; +#endif + } +#endif + return true; +} + +int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, + unsigned long attrs, struct page **pages, u32 nr_pages) +{ + dma_addr_t dma; + u32 i; + + pool->dma_pages = kvcalloc(nr_pages, sizeof(*pool->dma_pages), + GFP_KERNEL); + if (!pool->dma_pages) + return -ENOMEM; + + pool->dev = dev; + pool->dma_pages_cnt = nr_pages; + + for (i = 0; i < pool->dma_pages_cnt; i++) { + dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL, attrs); + if (dma_mapping_error(dev, dma)) { + xp_dma_unmap(pool, attrs); + return -ENOMEM; + } + pool->dma_pages[i] = dma; + } + + if (pool->unaligned) + xp_check_dma_contiguity(pool); + + pool->dev = dev; + pool->cheap_dma = xp_check_cheap_dma(pool); + return 0; +} +EXPORT_SYMBOL(xp_dma_map); + +static bool xp_addr_crosses_non_contig_pg(struct xsk_buff_pool *pool, + u64 addr) +{ + return xp_desc_crosses_non_contig_pg(pool, addr, pool->chunk_size); +} + +static bool xp_check_unaligned(struct xsk_buff_pool *pool, u64 *addr) +{ + *addr = xp_unaligned_extract_addr(*addr); + if (*addr >= pool->addrs_cnt || + *addr + pool->chunk_size > pool->addrs_cnt || + xp_addr_crosses_non_contig_pg(pool, *addr)) + return false; + return true; +} + +static bool xp_check_aligned(struct xsk_buff_pool *pool, u64 *addr) +{ + *addr = xp_aligned_extract_addr(pool, *addr); + return *addr < pool->addrs_cnt; +} + +static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool) +{ + struct xdp_buff_xsk *xskb; + u64 addr; + bool ok; + + if (pool->free_heads_cnt == 0) + return NULL; + + xskb = pool->free_heads[--pool->free_heads_cnt]; + + for (;;) { + if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) { + xp_release(xskb); + return NULL; + } + + ok = pool->unaligned ? xp_check_unaligned(pool, &addr) : + xp_check_aligned(pool, &addr); + if (!ok) { + pool->fq->invalid_descs++; + xskq_cons_release(pool->fq); + continue; + } + break; + } + xskq_cons_release(pool->fq); + + xskb->orig_addr = addr; + xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom; + if (pool->dma_pages_cnt) { + xskb->frame_dma = (pool->dma_pages[addr >> PAGE_SHIFT] & + ~XSK_NEXT_PG_CONTIG_MASK) + + (addr & ~PAGE_MASK); + xskb->dma = xskb->frame_dma + pool->headroom + + XDP_PACKET_HEADROOM; + } + return xskb; +} + +struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) +{ + struct xdp_buff_xsk *xskb; + + if (!pool->free_list_cnt) { + xskb = __xp_alloc(pool); + if (!xskb) + return NULL; + } else { + pool->free_list_cnt--; + xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, + free_list_node); + list_del(&xskb->free_list_node); + } + + xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; + xskb->xdp.data_meta = xskb->xdp.data; + + if (!pool->cheap_dma) { + dma_sync_single_range_for_device(pool->dev, xskb->dma, 0, + pool->frame_len, + DMA_BIDIRECTIONAL); + } + return &xskb->xdp; +} +EXPORT_SYMBOL(xp_alloc); + +bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count) +{ + if (pool->free_list_cnt >= count) + return true; + return xskq_cons_has_entries(pool->fq, count - pool->free_list_cnt); +} +EXPORT_SYMBOL(xp_can_alloc); + +void xp_free(struct xdp_buff_xsk *xskb) +{ + xskb->pool->free_list_cnt++; + list_add(&xskb->free_list_node, &xskb->pool->free_list); +} +EXPORT_SYMBOL(xp_free); + +void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr) +{ + addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr; + return pool->addrs + addr; +} +EXPORT_SYMBOL(xp_raw_get_data); + +dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) +{ + addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr; + return (pool->dma_pages[addr >> PAGE_SHIFT] & + ~XSK_NEXT_PG_CONTIG_MASK) + + (addr & ~PAGE_MASK); +} +EXPORT_SYMBOL(xp_raw_get_dma); + +void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb) +{ + dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0, + xskb->pool->frame_len, DMA_BIDIRECTIONAL); +} +EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow); + +void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma, + size_t size) +{ + dma_sync_single_range_for_device(pool->dev, dma, 0, + size, DMA_BIDIRECTIONAL); +} +EXPORT_SYMBOL(xp_dma_sync_for_device_slow); diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c index f59791ba43a0..0163b26aaf63 100644 --- a/net/xdp/xsk_diag.c +++ b/net/xdp/xsk_diag.c @@ -56,7 +56,7 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) du.id = umem->id; du.size = umem->size; du.num_pages = umem->npgs; - du.chunk_size = umem->chunk_size_nohr + umem->headroom; + du.chunk_size = umem->chunk_size; du.headroom = umem->headroom; du.ifindex = umem->dev ? umem->dev->ifindex : 0; du.queue_id = umem->queue_id; diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c index 57fb81bd593c..6cf9586e5027 100644 --- a/net/xdp/xsk_queue.c +++ b/net/xdp/xsk_queue.c @@ -6,18 +6,10 @@ #include <linux/log2.h> #include <linux/slab.h> #include <linux/overflow.h> +#include <net/xdp_sock_drv.h> #include "xsk_queue.h" -void xskq_set_umem(struct xsk_queue *q, u64 umem_size, u64 chunk_mask) -{ - if (!q) - return; - - q->umem_size = umem_size; - q->chunk_mask = chunk_mask; -} - static size_t xskq_get_ring_size(struct xsk_queue *q, bool umem_queue) { struct xdp_umem_ring *umem_ring; @@ -63,56 +55,3 @@ void xskq_destroy(struct xsk_queue *q) page_frag_free(q->ring); kfree(q); } - -struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries) -{ - struct xdp_umem_fq_reuse *newq; - - /* Check for overflow */ - if (nentries > (u32)roundup_pow_of_two(nentries)) - return NULL; - nentries = roundup_pow_of_two(nentries); - - newq = kvmalloc(struct_size(newq, handles, nentries), GFP_KERNEL); - if (!newq) - return NULL; - memset(newq, 0, offsetof(typeof(*newq), handles)); - - newq->nentries = nentries; - return newq; -} -EXPORT_SYMBOL_GPL(xsk_reuseq_prepare); - -struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem, - struct xdp_umem_fq_reuse *newq) -{ - struct xdp_umem_fq_reuse *oldq = umem->fq_reuse; - - if (!oldq) { - umem->fq_reuse = newq; - return NULL; - } - - if (newq->nentries < oldq->length) - return newq; - - memcpy(newq->handles, oldq->handles, - array_size(oldq->length, sizeof(u64))); - newq->length = oldq->length; - - umem->fq_reuse = newq; - return oldq; -} -EXPORT_SYMBOL_GPL(xsk_reuseq_swap); - -void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq) -{ - kvfree(rq); -} -EXPORT_SYMBOL_GPL(xsk_reuseq_free); - -void xsk_reuseq_destroy(struct xdp_umem *umem) -{ - xsk_reuseq_free(umem->fq_reuse); - umem->fq_reuse = NULL; -} diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 648733ec24ac..5b5d24d2dd37 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -9,6 +9,9 @@ #include <linux/types.h> #include <linux/if_xdp.h> #include <net/xdp_sock.h> +#include <net/xsk_buff_pool.h> + +#include "xsk.h" struct xdp_ring { u32 producer ____cacheline_aligned_in_smp; @@ -29,8 +32,6 @@ struct xdp_umem_ring { }; struct xsk_queue { - u64 chunk_mask; - u64 umem_size; u32 ring_mask; u32 nentries; u32 cached_prod; @@ -103,98 +104,73 @@ struct xsk_queue { /* Functions that read and validate content from consumer rings. */ -static inline bool xskq_cons_crosses_non_contig_pg(struct xdp_umem *umem, - u64 addr, - u64 length) +static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) { - bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE; - bool next_pg_contig = - (unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr & - XSK_NEXT_PG_CONTIG_MASK; - - return cross_pg && !next_pg_contig; -} + struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; -static inline bool xskq_cons_is_valid_unaligned(struct xsk_queue *q, - u64 addr, - u64 length, - struct xdp_umem *umem) -{ - u64 base_addr = xsk_umem_extract_addr(addr); + if (q->cached_cons != q->cached_prod) { + u32 idx = q->cached_cons & q->ring_mask; - addr = xsk_umem_add_offset_to_addr(addr); - if (base_addr >= q->umem_size || addr >= q->umem_size || - xskq_cons_crosses_non_contig_pg(umem, addr, length)) { - q->invalid_descs++; - return false; + *addr = ring->desc[idx]; + return true; } - return true; + return false; } -static inline bool xskq_cons_is_valid_addr(struct xsk_queue *q, u64 addr) +static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, + struct xdp_desc *desc) { - if (addr >= q->umem_size) { - q->invalid_descs++; + u64 chunk, chunk_end; + + chunk = xp_aligned_extract_addr(pool, desc->addr); + chunk_end = xp_aligned_extract_addr(pool, desc->addr + desc->len); + if (chunk != chunk_end) + return false; + + if (chunk >= pool->addrs_cnt) return false; - } + if (desc->options) + return false; return true; } -static inline bool xskq_cons_read_addr(struct xsk_queue *q, u64 *addr, - struct xdp_umem *umem) +static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, + struct xdp_desc *desc) { - struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; - - while (q->cached_cons != q->cached_prod) { - u32 idx = q->cached_cons & q->ring_mask; + u64 addr, base_addr; - *addr = ring->desc[idx] & q->chunk_mask; + base_addr = xp_unaligned_extract_addr(desc->addr); + addr = xp_unaligned_add_offset_to_addr(desc->addr); - if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { - if (xskq_cons_is_valid_unaligned(q, *addr, - umem->chunk_size_nohr, - umem)) - return true; - goto out; - } + if (desc->len > pool->chunk_size) + return false; - if (xskq_cons_is_valid_addr(q, *addr)) - return true; + if (base_addr >= pool->addrs_cnt || addr >= pool->addrs_cnt || + xp_desc_crosses_non_contig_pg(pool, addr, desc->len)) + return false; -out: - q->cached_cons++; - } + if (desc->options) + return false; + return true; +} - return false; +static inline bool xp_validate_desc(struct xsk_buff_pool *pool, + struct xdp_desc *desc) +{ + return pool->unaligned ? xp_unaligned_validate_desc(pool, desc) : + xp_aligned_validate_desc(pool, desc); } static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d, struct xdp_umem *umem) { - if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { - if (!xskq_cons_is_valid_unaligned(q, d->addr, d->len, umem)) - return false; - - if (d->len > umem->chunk_size_nohr || d->options) { - q->invalid_descs++; - return false; - } - - return true; - } - - if (!xskq_cons_is_valid_addr(q, d->addr)) - return false; - - if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) || - d->options) { + if (!xp_validate_desc(umem->pool, d)) { q->invalid_descs++; return false; } - return true; } @@ -250,12 +226,11 @@ static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt) return entries >= cnt; } -static inline bool xskq_cons_peek_addr(struct xsk_queue *q, u64 *addr, - struct xdp_umem *umem) +static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr) { if (q->cached_prod == q->cached_cons) xskq_cons_get_entries(q); - return xskq_cons_read_addr(q, addr, umem); + return xskq_cons_read_addr_unchecked(q, addr); } static inline bool xskq_cons_peek_desc(struct xsk_queue *q, @@ -379,11 +354,7 @@ static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) return q ? q->invalid_descs : 0; } -void xskq_set_umem(struct xsk_queue *q, u64 umem_size, u64 chunk_mask); struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); void xskq_destroy(struct xsk_queue *q_ops); -/* Executed by the core when the entire UMEM gets freed */ -void xsk_reuseq_destroy(struct xdp_umem *umem); - #endif /* _LINUX_XSK_QUEUE_H */ diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c new file mode 100644 index 000000000000..1dc7208c71ba --- /dev/null +++ b/net/xdp/xskmap.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0 +/* XSKMAP used for AF_XDP sockets + * Copyright(c) 2018 Intel Corporation. + */ + +#include <linux/bpf.h> +#include <linux/capability.h> +#include <net/xdp_sock.h> +#include <linux/slab.h> +#include <linux/sched.h> + +#include "xsk.h" + +int xsk_map_inc(struct xsk_map *map) +{ + bpf_map_inc(&map->map); + return 0; +} + +void xsk_map_put(struct xsk_map *map) +{ + bpf_map_put(&map->map); +} + +static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, + struct xdp_sock **map_entry) +{ + struct xsk_map_node *node; + int err; + + node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN); + if (!node) + return ERR_PTR(-ENOMEM); + + err = xsk_map_inc(map); + if (err) { + kfree(node); + return ERR_PTR(err); + } + + node->map = map; + node->map_entry = map_entry; + return node; +} + +static void xsk_map_node_free(struct xsk_map_node *node) +{ + xsk_map_put(node->map); + kfree(node); +} + +static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node) +{ + spin_lock_bh(&xs->map_list_lock); + list_add_tail(&node->node, &xs->map_list); + spin_unlock_bh(&xs->map_list_lock); +} + +static void xsk_map_sock_delete(struct xdp_sock *xs, + struct xdp_sock **map_entry) +{ + struct xsk_map_node *n, *tmp; + + spin_lock_bh(&xs->map_list_lock); + list_for_each_entry_safe(n, tmp, &xs->map_list, node) { + if (map_entry == n->map_entry) { + list_del(&n->node); + xsk_map_node_free(n); + } + } + spin_unlock_bh(&xs->map_list_lock); +} + +static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) +{ + struct bpf_map_memory mem; + int err, numa_node; + struct xsk_map *m; + u64 size; + + if (!capable(CAP_NET_ADMIN)) + return ERR_PTR(-EPERM); + + if (attr->max_entries == 0 || attr->key_size != 4 || + attr->value_size != 4 || + attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)) + return ERR_PTR(-EINVAL); + + numa_node = bpf_map_attr_numa_node(attr); + size = struct_size(m, xsk_map, attr->max_entries); + + err = bpf_map_charge_init(&mem, size); + if (err < 0) + return ERR_PTR(err); + + m = bpf_map_area_alloc(size, numa_node); + if (!m) { + bpf_map_charge_finish(&mem); + return ERR_PTR(-ENOMEM); + } + + bpf_map_init_from_attr(&m->map, attr); + bpf_map_charge_move(&m->map.memory, &mem); + spin_lock_init(&m->lock); + + return &m->map; +} + +static void xsk_map_free(struct bpf_map *map) +{ + struct xsk_map *m = container_of(map, struct xsk_map, map); + + bpf_clear_redirect_map(map); + synchronize_net(); + bpf_map_area_free(m); +} + +static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + struct xsk_map *m = container_of(map, struct xsk_map, map); + u32 index = key ? *(u32 *)key : U32_MAX; + u32 *next = next_key; + + if (index >= m->map.max_entries) { + *next = 0; + return 0; + } + + if (index == m->map.max_entries - 1) + return -ENOENT; + *next = index + 1; + return 0; +} + +static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) +{ + const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2; + struct bpf_insn *insn = insn_buf; + + *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); + *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *))); + *insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map)); + *insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0); + *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); + *insn++ = BPF_MOV64_IMM(ret, 0); + return insn - insn_buf; +} + +static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + return __xsk_map_lookup_elem(map, *(u32 *)key); +} + +static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + struct xsk_map *m = container_of(map, struct xsk_map, map); + struct xdp_sock *xs, *old_xs, **map_entry; + u32 i = *(u32 *)key, fd = *(u32 *)value; + struct xsk_map_node *node; + struct socket *sock; + int err; + + if (unlikely(map_flags > BPF_EXIST)) + return -EINVAL; + if (unlikely(i >= m->map.max_entries)) + return -E2BIG; + + sock = sockfd_lookup(fd, &err); + if (!sock) + return err; + + if (sock->sk->sk_family != PF_XDP) { + sockfd_put(sock); + return -EOPNOTSUPP; + } + + xs = (struct xdp_sock *)sock->sk; + + if (!xsk_is_setup_for_bpf_map(xs)) { + sockfd_put(sock); + return -EOPNOTSUPP; + } + + map_entry = &m->xsk_map[i]; + node = xsk_map_node_alloc(m, map_entry); + if (IS_ERR(node)) { + sockfd_put(sock); + return PTR_ERR(node); + } + + spin_lock_bh(&m->lock); + old_xs = READ_ONCE(*map_entry); + if (old_xs == xs) { + err = 0; + goto out; + } else if (old_xs && map_flags == BPF_NOEXIST) { + err = -EEXIST; + goto out; + } else if (!old_xs && map_flags == BPF_EXIST) { + err = -ENOENT; + goto out; + } + xsk_map_sock_add(xs, node); + WRITE_ONCE(*map_entry, xs); + if (old_xs) + xsk_map_sock_delete(old_xs, map_entry); + spin_unlock_bh(&m->lock); + sockfd_put(sock); + return 0; + +out: + spin_unlock_bh(&m->lock); + sockfd_put(sock); + xsk_map_node_free(node); + return err; +} + +static int xsk_map_delete_elem(struct bpf_map *map, void *key) +{ + struct xsk_map *m = container_of(map, struct xsk_map, map); + struct xdp_sock *old_xs, **map_entry; + int k = *(u32 *)key; + + if (k >= map->max_entries) + return -EINVAL; + + spin_lock_bh(&m->lock); + map_entry = &m->xsk_map[k]; + old_xs = xchg(map_entry, NULL); + if (old_xs) + xsk_map_sock_delete(old_xs, map_entry); + spin_unlock_bh(&m->lock); + + return 0; +} + +void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, + struct xdp_sock **map_entry) +{ + spin_lock_bh(&map->lock); + if (READ_ONCE(*map_entry) == xs) { + WRITE_ONCE(*map_entry, NULL); + xsk_map_sock_delete(xs, map_entry); + } + spin_unlock_bh(&map->lock); +} + +const struct bpf_map_ops xsk_map_ops = { + .map_alloc = xsk_map_alloc, + .map_free = xsk_map_free, + .map_get_next_key = xsk_map_get_next_key, + .map_lookup_elem = xsk_map_lookup_elem, + .map_gen_lookup = xsk_map_gen_lookup, + .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only, + .map_update_elem = xsk_map_update_elem, + .map_delete_elem = xsk_map_delete_elem, + .map_check_btf = map_check_no_btf, +}; diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 6921a18201a0..b7fd9c838416 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -99,4 +99,7 @@ config NET_KEY_MIGRATE If unsure, say N. +config XFRM_ESPINTCP + bool + endif # INET diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 212a4fcb4a88..2d4bb4b9f75e 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -11,4 +11,4 @@ obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o -obj-$(CONFIG_INET_ESPINTCP) += espintcp.o +obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 037ea156d2f9..100e29682b48 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -6,6 +6,9 @@ #include <net/espintcp.h> #include <linux/skmsg.h> #include <net/inet_common.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6_stubs.h> +#endif static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb, struct sock *sk) @@ -31,7 +34,12 @@ static void handle_esp(struct sk_buff *skb, struct sock *sk) rcu_read_lock(); skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); local_bh_disable(); - xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + ipv6_stub->xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); + else +#endif + xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); local_bh_enable(); rcu_read_unlock(); } @@ -347,6 +355,9 @@ unlock: static struct proto espintcp_prot __ro_after_init; static struct proto_ops espintcp_ops __ro_after_init; +static struct proto espintcp6_prot; +static struct proto_ops espintcp6_ops; +static DEFINE_MUTEX(tcpv6_prot_mutex); static void espintcp_data_ready(struct sock *sk) { @@ -379,15 +390,20 @@ static void espintcp_destruct(struct sock *sk) { struct espintcp_ctx *ctx = espintcp_getctx(sk); + ctx->saved_destruct(sk); kfree(ctx); } bool tcp_is_ulp_esp(struct sock *sk) { - return sk->sk_prot == &espintcp_prot; + return sk->sk_prot == &espintcp_prot || sk->sk_prot == &espintcp6_prot; } EXPORT_SYMBOL_GPL(tcp_is_ulp_esp); +static void build_protos(struct proto *espintcp_prot, + struct proto_ops *espintcp_ops, + const struct proto *orig_prot, + const struct proto_ops *orig_ops); static int espintcp_init_sk(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -415,10 +431,22 @@ static int espintcp_init_sk(struct sock *sk) strp_check_rcv(&ctx->strp); skb_queue_head_init(&ctx->ike_queue); skb_queue_head_init(&ctx->out_queue); - sk->sk_prot = &espintcp_prot; - sk->sk_socket->ops = &espintcp_ops; + + if (sk->sk_family == AF_INET) { + sk->sk_prot = &espintcp_prot; + sk->sk_socket->ops = &espintcp_ops; + } else { + mutex_lock(&tcpv6_prot_mutex); + if (!espintcp6_prot.recvmsg) + build_protos(&espintcp6_prot, &espintcp6_ops, sk->sk_prot, sk->sk_socket->ops); + mutex_unlock(&tcpv6_prot_mutex); + + sk->sk_prot = &espintcp6_prot; + sk->sk_socket->ops = &espintcp6_ops; + } ctx->saved_data_ready = sk->sk_data_ready; ctx->saved_write_space = sk->sk_write_space; + ctx->saved_destruct = sk->sk_destruct; sk->sk_data_ready = espintcp_data_ready; sk->sk_write_space = espintcp_write_space; sk->sk_destruct = espintcp_destruct; @@ -489,6 +517,20 @@ static __poll_t espintcp_poll(struct file *file, struct socket *sock, return mask; } +static void build_protos(struct proto *espintcp_prot, + struct proto_ops *espintcp_ops, + const struct proto *orig_prot, + const struct proto_ops *orig_ops) +{ + memcpy(espintcp_prot, orig_prot, sizeof(struct proto)); + memcpy(espintcp_ops, orig_ops, sizeof(struct proto_ops)); + espintcp_prot->sendmsg = espintcp_sendmsg; + espintcp_prot->recvmsg = espintcp_recvmsg; + espintcp_prot->close = espintcp_close; + espintcp_prot->release_cb = espintcp_release; + espintcp_ops->poll = espintcp_poll; +} + static struct tcp_ulp_ops espintcp_ulp __read_mostly = { .name = "espintcp", .owner = THIS_MODULE, @@ -497,13 +539,7 @@ static struct tcp_ulp_ops espintcp_ulp __read_mostly = { void __init espintcp_init(void) { - memcpy(&espintcp_prot, &tcp_prot, sizeof(tcp_prot)); - memcpy(&espintcp_ops, &inet_stream_ops, sizeof(inet_stream_ops)); - espintcp_prot.sendmsg = espintcp_sendmsg; - espintcp_prot.recvmsg = espintcp_recvmsg; - espintcp_prot.close = espintcp_close; - espintcp_prot.release_cb = espintcp_release; - espintcp_ops.poll = espintcp_poll; + build_protos(&espintcp_prot, &espintcp_ops, &tcp_prot, &inet_stream_ops); tcp_register_ulp(&espintcp_ulp); } diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 6cc7f7f1dd68..f50d1f97cf8e 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -25,12 +25,10 @@ static void __xfrm_transport_prep(struct xfrm_state *x, struct sk_buff *skb, struct xfrm_offload *xo = xfrm_offload(skb); skb_reset_mac_len(skb); - pskb_pull(skb, skb->mac_len + hsize + x->props.header_len); - - if (xo->flags & XFRM_GSO_SEGMENT) { - skb_reset_transport_header(skb); + if (xo->flags & XFRM_GSO_SEGMENT) skb->transport_header -= x->props.header_len; - } + + pskb_pull(skb, skb_transport_offset(skb) + x->props.header_len); } static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb, diff --git a/net/xfrm/xfrm_inout.h b/net/xfrm/xfrm_inout.h index c7b0318938e2..efc5e6b2e87b 100644 --- a/net/xfrm/xfrm_inout.h +++ b/net/xfrm/xfrm_inout.h @@ -6,6 +6,38 @@ #ifndef XFRM_INOUT_H #define XFRM_INOUT_H 1 +static inline void xfrm4_extract_header(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + + XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); + XFRM_MODE_SKB_CB(skb)->id = iph->id; + XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off; + XFRM_MODE_SKB_CB(skb)->tos = iph->tos; + XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl; + XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph); + memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0, + sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); +} + +static inline void xfrm6_extract_header(struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6hdr *iph = ipv6_hdr(skb); + + XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); + XFRM_MODE_SKB_CB(skb)->id = 0; + XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); + XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); + XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; + XFRM_MODE_SKB_CB(skb)->optlen = 0; + memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, + sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); +#else + WARN_ON_ONCE(1); +#endif +} + static inline void xfrm6_beet_make_header(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index aa35f23c4912..bd984ff17c2d 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -353,17 +353,18 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) { const struct xfrm_mode *inner_mode = &x->inner_mode; - const struct xfrm_state_afinfo *afinfo; - int err = -EAFNOSUPPORT; - - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family); - if (likely(afinfo)) - err = afinfo->extract_input(x, skb); - rcu_read_unlock(); - if (err) - return err; + switch (x->outer_mode.family) { + case AF_INET: + xfrm4_extract_header(skb); + break; + case AF_INET6: + xfrm6_extract_header(skb); + break; + default: + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; + } if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); @@ -644,7 +645,7 @@ resume: dev_put(skb->dev); spin_lock(&x->lock); - if (nexthdr <= 0) { + if (nexthdr < 0) { if (nexthdr == -EBADMSG) { xfrm_audit_state_icvfail(x, skb, x->type->proto); diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 3361e3ac5714..c407ecbc5d46 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -145,7 +145,6 @@ static int xfrmi_create(struct net_device *dev) if (err < 0) goto out; - dev_hold(dev); xfrmi_link(xfrmn, xi); return 0; @@ -175,7 +174,6 @@ static void xfrmi_dev_uninit(struct net_device *dev) struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id); xfrmi_unlink(xfrmn, xi); - dev_put(dev); } static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) @@ -750,13 +748,35 @@ static struct rtnl_link_ops xfrmi_link_ops __read_mostly = { .get_link_net = xfrmi_get_link_net, }; +static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list) +{ + struct net *net; + LIST_HEAD(list); + + rtnl_lock(); + list_for_each_entry(net, net_exit_list, exit_list) { + struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); + struct xfrm_if __rcu **xip; + struct xfrm_if *xi; + + for (xip = &xfrmn->xfrmi[0]; + (xi = rtnl_dereference(*xip)) != NULL; + xip = &xi->next) + unregister_netdevice_queue(xi->dev, &list); + } + unregister_netdevice_many(&list); + rtnl_unlock(); +} + static struct pernet_operations xfrmi_net_ops = { + .exit_batch = xfrmi_exit_batch_net, .id = &xfrmi_net_id, .size = sizeof(struct xfrmi_net), }; static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, @@ -764,6 +784,7 @@ static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = { static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, @@ -771,6 +792,7 @@ static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = { static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = { .handler = xfrm6_rcv, + .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 2fd3d990d992..e4c23f69f69f 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -13,9 +13,15 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <net/dst.h> +#include <net/icmp.h> #include <net/inet_ecn.h> #include <net/xfrm.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ip6_route.h> +#include <net/ipv6_stubs.h> +#endif + #include "xfrm_inout.h" static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); @@ -565,6 +571,22 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) struct xfrm_state *x = skb_dst(skb)->xfrm; int err; + switch (x->outer_mode.family) { + case AF_INET: + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); +#ifdef CONFIG_NETFILTER + IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; +#endif + break; + case AF_INET6: + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + +#ifdef CONFIG_NETFILTER + IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; +#endif + break; + } + secpath_reset(skb); if (xfrm_dev_offload_ok(skb, x)) { @@ -583,18 +605,20 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) xfrm_state_hold(x); if (skb_is_gso(skb)) { - skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; + if (skb->inner_protocol) + return xfrm_output_gso(net, sk, skb); - return xfrm_output2(net, sk, skb); + skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; + goto out; } if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM) goto out; + } else { + if (skb_is_gso(skb)) + return xfrm_output_gso(net, sk, skb); } - if (skb_is_gso(skb)) - return xfrm_output_gso(net, sk, skb); - if (skb->ip_summed == CHECKSUM_PARTIAL) { err = skb_checksum_help(skb); if (err) { @@ -609,11 +633,101 @@ out: } EXPORT_SYMBOL_GPL(xfrm_output); +static int xfrm4_tunnel_check_size(struct sk_buff *skb) +{ + int mtu, ret = 0; + + if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) + goto out; + + if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) + goto out; + + mtu = dst_mtu(skb_dst(skb)); + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && + !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { + skb->protocol = htons(ETH_P_IP); + + if (skb->sk) + xfrm_local_error(skb, mtu); + else + icmp_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); + ret = -EMSGSIZE; + } +out: + return ret; +} + +static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + + err = xfrm4_tunnel_check_size(skb); + if (err) + return err; + + XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol; + + xfrm4_extract_header(skb); + return 0; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int xfrm6_tunnel_check_size(struct sk_buff *skb) +{ + int mtu, ret = 0; + struct dst_entry *dst = skb_dst(skb); + + if (skb->ignore_df) + goto out; + + mtu = dst_mtu(dst); + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && + !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { + skb->dev = dst->dev; + skb->protocol = htons(ETH_P_IPV6); + + if (xfrm6_local_dontfrag(skb->sk)) + ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); + else if (skb->sk) + xfrm_local_error(skb, mtu); + else + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + ret = -EMSGSIZE; + } +out: + return ret; +} +#endif + +static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + int err; + + err = xfrm6_tunnel_check_size(skb); + if (err) + return err; + + XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; + + xfrm6_extract_header(skb); + return 0; +#else + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; +#endif +} + static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { - const struct xfrm_state_afinfo *afinfo; const struct xfrm_mode *inner_mode; - int err = -EAFNOSUPPORT; if (x->sel.family == AF_UNSPEC) inner_mode = xfrm_ip2inner_mode(x, @@ -624,13 +738,14 @@ static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) if (inner_mode == NULL) return -EAFNOSUPPORT; - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); - if (likely(afinfo)) - err = afinfo->extract_output(x, skb); - rcu_read_unlock(); + switch (inner_mode->family) { + case AF_INET: + return xfrm4_extract_output(x, skb); + case AF_INET6: + return xfrm6_extract_output(x, skb); + } - return err; + return -EAFNOSUPPORT; } void xfrm_local_error(struct sk_buff *skb, int mtu) @@ -640,7 +755,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu) if (skb->protocol == htons(ETH_P_IP)) proto = AF_INET; - else if (skb->protocol == htons(ETH_P_IPV6)) + else if (skb->protocol == htons(ETH_P_IPV6) && + skb->sk->sk_family == AF_INET6) proto = AF_INET6; else return; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 297b2fdb3c29..564aa6492e7c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1436,12 +1436,7 @@ static void xfrm_policy_requeue(struct xfrm_policy *old, static bool xfrm_policy_mark_match(struct xfrm_policy *policy, struct xfrm_policy *pol) { - u32 mark = policy->mark.v & policy->mark.m; - - if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m) - return true; - - if ((mark & pol->mark.m) == pol->mark.v && + if (policy->mark.v == pol->mark.v && policy->priority == pol->priority) return true; |