summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Makefile2
-rw-r--r--net/appletalk/ddp.c82
-rw-r--r--net/atm/ioctl.c96
-rw-r--r--net/atm/resources.c108
-rw-r--r--net/atm/resources.h5
-rw-r--r--net/ax25/af_ax25.c6
-rw-r--r--net/batman-adv/bat_v_elp.c15
-rw-r--r--net/batman-adv/gateway_client.c4
-rw-r--r--net/batman-adv/hard-interface.c4
-rw-r--r--net/batman-adv/icmp_socket.c3
-rw-r--r--net/batman-adv/routing.c4
-rw-r--r--net/batman-adv/soft-interface.c1
-rw-r--r--net/bpf/test_run.c8
-rw-r--r--net/bridge/br_mrp.c46
-rw-r--r--net/bridge/br_mrp_netlink.c272
-rw-r--r--net/bridge/br_multicast.c3
-rw-r--r--net/bridge/br_private_mrp.h2
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c6
-rw-r--r--net/ceph/messenger.c11
-rw-r--r--net/ceph/osd_client.c4
-rw-r--r--net/compat.c194
-rw-r--r--net/core/dev.c20
-rw-r--r--net/core/filter.c4
-rw-r--r--net/core/flow_dissector.c75
-rw-r--r--net/core/flow_offload.c6
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/core/sock.c189
-rw-r--r--net/core/xdp.c51
-rw-r--r--net/dccp/ipv6.c1
-rw-r--r--net/devres.c95
-rw-r--r--net/dsa/slave.c1
-rw-r--r--net/dsa/tag_8021q.c61
-rw-r--r--net/dsa/tag_mtk.c15
-rw-r--r--net/ethernet/eth.c28
-rw-r--r--net/ethtool/cabletest.c246
-rw-r--r--net/ethtool/channels.c2
-rw-r--r--net/ethtool/ioctl.c7
-rw-r--r--net/ethtool/linkstate.c75
-rw-r--r--net/ethtool/netlink.c9
-rw-r--r--net/ethtool/netlink.h1
-rw-r--r--net/ethtool/strset.c1
-rw-r--r--net/ipv4/Kconfig1
-rw-r--r--net/ipv4/af_inet.c46
-rw-r--r--net/ipv4/devinet.c1
-rw-r--r--net/ipv4/esp4_offload.c30
-rw-r--r--net/ipv4/fib_frontend.c22
-rw-r--r--net/ipv4/fib_trie.c51
-rw-r--r--net/ipv4/igmp.c18
-rw-r--r--net/ipv4/inet_connection_sock.c43
-rw-r--r--net/ipv4/ip_gre.c35
-rw-r--r--net/ipv4/ip_sockglue.c394
-rw-r--r--net/ipv4/ip_tunnel.c16
-rw-r--r--net/ipv4/ip_vti.c55
-rw-r--r--net/ipv4/ipip.c32
-rw-r--r--net/ipv4/ipmr.c124
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c7
-rw-r--r--net/ipv4/nexthop.c264
-rw-r--r--net/ipv4/route.c14
-rw-r--r--net/ipv4/tcp.c222
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c104
-rw-r--r--net/ipv4/udp.c10
-rw-r--r--net/ipv4/udp_tunnel.c4
-rw-r--r--net/ipv4/xfrm4_input.c5
-rw-r--r--net/ipv4/xfrm4_output.c63
-rw-r--r--net/ipv4/xfrm4_state.c24
-rw-r--r--net/ipv6/Kconfig12
-rw-r--r--net/ipv6/addrconf.c85
-rw-r--r--net/ipv6/af_inet6.c84
-rw-r--r--net/ipv6/ah6.c1
-rw-r--r--net/ipv6/esp6.c414
-rw-r--r--net/ipv6/esp6_offload.c44
-rw-r--r--net/ipv6/ip6_fib.c2
-rw-r--r--net/ipv6/ip6_tunnel.c247
-rw-r--r--net/ipv6/ip6_udp_tunnel.c9
-rw-r--r--net/ipv6/ip6_vti.c18
-rw-r--r--net/ipv6/ip6mr.c5
-rw-r--r--net/ipv6/ipcomp6.c1
-rw-r--r--net/ipv6/ipv6_sockglue.c292
-rw-r--r--net/ipv6/mcast.c17
-rw-r--r--net/ipv6/raw.c1
-rw-r--r--net/ipv6/route.c49
-rw-r--r--net/ipv6/sit.c363
-rw-r--r--net/ipv6/tcp_ipv6.c11
-rw-r--r--net/ipv6/tunnel6.c87
-rw-r--r--net/ipv6/xfrm6_input.c111
-rw-r--r--net/ipv6/xfrm6_output.c96
-rw-r--r--net/ipv6/xfrm6_protocol.c48
-rw-r--r--net/ipv6/xfrm6_state.c26
-rw-r--r--net/iucv/af_iucv.c253
-rw-r--r--net/iucv/iucv.c188
-rw-r--r--net/l2tp/l2tp_core.c3
-rw-r--r--net/l2tp/l2tp_ip.c29
-rw-r--r--net/l2tp/l2tp_ip6.c31
-rw-r--r--net/mac80211/agg-rx.c5
-rw-r--r--net/mac80211/agg-tx.c82
-rw-r--r--net/mac80211/cfg.c106
-rw-r--r--net/mac80211/chan.c1
-rw-r--r--net/mac80211/debugfs_netdev.c2
-rw-r--r--net/mac80211/driver-ops.h4
-rw-r--r--net/mac80211/he.c61
-rw-r--r--net/mac80211/ibss.c16
-rw-r--r--net/mac80211/ieee80211_i.h34
-rw-r--r--net/mac80211/iface.c5
-rw-r--r--net/mac80211/main.c15
-rw-r--r--net/mac80211/mesh.c55
-rw-r--r--net/mac80211/mesh.h2
-rw-r--r--net/mac80211/mesh_hwmp.c7
-rw-r--r--net/mac80211/mesh_plink.c9
-rw-r--r--net/mac80211/mlme.c182
-rw-r--r--net/mac80211/offchannel.c4
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c3
-rw-r--r--net/mac80211/rx.c122
-rw-r--r--net/mac80211/scan.c27
-rw-r--r--net/mac80211/spectmgmt.c4
-rw-r--r--net/mac80211/sta_info.h4
-rw-r--r--net/mac80211/status.c9
-rw-r--r--net/mac80211/tdls.c9
-rw-r--r--net/mac80211/trace.h41
-rw-r--r--net/mac80211/tx.c72
-rw-r--r--net/mac80211/util.c298
-rw-r--r--net/mac80211/vht.c10
-rw-r--r--net/mpls/af_mpls.c3
-rw-r--r--net/mpls/internal.h11
-rw-r--r--net/mptcp/crypto.c24
-rw-r--r--net/mptcp/options.c9
-rw-r--r--net/mptcp/protocol.c89
-rw-r--r--net/mptcp/protocol.h1
-rw-r--r--net/mptcp/subflow.c35
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c80
-rw-r--r--net/netfilter/nf_conntrack_pptp.c62
-rw-r--r--net/netfilter/nfnetlink_cthelper.c3
-rw-r--r--net/psample/psample.c165
-rw-r--r--net/qrtr/ns.c10
-rw-r--r--net/qrtr/qrtr.c2
-rw-r--r--net/rds/info.c3
-rw-r--r--net/rds/tcp.c11
-rw-r--r--net/rds/tcp.h4
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_listen.c47
-rw-r--r--net/rds/tcp_send.c9
-rw-r--r--net/rxrpc/Makefile1
-rw-r--r--net/rxrpc/af_rxrpc.c13
-rw-r--r--net/rxrpc/ar-internal.h25
-rw-r--r--net/rxrpc/call_accept.c2
-rw-r--r--net/rxrpc/call_event.c22
-rw-r--r--net/rxrpc/input.c44
-rw-r--r--net/rxrpc/local_object.c34
-rw-r--r--net/rxrpc/misc.c5
-rw-r--r--net/rxrpc/output.c23
-rw-r--r--net/rxrpc/peer_event.c46
-rw-r--r--net/rxrpc/peer_object.c12
-rw-r--r--net/rxrpc/proc.c8
-rw-r--r--net/rxrpc/rtt.c195
-rw-r--r--net/rxrpc/rxkad.c3
-rw-r--r--net/rxrpc/sendmsg.c26
-rw-r--r--net/rxrpc/sysctl.c9
-rw-r--r--net/sched/act_ct.c3
-rw-r--r--net/sched/cls_flower.c295
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/sched/sch_cake.c65
-rw-r--r--net/sched/sch_fq_pie.c4
-rw-r--r--net/sched/sch_generic.c75
-rw-r--r--net/sctp/Kconfig2
-rw-r--r--net/sctp/associola.c8
-rw-r--r--net/sctp/ipv6.c1
-rw-r--r--net/sctp/sm_sideeffect.c14
-rw-r--r--net/sctp/sm_statefuns.c9
-rw-r--r--net/sctp/socket.c73
-rw-r--r--net/sctp/ulpevent.c5
-rw-r--r--net/smc/smc_cdc.c10
-rw-r--r--net/smc/smc_cdc.h4
-rw-r--r--net/smc/smc_core.c18
-rw-r--r--net/smc/smc_pnet.c2
-rw-r--r--net/socket.c156
-rw-r--r--net/sunrpc/clnt.c9
-rw-r--r--net/sunrpc/svcsock.c29
-rw-r--r--net/sunrpc/xprtsock.c40
-rw-r--r--net/tipc/bcast.c22
-rw-r--r--net/tipc/bcast.h9
-rw-r--r--net/tipc/link.c487
-rw-r--r--net/tipc/link.h11
-rw-r--r--net/tipc/msg.c15
-rw-r--r--net/tipc/msg.h43
-rw-r--r--net/tipc/netlink.c2
-rw-r--r--net/tipc/node.c73
-rw-r--r--net/tipc/socket.c82
-rw-r--r--net/tipc/socket.h2
-rw-r--r--net/tipc/sysctl.c9
-rw-r--r--net/tipc/topsrv.c6
-rw-r--r--net/tipc/trace.h17
-rw-r--r--net/tipc/udp_media.c6
-rw-r--r--net/tls/tls_device.c9
-rw-r--r--net/tls/tls_sw.c50
-rw-r--r--net/vmw_vsock/af_vsock.c2
-rw-r--r--net/vmw_vsock/virtio_transport_common.c8
-rw-r--r--net/wireless/Kconfig4
-rw-r--r--net/wireless/chan.c93
-rw-r--r--net/wireless/core.c29
-rw-r--r--net/wireless/core.h11
-rw-r--r--net/wireless/mlme.c112
-rw-r--r--net/wireless/nl80211.c341
-rw-r--r--net/wireless/rdev-ops.h20
-rw-r--r--net/wireless/reg.c40
-rw-r--r--net/wireless/scan.c4
-rw-r--r--net/wireless/sme.c9
-rw-r--r--net/wireless/trace.h66
-rw-r--r--net/wireless/util.c68
-rw-r--r--net/xdp/Makefile3
-rw-r--r--net/xdp/xdp_umem.c63
-rw-r--r--net/xdp/xdp_umem.h2
-rw-r--r--net/xdp/xsk.c204
-rw-r--r--net/xdp/xsk.h30
-rw-r--r--net/xdp/xsk_buff_pool.c336
-rw-r--r--net/xdp/xsk_diag.c2
-rw-r--r--net/xdp/xsk_queue.c63
-rw-r--r--net/xdp/xsk_queue.h117
-rw-r--r--net/xdp/xskmap.c267
-rw-r--r--net/xfrm/Kconfig3
-rw-r--r--net/xfrm/Makefile2
-rw-r--r--net/xfrm/espintcp.c58
-rw-r--r--net/xfrm/xfrm_device.c8
-rw-r--r--net/xfrm/xfrm_inout.h32
-rw-r--r--net/xfrm/xfrm_input.c23
-rw-r--r--net/xfrm/xfrm_interface.c26
-rw-r--r--net/xfrm/xfrm_output.c144
-rw-r--r--net/xfrm/xfrm_policy.c7
229 files changed, 7861 insertions, 4118 deletions
diff --git a/net/Makefile b/net/Makefile
index 07ea48160874..5744bf1997fd 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -6,7 +6,7 @@
# Rewritten to use lists instead of if-statements.
#
-obj-$(CONFIG_NET) := socket.o core/
+obj-$(CONFIG_NET) := devres.o socket.o core/
tmp-$(CONFIG_COMPAT) := compat.o
obj-$(CONFIG_NET) += $(tmp-y)
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index b41375d4d295..15787e8c0629 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -57,6 +57,7 @@
#include <net/sock.h>
#include <net/tcp_states.h>
#include <net/route.h>
+#include <net/compat.h>
#include <linux/atalk.h>
#include <linux/highmem.h>
@@ -867,6 +868,24 @@ static int atif_ioctl(int cmd, void __user *arg)
return copy_to_user(arg, &atreq, sizeof(atreq)) ? -EFAULT : 0;
}
+static int atrtr_ioctl_addrt(struct rtentry *rt)
+{
+ struct net_device *dev = NULL;
+
+ if (rt->rt_dev) {
+ char name[IFNAMSIZ];
+
+ if (copy_from_user(name, rt->rt_dev, IFNAMSIZ-1))
+ return -EFAULT;
+ name[IFNAMSIZ-1] = '\0';
+
+ dev = __dev_get_by_name(&init_net, name);
+ if (!dev)
+ return -ENODEV;
+ }
+ return atrtr_create(rt, dev);
+}
+
/* Routing ioctl() calls */
static int atrtr_ioctl(unsigned int cmd, void __user *arg)
{
@@ -882,19 +901,8 @@ static int atrtr_ioctl(unsigned int cmd, void __user *arg)
return atrtr_delete(&((struct sockaddr_at *)
&rt.rt_dst)->sat_addr);
- case SIOCADDRT: {
- struct net_device *dev = NULL;
- if (rt.rt_dev) {
- char name[IFNAMSIZ];
- if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1))
- return -EFAULT;
- name[IFNAMSIZ-1] = '\0';
- dev = __dev_get_by_name(&init_net, name);
- if (!dev)
- return -ENODEV;
- }
- return atrtr_create(&rt, dev);
- }
+ case SIOCADDRT:
+ return atrtr_ioctl_addrt(&rt);
}
return -EINVAL;
}
@@ -1832,20 +1840,58 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
#ifdef CONFIG_COMPAT
+static int atalk_compat_routing_ioctl(struct sock *sk, unsigned int cmd,
+ struct compat_rtentry __user *ur)
+{
+ compat_uptr_t rtdev;
+ struct rtentry rt;
+
+ if (copy_from_user(&rt.rt_dst, &ur->rt_dst,
+ 3 * sizeof(struct sockaddr)) ||
+ get_user(rt.rt_flags, &ur->rt_flags) ||
+ get_user(rt.rt_metric, &ur->rt_metric) ||
+ get_user(rt.rt_mtu, &ur->rt_mtu) ||
+ get_user(rt.rt_window, &ur->rt_window) ||
+ get_user(rt.rt_irtt, &ur->rt_irtt) ||
+ get_user(rtdev, &ur->rt_dev))
+ return -EFAULT;
+
+ switch (cmd) {
+ case SIOCDELRT:
+ if (rt.rt_dst.sa_family != AF_APPLETALK)
+ return -EINVAL;
+ return atrtr_delete(&((struct sockaddr_at *)
+ &rt.rt_dst)->sat_addr);
+
+ case SIOCADDRT:
+ rt.rt_dev = compat_ptr(rtdev);
+ return atrtr_ioctl_addrt(&rt);
+ default:
+ return -EINVAL;
+ }
+}
static int atalk_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
+ void __user *argp = compat_ptr(arg);
+ struct sock *sk = sock->sk;
+
+ switch (cmd) {
+ case SIOCADDRT:
+ case SIOCDELRT:
+ return atalk_compat_routing_ioctl(sk, cmd, argp);
/*
* SIOCATALKDIFADDR is a SIOCPROTOPRIVATE ioctl number, so we
* cannot handle it in common code. The data we access if ifreq
* here is compatible, so we can simply call the native
* handler.
*/
- if (cmd == SIOCATALKDIFADDR)
- return atalk_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
-
- return -ENOIOCTLCMD;
+ case SIOCATALKDIFADDR:
+ return atalk_ioctl(sock, cmd, (unsigned long)argp);
+ default:
+ return -ENOIOCTLCMD;
+ }
}
-#endif
+#endif /* CONFIG_COMPAT */
static const struct net_proto_family atalk_family_ops = {
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index d955b683aa7c..838ebf0cabbf 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -56,6 +56,8 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd,
int error;
struct list_head *pos;
void __user *argp = (void __user *)arg;
+ void __user *buf;
+ int __user *len;
vcc = ATM_SD(sock);
switch (cmd) {
@@ -162,7 +164,49 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd,
if (error != -ENOIOCTLCMD)
goto done;
- error = atm_dev_ioctl(cmd, argp, compat);
+ if (cmd == ATM_GETNAMES) {
+ if (IS_ENABLED(CONFIG_COMPAT) && compat) {
+#ifdef CONFIG_COMPAT
+ struct compat_atm_iobuf __user *ciobuf = argp;
+ compat_uptr_t cbuf;
+ len = &ciobuf->length;
+ if (get_user(cbuf, &ciobuf->buffer))
+ return -EFAULT;
+ buf = compat_ptr(cbuf);
+#endif
+ } else {
+ struct atm_iobuf __user *iobuf = argp;
+ len = &iobuf->length;
+ if (get_user(buf, &iobuf->buffer))
+ return -EFAULT;
+ }
+ error = atm_getnames(buf, len);
+ } else {
+ int number;
+
+ if (IS_ENABLED(CONFIG_COMPAT) && compat) {
+#ifdef CONFIG_COMPAT
+ struct compat_atmif_sioc __user *csioc = argp;
+ compat_uptr_t carg;
+
+ len = &csioc->length;
+ if (get_user(carg, &csioc->arg))
+ return -EFAULT;
+ buf = compat_ptr(carg);
+ if (get_user(number, &csioc->number))
+ return -EFAULT;
+#endif
+ } else {
+ struct atmif_sioc __user *sioc = argp;
+
+ len = &sioc->length;
+ if (get_user(buf, &sioc->arg))
+ return -EFAULT;
+ if (get_user(number, &sioc->number))
+ return -EFAULT;
+ }
+ error = atm_dev_ioctl(cmd, buf, len, number, compat);
+ }
done:
return error;
@@ -230,61 +274,25 @@ static struct {
static int do_atm_iobuf(struct socket *sock, unsigned int cmd,
unsigned long arg)
{
- struct atm_iobuf __user *iobuf;
- struct compat_atm_iobuf __user *iobuf32;
+ struct compat_atm_iobuf __user *iobuf32 = compat_ptr(arg);
u32 data;
- void __user *datap;
- int len, err;
-
- iobuf = compat_alloc_user_space(sizeof(*iobuf));
- iobuf32 = compat_ptr(arg);
- if (get_user(len, &iobuf32->length) ||
- get_user(data, &iobuf32->buffer))
+ if (get_user(data, &iobuf32->buffer))
return -EFAULT;
- datap = compat_ptr(data);
- if (put_user(len, &iobuf->length) ||
- put_user(datap, &iobuf->buffer))
- return -EFAULT;
-
- err = do_vcc_ioctl(sock, cmd, (unsigned long) iobuf, 0);
- if (!err) {
- if (copy_in_user(&iobuf32->length, &iobuf->length,
- sizeof(int)))
- err = -EFAULT;
- }
-
- return err;
+ return atm_getnames(&iobuf32->length, compat_ptr(data));
}
static int do_atmif_sioc(struct socket *sock, unsigned int cmd,
unsigned long arg)
{
- struct atmif_sioc __user *sioc;
- struct compat_atmif_sioc __user *sioc32;
+ struct compat_atmif_sioc __user *sioc32 = compat_ptr(arg);
+ int number;
u32 data;
- void __user *datap;
- int err;
-
- sioc = compat_alloc_user_space(sizeof(*sioc));
- sioc32 = compat_ptr(arg);
- if (copy_in_user(&sioc->number, &sioc32->number, 2 * sizeof(int)) ||
- get_user(data, &sioc32->arg))
+ if (get_user(data, &sioc32->arg) || get_user(number, &sioc32->number))
return -EFAULT;
- datap = compat_ptr(data);
- if (put_user(datap, &sioc->arg))
- return -EFAULT;
-
- err = do_vcc_ioctl(sock, cmd, (unsigned long) sioc, 0);
-
- if (!err) {
- if (copy_in_user(&sioc32->length, &sioc->length,
- sizeof(int)))
- err = -EFAULT;
- }
- return err;
+ return atm_dev_ioctl(cmd, compat_ptr(data), &sioc32->length, number, 0);
}
static int do_atm_ioctl(struct socket *sock, unsigned int cmd32,
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 889349c6d90d..94bdc6527ee8 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -193,88 +193,48 @@ static int fetch_stats(struct atm_dev *dev, struct atm_dev_stats __user *arg,
return error ? -EFAULT : 0;
}
-int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
+int atm_getnames(void __user *buf, int __user *iobuf_len)
{
- void __user *buf;
- int error, len, number, size = 0;
+ int error, len, size = 0;
struct atm_dev *dev;
struct list_head *p;
int *tmp_buf, *tmp_p;
- int __user *sioc_len;
- int __user *iobuf_len;
- switch (cmd) {
- case ATM_GETNAMES:
- if (IS_ENABLED(CONFIG_COMPAT) && compat) {
-#ifdef CONFIG_COMPAT
- struct compat_atm_iobuf __user *ciobuf = arg;
- compat_uptr_t cbuf;
- iobuf_len = &ciobuf->length;
- if (get_user(cbuf, &ciobuf->buffer))
- return -EFAULT;
- buf = compat_ptr(cbuf);
-#endif
- } else {
- struct atm_iobuf __user *iobuf = arg;
- iobuf_len = &iobuf->length;
- if (get_user(buf, &iobuf->buffer))
- return -EFAULT;
- }
- if (get_user(len, iobuf_len))
- return -EFAULT;
- mutex_lock(&atm_dev_mutex);
- list_for_each(p, &atm_devs)
- size += sizeof(int);
- if (size > len) {
- mutex_unlock(&atm_dev_mutex);
- return -E2BIG;
- }
- tmp_buf = kmalloc(size, GFP_ATOMIC);
- if (!tmp_buf) {
- mutex_unlock(&atm_dev_mutex);
- return -ENOMEM;
- }
- tmp_p = tmp_buf;
- list_for_each(p, &atm_devs) {
- dev = list_entry(p, struct atm_dev, dev_list);
- *tmp_p++ = dev->number;
- }
+ if (get_user(len, iobuf_len))
+ return -EFAULT;
+ mutex_lock(&atm_dev_mutex);
+ list_for_each(p, &atm_devs)
+ size += sizeof(int);
+ if (size > len) {
mutex_unlock(&atm_dev_mutex);
- error = ((copy_to_user(buf, tmp_buf, size)) ||
- put_user(size, iobuf_len))
- ? -EFAULT : 0;
- kfree(tmp_buf);
- return error;
- default:
- break;
+ return -E2BIG;
}
-
- if (IS_ENABLED(CONFIG_COMPAT) && compat) {
-#ifdef CONFIG_COMPAT
- struct compat_atmif_sioc __user *csioc = arg;
- compat_uptr_t carg;
-
- sioc_len = &csioc->length;
- if (get_user(carg, &csioc->arg))
- return -EFAULT;
- buf = compat_ptr(carg);
-
- if (get_user(len, &csioc->length))
- return -EFAULT;
- if (get_user(number, &csioc->number))
- return -EFAULT;
-#endif
- } else {
- struct atmif_sioc __user *sioc = arg;
-
- sioc_len = &sioc->length;
- if (get_user(buf, &sioc->arg))
- return -EFAULT;
- if (get_user(len, &sioc->length))
- return -EFAULT;
- if (get_user(number, &sioc->number))
- return -EFAULT;
+ tmp_buf = kmalloc(size, GFP_ATOMIC);
+ if (!tmp_buf) {
+ mutex_unlock(&atm_dev_mutex);
+ return -ENOMEM;
+ }
+ tmp_p = tmp_buf;
+ list_for_each(p, &atm_devs) {
+ dev = list_entry(p, struct atm_dev, dev_list);
+ *tmp_p++ = dev->number;
}
+ mutex_unlock(&atm_dev_mutex);
+ error = ((copy_to_user(buf, tmp_buf, size)) ||
+ put_user(size, iobuf_len))
+ ? -EFAULT : 0;
+ kfree(tmp_buf);
+ return error;
+}
+
+int atm_dev_ioctl(unsigned int cmd, void __user *buf, int __user *sioc_len,
+ int number, int compat)
+{
+ int error, len, size = 0;
+ struct atm_dev *dev;
+
+ if (get_user(len, sioc_len))
+ return -EFAULT;
dev = try_then_request_module(atm_dev_lookup(number), "atm-device-%d",
number);
diff --git a/net/atm/resources.h b/net/atm/resources.h
index 048232e4d4c6..4a0839e92ff3 100644
--- a/net/atm/resources.h
+++ b/net/atm/resources.h
@@ -14,8 +14,9 @@
extern struct list_head atm_devs;
extern struct mutex atm_dev_mutex;
-int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat);
-
+int atm_getnames(void __user *buf, int __user *iobuf_len);
+int atm_dev_ioctl(unsigned int cmd, void __user *buf, int __user *sioc_len,
+ int number, int compat);
#ifdef CONFIG_PROC_FS
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index ff57ea89c27e..fd91cd34f25e 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -635,8 +635,10 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
break;
case SO_BINDTODEVICE:
- if (optlen > IFNAMSIZ)
- optlen = IFNAMSIZ;
+ if (optlen > IFNAMSIZ - 1)
+ optlen = IFNAMSIZ - 1;
+
+ memset(devname, 0, sizeof(devname));
if (copy_from_user(devname, optval, optlen)) {
res = -EFAULT;
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 353e49c40e7f..0bdefa35da98 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -127,20 +127,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
rtnl_lock();
ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings);
rtnl_unlock();
-
- /* Virtual interface drivers such as tun / tap interfaces, VLAN, etc
- * tend to initialize the interface throughput with some value for the
- * sake of having a throughput number to export via ethtool. This
- * exported throughput leaves batman-adv to conclude the interface
- * throughput is genuine (reflecting reality), thus no measurements
- * are necessary.
- *
- * Based on the observation that those interface types also tend to set
- * the link auto-negotiation to 'off', batman-adv shall check this
- * setting to differentiate between genuine link throughput information
- * and placeholders installed by virtual interfaces.
- */
- if (ret == 0 && link_settings.base.autoneg == AUTONEG_ENABLE) {
+ if (ret == 0) {
/* link characteristics might change over time */
if (link_settings.base.duplex == DUPLEX_FULL)
hard_iface->bat_v.flags |= BATADV_FULL_DUPLEX;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index e22e49289677..a18dcc686dc3 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -146,8 +146,8 @@ static void batadv_gw_select(struct batadv_priv *bat_priv,
if (new_gw_node)
kref_get(&new_gw_node->refcount);
- curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1);
- rcu_assign_pointer(bat_priv->gw.curr_gw, new_gw_node);
+ curr_gw_node = rcu_replace_pointer(bat_priv->gw.curr_gw, new_gw_node,
+ true);
if (curr_gw_node)
batadv_gw_node_put(curr_gw_node);
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index c7e98a40dd33..3a256af92784 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -473,8 +473,8 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
if (new_hard_iface)
kref_get(&new_hard_iface->refcount);
- curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1);
- rcu_assign_pointer(bat_priv->primary_if, new_hard_iface);
+ curr_hard_iface = rcu_replace_pointer(bat_priv->primary_if,
+ new_hard_iface, 1);
if (!new_hard_iface)
goto out;
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index ccb535c77e5d..8bdabc03b0b2 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -135,9 +135,6 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf,
if (!buf || count < sizeof(struct batadv_icmp_packet))
return -EINVAL;
- if (!access_ok(buf, count))
- return -EFAULT;
-
error = wait_event_interruptible(socket_client->queue_wait,
socket_client->queue_len);
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 3632bd976c56..d343382e9664 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -71,13 +71,13 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
* the code needs to ensure the curr_router variable contains a pointer
* to the replaced best neighbor.
*/
- curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
/* increase refcount of new best neighbor */
if (neigh_node)
kref_get(&neigh_node->refcount);
- rcu_assign_pointer(orig_ifinfo->router, neigh_node);
+ curr_router = rcu_replace_pointer(orig_ifinfo->router, neigh_node,
+ true);
spin_unlock_bh(&orig_node->neigh_list_lock);
batadv_orig_ifinfo_put(orig_ifinfo);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 822af540b854..0ddd80130ea3 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -22,6 +22,7 @@
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
+#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/percpu.h>
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 30ba7d38941d..bfd4ccd80847 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -160,16 +160,20 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
u32 headroom, u32 tailroom)
{
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+ u32 user_size = kattr->test.data_size_in;
void *data;
if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
return ERR_PTR(-EINVAL);
+ if (user_size > size)
+ return ERR_PTR(-EMSGSIZE);
+
data = kzalloc(size + headroom + tailroom, GFP_USER);
if (!data)
return ERR_PTR(-ENOMEM);
- if (copy_from_user(data + headroom, data_in, size)) {
+ if (copy_from_user(data + headroom, data_in, user_size)) {
kfree(data);
return ERR_PTR(-EFAULT);
}
@@ -486,8 +490,6 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
/* XDP have extra tailroom as (most) drivers use full page */
max_data_sz = 4096 - headroom - tailroom;
- if (size > max_data_sz)
- return -EINVAL;
data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
if (IS_ERR(data))
diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index d7bc09de4c13..8ea59504ef47 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -37,6 +37,26 @@ static struct br_mrp *br_mrp_find_id(struct net_bridge *br, u32 ring_id)
return res;
}
+static bool br_mrp_unique_ifindex(struct net_bridge *br, u32 ifindex)
+{
+ struct br_mrp *mrp;
+
+ list_for_each_entry_rcu(mrp, &br->mrp_list, list,
+ lockdep_rtnl_is_held()) {
+ struct net_bridge_port *p;
+
+ p = rtnl_dereference(mrp->p_port);
+ if (p && p->dev->ifindex == ifindex)
+ return false;
+
+ p = rtnl_dereference(mrp->s_port);
+ if (p && p->dev->ifindex == ifindex)
+ return false;
+ }
+
+ return true;
+}
+
static struct br_mrp *br_mrp_find_port(struct net_bridge *br,
struct net_bridge_port *p)
{
@@ -203,6 +223,7 @@ out:
static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp)
{
struct net_bridge_port *p;
+ u8 state;
/* Stop sending MRP_Test frames */
cancel_delayed_work_sync(&mrp->test_work);
@@ -214,20 +235,24 @@ static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp)
p = rtnl_dereference(mrp->p_port);
if (p) {
spin_lock_bh(&br->lock);
- p->state = BR_STATE_FORWARDING;
+ state = netif_running(br->dev) ?
+ BR_STATE_FORWARDING : BR_STATE_DISABLED;
+ p->state = state;
p->flags &= ~BR_MRP_AWARE;
spin_unlock_bh(&br->lock);
- br_mrp_port_switchdev_set_state(p, BR_STATE_FORWARDING);
+ br_mrp_port_switchdev_set_state(p, state);
rcu_assign_pointer(mrp->p_port, NULL);
}
p = rtnl_dereference(mrp->s_port);
if (p) {
spin_lock_bh(&br->lock);
- p->state = BR_STATE_FORWARDING;
+ state = netif_running(br->dev) ?
+ BR_STATE_FORWARDING : BR_STATE_DISABLED;
+ p->state = state;
p->flags &= ~BR_MRP_AWARE;
spin_unlock_bh(&br->lock);
- br_mrp_port_switchdev_set_state(p, BR_STATE_FORWARDING);
+ br_mrp_port_switchdev_set_state(p, state);
rcu_assign_pointer(mrp->s_port, NULL);
}
@@ -255,6 +280,11 @@ int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance)
!br_mrp_get_port(br, instance->s_ifindex))
return -EINVAL;
+ /* It is not possible to have the same port part of multiple rings */
+ if (!br_mrp_unique_ifindex(br, instance->p_ifindex) ||
+ !br_mrp_unique_ifindex(br, instance->s_ifindex))
+ return -EINVAL;
+
mrp = kzalloc(sizeof(*mrp), GFP_KERNEL);
if (!mrp)
return -ENOMEM;
@@ -346,24 +376,24 @@ int br_mrp_set_port_state(struct net_bridge_port *p,
* note: already called with rtnl_lock
*/
int br_mrp_set_port_role(struct net_bridge_port *p,
- struct br_mrp_port_role *role)
+ enum br_mrp_port_role_type role)
{
struct br_mrp *mrp;
if (!p || !(p->flags & BR_MRP_AWARE))
return -EINVAL;
- mrp = br_mrp_find_id(p->br, role->ring_id);
+ mrp = br_mrp_find_port(p->br, p);
if (!mrp)
return -EINVAL;
- if (role->role == BR_MRP_PORT_ROLE_PRIMARY)
+ if (role == BR_MRP_PORT_ROLE_PRIMARY)
rcu_assign_pointer(mrp->p_port, p);
else
rcu_assign_pointer(mrp->s_port, p);
- br_mrp_port_switchdev_set_role(p, role->role);
+ br_mrp_port_switchdev_set_role(p, role);
return 0;
}
diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c
index 397e7f710772..d9de780d2ce0 100644
--- a/net/bridge/br_mrp_netlink.c
+++ b/net/bridge/br_mrp_netlink.c
@@ -8,25 +8,234 @@
static const struct nla_policy br_mrp_policy[IFLA_BRIDGE_MRP_MAX + 1] = {
[IFLA_BRIDGE_MRP_UNSPEC] = { .type = NLA_REJECT },
- [IFLA_BRIDGE_MRP_INSTANCE] = { .type = NLA_EXACT_LEN,
- .len = sizeof(struct br_mrp_instance)},
- [IFLA_BRIDGE_MRP_PORT_STATE] = { .type = NLA_U32 },
- [IFLA_BRIDGE_MRP_PORT_ROLE] = { .type = NLA_EXACT_LEN,
- .len = sizeof(struct br_mrp_port_role)},
- [IFLA_BRIDGE_MRP_RING_STATE] = { .type = NLA_EXACT_LEN,
- .len = sizeof(struct br_mrp_ring_state)},
- [IFLA_BRIDGE_MRP_RING_ROLE] = { .type = NLA_EXACT_LEN,
- .len = sizeof(struct br_mrp_ring_role)},
- [IFLA_BRIDGE_MRP_START_TEST] = { .type = NLA_EXACT_LEN,
- .len = sizeof(struct br_mrp_start_test)},
+ [IFLA_BRIDGE_MRP_INSTANCE] = { .type = NLA_NESTED },
+ [IFLA_BRIDGE_MRP_PORT_STATE] = { .type = NLA_NESTED },
+ [IFLA_BRIDGE_MRP_PORT_ROLE] = { .type = NLA_NESTED },
+ [IFLA_BRIDGE_MRP_RING_STATE] = { .type = NLA_NESTED },
+ [IFLA_BRIDGE_MRP_RING_ROLE] = { .type = NLA_NESTED },
+ [IFLA_BRIDGE_MRP_START_TEST] = { .type = NLA_NESTED },
};
+static const struct nla_policy
+br_mrp_instance_policy[IFLA_BRIDGE_MRP_INSTANCE_MAX + 1] = {
+ [IFLA_BRIDGE_MRP_INSTANCE_UNSPEC] = { .type = NLA_REJECT },
+ [IFLA_BRIDGE_MRP_INSTANCE_RING_ID] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX] = { .type = NLA_U32 },
+};
+
+static int br_mrp_instance_parse(struct net_bridge *br, struct nlattr *attr,
+ int cmd, struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_BRIDGE_MRP_INSTANCE_MAX + 1];
+ struct br_mrp_instance inst;
+ int err;
+
+ err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_INSTANCE_MAX, attr,
+ br_mrp_instance_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[IFLA_BRIDGE_MRP_INSTANCE_RING_ID] ||
+ !tb[IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX] ||
+ !tb[IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX]) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Missing attribute: RING_ID or P_IFINDEX or S_IFINDEX");
+ return -EINVAL;
+ }
+
+ memset(&inst, 0, sizeof(inst));
+
+ inst.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_RING_ID]);
+ inst.p_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX]);
+ inst.s_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX]);
+
+ if (cmd == RTM_SETLINK)
+ return br_mrp_add(br, &inst);
+ else
+ return br_mrp_del(br, &inst);
+
+ return 0;
+}
+
+static const struct nla_policy
+br_mrp_port_state_policy[IFLA_BRIDGE_MRP_PORT_STATE_MAX + 1] = {
+ [IFLA_BRIDGE_MRP_PORT_STATE_UNSPEC] = { .type = NLA_REJECT },
+ [IFLA_BRIDGE_MRP_PORT_STATE_STATE] = { .type = NLA_U32 },
+};
+
+static int br_mrp_port_state_parse(struct net_bridge_port *p,
+ struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_BRIDGE_MRP_PORT_STATE_MAX + 1];
+ enum br_mrp_port_state_type state;
+ int err;
+
+ err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_PORT_STATE_MAX, attr,
+ br_mrp_port_state_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[IFLA_BRIDGE_MRP_PORT_STATE_STATE]) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing attribute: STATE");
+ return -EINVAL;
+ }
+
+ state = nla_get_u32(tb[IFLA_BRIDGE_MRP_PORT_STATE_STATE]);
+
+ return br_mrp_set_port_state(p, state);
+}
+
+static const struct nla_policy
+br_mrp_port_role_policy[IFLA_BRIDGE_MRP_PORT_ROLE_MAX + 1] = {
+ [IFLA_BRIDGE_MRP_PORT_ROLE_UNSPEC] = { .type = NLA_REJECT },
+ [IFLA_BRIDGE_MRP_PORT_ROLE_ROLE] = { .type = NLA_U32 },
+};
+
+static int br_mrp_port_role_parse(struct net_bridge_port *p,
+ struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_BRIDGE_MRP_PORT_ROLE_MAX + 1];
+ enum br_mrp_port_role_type role;
+ int err;
+
+ err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_PORT_ROLE_MAX, attr,
+ br_mrp_port_role_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[IFLA_BRIDGE_MRP_PORT_ROLE_ROLE]) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing attribute: ROLE");
+ return -EINVAL;
+ }
+
+ role = nla_get_u32(tb[IFLA_BRIDGE_MRP_PORT_ROLE_ROLE]);
+
+ return br_mrp_set_port_role(p, role);
+}
+
+static const struct nla_policy
+br_mrp_ring_state_policy[IFLA_BRIDGE_MRP_RING_STATE_MAX + 1] = {
+ [IFLA_BRIDGE_MRP_RING_STATE_UNSPEC] = { .type = NLA_REJECT },
+ [IFLA_BRIDGE_MRP_RING_STATE_RING_ID] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_RING_STATE_STATE] = { .type = NLA_U32 },
+};
+
+static int br_mrp_ring_state_parse(struct net_bridge *br, struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_BRIDGE_MRP_RING_STATE_MAX + 1];
+ struct br_mrp_ring_state state;
+ int err;
+
+ err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_RING_STATE_MAX, attr,
+ br_mrp_ring_state_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[IFLA_BRIDGE_MRP_RING_STATE_RING_ID] ||
+ !tb[IFLA_BRIDGE_MRP_RING_STATE_STATE]) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Missing attribute: RING_ID or STATE");
+ return -EINVAL;
+ }
+
+ memset(&state, 0x0, sizeof(state));
+
+ state.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_STATE_RING_ID]);
+ state.ring_state = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_STATE_STATE]);
+
+ return br_mrp_set_ring_state(br, &state);
+}
+
+static const struct nla_policy
+br_mrp_ring_role_policy[IFLA_BRIDGE_MRP_RING_ROLE_MAX + 1] = {
+ [IFLA_BRIDGE_MRP_RING_ROLE_UNSPEC] = { .type = NLA_REJECT },
+ [IFLA_BRIDGE_MRP_RING_ROLE_RING_ID] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_RING_ROLE_ROLE] = { .type = NLA_U32 },
+};
+
+static int br_mrp_ring_role_parse(struct net_bridge *br, struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_BRIDGE_MRP_RING_ROLE_MAX + 1];
+ struct br_mrp_ring_role role;
+ int err;
+
+ err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_RING_ROLE_MAX, attr,
+ br_mrp_ring_role_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[IFLA_BRIDGE_MRP_RING_ROLE_RING_ID] ||
+ !tb[IFLA_BRIDGE_MRP_RING_ROLE_ROLE]) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Missing attribute: RING_ID or ROLE");
+ return -EINVAL;
+ }
+
+ memset(&role, 0x0, sizeof(role));
+
+ role.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_ROLE_RING_ID]);
+ role.ring_role = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_ROLE_ROLE]);
+
+ return br_mrp_set_ring_role(br, &role);
+}
+
+static const struct nla_policy
+br_mrp_start_test_policy[IFLA_BRIDGE_MRP_START_TEST_MAX + 1] = {
+ [IFLA_BRIDGE_MRP_START_TEST_UNSPEC] = { .type = NLA_REJECT },
+ [IFLA_BRIDGE_MRP_START_TEST_RING_ID] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_START_TEST_INTERVAL] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_START_TEST_PERIOD] = { .type = NLA_U32 },
+};
+
+static int br_mrp_start_test_parse(struct net_bridge *br, struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_BRIDGE_MRP_START_TEST_MAX + 1];
+ struct br_mrp_start_test test;
+ int err;
+
+ err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_START_TEST_MAX, attr,
+ br_mrp_start_test_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[IFLA_BRIDGE_MRP_START_TEST_RING_ID] ||
+ !tb[IFLA_BRIDGE_MRP_START_TEST_INTERVAL] ||
+ !tb[IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] ||
+ !tb[IFLA_BRIDGE_MRP_START_TEST_PERIOD]) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Missing attribute: RING_ID or INTERVAL or MAX_MISS or PERIOD");
+ return -EINVAL;
+ }
+
+ memset(&test, 0x0, sizeof(test));
+
+ test.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_RING_ID]);
+ test.interval = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_INTERVAL]);
+ test.max_miss = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_MAX_MISS]);
+ test.period = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_PERIOD]);
+
+ return br_mrp_start_test(br, &test);
+}
+
int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
struct nlattr *attr, int cmd, struct netlink_ext_ack *extack)
{
struct nlattr *tb[IFLA_BRIDGE_MRP_MAX + 1];
int err;
+ /* When this function is called for a port then the br pointer is
+ * invalid, therefor set the br to point correctly
+ */
+ if (p)
+ br = p->br;
+
if (br->stp_enabled != BR_NO_STP) {
NL_SET_ERR_MSG_MOD(extack, "MRP can't be enabled if STP is already enabled");
return -EINVAL;
@@ -38,58 +247,45 @@ int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
return err;
if (tb[IFLA_BRIDGE_MRP_INSTANCE]) {
- struct br_mrp_instance *instance =
- nla_data(tb[IFLA_BRIDGE_MRP_INSTANCE]);
-
- if (cmd == RTM_SETLINK)
- err = br_mrp_add(br, instance);
- else
- err = br_mrp_del(br, instance);
+ err = br_mrp_instance_parse(br, tb[IFLA_BRIDGE_MRP_INSTANCE],
+ cmd, extack);
if (err)
return err;
}
if (tb[IFLA_BRIDGE_MRP_PORT_STATE]) {
- enum br_mrp_port_state_type state =
- nla_get_u32(tb[IFLA_BRIDGE_MRP_PORT_STATE]);
-
- err = br_mrp_set_port_state(p, state);
+ err = br_mrp_port_state_parse(p, tb[IFLA_BRIDGE_MRP_PORT_STATE],
+ extack);
if (err)
return err;
}
if (tb[IFLA_BRIDGE_MRP_PORT_ROLE]) {
- struct br_mrp_port_role *role =
- nla_data(tb[IFLA_BRIDGE_MRP_PORT_ROLE]);
-
- err = br_mrp_set_port_role(p, role);
+ err = br_mrp_port_role_parse(p, tb[IFLA_BRIDGE_MRP_PORT_ROLE],
+ extack);
if (err)
return err;
}
if (tb[IFLA_BRIDGE_MRP_RING_STATE]) {
- struct br_mrp_ring_state *state =
- nla_data(tb[IFLA_BRIDGE_MRP_RING_STATE]);
-
- err = br_mrp_set_ring_state(br, state);
+ err = br_mrp_ring_state_parse(br,
+ tb[IFLA_BRIDGE_MRP_RING_STATE],
+ extack);
if (err)
return err;
}
if (tb[IFLA_BRIDGE_MRP_RING_ROLE]) {
- struct br_mrp_ring_role *role =
- nla_data(tb[IFLA_BRIDGE_MRP_RING_ROLE]);
-
- err = br_mrp_set_ring_role(br, role);
+ err = br_mrp_ring_role_parse(br, tb[IFLA_BRIDGE_MRP_RING_ROLE],
+ extack);
if (err)
return err;
}
if (tb[IFLA_BRIDGE_MRP_START_TEST]) {
- struct br_mrp_start_test *test =
- nla_data(tb[IFLA_BRIDGE_MRP_START_TEST]);
-
- err = br_mrp_start_test(br, test);
+ err = br_mrp_start_test_parse(br,
+ tb[IFLA_BRIDGE_MRP_START_TEST],
+ extack);
if (err)
return err;
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index ad12fe3fca8c..83490bf73a13 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2413,7 +2413,8 @@ void br_multicast_uninit_stats(struct net_bridge *br)
free_percpu(br->mcast_stats);
}
-static void mcast_stats_add_dir(u64 *dst, u64 *src)
+/* noinline for https://bugs.llvm.org/show_bug.cgi?id=45802#c9 */
+static noinline_for_stack void mcast_stats_add_dir(u64 *dst, u64 *src)
{
dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX];
dst[BR_MCAST_DIR_TX] += src[BR_MCAST_DIR_TX];
diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h
index 2921a4b59f8e..a0f53cc3ab85 100644
--- a/net/bridge/br_private_mrp.h
+++ b/net/bridge/br_private_mrp.h
@@ -37,7 +37,7 @@ int br_mrp_del(struct net_bridge *br, struct br_mrp_instance *instance);
int br_mrp_set_port_state(struct net_bridge_port *p,
enum br_mrp_port_state_type state);
int br_mrp_set_port_role(struct net_bridge_port *p,
- struct br_mrp_port_role *role);
+ enum br_mrp_port_role_type role);
int br_mrp_set_ring_state(struct net_bridge *br,
struct br_mrp_ring_state *state);
int br_mrp_set_ring_role(struct net_bridge *br, struct br_mrp_ring_role *role);
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index b325b569e761..f48cf4cfb80f 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -31,6 +31,12 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb,
ether_addr_copy(eth->h_dest, eth_hdr(oldskb)->h_source);
eth->h_proto = eth_hdr(oldskb)->h_proto;
skb_pull(nskb, ETH_HLEN);
+
+ if (skb_vlan_tag_present(oldskb)) {
+ u16 vid = skb_vlan_tag_get(oldskb);
+
+ __vlan_hwaccel_put_tag(nskb, oldskb->vlan_proto, vid);
+ }
}
static int nft_bridge_iphdr_validate(struct sk_buff *skb)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index f8ca5edc5f2c..27d6ab11f9ee 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -490,15 +490,8 @@ static int ceph_tcp_connect(struct ceph_connection *con)
return ret;
}
- if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) {
- int optval = 1;
-
- ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
- (char *)&optval, sizeof(optval));
- if (ret)
- pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d",
- ret);
- }
+ if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY))
+ tcp_sock_set_nodelay(sock->sk);
con->sock = sock;
return 0;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 998e26b75a78..1d4973f8cd7a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -3649,7 +3649,9 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
* supported.
*/
req->r_t.target_oloc.pool = m.redirect.oloc.pool;
- req->r_flags |= CEPH_OSD_FLAG_REDIRECTED;
+ req->r_flags |= CEPH_OSD_FLAG_REDIRECTED |
+ CEPH_OSD_FLAG_IGNORE_OVERLAY |
+ CEPH_OSD_FLAG_IGNORE_CACHE;
req->r_tid = 0;
__submit_request(req, false);
goto out_unlock_osdc;
diff --git a/net/compat.c b/net/compat.c
index 69fc6d1e4e6e..afd7b444e0bf 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -448,200 +448,6 @@ COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
return __compat_sys_getsockopt(fd, level, optname, optval, optlen);
}
-struct compat_group_req {
- __u32 gr_interface;
- struct __kernel_sockaddr_storage gr_group
- __aligned(4);
-} __packed;
-
-struct compat_group_source_req {
- __u32 gsr_interface;
- struct __kernel_sockaddr_storage gsr_group
- __aligned(4);
- struct __kernel_sockaddr_storage gsr_source
- __aligned(4);
-} __packed;
-
-struct compat_group_filter {
- __u32 gf_interface;
- struct __kernel_sockaddr_storage gf_group
- __aligned(4);
- __u32 gf_fmode;
- __u32 gf_numsrc;
- struct __kernel_sockaddr_storage gf_slist[1]
- __aligned(4);
-} __packed;
-
-#define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \
- sizeof(struct __kernel_sockaddr_storage))
-
-
-int compat_mc_setsockopt(struct sock *sock, int level, int optname,
- char __user *optval, unsigned int optlen,
- int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int))
-{
- char __user *koptval = optval;
- int koptlen = optlen;
-
- switch (optname) {
- case MCAST_JOIN_GROUP:
- case MCAST_LEAVE_GROUP:
- {
- struct compat_group_req __user *gr32 = (void __user *)optval;
- struct group_req __user *kgr =
- compat_alloc_user_space(sizeof(struct group_req));
- u32 interface;
-
- if (!access_ok(gr32, sizeof(*gr32)) ||
- !access_ok(kgr, sizeof(struct group_req)) ||
- __get_user(interface, &gr32->gr_interface) ||
- __put_user(interface, &kgr->gr_interface) ||
- copy_in_user(&kgr->gr_group, &gr32->gr_group,
- sizeof(kgr->gr_group)))
- return -EFAULT;
- koptval = (char __user *)kgr;
- koptlen = sizeof(struct group_req);
- break;
- }
- case MCAST_JOIN_SOURCE_GROUP:
- case MCAST_LEAVE_SOURCE_GROUP:
- case MCAST_BLOCK_SOURCE:
- case MCAST_UNBLOCK_SOURCE:
- {
- struct compat_group_source_req __user *gsr32 = (void __user *)optval;
- struct group_source_req __user *kgsr = compat_alloc_user_space(
- sizeof(struct group_source_req));
- u32 interface;
-
- if (!access_ok(gsr32, sizeof(*gsr32)) ||
- !access_ok(kgsr,
- sizeof(struct group_source_req)) ||
- __get_user(interface, &gsr32->gsr_interface) ||
- __put_user(interface, &kgsr->gsr_interface) ||
- copy_in_user(&kgsr->gsr_group, &gsr32->gsr_group,
- sizeof(kgsr->gsr_group)) ||
- copy_in_user(&kgsr->gsr_source, &gsr32->gsr_source,
- sizeof(kgsr->gsr_source)))
- return -EFAULT;
- koptval = (char __user *)kgsr;
- koptlen = sizeof(struct group_source_req);
- break;
- }
- case MCAST_MSFILTER:
- {
- struct compat_group_filter __user *gf32 = (void __user *)optval;
- struct group_filter __user *kgf;
- u32 interface, fmode, numsrc;
-
- if (!access_ok(gf32, __COMPAT_GF0_SIZE) ||
- __get_user(interface, &gf32->gf_interface) ||
- __get_user(fmode, &gf32->gf_fmode) ||
- __get_user(numsrc, &gf32->gf_numsrc))
- return -EFAULT;
- koptlen = optlen + sizeof(struct group_filter) -
- sizeof(struct compat_group_filter);
- if (koptlen < GROUP_FILTER_SIZE(numsrc))
- return -EINVAL;
- kgf = compat_alloc_user_space(koptlen);
- if (!access_ok(kgf, koptlen) ||
- __put_user(interface, &kgf->gf_interface) ||
- __put_user(fmode, &kgf->gf_fmode) ||
- __put_user(numsrc, &kgf->gf_numsrc) ||
- copy_in_user(&kgf->gf_group, &gf32->gf_group,
- sizeof(kgf->gf_group)) ||
- (numsrc && copy_in_user(kgf->gf_slist, gf32->gf_slist,
- numsrc * sizeof(kgf->gf_slist[0]))))
- return -EFAULT;
- koptval = (char __user *)kgf;
- break;
- }
-
- default:
- break;
- }
- return setsockopt(sock, level, optname, koptval, koptlen);
-}
-EXPORT_SYMBOL(compat_mc_setsockopt);
-
-int compat_mc_getsockopt(struct sock *sock, int level, int optname,
- char __user *optval, int __user *optlen,
- int (*getsockopt)(struct sock *, int, int, char __user *, int __user *))
-{
- struct compat_group_filter __user *gf32 = (void __user *)optval;
- struct group_filter __user *kgf;
- int __user *koptlen;
- u32 interface, fmode, numsrc;
- int klen, ulen, err;
-
- if (optname != MCAST_MSFILTER)
- return getsockopt(sock, level, optname, optval, optlen);
-
- koptlen = compat_alloc_user_space(sizeof(*koptlen));
- if (!access_ok(optlen, sizeof(*optlen)) ||
- __get_user(ulen, optlen))
- return -EFAULT;
-
- /* adjust len for pad */
- klen = ulen + sizeof(*kgf) - sizeof(*gf32);
-
- if (klen < GROUP_FILTER_SIZE(0))
- return -EINVAL;
-
- if (!access_ok(koptlen, sizeof(*koptlen)) ||
- __put_user(klen, koptlen))
- return -EFAULT;
-
- /* have to allow space for previous compat_alloc_user_space, too */
- kgf = compat_alloc_user_space(klen+sizeof(*optlen));
-
- if (!access_ok(gf32, __COMPAT_GF0_SIZE) ||
- __get_user(interface, &gf32->gf_interface) ||
- __get_user(fmode, &gf32->gf_fmode) ||
- __get_user(numsrc, &gf32->gf_numsrc) ||
- __put_user(interface, &kgf->gf_interface) ||
- __put_user(fmode, &kgf->gf_fmode) ||
- __put_user(numsrc, &kgf->gf_numsrc) ||
- copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group)))
- return -EFAULT;
-
- err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen);
- if (err)
- return err;
-
- if (!access_ok(koptlen, sizeof(*koptlen)) ||
- __get_user(klen, koptlen))
- return -EFAULT;
-
- ulen = klen - (sizeof(*kgf)-sizeof(*gf32));
-
- if (!access_ok(optlen, sizeof(*optlen)) ||
- __put_user(ulen, optlen))
- return -EFAULT;
-
- if (!access_ok(kgf, klen) ||
- !access_ok(gf32, ulen) ||
- __get_user(interface, &kgf->gf_interface) ||
- __get_user(fmode, &kgf->gf_fmode) ||
- __get_user(numsrc, &kgf->gf_numsrc) ||
- __put_user(interface, &gf32->gf_interface) ||
- __put_user(fmode, &gf32->gf_fmode) ||
- __put_user(numsrc, &gf32->gf_numsrc))
- return -EFAULT;
- if (numsrc) {
- int copylen;
-
- klen -= GROUP_FILTER_SIZE(0);
- copylen = numsrc * sizeof(gf32->gf_slist[0]);
- if (copylen > klen)
- copylen = klen;
- if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen))
- return -EFAULT;
- }
- return err;
-}
-EXPORT_SYMBOL(compat_mc_getsockopt);
-
-
/* Argument list sizes for compat_sys_socketcall */
#define AL(x) ((x) * sizeof(u32))
static unsigned char nas[21] = {
diff --git a/net/core/dev.c b/net/core/dev.c
index f36bd3b21997..ae37586f6ee8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5058,11 +5058,12 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
return 0;
}
-static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
+static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
struct packet_type **ppt_prev)
{
struct packet_type *ptype, *pt_prev;
rx_handler_func_t *rx_handler;
+ struct sk_buff *skb = *pskb;
struct net_device *orig_dev;
bool deliver_exact = false;
int ret = NET_RX_DROP;
@@ -5093,8 +5094,10 @@ another_round:
ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
preempt_enable();
- if (ret2 != XDP_PASS)
- return NET_RX_DROP;
+ if (ret2 != XDP_PASS) {
+ ret = NET_RX_DROP;
+ goto out;
+ }
skb_reset_mac_len(skb);
}
@@ -5244,6 +5247,13 @@ drop:
}
out:
+ /* The invariant here is that if *ppt_prev is not NULL
+ * then skb should also be non-NULL.
+ *
+ * Apparently *ppt_prev assignment above holds this invariant due to
+ * skb dereferencing near it.
+ */
+ *pskb = skb;
return ret;
}
@@ -5253,7 +5263,7 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
struct packet_type *pt_prev = NULL;
int ret;
- ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+ ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
if (pt_prev)
ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
skb->dev, pt_prev, orig_dev);
@@ -5331,7 +5341,7 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
struct packet_type *pt_prev = NULL;
skb_list_del_init(skb);
- __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+ __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
if (!pt_prev)
continue;
if (pt_curr != pt_prev || od_curr != orig_dev) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 822d662f97ef..bd2853d23b50 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7049,6 +7049,8 @@ static bool sock_addr_is_valid_access(int off, int size,
switch (prog->expected_attach_type) {
case BPF_CGROUP_INET4_BIND:
case BPF_CGROUP_INET4_CONNECT:
+ case BPF_CGROUP_INET4_GETPEERNAME:
+ case BPF_CGROUP_INET4_GETSOCKNAME:
case BPF_CGROUP_UDP4_SENDMSG:
case BPF_CGROUP_UDP4_RECVMSG:
break;
@@ -7060,6 +7062,8 @@ static bool sock_addr_is_valid_access(int off, int size,
switch (prog->expected_attach_type) {
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET6_CONNECT:
+ case BPF_CGROUP_INET6_GETPEERNAME:
+ case BPF_CGROUP_INET6_GETSOCKNAME:
case BPF_CGROUP_UDP6_SENDMSG:
case BPF_CGROUP_UDP6_RECVMSG:
break;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 3eff84824c8b..0aeb33572feb 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -160,12 +160,10 @@ out:
return ret;
}
-int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
+static int flow_dissector_bpf_prog_detach(struct net *net)
{
struct bpf_prog *attached;
- struct net *net;
- net = current->nsproxy->net_ns;
mutex_lock(&flow_dissector_mutex);
attached = rcu_dereference_protected(net->flow_dissector_prog,
lockdep_is_held(&flow_dissector_mutex));
@@ -179,6 +177,24 @@ int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
return 0;
}
+int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
+{
+ return flow_dissector_bpf_prog_detach(current->nsproxy->net_ns);
+}
+
+static void __net_exit flow_dissector_pernet_pre_exit(struct net *net)
+{
+ /* We're not racing with attach/detach because there are no
+ * references to netns left when pre_exit gets called.
+ */
+ if (rcu_access_pointer(net->flow_dissector_prog))
+ flow_dissector_bpf_prog_detach(net);
+}
+
+static struct pernet_operations flow_dissector_pernet_ops __net_initdata = {
+ .pre_exit = flow_dissector_pernet_pre_exit,
+};
+
/**
* __skb_flow_get_ports - extract the upper layer ports and return them
* @skb: sk_buff to extract the ports from
@@ -464,47 +480,59 @@ EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
static enum flow_dissect_ret
__skb_flow_dissect_mpls(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
- void *target_container, void *data, int nhoff, int hlen)
+ void *target_container, void *data, int nhoff, int hlen,
+ int lse_index, bool *entropy_label)
{
- struct flow_dissector_key_keyid *key_keyid;
- struct mpls_label *hdr, _hdr[2];
- u32 entry, label;
+ struct mpls_label *hdr, _hdr;
+ u32 entry, label, bos;
if (!dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_MPLS_ENTROPY) &&
!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS))
return FLOW_DISSECT_RET_OUT_GOOD;
+ if (lse_index >= FLOW_DIS_MPLS_MAX)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
hlen, &_hdr);
if (!hdr)
return FLOW_DISSECT_RET_OUT_BAD;
- entry = ntohl(hdr[0].entry);
+ entry = ntohl(hdr->entry);
label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT;
if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) {
struct flow_dissector_key_mpls *key_mpls;
+ struct flow_dissector_mpls_lse *lse;
key_mpls = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_MPLS,
target_container);
- key_mpls->mpls_label = label;
- key_mpls->mpls_ttl = (entry & MPLS_LS_TTL_MASK)
- >> MPLS_LS_TTL_SHIFT;
- key_mpls->mpls_tc = (entry & MPLS_LS_TC_MASK)
- >> MPLS_LS_TC_SHIFT;
- key_mpls->mpls_bos = (entry & MPLS_LS_S_MASK)
- >> MPLS_LS_S_SHIFT;
+ lse = &key_mpls->ls[lse_index];
+
+ lse->mpls_ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
+ lse->mpls_bos = bos;
+ lse->mpls_tc = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
+ lse->mpls_label = label;
+ dissector_set_mpls_lse(key_mpls, lse_index);
}
- if (label == MPLS_LABEL_ENTROPY) {
+ if (*entropy_label &&
+ dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
+ struct flow_dissector_key_keyid *key_keyid;
+
key_keyid = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
target_container);
- key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK);
+ key_keyid->keyid = cpu_to_be32(label);
}
- return FLOW_DISSECT_RET_OUT_GOOD;
+
+ *entropy_label = label == MPLS_LABEL_ENTROPY;
+
+ return bos ? FLOW_DISSECT_RET_OUT_GOOD : FLOW_DISSECT_RET_PROTO_AGAIN;
}
static enum flow_dissect_ret
@@ -963,6 +991,8 @@ bool __skb_flow_dissect(const struct net *net,
struct bpf_prog *attached = NULL;
enum flow_dissect_ret fdret;
enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
+ bool mpls_el = false;
+ int mpls_lse = 0;
int num_hdrs = 0;
u8 ip_proto = 0;
bool ret;
@@ -1262,7 +1292,10 @@ proto_again:
case htons(ETH_P_MPLS_MC):
fdret = __skb_flow_dissect_mpls(skb, flow_dissector,
target_container, data,
- nhoff, hlen);
+ nhoff, hlen, mpls_lse,
+ &mpls_el);
+ nhoff += sizeof(struct mpls_label);
+ mpls_lse++;
break;
case htons(ETH_P_FCOE):
if ((hlen - nhoff) < FCOE_HEADER_LEN) {
@@ -1836,7 +1869,7 @@ static int __init init_default_flow_dissectors(void)
skb_flow_dissector_init(&flow_keys_basic_dissector,
flow_keys_basic_dissector_keys,
ARRAY_SIZE(flow_keys_basic_dissector_keys));
- return 0;
-}
+ return register_pernet_subsys(&flow_dissector_pernet_ops);
+}
core_initcall(init_default_flow_dissectors);
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index e951b743bed3..e64941c526b1 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -8,6 +8,7 @@
struct flow_rule *flow_rule_alloc(unsigned int num_actions)
{
struct flow_rule *rule;
+ int i;
rule = kzalloc(struct_size(rule, action.entries, num_actions),
GFP_KERNEL);
@@ -15,6 +16,11 @@ struct flow_rule *flow_rule_alloc(unsigned int num_actions)
return NULL;
rule->action.num_entries = num_actions;
+ /* Pre-fill each action hw_stats with DONT_CARE.
+ * Caller can override this if it wants stats for a given action.
+ */
+ for (i = 0; i < num_actions; i++)
+ rule->action.entries[i].hw_stats = FLOW_ACTION_HW_STATS_DONT_CARE;
return rule;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index b607ea602774..ef6b5a8f629c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1082,8 +1082,8 @@ static void neigh_timer_handler(struct timer_list *t)
}
if (neigh->nud_state & NUD_IN_TIMER) {
- if (time_before(next, jiffies + HZ/2))
- next = jiffies + HZ/2;
+ if (time_before(next, jiffies + HZ/100))
+ next = jiffies + HZ/100;
if (!mod_timer(&neigh->timer, next))
neigh_hold(neigh);
}
@@ -1771,6 +1771,7 @@ static struct neigh_table *neigh_find_table(int family)
}
const struct nla_policy nda_policy[NDA_MAX+1] = {
+ [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID },
[NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
[NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
[NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
@@ -1781,6 +1782,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = {
[NDA_IFINDEX] = { .type = NLA_U32 },
[NDA_MASTER] = { .type = NLA_U32 },
[NDA_PROTOCOL] = { .type = NLA_U8 },
+ [NDA_NH_ID] = { .type = NLA_U32 },
};
static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 35a133c6d13b..b8afefe6f6b6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3727,7 +3727,6 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
return 0;
}
-EXPORT_SYMBOL_GPL(skb_gro_receive_list);
/**
* skb_segment - Perform protocol segmentation on skb.
@@ -4191,7 +4190,6 @@ done:
NAPI_GRO_CB(skb)->same_flow = 1;
return 0;
}
-EXPORT_SYMBOL_GPL(skb_gro_receive);
#ifdef CONFIG_SKB_EXTENSIONS
#define SKB_EXT_ALIGN_VALUE 8
diff --git a/net/core/sock.c b/net/core/sock.c
index fd85e651ce28..61ec573221a6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -566,7 +566,7 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
}
EXPORT_SYMBOL(sk_dst_check);
-static int sock_setbindtodevice_locked(struct sock *sk, int ifindex)
+static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
@@ -594,6 +594,18 @@ out:
return ret;
}
+int sock_bindtoindex(struct sock *sk, int ifindex)
+{
+ int ret;
+
+ lock_sock(sk);
+ ret = sock_bindtoindex_locked(sk, ifindex);
+ release_sock(sk);
+
+ return ret;
+}
+EXPORT_SYMBOL(sock_bindtoindex);
+
static int sock_setbindtodevice(struct sock *sk, char __user *optval,
int optlen)
{
@@ -634,10 +646,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
goto out;
}
- lock_sock(sk);
- ret = sock_setbindtodevice_locked(sk, index);
- release_sock(sk);
-
+ return sock_bindtoindex(sk, index);
out:
#endif
@@ -712,6 +721,111 @@ bool sk_mc_loop(struct sock *sk)
}
EXPORT_SYMBOL(sk_mc_loop);
+void sock_set_reuseaddr(struct sock *sk)
+{
+ lock_sock(sk);
+ sk->sk_reuse = SK_CAN_REUSE;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_reuseaddr);
+
+void sock_set_reuseport(struct sock *sk)
+{
+ lock_sock(sk);
+ sk->sk_reuseport = true;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_reuseport);
+
+void sock_no_linger(struct sock *sk)
+{
+ lock_sock(sk);
+ sk->sk_lingertime = 0;
+ sock_set_flag(sk, SOCK_LINGER);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_no_linger);
+
+void sock_set_priority(struct sock *sk, u32 priority)
+{
+ lock_sock(sk);
+ sk->sk_priority = priority;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_priority);
+
+void sock_set_sndtimeo(struct sock *sk, s64 secs)
+{
+ lock_sock(sk);
+ if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
+ sk->sk_sndtimeo = secs * HZ;
+ else
+ sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_sndtimeo);
+
+static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
+{
+ if (val) {
+ sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
+ sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
+ sock_set_flag(sk, SOCK_RCVTSTAMP);
+ sock_enable_timestamp(sk, SOCK_TIMESTAMP);
+ } else {
+ sock_reset_flag(sk, SOCK_RCVTSTAMP);
+ sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
+ sock_reset_flag(sk, SOCK_TSTAMP_NEW);
+ }
+}
+
+void sock_enable_timestamps(struct sock *sk)
+{
+ lock_sock(sk);
+ __sock_set_timestamps(sk, true, false, true);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_enable_timestamps);
+
+void sock_set_keepalive(struct sock *sk)
+{
+ lock_sock(sk);
+ if (sk->sk_prot->keepalive)
+ sk->sk_prot->keepalive(sk, true);
+ sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_keepalive);
+
+static void __sock_set_rcvbuf(struct sock *sk, int val)
+{
+ /* Ensure val * 2 fits into an int, to prevent max_t() from treating it
+ * as a negative value.
+ */
+ val = min_t(int, val, INT_MAX / 2);
+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+
+ /* We double it on the way in to account for "struct sk_buff" etc.
+ * overhead. Applications assume that the SO_RCVBUF setting they make
+ * will allow that much actual data to be received on that socket.
+ *
+ * Applications are unaware that "struct sk_buff" and other overheads
+ * allocate from the receive buffer during socket buffer allocation.
+ *
+ * And after considering the possible alternatives, returning the value
+ * we actually used in getsockopt is the most desirable behavior.
+ */
+ WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
+}
+
+void sock_set_rcvbuf(struct sock *sk, int val)
+{
+ lock_sock(sk);
+ __sock_set_rcvbuf(sk, val);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_rcvbuf);
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -808,30 +922,7 @@ set_sndbuf:
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
- val = min_t(u32, val, sysctl_rmem_max);
-set_rcvbuf:
- /* Ensure val * 2 fits into an int, to prevent max_t()
- * from treating it as a negative value.
- */
- val = min_t(int, val, INT_MAX / 2);
- sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- /*
- * We double it on the way in to account for
- * "struct sk_buff" etc. overhead. Applications
- * assume that the SO_RCVBUF setting they make will
- * allow that much actual data to be received on that
- * socket.
- *
- * Applications are unaware that "struct sk_buff" and
- * other overheads allocate from the receive buffer
- * during socket buffer allocation.
- *
- * And after considering the possible alternatives,
- * returning the value we actually used in getsockopt
- * is the most desirable behavior.
- */
- WRITE_ONCE(sk->sk_rcvbuf,
- max_t(int, val * 2, SOCK_MIN_RCVBUF));
+ __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
break;
case SO_RCVBUFFORCE:
@@ -843,9 +934,8 @@ set_rcvbuf:
/* No negative values (to prevent underflow, as val will be
* multiplied by 2).
*/
- if (val < 0)
- val = 0;
- goto set_rcvbuf;
+ __sock_set_rcvbuf(sk, max(val, 0));
+ break;
case SO_KEEPALIVE:
if (sk->sk_prot->keepalive)
@@ -903,28 +993,17 @@ set_rcvbuf:
break;
case SO_TIMESTAMP_OLD:
+ __sock_set_timestamps(sk, valbool, false, false);
+ break;
case SO_TIMESTAMP_NEW:
+ __sock_set_timestamps(sk, valbool, true, false);
+ break;
case SO_TIMESTAMPNS_OLD:
+ __sock_set_timestamps(sk, valbool, false, true);
+ break;
case SO_TIMESTAMPNS_NEW:
- if (valbool) {
- if (optname == SO_TIMESTAMP_NEW || optname == SO_TIMESTAMPNS_NEW)
- sock_set_flag(sk, SOCK_TSTAMP_NEW);
- else
- sock_reset_flag(sk, SOCK_TSTAMP_NEW);
-
- if (optname == SO_TIMESTAMP_OLD || optname == SO_TIMESTAMP_NEW)
- sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
- else
- sock_set_flag(sk, SOCK_RCVTSTAMPNS);
- sock_set_flag(sk, SOCK_RCVTSTAMP);
- sock_enable_timestamp(sk, SOCK_TIMESTAMP);
- } else {
- sock_reset_flag(sk, SOCK_RCVTSTAMP);
- sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
- sock_reset_flag(sk, SOCK_TSTAMP_NEW);
- }
+ __sock_set_timestamps(sk, valbool, true, true);
break;
-
case SO_TIMESTAMPING_NEW:
sock_set_flag(sk, SOCK_TSTAMP_NEW);
/* fall through */
@@ -1180,7 +1259,7 @@ set_rcvbuf:
break;
case SO_BINDTOIFINDEX:
- ret = sock_setbindtodevice_locked(sk, val);
+ ret = sock_bindtoindex_locked(sk, val);
break;
default:
@@ -3633,3 +3712,11 @@ bool sk_busy_loop_end(void *p, unsigned long start_time)
}
EXPORT_SYMBOL(sk_busy_loop_end);
#endif /* CONFIG_NET_RX_BUSY_POLL */
+
+int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
+{
+ if (!sk->sk_prot->bind_add)
+ return -EOPNOTSUPP;
+ return sk->sk_prot->bind_add(sk, addr, addr_len);
+}
+EXPORT_SYMBOL(sock_bind_add);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 490b8f5fa8ee..90f44f382115 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -17,6 +17,7 @@
#include <net/xdp.h>
#include <net/xdp_priv.h> /* struct xdp_mem_allocator */
#include <trace/events/xdp.h>
+#include <net/xdp_sock_drv.h>
#define REG_STATE_NEW 0x0
#define REG_STATE_REGISTERED 0x1
@@ -109,27 +110,6 @@ static void mem_allocator_disconnect(void *allocator)
mutex_unlock(&mem_id_lock);
}
-static void mem_id_disconnect(int id)
-{
- struct xdp_mem_allocator *xa;
-
- mutex_lock(&mem_id_lock);
-
- xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
- if (!xa) {
- mutex_unlock(&mem_id_lock);
- WARN(1, "Request remove non-existing id(%d), driver bug?", id);
- return;
- }
-
- trace_mem_disconnect(xa);
-
- if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
- call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
-
- mutex_unlock(&mem_id_lock);
-}
-
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
{
struct xdp_mem_allocator *xa;
@@ -143,9 +123,6 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
if (id == 0)
return;
- if (xdp_rxq->mem.type == MEM_TYPE_ZERO_COPY)
- return mem_id_disconnect(id);
-
if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) {
rcu_read_lock();
xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
@@ -301,7 +278,7 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
xdp_rxq->mem.type = type;
if (!allocator) {
- if (type == MEM_TYPE_PAGE_POOL || type == MEM_TYPE_ZERO_COPY)
+ if (type == MEM_TYPE_PAGE_POOL)
return -EINVAL; /* Setup time check page_pool req */
return 0;
}
@@ -358,10 +335,11 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
* scenarios (e.g. queue full), it is possible to return the xdp_frame
* while still leveraging this protection. The @napi_direct boolean
* is used for those calls sites. Thus, allowing for faster recycling
- * of xdp_frames/pages in those cases.
+ * of xdp_frames/pages in those cases. This path is never used by the
+ * MEM_TYPE_XSK_BUFF_POOL memory type, so it's explicitly not part of
+ * the switch-statement.
*/
-static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
- unsigned long handle)
+static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct)
{
struct xdp_mem_allocator *xa;
struct page *page;
@@ -383,36 +361,29 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
page = virt_to_page(data); /* Assumes order0 page*/
put_page(page);
break;
- case MEM_TYPE_ZERO_COPY:
- /* NB! Only valid from an xdp_buff! */
- rcu_read_lock();
- /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
- xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
- xa->zc_alloc->free(xa->zc_alloc, handle);
- rcu_read_unlock();
default:
/* Not possible, checked in xdp_rxq_info_reg_mem_model() */
+ WARN(1, "Incorrect XDP memory type (%d) usage", mem->type);
break;
}
}
void xdp_return_frame(struct xdp_frame *xdpf)
{
- __xdp_return(xdpf->data, &xdpf->mem, false, 0);
+ __xdp_return(xdpf->data, &xdpf->mem, false);
}
EXPORT_SYMBOL_GPL(xdp_return_frame);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
{
- __xdp_return(xdpf->data, &xdpf->mem, true, 0);
+ __xdp_return(xdpf->data, &xdpf->mem, true);
}
EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
void xdp_return_buff(struct xdp_buff *xdp)
{
- __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle);
+ __xdp_return(xdp->data, &xdp->rxq->mem, true);
}
-EXPORT_SYMBOL_GPL(xdp_return_buff);
/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
@@ -493,7 +464,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
xdpf->metasize = metasize;
xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
- xdp_return_buff(xdp);
+ xsk_buff_free(xdp);
return xdpf;
}
EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 1e5e08cc0bfc..650187d68851 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1082,6 +1082,7 @@ static const struct proto_ops inet6_dccp_ops = {
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
#ifdef CONFIG_COMPAT
+ .compat_ioctl = inet6_compat_ioctl,
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
diff --git a/net/devres.c b/net/devres.c
new file mode 100644
index 000000000000..57a6a88d11f6
--- /dev/null
+++ b/net/devres.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains all networking devres helpers.
+ */
+
+#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+
+struct net_device_devres {
+ struct net_device *ndev;
+};
+
+static void devm_free_netdev(struct device *dev, void *this)
+{
+ struct net_device_devres *res = this;
+
+ free_netdev(res->ndev);
+}
+
+struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv,
+ unsigned int txqs, unsigned int rxqs)
+{
+ struct net_device_devres *dr;
+
+ dr = devres_alloc(devm_free_netdev, sizeof(*dr), GFP_KERNEL);
+ if (!dr)
+ return NULL;
+
+ dr->ndev = alloc_etherdev_mqs(sizeof_priv, txqs, rxqs);
+ if (!dr->ndev) {
+ devres_free(dr);
+ return NULL;
+ }
+
+ devres_add(dev, dr);
+
+ return dr->ndev;
+}
+EXPORT_SYMBOL(devm_alloc_etherdev_mqs);
+
+static void devm_netdev_release(struct device *dev, void *this)
+{
+ struct net_device_devres *res = this;
+
+ unregister_netdev(res->ndev);
+}
+
+static int netdev_devres_match(struct device *dev, void *this, void *match_data)
+{
+ struct net_device_devres *res = this;
+ struct net_device *ndev = match_data;
+
+ return ndev == res->ndev;
+}
+
+/**
+ * devm_register_netdev - resource managed variant of register_netdev()
+ * @dev: managing device for this netdev - usually the parent device
+ * @ndev: device to register
+ *
+ * This is a devres variant of register_netdev() for which the unregister
+ * function will be call automatically when the managing device is
+ * detached. Note: the net_device used must also be resource managed by
+ * the same struct device.
+ */
+int devm_register_netdev(struct device *dev, struct net_device *ndev)
+{
+ struct net_device_devres *dr;
+ int ret;
+
+ /* struct net_device must itself be managed. For now a managed netdev
+ * can only be allocated by devm_alloc_etherdev_mqs() so the check is
+ * straightforward.
+ */
+ if (WARN_ON(!devres_find(dev, devm_free_netdev,
+ netdev_devres_match, ndev)))
+ return -EINVAL;
+
+ dr = devres_alloc(devm_netdev_release, sizeof(*dr), GFP_KERNEL);
+ if (!dr)
+ return -ENOMEM;
+
+ ret = register_netdev(ndev);
+ if (ret) {
+ devres_free(dr);
+ return ret;
+ }
+
+ dr->ndev = ndev;
+ devres_add(ndev->dev.parent, dr);
+
+ return 0;
+}
+EXPORT_SYMBOL(devm_register_netdev);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 886490fb203d..4c7f086a047b 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1746,6 +1746,7 @@ int dsa_slave_create(struct dsa_port *port)
if (ds->ops->port_vlan_add && ds->ops->port_vlan_del)
slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
slave_dev->hw_features |= NETIF_F_HW_TC;
+ slave_dev->features |= NETIF_F_LLTX;
slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
if (!IS_ERR_OR_NULL(port->mac))
ether_addr_copy(slave_dev->dev_addr, port->mac);
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 3052da668156..780b2a15ac9b 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -140,34 +140,6 @@ bool vid_is_dsa_8021q(u16 vid)
}
EXPORT_SYMBOL_GPL(vid_is_dsa_8021q);
-static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
-{
- struct bridge_vlan_info vinfo;
- struct net_device *slave;
- u16 pvid;
- int err;
-
- if (!dsa_is_user_port(ds, port))
- return 0;
-
- slave = dsa_to_port(ds, port)->slave;
-
- err = br_vlan_get_pvid(slave, &pvid);
- if (!pvid || err < 0)
- /* There is no pvid on the bridge for this port, which is
- * perfectly valid. Nothing to restore, bye-bye!
- */
- return 0;
-
- err = br_vlan_get_info(slave, pvid, &vinfo);
- if (err < 0) {
- dev_err(ds->dev, "Couldn't determine PVID attributes\n");
- return err;
- }
-
- return dsa_port_vid_add(dsa_to_port(ds, port), pvid, vinfo.flags);
-}
-
/* If @enabled is true, installs @vid with @flags into the switch port's HW
* filter.
* If @enabled is false, deletes @vid (ignores @flags) from the port. Had the
@@ -178,39 +150,11 @@ static int dsa_8021q_vid_apply(struct dsa_switch *ds, int port, u16 vid,
u16 flags, bool enabled)
{
struct dsa_port *dp = dsa_to_port(ds, port);
- struct bridge_vlan_info vinfo;
- int err;
if (enabled)
return dsa_port_vid_add(dp, vid, flags);
- err = dsa_port_vid_del(dp, vid);
- if (err < 0)
- return err;
-
- /* Nothing to restore from the bridge for a non-user port.
- * The CPU port VLANs are restored implicitly with the user ports,
- * similar to how the bridge does in dsa_slave_vlan_add and
- * dsa_slave_vlan_del.
- */
- if (!dsa_is_user_port(ds, port))
- return 0;
-
- err = br_vlan_get_info(dp->slave, vid, &vinfo);
- /* Couldn't determine bridge attributes for this vid,
- * it means the bridge had not configured it.
- */
- if (err < 0)
- return 0;
-
- /* Restore the VID from the bridge */
- err = dsa_port_vid_add(dp, vid, vinfo.flags);
- if (err < 0)
- return err;
-
- vinfo.flags &= ~BRIDGE_VLAN_INFO_PVID;
-
- return dsa_port_vid_add(dp->cpu_dp, vid, vinfo.flags);
+ return dsa_port_vid_del(dp, vid);
}
/* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single
@@ -329,9 +273,6 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled)
return err;
}
- if (!enabled)
- err = dsa_8021q_restore_pvid(ds, port);
-
return err;
}
EXPORT_SYMBOL_GPL(dsa_port_setup_8021q_tagging);
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index b5705cba8318..d6619edd53e5 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -15,6 +15,7 @@
#define MTK_HDR_XMIT_TAGGED_TPID_8100 1
#define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0)
#define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0)
+#define MTK_HDR_XMIT_SA_DIS BIT(6)
static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
@@ -22,6 +23,9 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *mtk_tag;
bool is_vlan_skb = true;
+ unsigned char *dest = eth_hdr(skb)->h_dest;
+ bool is_multicast_skb = is_multicast_ether_addr(dest) &&
+ !is_broadcast_ether_addr(dest);
/* Build the special tag after the MAC Source Address. If VLAN header
* is present, it's required that VLAN header and special tag is
@@ -47,6 +51,10 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
MTK_HDR_XMIT_UNTAGGED;
mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
+ /* Disable SA learning for multicast frames */
+ if (unlikely(is_multicast_skb))
+ mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS;
+
/* Tag control information is kept for 802.1Q */
if (!is_vlan_skb) {
mtk_tag[2] = 0;
@@ -61,6 +69,9 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
{
int port;
__be16 *phdr, hdr;
+ unsigned char *dest = eth_hdr(skb)->h_dest;
+ bool is_multicast_skb = is_multicast_ether_addr(dest) &&
+ !is_broadcast_ether_addr(dest);
if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN)))
return NULL;
@@ -86,6 +97,10 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
if (!skb->dev)
return NULL;
+ /* Only unicast or broadcast frames are offloaded */
+ if (likely(!is_multicast_skb))
+ skb->offload_fwd_mark = 1;
+
return skb;
}
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index c8b903302ff2..dac65180c4ef 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -400,34 +400,6 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
}
EXPORT_SYMBOL(alloc_etherdev_mqs);
-static void devm_free_netdev(struct device *dev, void *res)
-{
- free_netdev(*(struct net_device **)res);
-}
-
-struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv,
- unsigned int txqs, unsigned int rxqs)
-{
- struct net_device **dr;
- struct net_device *netdev;
-
- dr = devres_alloc(devm_free_netdev, sizeof(*dr), GFP_KERNEL);
- if (!dr)
- return NULL;
-
- netdev = alloc_etherdev_mqs(sizeof_priv, txqs, rxqs);
- if (!netdev) {
- devres_free(dr);
- return NULL;
- }
-
- *dr = netdev;
- devres_add(dev, dr);
-
- return netdev;
-}
-EXPORT_SYMBOL(devm_alloc_etherdev_mqs);
-
ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
{
return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr);
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index 5ba06eabe8c2..7b7a0456c15c 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -5,7 +5,11 @@
#include "netlink.h"
#include "common.h"
-/* CABLE_TEST_ACT */
+/* 802.3 standard allows 100 meters for BaseT cables. However longer
+ * cables might work, depending on the quality of the cables and the
+ * PHY. So allow testing for up to 150 meters.
+ */
+#define MAX_CABLE_LENGTH_CM (150 * 100)
static const struct nla_policy
cable_test_act_policy[ETHTOOL_A_CABLE_TEST_MAX + 1] = {
@@ -13,7 +17,7 @@ cable_test_act_policy[ETHTOOL_A_CABLE_TEST_MAX + 1] = {
[ETHTOOL_A_CABLE_TEST_HEADER] = { .type = NLA_NESTED },
};
-static int ethnl_cable_test_started(struct phy_device *phydev)
+static int ethnl_cable_test_started(struct phy_device *phydev, u8 cmd)
{
struct sk_buff *skb;
int err = -ENOMEM;
@@ -23,7 +27,7 @@ static int ethnl_cable_test_started(struct phy_device *phydev)
if (!skb)
goto out;
- ehdr = ethnl_bcastmsg_put(skb, ETHTOOL_MSG_CABLE_TEST_NTF);
+ ehdr = ethnl_bcastmsg_put(skb, cmd);
if (!ehdr) {
err = -EMSGSIZE;
goto out;
@@ -86,7 +90,8 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
ethnl_ops_complete(dev);
if (!ret)
- ethnl_cable_test_started(dev->phydev);
+ ethnl_cable_test_started(dev->phydev,
+ ETHTOOL_MSG_CABLE_TEST_NTF);
out_rtnl:
rtnl_unlock();
@@ -95,16 +100,18 @@ out_dev_put:
return ret;
}
-int ethnl_cable_test_alloc(struct phy_device *phydev)
+int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd)
{
int err = -ENOMEM;
- phydev->skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ /* One TDR sample occupies 20 bytes. For a 150 meter cable,
+ * with four pairs, around 12K is needed.
+ */
+ phydev->skb = genlmsg_new(SZ_16K, GFP_KERNEL);
if (!phydev->skb)
goto out;
- phydev->ehdr = ethnl_bcastmsg_put(phydev->skb,
- ETHTOOL_MSG_CABLE_TEST_NTF);
+ phydev->ehdr = ethnl_bcastmsg_put(phydev->skb, cmd);
if (!phydev->ehdr) {
err = -EMSGSIZE;
goto out;
@@ -199,3 +206,226 @@ err:
return ret;
}
EXPORT_SYMBOL_GPL(ethnl_cable_test_fault_length);
+
+struct cable_test_tdr_req_info {
+ struct ethnl_req_info base;
+};
+
+static const struct nla_policy
+cable_test_tdr_act_cfg_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX + 1] = {
+ [ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST] = { .type = NLA_U32 },
+ [ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST] = { .type = NLA_U32 },
+ [ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP] = { .type = NLA_U32 },
+ [ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR] = { .type = NLA_U8 },
+};
+
+static const struct nla_policy
+cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_MAX + 1] = {
+ [ETHTOOL_A_CABLE_TEST_TDR_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_CABLE_TEST_TDR_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_CABLE_TEST_TDR_CFG] = { .type = NLA_NESTED },
+};
+
+/* CABLE_TEST_TDR_ACT */
+static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest,
+ struct genl_info *info,
+ struct phy_tdr_config *cfg)
+{
+ struct nlattr *tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX + 1];
+ int ret;
+
+ ret = nla_parse_nested(tb, ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX, nest,
+ cable_test_tdr_act_cfg_policy, info->extack);
+ if (ret < 0)
+ return ret;
+
+ if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST])
+ cfg->first = nla_get_u32(
+ tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST]);
+ else
+ cfg->first = 100;
+ if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST])
+ cfg->last = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST]);
+ else
+ cfg->last = MAX_CABLE_LENGTH_CM;
+
+ if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP])
+ cfg->step = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP]);
+ else
+ cfg->step = 100;
+
+ if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]) {
+ cfg->pair = nla_get_u8(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]);
+ if (cfg->pair > ETHTOOL_A_CABLE_PAIR_D) {
+ NL_SET_ERR_MSG_ATTR(
+ info->extack,
+ tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR],
+ "invalid pair parameter");
+ return -EINVAL;
+ }
+ } else {
+ cfg->pair = PHY_PAIR_ALL;
+ }
+
+ if (cfg->first > MAX_CABLE_LENGTH_CM) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST],
+ "invalid first parameter");
+ return -EINVAL;
+ }
+
+ if (cfg->last > MAX_CABLE_LENGTH_CM) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST],
+ "invalid last parameter");
+ return -EINVAL;
+ }
+
+ if (cfg->first > cfg->last) {
+ NL_SET_ERR_MSG(info->extack, "invalid first/last parameter");
+ return -EINVAL;
+ }
+
+ if (!cfg->step) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP],
+ "invalid step parameter");
+ return -EINVAL;
+ }
+
+ if (cfg->step > (cfg->last - cfg->first)) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP],
+ "step parameter too big");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *tb[ETHTOOL_A_CABLE_TEST_TDR_MAX + 1];
+ struct ethnl_req_info req_info = {};
+ struct phy_tdr_config cfg;
+ struct net_device *dev;
+ int ret;
+
+ ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb,
+ ETHTOOL_A_CABLE_TEST_TDR_MAX,
+ cable_test_tdr_act_policy, info->extack);
+ if (ret < 0)
+ return ret;
+
+ ret = ethnl_parse_header_dev_get(&req_info,
+ tb[ETHTOOL_A_CABLE_TEST_TDR_HEADER],
+ genl_info_net(info), info->extack,
+ true);
+ if (ret < 0)
+ return ret;
+
+ dev = req_info.dev;
+ if (!dev->phydev) {
+ ret = -EOPNOTSUPP;
+ goto out_dev_put;
+ }
+
+ ret = ethnl_act_cable_test_tdr_cfg(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG],
+ info, &cfg);
+ if (ret)
+ goto out_dev_put;
+
+ rtnl_lock();
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out_rtnl;
+
+ ret = phy_start_cable_test_tdr(dev->phydev, info->extack, &cfg);
+
+ ethnl_ops_complete(dev);
+
+ if (!ret)
+ ethnl_cable_test_started(dev->phydev,
+ ETHTOOL_MSG_CABLE_TEST_TDR_NTF);
+
+out_rtnl:
+ rtnl_unlock();
+out_dev_put:
+ dev_put(dev);
+ return ret;
+}
+
+int ethnl_cable_test_amplitude(struct phy_device *phydev,
+ u8 pair, s16 mV)
+{
+ struct nlattr *nest;
+ int ret = -EMSGSIZE;
+
+ nest = nla_nest_start(phydev->skb,
+ ETHTOOL_A_CABLE_TDR_NEST_AMPLITUDE);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_AMPLITUDE_PAIR, pair))
+ goto err;
+ if (nla_put_u16(phydev->skb, ETHTOOL_A_CABLE_AMPLITUDE_mV, mV))
+ goto err;
+
+ nla_nest_end(phydev->skb, nest);
+ return 0;
+
+err:
+ nla_nest_cancel(phydev->skb, nest);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ethnl_cable_test_amplitude);
+
+int ethnl_cable_test_pulse(struct phy_device *phydev, u16 mV)
+{
+ struct nlattr *nest;
+ int ret = -EMSGSIZE;
+
+ nest = nla_nest_start(phydev->skb, ETHTOOL_A_CABLE_TDR_NEST_PULSE);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u16(phydev->skb, ETHTOOL_A_CABLE_PULSE_mV, mV))
+ goto err;
+
+ nla_nest_end(phydev->skb, nest);
+ return 0;
+
+err:
+ nla_nest_cancel(phydev->skb, nest);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ethnl_cable_test_pulse);
+
+int ethnl_cable_test_step(struct phy_device *phydev, u32 first, u32 last,
+ u32 step)
+{
+ struct nlattr *nest;
+ int ret = -EMSGSIZE;
+
+ nest = nla_nest_start(phydev->skb, ETHTOOL_A_CABLE_TDR_NEST_STEP);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_STEP_FIRST_DISTANCE,
+ first))
+ goto err;
+
+ if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_STEP_LAST_DISTANCE, last))
+ goto err;
+
+ if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_STEP_STEP_DISTANCE, step))
+ goto err;
+
+ nla_nest_end(phydev->skb, nest);
+ return 0;
+
+err:
+ nla_nest_cancel(phydev->skb, nest);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ethnl_cable_test_step);
diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index 3aa4975919d7..9ef54cdcf662 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include "netlink.h"
#include "common.h"
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index eeb1137a3f23..b5df90c981c2 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -24,7 +24,7 @@
#include <linux/sched/signal.h>
#include <linux/net.h>
#include <net/devlink.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include <net/flow_offload.h>
#include <linux/ethtool_netlink.h>
#include <generated/utsrelease.h>
@@ -1510,11 +1510,14 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
void __user *useraddr)
{
struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
+ int ret;
if (!dev->ethtool_ops->get_coalesce)
return -EOPNOTSUPP;
- dev->ethtool_ops->get_coalesce(dev, &coalesce);
+ ret = dev->ethtool_ops->get_coalesce(dev, &coalesce);
+ if (ret)
+ return ret;
if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
return -EFAULT;
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
index 2740cde0a182..7f47ba89054e 100644
--- a/net/ethtool/linkstate.c
+++ b/net/ethtool/linkstate.c
@@ -2,6 +2,7 @@
#include "netlink.h"
#include "common.h"
+#include <linux/phy.h>
struct linkstate_req_info {
struct ethnl_req_info base;
@@ -10,6 +11,8 @@ struct linkstate_req_info {
struct linkstate_reply_data {
struct ethnl_reply_data base;
int link;
+ int sqi;
+ int sqi_max;
};
#define LINKSTATE_REPDATA(__reply_base) \
@@ -20,8 +23,46 @@ linkstate_get_policy[ETHTOOL_A_LINKSTATE_MAX + 1] = {
[ETHTOOL_A_LINKSTATE_UNSPEC] = { .type = NLA_REJECT },
[ETHTOOL_A_LINKSTATE_HEADER] = { .type = NLA_NESTED },
[ETHTOOL_A_LINKSTATE_LINK] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKSTATE_SQI] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKSTATE_SQI_MAX] = { .type = NLA_REJECT },
};
+static int linkstate_get_sqi(struct net_device *dev)
+{
+ struct phy_device *phydev = dev->phydev;
+ int ret;
+
+ if (!phydev)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&phydev->lock);
+ if (!phydev->drv || !phydev->drv->get_sqi)
+ ret = -EOPNOTSUPP;
+ else
+ ret = phydev->drv->get_sqi(phydev);
+ mutex_unlock(&phydev->lock);
+
+ return ret;
+}
+
+static int linkstate_get_sqi_max(struct net_device *dev)
+{
+ struct phy_device *phydev = dev->phydev;
+ int ret;
+
+ if (!phydev)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&phydev->lock);
+ if (!phydev->drv || !phydev->drv->get_sqi_max)
+ ret = -EOPNOTSUPP;
+ else
+ ret = phydev->drv->get_sqi_max(phydev);
+ mutex_unlock(&phydev->lock);
+
+ return ret;
+}
+
static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
struct genl_info *info)
@@ -34,6 +75,19 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
data->link = __ethtool_get_link(dev);
+
+ ret = linkstate_get_sqi(dev);
+ if (ret < 0 && ret != -EOPNOTSUPP)
+ return ret;
+
+ data->sqi = ret;
+
+ ret = linkstate_get_sqi_max(dev);
+ if (ret < 0 && ret != -EOPNOTSUPP)
+ return ret;
+
+ data->sqi_max = ret;
+
ethnl_ops_complete(dev);
return 0;
@@ -42,8 +96,19 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
static int linkstate_reply_size(const struct ethnl_req_info *req_base,
const struct ethnl_reply_data *reply_base)
{
- return nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */
+ struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base);
+ int len;
+
+ len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */
+ 0;
+
+ if (data->sqi != -EOPNOTSUPP)
+ len += nla_total_size(sizeof(u32));
+
+ if (data->sqi_max != -EOPNOTSUPP)
+ len += nla_total_size(sizeof(u32));
+
+ return len;
}
static int linkstate_fill_reply(struct sk_buff *skb,
@@ -56,6 +121,14 @@ static int linkstate_fill_reply(struct sk_buff *skb,
nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link))
return -EMSGSIZE;
+ if (data->sqi != -EOPNOTSUPP &&
+ nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
+ return -EMSGSIZE;
+
+ if (data->sqi_max != -EOPNOTSUPP &&
+ nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max))
+ return -EMSGSIZE;
+
return 0;
}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 87bc02da74bc..88fd07f47040 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -342,7 +342,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
ret = ops->reply_size(req_info, reply_data);
if (ret < 0)
goto err_cleanup;
- reply_len = ret;
+ reply_len = ret + ethnl_reply_header_size();
ret = -ENOMEM;
rskb = ethnl_reply_init(reply_len, req_info->dev, ops->reply_cmd,
ops->hdr_attr, info, &reply_payload);
@@ -588,7 +588,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
ret = ops->reply_size(req_info, reply_data);
if (ret < 0)
goto err_cleanup;
- reply_len = ret;
+ reply_len = ret + ethnl_reply_header_size();
ret = -ENOMEM;
skb = genlmsg_new(reply_len, GFP_KERNEL);
if (!skb)
@@ -844,6 +844,11 @@ static const struct genl_ops ethtool_genl_ops[] = {
.flags = GENL_UNS_ADMIN_PERM,
.doit = ethnl_act_cable_test,
},
+ {
+ .cmd = ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_act_cable_test_tdr,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index b0eb5d920099..9a96b6e90dc2 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -360,5 +360,6 @@ int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info);
int ethnl_set_pause(struct sk_buff *skb, struct genl_info *info);
int ethnl_set_eee(struct sk_buff *skb, struct genl_info *info);
int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info);
+int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info);
#endif /* _NET_ETHTOOL_NETLINK_H */
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index 95eae5c68a52..0eed4e4909ab 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -324,7 +324,6 @@ static int strset_reply_size(const struct ethnl_req_info *req_base,
int len = 0;
int ret;
- len += ethnl_reply_header_size();
for (i = 0; i < ETH_SS_COUNT; i++) {
const struct strset_info *set_info = &data->sets[i];
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 5da4733067fb..23ba5045e3d3 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -384,6 +384,7 @@ config INET_ESPINTCP
depends on XFRM && INET_ESP
select STREAM_PARSER
select NET_SOCK_MSG
+ select XFRM_ESPINTCP
help
Support for RFC 8229 encapsulation of ESP and IKE over
TCP/IPv4 sockets.
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index fcf0d12a407a..02aa5cb3a4fd 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -116,6 +116,7 @@
#include <linux/mroute.h>
#endif
#include <net/l3mdev.h>
+#include <net/compat.h>
#include <trace/events/sock.h>
@@ -755,12 +756,11 @@ do_err:
}
EXPORT_SYMBOL(inet_accept);
-
/*
* This does both peername and sockname.
*/
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
- int peer)
+ int peer)
{
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
@@ -781,6 +781,11 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin_port = inet->inet_sport;
sin->sin_addr.s_addr = addr;
}
+ if (cgroup_bpf_enabled)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
+ peer ? BPF_CGROUP_INET4_GETPEERNAME :
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ NULL);
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
return sizeof(*sin);
}
@@ -970,17 +975,42 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
EXPORT_SYMBOL(inet_ioctl);
#ifdef CONFIG_COMPAT
+static int inet_compat_routing_ioctl(struct sock *sk, unsigned int cmd,
+ struct compat_rtentry __user *ur)
+{
+ compat_uptr_t rtdev;
+ struct rtentry rt;
+
+ if (copy_from_user(&rt.rt_dst, &ur->rt_dst,
+ 3 * sizeof(struct sockaddr)) ||
+ get_user(rt.rt_flags, &ur->rt_flags) ||
+ get_user(rt.rt_metric, &ur->rt_metric) ||
+ get_user(rt.rt_mtu, &ur->rt_mtu) ||
+ get_user(rt.rt_window, &ur->rt_window) ||
+ get_user(rt.rt_irtt, &ur->rt_irtt) ||
+ get_user(rtdev, &ur->rt_dev))
+ return -EFAULT;
+
+ rt.rt_dev = compat_ptr(rtdev);
+ return ip_rt_ioctl(sock_net(sk), cmd, &rt);
+}
+
static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
+ void __user *argp = compat_ptr(arg);
struct sock *sk = sock->sk;
- int err = -ENOIOCTLCMD;
-
- if (sk->sk_prot->compat_ioctl)
- err = sk->sk_prot->compat_ioctl(sk, cmd, arg);
- return err;
+ switch (cmd) {
+ case SIOCADDRT:
+ case SIOCDELRT:
+ return inet_compat_routing_ioctl(sk, cmd, argp);
+ default:
+ if (!sk->sk_prot->compat_ioctl)
+ return -ENOIOCTLCMD;
+ return sk->sk_prot->compat_ioctl(sk, cmd, arg);
+ }
}
-#endif
+#endif /* CONFIG_COMPAT */
const struct proto_ops inet_stream_ops = {
.family = PF_INET,
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f048d0a188b7..123a6d39438f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -276,6 +276,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
err = devinet_sysctl_register(in_dev);
if (err) {
in_dev->dead = 1;
+ neigh_parms_release(&arp_tbl, in_dev->arp_parms);
in_dev_put(in_dev);
in_dev = NULL;
goto out;
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 731022cff600..d14133eac476 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -63,10 +63,8 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
sp->olen++;
xo = xfrm_offload(skb);
- if (!xo) {
- xfrm_state_put(x);
+ if (!xo)
goto out_reset;
- }
}
xo->flags |= XFRM_GRO;
@@ -139,19 +137,27 @@ static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x,
struct xfrm_offload *xo = xfrm_offload(skb);
struct sk_buff *segs = ERR_PTR(-EINVAL);
const struct net_offload *ops;
- int proto = xo->proto;
+ u8 proto = xo->proto;
skb->transport_header += x->props.header_len;
- if (proto == IPPROTO_BEETPH) {
- struct ip_beet_phdr *ph = (struct ip_beet_phdr *)skb->data;
+ if (x->sel.family != AF_INET6) {
+ if (proto == IPPROTO_BEETPH) {
+ struct ip_beet_phdr *ph =
+ (struct ip_beet_phdr *)skb->data;
+
+ skb->transport_header += ph->hdrlen * 8;
+ proto = ph->nexthdr;
+ } else {
+ skb->transport_header -= IPV4_BEET_PHMAXLEN;
+ }
+ } else {
+ __be16 frag;
- skb->transport_header += ph->hdrlen * 8;
- proto = ph->nexthdr;
- } else if (x->sel.family != AF_INET6) {
- skb->transport_header -= IPV4_BEET_PHMAXLEN;
- } else if (proto == IPPROTO_TCP) {
- skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
+ skb->transport_header +=
+ ipv6_skip_exthdr(skb, 0, &proto, &frag);
+ if (proto == IPPROTO_TCP)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
}
__skb_pull(skb, skb_transport_offset(skb));
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 213be9c050ad..41079490a118 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -309,17 +309,18 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev)
{
bool dev_match = false;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- int ret;
+ if (unlikely(fi->nh)) {
+ dev_match = nexthop_uses_dev(fi->nh, dev);
+ } else {
+ int ret;
- for (ret = 0; ret < fib_info_num_path(fi); ret++) {
- const struct fib_nh_common *nhc = fib_info_nhc(fi, ret);
+ for (ret = 0; ret < fib_info_num_path(fi); ret++) {
+ const struct fib_nh_common *nhc = fib_info_nhc(fi, ret);
- if (nhc->nhc_dev == dev) {
- dev_match = true;
- break;
- } else if (l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex) {
- dev_match = true;
- break;
+ if (nhc_l3mdev_matches_dev(nhc, dev)) {
+ dev_match = true;
+ break;
+ }
}
}
#else
@@ -918,7 +919,6 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
else
filter->dump_exceptions = false;
- filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC);
filter->flags = rtm->rtm_flags;
filter->protocol = rtm->rtm_protocol;
filter->rt_type = rtm->rtm_type;
@@ -990,7 +990,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (filter.table_id) {
tb = fib_get_table(net, filter.table_id);
if (!tb) {
- if (filter.dump_all_families)
+ if (rtnl_msg_family(cb->nlh) != PF_INET)
return skb->len;
NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4f334b425538..248f1c1959a6 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1371,6 +1371,26 @@ static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
return (key ^ prefix) & (prefix | -prefix);
}
+bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
+ const struct flowi4 *flp)
+{
+ if (nhc->nhc_flags & RTNH_F_DEAD)
+ return false;
+
+ if (ip_ignore_linkdown(nhc->nhc_dev) &&
+ nhc->nhc_flags & RTNH_F_LINKDOWN &&
+ !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
+ return false;
+
+ if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
+ if (flp->flowi4_oif &&
+ flp->flowi4_oif != nhc->nhc_oif)
+ return false;
+ }
+
+ return true;
+}
+
/* should be called with rcu_read_lock */
int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
struct fib_result *res, int fib_flags)
@@ -1503,6 +1523,7 @@ found:
/* Step 3: Process the leaf, if that fails fall back to backtracing */
hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
struct fib_info *fi = fa->fa_info;
+ struct fib_nh_common *nhc;
int nhsel, err;
if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
@@ -1528,26 +1549,25 @@ out_reject:
if (fi->fib_flags & RTNH_F_DEAD)
continue;
- if (unlikely(fi->nh && nexthop_is_blackhole(fi->nh))) {
- err = fib_props[RTN_BLACKHOLE].error;
- goto out_reject;
+ if (unlikely(fi->nh)) {
+ if (nexthop_is_blackhole(fi->nh)) {
+ err = fib_props[RTN_BLACKHOLE].error;
+ goto out_reject;
+ }
+
+ nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp,
+ &nhsel);
+ if (nhc)
+ goto set_result;
+ goto miss;
}
for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
- struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
+ nhc = fib_info_nhc(fi, nhsel);
- if (nhc->nhc_flags & RTNH_F_DEAD)
+ if (!fib_lookup_good_nhc(nhc, fib_flags, flp))
continue;
- if (ip_ignore_linkdown(nhc->nhc_dev) &&
- nhc->nhc_flags & RTNH_F_LINKDOWN &&
- !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
- continue;
- if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
- if (flp->flowi4_oif &&
- flp->flowi4_oif != nhc->nhc_oif)
- continue;
- }
-
+set_result:
if (!(fib_flags & FIB_LOOKUP_NOREF))
refcount_inc(&fi->fib_clntref);
@@ -1568,6 +1588,7 @@ out_reject:
return err;
}
}
+miss:
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->semantic_match_miss);
#endif
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 47f0502b2101..7b272bbed2b4 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2565,9 +2565,9 @@ done:
}
int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
- struct group_filter __user *optval, int __user *optlen)
+ struct sockaddr_storage __user *p)
{
- int err, i, count, copycount;
+ int i, count, copycount;
struct sockaddr_in *psin;
__be32 addr;
struct ip_mc_socklist *pmc;
@@ -2583,37 +2583,29 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
if (!ipv4_is_multicast(addr))
return -EINVAL;
- err = -EADDRNOTAVAIL;
-
for_each_pmc_rtnl(inet, pmc) {
if (pmc->multi.imr_multiaddr.s_addr == addr &&
pmc->multi.imr_ifindex == gsf->gf_interface)
break;
}
if (!pmc) /* must have a prior join */
- goto done;
+ return -EADDRNOTAVAIL;
gsf->gf_fmode = pmc->sfmode;
psl = rtnl_dereference(pmc->sflist);
count = psl ? psl->sl_count : 0;
copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
gsf->gf_numsrc = count;
- if (put_user(GROUP_FILTER_SIZE(copycount), optlen) ||
- copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
- return -EFAULT;
- }
- for (i = 0; i < copycount; i++) {
+ for (i = 0; i < copycount; i++, p++) {
struct sockaddr_storage ss;
psin = (struct sockaddr_in *)&ss;
memset(&ss, 0, sizeof(ss));
psin->sin_family = AF_INET;
psin->sin_addr.s_addr = psl->sl_addr[i];
- if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss)))
+ if (copy_to_user(p, &ss, sizeof(ss)))
return -EFAULT;
}
return 0;
-done:
- return err;
}
/*
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d6faf3702824..f40b1b72f979 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -24,17 +24,19 @@
#include <net/addrconf.h>
#if IS_ENABLED(CONFIG_IPV6)
-/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
- * only, and any IPv4 addresses if not IPv6 only
- * match_wildcard == false: addresses must be exactly the same, i.e.
- * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
- * and 0.0.0.0 equals to 0.0.0.0 only
+/* match_sk*_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses
+ * if IPv6 only, and any IPv4 addresses
+ * if not IPv6 only
+ * match_sk*_wildcard == false: addresses must be exactly the same, i.e.
+ * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
+ * and 0.0.0.0 equals to 0.0.0.0 only
*/
static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
const struct in6_addr *sk2_rcv_saddr6,
__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
bool sk1_ipv6only, bool sk2_ipv6only,
- bool match_wildcard)
+ bool match_sk1_wildcard,
+ bool match_sk2_wildcard)
{
int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
@@ -44,8 +46,8 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
if (!sk2_ipv6only) {
if (sk1_rcv_saddr == sk2_rcv_saddr)
return true;
- if (!sk1_rcv_saddr || !sk2_rcv_saddr)
- return match_wildcard;
+ return (match_sk1_wildcard && !sk1_rcv_saddr) ||
+ (match_sk2_wildcard && !sk2_rcv_saddr);
}
return false;
}
@@ -53,11 +55,11 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
return true;
- if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
+ if (addr_type2 == IPV6_ADDR_ANY && match_sk2_wildcard &&
!(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
return true;
- if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
+ if (addr_type == IPV6_ADDR_ANY && match_sk1_wildcard &&
!(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
return true;
@@ -69,18 +71,19 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
}
#endif
-/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
- * match_wildcard == false: addresses must be exactly the same, i.e.
- * 0.0.0.0 only equals to 0.0.0.0
+/* match_sk*_wildcard == true: 0.0.0.0 equals to any IPv4 addresses
+ * match_sk*_wildcard == false: addresses must be exactly the same, i.e.
+ * 0.0.0.0 only equals to 0.0.0.0
*/
static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
- bool sk2_ipv6only, bool match_wildcard)
+ bool sk2_ipv6only, bool match_sk1_wildcard,
+ bool match_sk2_wildcard)
{
if (!sk2_ipv6only) {
if (sk1_rcv_saddr == sk2_rcv_saddr)
return true;
- if (!sk1_rcv_saddr || !sk2_rcv_saddr)
- return match_wildcard;
+ return (match_sk1_wildcard && !sk1_rcv_saddr) ||
+ (match_sk2_wildcard && !sk2_rcv_saddr);
}
return false;
}
@@ -96,10 +99,12 @@ bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
sk2->sk_rcv_saddr,
ipv6_only_sock(sk),
ipv6_only_sock(sk2),
+ match_wildcard,
match_wildcard);
#endif
return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr,
- ipv6_only_sock(sk2), match_wildcard);
+ ipv6_only_sock(sk2), match_wildcard,
+ match_wildcard);
}
EXPORT_SYMBOL(inet_rcv_saddr_equal);
@@ -285,10 +290,10 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
tb->fast_rcv_saddr,
sk->sk_rcv_saddr,
tb->fast_ipv6_only,
- ipv6_only_sock(sk), true);
+ ipv6_only_sock(sk), true, false);
#endif
return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr,
- ipv6_only_sock(sk), true);
+ ipv6_only_sock(sk), true, false);
}
/* Obtain a reference to a local port for the given sock,
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0ce9b91ff55c..4e31f23e4117 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -768,45 +768,37 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
}
}
-static int ipgre_tunnel_ioctl(struct net_device *dev,
- struct ifreq *ifr, int cmd)
+static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
+ int cmd)
{
- struct ip_tunnel_parm p;
int err;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- return -EFAULT;
-
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
- p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
- ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
+ if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
+ p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
+ ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING)))
return -EINVAL;
}
- p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
- p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
+ p->i_flags = gre_flags_to_tnl_flags(p->i_flags);
+ p->o_flags = gre_flags_to_tnl_flags(p->o_flags);
- err = ip_tunnel_ioctl(dev, &p, cmd);
+ err = ip_tunnel_ctl(dev, p, cmd);
if (err)
return err;
if (cmd == SIOCCHGTUNNEL) {
struct ip_tunnel *t = netdev_priv(dev);
- t->parms.i_flags = p.i_flags;
- t->parms.o_flags = p.o_flags;
+ t->parms.i_flags = p->i_flags;
+ t->parms.o_flags = p->o_flags;
if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
ipgre_link_update(dev, true);
}
- p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
- p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
-
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
- return -EFAULT;
-
+ p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
+ p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
return 0;
}
@@ -924,10 +916,11 @@ static const struct net_device_ops ipgre_netdev_ops = {
.ndo_stop = ipgre_close,
#endif
.ndo_start_xmit = ipgre_xmit,
- .ndo_do_ioctl = ipgre_tunnel_ioctl,
+ .ndo_do_ioctl = ip_tunnel_ioctl,
.ndo_change_mtu = ip_tunnel_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
+ .ndo_tunnel_ctl = ipgre_tunnel_ctl,
};
#define GRE_FEATURES (NETIF_F_SG | \
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8206047d70b6..84ec3703c909 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -560,6 +560,61 @@ out:
return err;
}
+static void __ip_sock_set_tos(struct sock *sk, int val)
+{
+ if (sk->sk_type == SOCK_STREAM) {
+ val &= ~INET_ECN_MASK;
+ val |= inet_sk(sk)->tos & INET_ECN_MASK;
+ }
+ if (inet_sk(sk)->tos != val) {
+ inet_sk(sk)->tos = val;
+ sk->sk_priority = rt_tos2priority(val);
+ sk_dst_reset(sk);
+ }
+}
+
+void ip_sock_set_tos(struct sock *sk, int val)
+{
+ lock_sock(sk);
+ __ip_sock_set_tos(sk, val);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_tos);
+
+void ip_sock_set_freebind(struct sock *sk)
+{
+ lock_sock(sk);
+ inet_sk(sk)->freebind = true;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_freebind);
+
+void ip_sock_set_recverr(struct sock *sk)
+{
+ lock_sock(sk);
+ inet_sk(sk)->recverr = true;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_recverr);
+
+int ip_sock_set_mtu_discover(struct sock *sk, int val)
+{
+ if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
+ return -EINVAL;
+ lock_sock(sk);
+ inet_sk(sk)->pmtudisc = val;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(ip_sock_set_mtu_discover);
+
+void ip_sock_set_pktinfo(struct sock *sk)
+{
+ lock_sock(sk);
+ inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_pktinfo);
/*
* Socket option code for IP. This is the end of the line after any
@@ -587,6 +642,86 @@ static bool setsockopt_needs_rtnl(int optname)
return false;
}
+static int set_mcast_msfilter(struct sock *sk, int ifindex,
+ int numsrc, int fmode,
+ struct sockaddr_storage *group,
+ struct sockaddr_storage *list)
+{
+ int msize = IP_MSFILTER_SIZE(numsrc);
+ struct ip_msfilter *msf;
+ struct sockaddr_in *psin;
+ int err, i;
+
+ msf = kmalloc(msize, GFP_KERNEL);
+ if (!msf)
+ return -ENOBUFS;
+
+ psin = (struct sockaddr_in *)group;
+ if (psin->sin_family != AF_INET)
+ goto Eaddrnotavail;
+ msf->imsf_multiaddr = psin->sin_addr.s_addr;
+ msf->imsf_interface = 0;
+ msf->imsf_fmode = fmode;
+ msf->imsf_numsrc = numsrc;
+ for (i = 0; i < numsrc; ++i) {
+ psin = (struct sockaddr_in *)&list[i];
+
+ if (psin->sin_family != AF_INET)
+ goto Eaddrnotavail;
+ msf->imsf_slist[i] = psin->sin_addr.s_addr;
+ }
+ err = ip_mc_msfilter(sk, msf, ifindex);
+ kfree(msf);
+ return err;
+
+Eaddrnotavail:
+ kfree(msf);
+ return -EADDRNOTAVAIL;
+}
+
+static int do_mcast_group_source(struct sock *sk, int optname,
+ struct group_source_req *greqs)
+{
+ struct ip_mreq_source mreqs;
+ struct sockaddr_in *psin;
+ int omode, add, err;
+
+ if (greqs->gsr_group.ss_family != AF_INET ||
+ greqs->gsr_source.ss_family != AF_INET)
+ return -EADDRNOTAVAIL;
+
+ psin = (struct sockaddr_in *)&greqs->gsr_group;
+ mreqs.imr_multiaddr = psin->sin_addr.s_addr;
+ psin = (struct sockaddr_in *)&greqs->gsr_source;
+ mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
+ mreqs.imr_interface = 0; /* use index for mc_source */
+
+ if (optname == MCAST_BLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 1;
+ } else if (optname == MCAST_UNBLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 0;
+ } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
+ struct ip_mreqn mreq;
+
+ psin = (struct sockaddr_in *)&greqs->gsr_group;
+ mreq.imr_multiaddr = psin->sin_addr;
+ mreq.imr_address.s_addr = 0;
+ mreq.imr_ifindex = greqs->gsr_interface;
+ err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
+ if (err && err != -EADDRINUSE)
+ return err;
+ greqs->gsr_interface = mreq.imr_ifindex;
+ omode = MCAST_INCLUDE;
+ add = 1;
+ } else /* MCAST_LEAVE_SOURCE_GROUP */ {
+ omode = MCAST_INCLUDE;
+ add = 0;
+ }
+ return ip_mc_source(add, omode, sk, &mreqs, greqs->gsr_interface);
+}
+
static int do_ip_setsockopt(struct sock *sk, int level,
int optname, char __user *optval, unsigned int optlen)
{
@@ -743,15 +878,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
break;
case IP_TOS: /* This sets both TOS and Precedence */
- if (sk->sk_type == SOCK_STREAM) {
- val &= ~INET_ECN_MASK;
- val |= inet->tos & INET_ECN_MASK;
- }
- if (inet->tos != val) {
- inet->tos = val;
- sk->sk_priority = rt_tos2priority(val);
- sk_dst_reset(sk);
- }
+ __ip_sock_set_tos(sk, val);
break;
case IP_TTL:
if (optlen < 1)
@@ -1029,9 +1156,6 @@ static int do_ip_setsockopt(struct sock *sk, int level,
case MCAST_UNBLOCK_SOURCE:
{
struct group_source_req greqs;
- struct ip_mreq_source mreqs;
- struct sockaddr_in *psin;
- int omode, add;
if (optlen != sizeof(struct group_source_req))
goto e_inval;
@@ -1039,50 +1163,12 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -EFAULT;
break;
}
- if (greqs.gsr_group.ss_family != AF_INET ||
- greqs.gsr_source.ss_family != AF_INET) {
- err = -EADDRNOTAVAIL;
- break;
- }
- psin = (struct sockaddr_in *)&greqs.gsr_group;
- mreqs.imr_multiaddr = psin->sin_addr.s_addr;
- psin = (struct sockaddr_in *)&greqs.gsr_source;
- mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
- mreqs.imr_interface = 0; /* use index for mc_source */
-
- if (optname == MCAST_BLOCK_SOURCE) {
- omode = MCAST_EXCLUDE;
- add = 1;
- } else if (optname == MCAST_UNBLOCK_SOURCE) {
- omode = MCAST_EXCLUDE;
- add = 0;
- } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
- struct ip_mreqn mreq;
-
- psin = (struct sockaddr_in *)&greqs.gsr_group;
- mreq.imr_multiaddr = psin->sin_addr;
- mreq.imr_address.s_addr = 0;
- mreq.imr_ifindex = greqs.gsr_interface;
- err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
- if (err && err != -EADDRINUSE)
- break;
- greqs.gsr_interface = mreq.imr_ifindex;
- omode = MCAST_INCLUDE;
- add = 1;
- } else /* MCAST_LEAVE_SOURCE_GROUP */ {
- omode = MCAST_INCLUDE;
- add = 0;
- }
- err = ip_mc_source(add, omode, sk, &mreqs,
- greqs.gsr_interface);
+ err = do_mcast_group_source(sk, optname, &greqs);
break;
}
case MCAST_MSFILTER:
{
- struct sockaddr_in *psin;
- struct ip_msfilter *msf = NULL;
struct group_filter *gsf = NULL;
- int msize, i, ifindex;
if (optlen < GROUP_FILTER_SIZE(0))
goto e_inval;
@@ -1095,7 +1181,6 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = PTR_ERR(gsf);
break;
}
-
/* numsrc >= (4G-140)/128 overflow in 32 bits */
if (gsf->gf_numsrc >= 0x1ffffff ||
gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
@@ -1106,36 +1191,10 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -EINVAL;
goto mc_msf_out;
}
- msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
- msf = kmalloc(msize, GFP_KERNEL);
- if (!msf) {
- err = -ENOBUFS;
- goto mc_msf_out;
- }
- ifindex = gsf->gf_interface;
- psin = (struct sockaddr_in *)&gsf->gf_group;
- if (psin->sin_family != AF_INET) {
- err = -EADDRNOTAVAIL;
- goto mc_msf_out;
- }
- msf->imsf_multiaddr = psin->sin_addr.s_addr;
- msf->imsf_interface = 0;
- msf->imsf_fmode = gsf->gf_fmode;
- msf->imsf_numsrc = gsf->gf_numsrc;
- err = -EADDRNOTAVAIL;
- for (i = 0; i < gsf->gf_numsrc; ++i) {
- psin = (struct sockaddr_in *)&gsf->gf_slist[i];
-
- if (psin->sin_family != AF_INET)
- goto mc_msf_out;
- msf->imsf_slist[i] = psin->sin_addr.s_addr;
- }
- kfree(gsf);
- gsf = NULL;
-
- err = ip_mc_msfilter(sk, msf, ifindex);
+ err = set_mcast_msfilter(sk, gsf->gf_interface,
+ gsf->gf_numsrc, gsf->gf_fmode,
+ &gsf->gf_group, gsf->gf_slist);
mc_msf_out:
- kfree(msf);
kfree(gsf);
break;
}
@@ -1272,9 +1331,113 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
if (level != SOL_IP)
return -ENOPROTOOPT;
- if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)
- return compat_mc_setsockopt(sk, level, optname, optval, optlen,
- ip_setsockopt);
+ switch (optname) {
+ case MCAST_JOIN_GROUP:
+ case MCAST_LEAVE_GROUP:
+ {
+ struct compat_group_req __user *gr32 = (void __user *)optval;
+ struct group_req greq;
+ struct sockaddr_in *psin = (struct sockaddr_in *)&greq.gr_group;
+ struct ip_mreqn mreq;
+
+ if (optlen < sizeof(struct compat_group_req))
+ return -EINVAL;
+
+ if (get_user(greq.gr_interface, &gr32->gr_interface) ||
+ copy_from_user(&greq.gr_group, &gr32->gr_group,
+ sizeof(greq.gr_group)))
+ return -EFAULT;
+
+ if (psin->sin_family != AF_INET)
+ return -EINVAL;
+
+ memset(&mreq, 0, sizeof(mreq));
+ mreq.imr_multiaddr = psin->sin_addr;
+ mreq.imr_ifindex = greq.gr_interface;
+
+ rtnl_lock();
+ lock_sock(sk);
+ if (optname == MCAST_JOIN_GROUP)
+ err = ip_mc_join_group(sk, &mreq);
+ else
+ err = ip_mc_leave_group(sk, &mreq);
+ release_sock(sk);
+ rtnl_unlock();
+ return err;
+ }
+ case MCAST_JOIN_SOURCE_GROUP:
+ case MCAST_LEAVE_SOURCE_GROUP:
+ case MCAST_BLOCK_SOURCE:
+ case MCAST_UNBLOCK_SOURCE:
+ {
+ struct compat_group_source_req __user *gsr32 = (void __user *)optval;
+ struct group_source_req greqs;
+
+ if (optlen != sizeof(struct compat_group_source_req))
+ return -EINVAL;
+
+ if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) ||
+ copy_from_user(&greqs.gsr_group, &gsr32->gsr_group,
+ sizeof(greqs.gsr_group)) ||
+ copy_from_user(&greqs.gsr_source, &gsr32->gsr_source,
+ sizeof(greqs.gsr_source)))
+ return -EFAULT;
+
+ rtnl_lock();
+ lock_sock(sk);
+ err = do_mcast_group_source(sk, optname, &greqs);
+ release_sock(sk);
+ rtnl_unlock();
+ return err;
+ }
+ case MCAST_MSFILTER:
+ {
+ const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ struct compat_group_filter *gf32;
+ unsigned int n;
+ void *p;
+
+ if (optlen < size0)
+ return -EINVAL;
+ if (optlen > sysctl_optmem_max - 4)
+ return -ENOBUFS;
+
+ p = kmalloc(optlen + 4, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+ if (copy_from_user(gf32, optval, optlen)) {
+ err = -EFAULT;
+ goto mc_msf_out;
+ }
+
+ n = gf32->gf_numsrc;
+ /* numsrc >= (4G-140)/128 overflow in 32 bits */
+ if (n >= 0x1ffffff) {
+ err = -ENOBUFS;
+ goto mc_msf_out;
+ }
+ if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) {
+ err = -EINVAL;
+ goto mc_msf_out;
+ }
+
+ rtnl_lock();
+ lock_sock(sk);
+ /* numsrc >= (4G-140)/128 overflow in 32 bits */
+ if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+ err = -ENOBUFS;
+ else
+ err = set_mcast_msfilter(sk, gf32->gf_interface,
+ n, gf32->gf_fmode,
+ &gf32->gf_group, gf32->gf_slist);
+ release_sock(sk);
+ rtnl_unlock();
+mc_msf_out:
+ kfree(p);
+ return err;
+ }
+ }
err = do_ip_setsockopt(sk, level, optname, optval, optlen);
#ifdef CONFIG_NETFILTER
@@ -1465,19 +1628,28 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
}
case MCAST_MSFILTER:
{
+ struct group_filter __user *p = (void __user *)optval;
struct group_filter gsf;
+ const int size0 = offsetof(struct group_filter, gf_slist);
+ int num;
- if (len < GROUP_FILTER_SIZE(0)) {
+ if (len < size0) {
err = -EINVAL;
goto out;
}
- if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
+ if (copy_from_user(&gsf, p, size0)) {
err = -EFAULT;
goto out;
}
- err = ip_mc_gsfget(sk, &gsf,
- (struct group_filter __user *)optval,
- optlen);
+ num = gsf.gf_numsrc;
+ err = ip_mc_gsfget(sk, &gsf, p->gf_slist);
+ if (err)
+ goto out;
+ if (gsf.gf_numsrc < num)
+ num = gsf.gf_numsrc;
+ if (put_user(GROUP_FILTER_SIZE(num), optlen) ||
+ copy_to_user(p, &gsf, size0))
+ err = -EFAULT;
goto out;
}
case IP_MULTICAST_ALL:
@@ -1590,9 +1762,47 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
{
int err;
- if (optname == MCAST_MSFILTER)
- return compat_mc_getsockopt(sk, level, optname, optval, optlen,
- ip_getsockopt);
+ if (optname == MCAST_MSFILTER) {
+ const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ struct compat_group_filter __user *p = (void __user *)optval;
+ struct compat_group_filter gf32;
+ struct group_filter gf;
+ int ulen, err;
+ int num;
+
+ if (level != SOL_IP)
+ return -EOPNOTSUPP;
+
+ if (get_user(ulen, optlen))
+ return -EFAULT;
+
+ if (ulen < size0)
+ return -EINVAL;
+
+ if (copy_from_user(&gf32, p, size0))
+ return -EFAULT;
+
+ gf.gf_interface = gf32.gf_interface;
+ gf.gf_fmode = gf32.gf_fmode;
+ num = gf.gf_numsrc = gf32.gf_numsrc;
+ gf.gf_group = gf32.gf_group;
+
+ rtnl_lock();
+ lock_sock(sk);
+ err = ip_mc_gsfget(sk, &gf, p->gf_slist);
+ release_sock(sk);
+ rtnl_unlock();
+ if (err)
+ return err;
+ if (gf.gf_numsrc < num)
+ num = gf.gf_numsrc;
+ ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32));
+ if (put_user(ulen, optlen) ||
+ put_user(gf.gf_fmode, &p->gf_fmode) ||
+ put_user(gf.gf_numsrc, &p->gf_numsrc))
+ return -EFAULT;
+ return 0;
+ }
err = do_ip_getsockopt(sk, level, optname, optval, optlen,
MSG_CMSG_COMPAT);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index cd4b84310d92..f4f1d11eab50 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -860,7 +860,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
netdev_state_change(dev);
}
-int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
{
int err = 0;
struct ip_tunnel *t = netdev_priv(dev);
@@ -960,6 +960,20 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
done:
return err;
}
+EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
+
+int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct ip_tunnel_parm p;
+ int err;
+
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ return -EFAULT;
+ err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
+ if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ return -EFAULT;
+ return err;
+}
EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 1b4e6f298648..1d9c8cff5ac3 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -93,7 +93,28 @@ static int vti_rcv_proto(struct sk_buff *skb)
static int vti_rcv_tunnel(struct sk_buff *skb)
{
- return vti_rcv(skb, ip_hdr(skb)->saddr, true);
+ struct ip_tunnel_net *itn = net_generic(dev_net(skb->dev), vti_net_id);
+ const struct iphdr *iph = ip_hdr(skb);
+ struct ip_tunnel *tunnel;
+
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ iph->saddr, iph->daddr, 0);
+ if (tunnel) {
+ struct tnl_ptk_info tpi = {
+ .proto = htons(ETH_P_IP),
+ };
+
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto drop;
+ if (iptunnel_pull_header(skb, 0, tpi.proto, false))
+ goto drop;
+ return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, false);
+ }
+
+ return -EINVAL;
+drop:
+ kfree_skb(skb);
+ return 0;
}
static int vti_rcv_cb(struct sk_buff *skb, int err)
@@ -378,38 +399,31 @@ static int vti4_err(struct sk_buff *skb, u32 info)
}
static int
-vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
{
int err = 0;
- struct ip_tunnel_parm p;
-
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- return -EFAULT;
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
- p.iph.ihl != 5)
+ if (p->iph.version != 4 || p->iph.protocol != IPPROTO_IPIP ||
+ p->iph.ihl != 5)
return -EINVAL;
}
- if (!(p.i_flags & GRE_KEY))
- p.i_key = 0;
- if (!(p.o_flags & GRE_KEY))
- p.o_key = 0;
+ if (!(p->i_flags & GRE_KEY))
+ p->i_key = 0;
+ if (!(p->o_flags & GRE_KEY))
+ p->o_key = 0;
- p.i_flags = VTI_ISVTI;
+ p->i_flags = VTI_ISVTI;
- err = ip_tunnel_ioctl(dev, &p, cmd);
+ err = ip_tunnel_ctl(dev, p, cmd);
if (err)
return err;
if (cmd != SIOCDELTUNNEL) {
- p.i_flags |= GRE_KEY;
- p.o_flags |= GRE_KEY;
+ p->i_flags |= GRE_KEY;
+ p->o_flags |= GRE_KEY;
}
-
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
- return -EFAULT;
return 0;
}
@@ -417,10 +431,11 @@ static const struct net_device_ops vti_netdev_ops = {
.ndo_init = vti_tunnel_init,
.ndo_uninit = ip_tunnel_uninit,
.ndo_start_xmit = vti_tunnel_xmit,
- .ndo_do_ioctl = vti_tunnel_ioctl,
+ .ndo_do_ioctl = ip_tunnel_ioctl,
.ndo_change_mtu = ip_tunnel_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
+ .ndo_tunnel_ctl = vti_tunnel_ctl,
};
static void vti_tunnel_setup(struct net_device *dev)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 2f01cf6fa0de..40fea52c8277 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -327,41 +327,29 @@ static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
}
static int
-ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
{
- int err = 0;
- struct ip_tunnel_parm p;
-
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- return -EFAULT;
-
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
- if (p.iph.version != 4 ||
- !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
- p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
+ if (p->iph.version != 4 ||
+ !ipip_tunnel_ioctl_verify_protocol(p->iph.protocol) ||
+ p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)))
return -EINVAL;
}
- p.i_key = p.o_key = 0;
- p.i_flags = p.o_flags = 0;
- err = ip_tunnel_ioctl(dev, &p, cmd);
- if (err)
- return err;
-
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
- return -EFAULT;
-
- return 0;
+ p->i_key = p->o_key = 0;
+ p->i_flags = p->o_flags = 0;
+ return ip_tunnel_ctl(dev, p, cmd);
}
static const struct net_device_ops ipip_netdev_ops = {
.ndo_init = ipip_tunnel_init,
.ndo_uninit = ip_tunnel_uninit,
.ndo_start_xmit = ipip_tunnel_xmit,
- .ndo_do_ioctl = ipip_tunnel_ioctl,
+ .ndo_do_ioctl = ip_tunnel_ioctl,
.ndo_change_mtu = ip_tunnel_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
+ .ndo_tunnel_ctl = ipip_tunnel_ctl,
};
#define IPIP_FEATURES (NETIF_F_SG | \
@@ -698,7 +686,7 @@ out:
rtnl_link_failed:
#if IS_ENABLED(CONFIG_MPLS)
- xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
+ xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
xfrm_tunnel_mplsip_failed:
#endif
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5c218db2dede..f5c7a58844a4 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -421,37 +421,6 @@ static void ipmr_free_table(struct mr_table *mrt)
/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
-static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
-{
- struct net *net = dev_net(dev);
-
- dev_close(dev);
-
- dev = __dev_get_by_name(net, "tunl0");
- if (dev) {
- const struct net_device_ops *ops = dev->netdev_ops;
- struct ifreq ifr;
- struct ip_tunnel_parm p;
-
- memset(&p, 0, sizeof(p));
- p.iph.daddr = v->vifc_rmt_addr.s_addr;
- p.iph.saddr = v->vifc_lcl_addr.s_addr;
- p.iph.version = 4;
- p.iph.ihl = 5;
- p.iph.protocol = IPPROTO_IPIP;
- sprintf(p.name, "dvmrp%d", v->vifc_vifi);
- ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
-
- if (ops->ndo_do_ioctl) {
- mm_segment_t oldfs = get_fs();
-
- set_fs(KERNEL_DS);
- ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
- set_fs(oldfs);
- }
- }
-}
-
/* Initialize ipmr pimreg/tunnel in_device */
static bool ipmr_init_vif_indev(const struct net_device *dev)
{
@@ -471,51 +440,52 @@ static bool ipmr_init_vif_indev(const struct net_device *dev)
static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
{
- struct net_device *dev;
-
- dev = __dev_get_by_name(net, "tunl0");
+ struct net_device *tunnel_dev, *new_dev;
+ struct ip_tunnel_parm p = { };
+ int err;
- if (dev) {
- const struct net_device_ops *ops = dev->netdev_ops;
- int err;
- struct ifreq ifr;
- struct ip_tunnel_parm p;
+ tunnel_dev = __dev_get_by_name(net, "tunl0");
+ if (!tunnel_dev)
+ goto out;
- memset(&p, 0, sizeof(p));
- p.iph.daddr = v->vifc_rmt_addr.s_addr;
- p.iph.saddr = v->vifc_lcl_addr.s_addr;
- p.iph.version = 4;
- p.iph.ihl = 5;
- p.iph.protocol = IPPROTO_IPIP;
- sprintf(p.name, "dvmrp%d", v->vifc_vifi);
- ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
+ p.iph.daddr = v->vifc_rmt_addr.s_addr;
+ p.iph.saddr = v->vifc_lcl_addr.s_addr;
+ p.iph.version = 4;
+ p.iph.ihl = 5;
+ p.iph.protocol = IPPROTO_IPIP;
+ sprintf(p.name, "dvmrp%d", v->vifc_vifi);
- if (ops->ndo_do_ioctl) {
- mm_segment_t oldfs = get_fs();
+ if (!tunnel_dev->netdev_ops->ndo_tunnel_ctl)
+ goto out;
+ err = tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p,
+ SIOCADDTUNNEL);
+ if (err)
+ goto out;
- set_fs(KERNEL_DS);
- err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
- set_fs(oldfs);
- } else {
- err = -EOPNOTSUPP;
- }
- dev = NULL;
-
- if (err == 0 &&
- (dev = __dev_get_by_name(net, p.name)) != NULL) {
- dev->flags |= IFF_MULTICAST;
- if (!ipmr_init_vif_indev(dev))
- goto failure;
- if (dev_open(dev, NULL))
- goto failure;
- dev_hold(dev);
- }
- }
- return dev;
+ new_dev = __dev_get_by_name(net, p.name);
+ if (!new_dev)
+ goto out;
-failure:
- unregister_netdevice(dev);
- return NULL;
+ new_dev->flags |= IFF_MULTICAST;
+ if (!ipmr_init_vif_indev(new_dev))
+ goto out_unregister;
+ if (dev_open(new_dev, NULL))
+ goto out_unregister;
+ dev_hold(new_dev);
+ err = dev_set_allmulti(new_dev, 1);
+ if (err) {
+ dev_close(new_dev);
+ tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p,
+ SIOCDELTUNNEL);
+ dev_put(new_dev);
+ new_dev = ERR_PTR(err);
+ }
+ return new_dev;
+
+out_unregister:
+ unregister_netdevice(new_dev);
+out:
+ return ERR_PTR(-ENOBUFS);
}
#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
@@ -867,14 +837,8 @@ static int vif_add(struct net *net, struct mr_table *mrt,
break;
case VIFF_TUNNEL:
dev = ipmr_new_tunnel(net, vifc);
- if (!dev)
- return -ENOBUFS;
- err = dev_set_allmulti(dev, 1);
- if (err) {
- ipmr_del_tunnel(dev, vifc);
- dev_put(dev);
- return err;
- }
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
break;
case VIFF_USE_IFINDEX:
case 0:
@@ -2613,7 +2577,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id);
if (!mrt) {
- if (filter.dump_all_families)
+ if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR)
return skb->len;
NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist");
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 3c25a467b3ef..7afde8828b4c 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -166,8 +166,7 @@ pptp_outbound_pkt(struct sk_buff *skb,
break;
default:
pr_debug("unknown outbound packet 0x%04x:%s\n", msg,
- msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] :
- pptp_msg_name[0]);
+ pptp_msg_name(msg));
fallthrough;
case PPTP_SET_LINK_INFO:
/* only need to NAT in case PAC is behind NAT box */
@@ -268,9 +267,7 @@ pptp_inbound_pkt(struct sk_buff *skb,
pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
break;
default:
- pr_debug("unknown inbound packet %s\n",
- msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] :
- pptp_msg_name[0]);
+ pr_debug("unknown inbound packet %s\n", pptp_msg_name(msg));
fallthrough;
case PPTP_START_SESSION_REQUEST:
case PPTP_START_SESSION_REPLY:
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 3957364d556c..ebafa5ed91ac 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -33,8 +33,20 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
[NHA_ENCAP] = { .type = NLA_NESTED },
[NHA_GROUPS] = { .type = NLA_FLAG },
[NHA_MASTER] = { .type = NLA_U32 },
+ [NHA_FDB] = { .type = NLA_FLAG },
};
+static int call_nexthop_notifiers(struct net *net,
+ enum nexthop_event_type event_type,
+ struct nexthop *nh)
+{
+ int err;
+
+ err = atomic_notifier_call_chain(&net->nexthop.notifier_chain,
+ event_type, nh);
+ return notifier_to_errno(err);
+}
+
static unsigned int nh_dev_hashfn(unsigned int val)
{
unsigned int mask = NH_DEV_HASHSIZE - 1;
@@ -63,9 +75,16 @@ static void nexthop_free_mpath(struct nexthop *nh)
int i;
nhg = rcu_dereference_raw(nh->nh_grp);
- for (i = 0; i < nhg->num_nh; ++i)
- WARN_ON(nhg->nh_entries[i].nh);
+ for (i = 0; i < nhg->num_nh; ++i) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+
+ WARN_ON(!list_empty(&nhge->nh_list));
+ nexthop_put(nhge->nh);
+ }
+
+ WARN_ON(nhg->spare == nhg);
+ kfree(nhg->spare);
kfree(nhg);
}
@@ -107,6 +126,7 @@ static struct nexthop *nexthop_alloc(void)
INIT_LIST_HEAD(&nh->fi_list);
INIT_LIST_HEAD(&nh->f6i_list);
INIT_LIST_HEAD(&nh->grp_list);
+ INIT_LIST_HEAD(&nh->fdb_list);
}
return nh;
}
@@ -227,6 +247,9 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nla_put_u32(skb, NHA_ID, nh->id))
goto nla_put_failure;
+ if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB))
+ goto nla_put_failure;
+
if (nh->is_group) {
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
@@ -241,7 +264,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nla_put_flag(skb, NHA_BLACKHOLE))
goto nla_put_failure;
goto out;
- } else {
+ } else if (!nh->is_fdb_nh) {
const struct net_device *dev;
dev = nhi->fib_nhc.nhc_dev;
@@ -276,6 +299,7 @@ out:
return 0;
nla_put_failure:
+ nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
}
@@ -387,12 +411,35 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
return true;
}
+static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
+ struct netlink_ext_ack *extack)
+{
+ struct nh_info *nhi;
+
+ if (!nh->is_fdb_nh) {
+ NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops");
+ return -EINVAL;
+ }
+
+ nhi = rtnl_dereference(nh->nh_info);
+ if (*nh_family == AF_UNSPEC) {
+ *nh_family = nhi->family;
+ } else if (*nh_family != nhi->family) {
+ NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
unsigned int len = nla_len(tb[NHA_GROUP]);
+ u8 nh_family = AF_UNSPEC;
struct nexthop_grp *nhg;
unsigned int i, j;
+ u8 nhg_fdb = 0;
if (len & (sizeof(struct nexthop_grp) - 1)) {
NL_SET_ERR_MSG(extack,
@@ -421,6 +468,8 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
}
}
+ if (tb[NHA_FDB])
+ nhg_fdb = 1;
nhg = nla_data(tb[NHA_GROUP]);
for (i = 0; i < len; ++i) {
struct nexthop *nh;
@@ -432,11 +481,20 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
}
if (!valid_group_nh(nh, len, extack))
return -EINVAL;
+
+ if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack))
+ return -EINVAL;
+
+ if (!nhg_fdb && nh->is_fdb_nh) {
+ NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops");
+ return -EINVAL;
+ }
}
- for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) {
+ for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) {
if (!tb[i])
continue;
-
+ if (tb[NHA_FDB])
+ continue;
NL_SET_ERR_MSG(extack,
"No other attributes can be set in nexthop groups");
return -EINVAL;
@@ -495,6 +553,9 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
if (hash > atomic_read(&nhge->upper_bound))
continue;
+ if (nhge->nh->is_fdb_nh)
+ return nhge->nh;
+
/* nexthops always check if it is good and does
* not rely on a sysctl for this behavior
*/
@@ -564,6 +625,11 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
{
struct nh_info *nhi;
+ if (nh->is_fdb_nh) {
+ NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
+ return -EINVAL;
+ }
+
/* fib6_src is unique to a fib6_info and limits the ability to cache
* routes in fib6_nh within a nexthop that is potentially shared
* across multiple fib entries. If the config wants to use source
@@ -640,6 +706,12 @@ int fib_check_nexthop(struct nexthop *nh, u8 scope,
{
int err = 0;
+ if (nh->is_fdb_nh) {
+ NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
+ err = -EINVAL;
+ goto out;
+ }
+
if (nh->is_group) {
struct nh_group *nhg;
@@ -693,41 +765,56 @@ static void nh_group_rebalance(struct nh_group *nhg)
}
}
-static void remove_nh_grp_entry(struct nh_grp_entry *nhge,
- struct nh_group *nhg,
+static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
struct nl_info *nlinfo)
{
+ struct nh_grp_entry *nhges, *new_nhges;
+ struct nexthop *nhp = nhge->nh_parent;
struct nexthop *nh = nhge->nh;
- struct nh_grp_entry *nhges;
- bool found = false;
- int i;
+ struct nh_group *nhg, *newg;
+ int i, j;
WARN_ON(!nh);
- nhges = nhg->nh_entries;
- for (i = 0; i < nhg->num_nh; ++i) {
- if (found) {
- nhges[i-1].nh = nhges[i].nh;
- nhges[i-1].weight = nhges[i].weight;
- list_del(&nhges[i].nh_list);
- list_add(&nhges[i-1].nh_list, &nhges[i-1].nh->grp_list);
- } else if (nhg->nh_entries[i].nh == nh) {
- found = true;
- }
- }
+ nhg = rtnl_dereference(nhp->nh_grp);
+ newg = nhg->spare;
- if (WARN_ON(!found))
+ /* last entry, keep it visible and remove the parent */
+ if (nhg->num_nh == 1) {
+ remove_nexthop(net, nhp, nlinfo);
return;
+ }
+
+ newg->has_v4 = nhg->has_v4;
+ newg->mpath = nhg->mpath;
+ newg->num_nh = nhg->num_nh;
+
+ /* copy old entries to new except the one getting removed */
+ nhges = nhg->nh_entries;
+ new_nhges = newg->nh_entries;
+ for (i = 0, j = 0; i < nhg->num_nh; ++i) {
+ /* current nexthop getting removed */
+ if (nhg->nh_entries[i].nh == nh) {
+ newg->num_nh--;
+ continue;
+ }
- nhg->num_nh--;
- nhg->nh_entries[nhg->num_nh].nh = NULL;
+ list_del(&nhges[i].nh_list);
+ new_nhges[j].nh_parent = nhges[i].nh_parent;
+ new_nhges[j].nh = nhges[i].nh;
+ new_nhges[j].weight = nhges[i].weight;
+ list_add(&new_nhges[j].nh_list, &new_nhges[j].nh->grp_list);
+ j++;
+ }
- nh_group_rebalance(nhg);
+ nh_group_rebalance(newg);
+ rcu_assign_pointer(nhp->nh_grp, newg);
- nexthop_put(nh);
+ list_del(&nhge->nh_list);
+ nexthop_put(nhge->nh);
if (nlinfo)
- nexthop_notify(RTM_NEWNEXTHOP, nhge->nh_parent, nlinfo);
+ nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo);
}
static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
@@ -735,17 +822,11 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
{
struct nh_grp_entry *nhge, *tmp;
- list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) {
- struct nh_group *nhg;
-
- list_del(&nhge->nh_list);
- nhg = rtnl_dereference(nhge->nh_parent->nh_grp);
- remove_nh_grp_entry(nhge, nhg, nlinfo);
+ list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list)
+ remove_nh_grp_entry(net, nhge, nlinfo);
- /* if this group has no more entries then remove it */
- if (!nhg->num_nh)
- remove_nexthop(net, nhge->nh_parent, nlinfo);
- }
+ /* make sure all see the newly published array before releasing rtnl */
+ synchronize_rcu();
}
static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
@@ -759,10 +840,7 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
if (WARN_ON(!nhge->nh))
continue;
- list_del(&nhge->nh_list);
- nexthop_put(nhge->nh);
- nhge->nh = NULL;
- nhg->num_nh--;
+ list_del_init(&nhge->nh_list);
}
}
@@ -773,6 +851,8 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
bool do_flush = false;
struct fib_info *fi;
+ call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh);
+
list_for_each_entry(fi, &nh->fi_list, nh_list) {
fi->fib_flags |= RTNH_F_DEAD;
do_flush = true;
@@ -1086,6 +1166,7 @@ static struct nexthop *nexthop_create_group(struct net *net,
{
struct nlattr *grps_attr = cfg->nh_grp;
struct nexthop_grp *entry = nla_data(grps_attr);
+ u16 num_nh = nla_len(grps_attr) / sizeof(*entry);
struct nh_group *nhg;
struct nexthop *nh;
int i;
@@ -1096,12 +1177,21 @@ static struct nexthop *nexthop_create_group(struct net *net,
nh->is_group = 1;
- nhg = nexthop_grp_alloc(nla_len(grps_attr) / sizeof(*entry));
+ nhg = nexthop_grp_alloc(num_nh);
if (!nhg) {
kfree(nh);
return ERR_PTR(-ENOMEM);
}
+ /* spare group used for removals */
+ nhg->spare = nexthop_grp_alloc(num_nh);
+ if (!nhg) {
+ kfree(nhg);
+ kfree(nh);
+ return NULL;
+ }
+ nhg->spare->spare = nhg;
+
for (i = 0; i < nhg->num_nh; ++i) {
struct nexthop *nhe;
struct nh_info *nhi;
@@ -1125,6 +1215,9 @@ static struct nexthop *nexthop_create_group(struct net *net,
nh_group_rebalance(nhg);
}
+ if (cfg->nh_fdb)
+ nh->is_fdb_nh = 1;
+
rcu_assign_pointer(nh->nh_grp, nhg);
return nh;
@@ -1133,6 +1226,7 @@ out_no_nh:
for (; i >= 0; --i)
nexthop_put(nhg->nh_entries[i].nh);
+ kfree(nhg->spare);
kfree(nhg);
kfree(nh);
@@ -1152,7 +1246,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
.fc_encap = cfg->nh_encap,
.fc_encap_type = cfg->nh_encap_type,
};
- u32 tb_id = l3mdev_fib_table(cfg->dev);
+ u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN);
int err;
err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
@@ -1161,6 +1255,9 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
goto out;
}
+ if (nh->is_fdb_nh)
+ goto out;
+
/* sets nh_dev if successful */
err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
if (!err) {
@@ -1186,6 +1283,7 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh,
.fc_flags = cfg->nh_flags,
.fc_encap = cfg->nh_encap,
.fc_encap_type = cfg->nh_encap_type,
+ .fc_is_fdb = cfg->nh_fdb,
};
int err;
@@ -1227,6 +1325,9 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
nhi->family = cfg->nh_family;
nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
+ if (cfg->nh_fdb)
+ nh->is_fdb_nh = 1;
+
if (cfg->nh_blackhole) {
nhi->reject_nh = 1;
cfg->nh_ifindex = net->loopback_dev->ifindex;
@@ -1248,7 +1349,8 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
}
/* add the entry to the device based hash */
- nexthop_devhash_add(net, nhi);
+ if (!nh->is_fdb_nh)
+ nexthop_devhash_add(net, nhi);
rcu_assign_pointer(nh->nh_info, nhi);
@@ -1352,6 +1454,19 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
if (tb[NHA_ID])
cfg->nh_id = nla_get_u32(tb[NHA_ID]);
+ if (tb[NHA_FDB]) {
+ if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] ||
+ tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) {
+ NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole");
+ goto out;
+ }
+ if (nhm->nh_flags) {
+ NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header");
+ goto out;
+ }
+ cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]);
+ }
+
if (tb[NHA_GROUP]) {
if (nhm->nh_family != AF_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid family for group");
@@ -1375,8 +1490,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
if (tb[NHA_BLACKHOLE]) {
if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
- tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) {
- NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway or oif");
+ tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) {
+ NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb");
goto out;
}
@@ -1385,26 +1500,28 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
goto out;
}
- if (!tb[NHA_OIF]) {
- NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole nexthops");
+ if (!cfg->nh_fdb && !tb[NHA_OIF]) {
+ NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops");
goto out;
}
- cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
- if (cfg->nh_ifindex)
- cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
+ if (!cfg->nh_fdb && tb[NHA_OIF]) {
+ cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
+ if (cfg->nh_ifindex)
+ cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
- if (!cfg->dev) {
- NL_SET_ERR_MSG(extack, "Invalid device index");
- goto out;
- } else if (!(cfg->dev->flags & IFF_UP)) {
- NL_SET_ERR_MSG(extack, "Nexthop device is not up");
- err = -ENETDOWN;
- goto out;
- } else if (!netif_carrier_ok(cfg->dev)) {
- NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
- err = -ENETDOWN;
- goto out;
+ if (!cfg->dev) {
+ NL_SET_ERR_MSG(extack, "Invalid device index");
+ goto out;
+ } else if (!(cfg->dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ } else if (!netif_carrier_ok(cfg->dev)) {
+ NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
+ err = -ENETDOWN;
+ goto out;
+ }
}
err = -EINVAL;
@@ -1633,7 +1750,7 @@ static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx,
static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
int *master_idx, bool *group_filter,
- struct netlink_callback *cb)
+ bool *fdb_filter, struct netlink_callback *cb)
{
struct netlink_ext_ack *extack = cb->extack;
struct nlattr *tb[NHA_MAX + 1];
@@ -1670,6 +1787,9 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
case NHA_GROUPS:
*group_filter = true;
break;
+ case NHA_FDB:
+ *fdb_filter = true;
+ break;
default:
NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
return -EINVAL;
@@ -1688,17 +1808,17 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
/* rtnl */
static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
{
+ bool group_filter = false, fdb_filter = false;
struct nhmsg *nhm = nlmsg_data(cb->nlh);
int dev_filter_idx = 0, master_idx = 0;
struct net *net = sock_net(skb->sk);
struct rb_root *root = &net->nexthop.rb_root;
- bool group_filter = false;
struct rb_node *node;
int idx = 0, s_idx;
int err;
err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx,
- &group_filter, cb);
+ &group_filter, &fdb_filter, cb);
if (err < 0)
return err;
@@ -1783,6 +1903,19 @@ static struct notifier_block nh_netdev_notifier = {
.notifier_call = nh_netdev_event,
};
+int register_nexthop_notifier(struct net *net, struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&net->nexthop.notifier_chain, nb);
+}
+EXPORT_SYMBOL(register_nexthop_notifier);
+
+int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&net->nexthop.notifier_chain,
+ nb);
+}
+EXPORT_SYMBOL(unregister_nexthop_notifier);
+
static void __net_exit nexthop_net_exit(struct net *net)
{
rtnl_lock();
@@ -1799,6 +1932,7 @@ static int __net_init nexthop_net_init(struct net *net)
net->nexthop.devhash = kzalloc(sz, GFP_KERNEL);
if (!net->nexthop.devhash)
return -ENOMEM;
+ ATOMIC_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain);
return 0;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2058b94bfa00..1d7076b78e63 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -491,18 +491,16 @@ u32 ip_idents_reserve(u32 hash, int segs)
atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
u32 old = READ_ONCE(*p_tstamp);
u32 now = (u32)jiffies;
- u32 new, delta = 0;
+ u32 delta = 0;
if (old != now && cmpxchg(p_tstamp, old, now) == old)
delta = prandom_u32_max(now - old);
- /* Do not use atomic_add_return() as it makes UBSAN unhappy */
- do {
- old = (u32)atomic_read(p_id);
- new = old + delta + segs;
- } while (atomic_cmpxchg(p_id, old, new) != old);
-
- return new - segs;
+ /* If UBSAN reports an error there, please make sure your compiler
+ * supports -fno-strict-overflow before reporting it that was a bug
+ * in UBSAN, and it has been fixed in GCC-8.
+ */
+ return atomic_add_return(segs + delta, p_id) - segs;
}
EXPORT_SYMBOL(ip_idents_reserve);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 970064996377..15d47d5e7951 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2801,6 +2801,163 @@ static void tcp_enable_tx_delay(void)
}
}
+/* When set indicates to always queue non-full frames. Later the user clears
+ * this option and we transmit any pending partial frames in the queue. This is
+ * meant to be used alongside sendfile() to get properly filled frames when the
+ * user (for example) must write out headers with a write() call first and then
+ * use sendfile to send out the data parts.
+ *
+ * TCP_CORK can be set together with TCP_NODELAY and it is stronger than
+ * TCP_NODELAY.
+ */
+static void __tcp_sock_set_cork(struct sock *sk, bool on)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (on) {
+ tp->nonagle |= TCP_NAGLE_CORK;
+ } else {
+ tp->nonagle &= ~TCP_NAGLE_CORK;
+ if (tp->nonagle & TCP_NAGLE_OFF)
+ tp->nonagle |= TCP_NAGLE_PUSH;
+ tcp_push_pending_frames(sk);
+ }
+}
+
+void tcp_sock_set_cork(struct sock *sk, bool on)
+{
+ lock_sock(sk);
+ __tcp_sock_set_cork(sk, on);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_cork);
+
+/* TCP_NODELAY is weaker than TCP_CORK, so that this option on corked socket is
+ * remembered, but it is not activated until cork is cleared.
+ *
+ * However, when TCP_NODELAY is set we make an explicit push, which overrides
+ * even TCP_CORK for currently queued segments.
+ */
+static void __tcp_sock_set_nodelay(struct sock *sk, bool on)
+{
+ if (on) {
+ tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
+ tcp_push_pending_frames(sk);
+ } else {
+ tcp_sk(sk)->nonagle &= ~TCP_NAGLE_OFF;
+ }
+}
+
+void tcp_sock_set_nodelay(struct sock *sk)
+{
+ lock_sock(sk);
+ __tcp_sock_set_nodelay(sk, true);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_nodelay);
+
+static void __tcp_sock_set_quickack(struct sock *sk, int val)
+{
+ if (!val) {
+ inet_csk_enter_pingpong_mode(sk);
+ return;
+ }
+
+ inet_csk_exit_pingpong_mode(sk);
+ if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
+ inet_csk_ack_scheduled(sk)) {
+ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_PUSHED;
+ tcp_cleanup_rbuf(sk, 1);
+ if (!(val & 1))
+ inet_csk_enter_pingpong_mode(sk);
+ }
+}
+
+void tcp_sock_set_quickack(struct sock *sk, int val)
+{
+ lock_sock(sk);
+ __tcp_sock_set_quickack(sk, val);
+ release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_quickack);
+
+int tcp_sock_set_syncnt(struct sock *sk, int val)
+{
+ if (val < 1 || val > MAX_TCP_SYNCNT)
+ return -EINVAL;
+
+ lock_sock(sk);
+ inet_csk(sk)->icsk_syn_retries = val;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(tcp_sock_set_syncnt);
+
+void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
+{
+ lock_sock(sk);
+ inet_csk(sk)->icsk_user_timeout = val;
+ release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_user_timeout);
+
+static int __tcp_sock_set_keepidle(struct sock *sk, int val)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (val < 1 || val > MAX_TCP_KEEPIDLE)
+ return -EINVAL;
+
+ tp->keepalive_time = val * HZ;
+ if (sock_flag(sk, SOCK_KEEPOPEN) &&
+ !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
+ u32 elapsed = keepalive_time_elapsed(tp);
+
+ if (tp->keepalive_time > elapsed)
+ elapsed = tp->keepalive_time - elapsed;
+ else
+ elapsed = 0;
+ inet_csk_reset_keepalive_timer(sk, elapsed);
+ }
+
+ return 0;
+}
+
+int tcp_sock_set_keepidle(struct sock *sk, int val)
+{
+ int err;
+
+ lock_sock(sk);
+ err = __tcp_sock_set_keepidle(sk, val);
+ release_sock(sk);
+ return err;
+}
+EXPORT_SYMBOL(tcp_sock_set_keepidle);
+
+int tcp_sock_set_keepintvl(struct sock *sk, int val)
+{
+ if (val < 1 || val > MAX_TCP_KEEPINTVL)
+ return -EINVAL;
+
+ lock_sock(sk);
+ tcp_sk(sk)->keepalive_intvl = val * HZ;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(tcp_sock_set_keepintvl);
+
+int tcp_sock_set_keepcnt(struct sock *sk, int val)
+{
+ if (val < 1 || val > MAX_TCP_KEEPCNT)
+ return -EINVAL;
+
+ lock_sock(sk);
+ tcp_sk(sk)->keepalive_probes = val;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(tcp_sock_set_keepcnt);
+
/*
* Socket option code for TCP.
*/
@@ -2898,20 +3055,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_NODELAY:
- if (val) {
- /* TCP_NODELAY is weaker than TCP_CORK, so that
- * this option on corked socket is remembered, but
- * it is not activated until cork is cleared.
- *
- * However, when TCP_NODELAY is set we make
- * an explicit push, which overrides even TCP_CORK
- * for currently queued segments.
- */
- tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
- tcp_push_pending_frames(sk);
- } else {
- tp->nonagle &= ~TCP_NAGLE_OFF;
- }
+ __tcp_sock_set_nodelay(sk, val);
break;
case TCP_THIN_LINEAR_TIMEOUTS:
@@ -2979,43 +3123,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_CORK:
- /* When set indicates to always queue non-full frames.
- * Later the user clears this option and we transmit
- * any pending partial frames in the queue. This is
- * meant to be used alongside sendfile() to get properly
- * filled frames when the user (for example) must write
- * out headers with a write() call first and then use
- * sendfile to send out the data parts.
- *
- * TCP_CORK can be set together with TCP_NODELAY and it is
- * stronger than TCP_NODELAY.
- */
- if (val) {
- tp->nonagle |= TCP_NAGLE_CORK;
- } else {
- tp->nonagle &= ~TCP_NAGLE_CORK;
- if (tp->nonagle&TCP_NAGLE_OFF)
- tp->nonagle |= TCP_NAGLE_PUSH;
- tcp_push_pending_frames(sk);
- }
+ __tcp_sock_set_cork(sk, val);
break;
case TCP_KEEPIDLE:
- if (val < 1 || val > MAX_TCP_KEEPIDLE)
- err = -EINVAL;
- else {
- tp->keepalive_time = val * HZ;
- if (sock_flag(sk, SOCK_KEEPOPEN) &&
- !((1 << sk->sk_state) &
- (TCPF_CLOSE | TCPF_LISTEN))) {
- u32 elapsed = keepalive_time_elapsed(tp);
- if (tp->keepalive_time > elapsed)
- elapsed = tp->keepalive_time - elapsed;
- else
- elapsed = 0;
- inet_csk_reset_keepalive_timer(sk, elapsed);
- }
- }
+ err = __tcp_sock_set_keepidle(sk, val);
break;
case TCP_KEEPINTVL:
if (val < 1 || val > MAX_TCP_KEEPINTVL)
@@ -3072,19 +3184,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_QUICKACK:
- if (!val) {
- inet_csk_enter_pingpong_mode(sk);
- } else {
- inet_csk_exit_pingpong_mode(sk);
- if ((1 << sk->sk_state) &
- (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
- inet_csk_ack_scheduled(sk)) {
- icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
- tcp_cleanup_rbuf(sk, 1);
- if (!(val & 1))
- inet_csk_enter_pingpong_mode(sk);
- }
- }
+ __tcp_sock_set_quickack(sk, val);
break;
#ifdef CONFIG_TCP_MD5SIG
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ad90102f5dfb..83330a6cb242 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -437,7 +437,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
/* 3. Try to fixup all. It is made immediately after connection enters
* established state.
*/
-void tcp_init_buffer_space(struct sock *sk)
+static void tcp_init_buffer_space(struct sock *sk)
{
int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6c05f1ceb538..ad6435ba6d72 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -403,6 +403,46 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort)
}
EXPORT_SYMBOL(tcp_req_err);
+/* TCP-LD (RFC 6069) logic */
+void tcp_ld_RTO_revert(struct sock *sk, u32 seq)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct sk_buff *skb;
+ s32 remaining;
+ u32 delta_us;
+
+ if (sock_owned_by_user(sk))
+ return;
+
+ if (seq != tp->snd_una || !icsk->icsk_retransmits ||
+ !icsk->icsk_backoff)
+ return;
+
+ skb = tcp_rtx_queue_head(sk);
+ if (WARN_ON_ONCE(!skb))
+ return;
+
+ icsk->icsk_backoff--;
+ icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT;
+ icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
+
+ tcp_mstamp_refresh(tp);
+ delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
+ remaining = icsk->icsk_rto - usecs_to_jiffies(delta_us);
+
+ if (remaining > 0) {
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ remaining, TCP_RTO_MAX);
+ } else {
+ /* RTO revert clocked out retransmission.
+ * Will retransmit now.
+ */
+ tcp_retransmit_timer(sk);
+ }
+}
+EXPORT_SYMBOL(tcp_ld_RTO_revert);
+
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
@@ -419,27 +459,23 @@ EXPORT_SYMBOL(tcp_req_err);
*
*/
-int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
+int tcp_v4_err(struct sk_buff *skb, u32 info)
{
- const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
- struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
- struct inet_connection_sock *icsk;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
+ struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
struct tcp_sock *tp;
struct inet_sock *inet;
- const int type = icmp_hdr(icmp_skb)->type;
- const int code = icmp_hdr(icmp_skb)->code;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
struct sock *sk;
- struct sk_buff *skb;
struct request_sock *fastopen;
u32 seq, snd_una;
- s32 remaining;
- u32 delta_us;
int err;
- struct net *net = dev_net(icmp_skb->dev);
+ struct net *net = dev_net(skb->dev);
sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
th->dest, iph->saddr, ntohs(th->source),
- inet_iif(icmp_skb), 0);
+ inet_iif(skb), 0);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return -ENOENT;
@@ -476,7 +512,6 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
goto out;
}
- icsk = inet_csk(sk);
tp = tcp_sk(sk);
/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
fastopen = rcu_dereference(tp->fastopen_rsk);
@@ -490,7 +525,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
switch (type) {
case ICMP_REDIRECT:
if (!sock_owned_by_user(sk))
- do_redirect(icmp_skb, sk);
+ do_redirect(skb, sk);
goto out;
case ICMP_SOURCE_QUENCH:
/* Just silently ignore these. */
@@ -521,41 +556,12 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
}
err = icmp_err_convert[code].errno;
- /* check if icmp_skb allows revert of backoff
- * (see draft-zimmermann-tcp-lcd) */
- if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
- break;
- if (seq != tp->snd_una || !icsk->icsk_retransmits ||
- !icsk->icsk_backoff || fastopen)
- break;
-
- if (sock_owned_by_user(sk))
- break;
-
- skb = tcp_rtx_queue_head(sk);
- if (WARN_ON_ONCE(!skb))
- break;
-
- icsk->icsk_backoff--;
- icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
- TCP_TIMEOUT_INIT;
- icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
-
-
- tcp_mstamp_refresh(tp);
- delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
- remaining = icsk->icsk_rto -
- usecs_to_jiffies(delta_us);
-
- if (remaining > 0) {
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- remaining, TCP_RTO_MAX);
- } else {
- /* RTO revert clocked out retransmission.
- * Will retransmit now */
- tcp_retransmit_timer(sk);
- }
-
+ /* check if this ICMP message allows revert of backoff.
+ * (see RFC 6069)
+ */
+ if (!fastopen &&
+ (code == ICMP_NET_UNREACH || code == ICMP_HOST_UNREACH))
+ tcp_ld_RTO_revert(sk, seq);
break;
case ICMP_TIME_EXCEEDED:
err = EHOSTUNREACH;
@@ -573,6 +579,8 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (fastopen && !fastopen->sk)
break;
+ ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th);
+
if (!sock_owned_by_user(sk)) {
sk->sk_err = err;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 32564b350823..1b7ebbcae497 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -112,6 +112,9 @@
#include <net/sock_reuseport.h>
#include <net/addrconf.h>
#include <net/udp_tunnel.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6_stubs.h>
+#endif
struct udp_table udp_table __read_mostly;
EXPORT_SYMBOL(udp_table);
@@ -2563,7 +2566,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
#ifdef CONFIG_XFRM
case UDP_ENCAP_ESPINUDP:
case UDP_ENCAP_ESPINUDP_NON_IKE:
- up->encap_rcv = xfrm4_udp_encap_rcv;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
+ else
+#endif
+ up->encap_rcv = xfrm4_udp_encap_rcv;
#endif
fallthrough;
case UDP_ENCAP_L2TPINUDP:
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 150e6f0fdbf5..2158e8bddf41 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -22,9 +22,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
goto error;
if (cfg->bind_ifindex) {
- err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
- (void *)&cfg->bind_ifindex,
- sizeof(cfg->bind_ifindex));
+ err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
if (err < 0)
goto error;
}
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index f8de2482a529..ad2afeef4f10 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -18,11 +18,6 @@
#include <net/ip.h>
#include <net/xfrm.h>
-int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
-{
- return xfrm4_extract_header(skb);
-}
-
static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 30ddb9dc9398..3cff51ba72bb 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -14,77 +14,18 @@
#include <net/xfrm.h>
#include <net/icmp.h>
-static int xfrm4_tunnel_check_size(struct sk_buff *skb)
-{
- int mtu, ret = 0;
-
- if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
- goto out;
-
- if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df)
- goto out;
-
- mtu = dst_mtu(skb_dst(skb));
- if ((!skb_is_gso(skb) && skb->len > mtu) ||
- (skb_is_gso(skb) &&
- !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) {
- skb->protocol = htons(ETH_P_IP);
-
- if (skb->sk)
- xfrm_local_error(skb, mtu);
- else
- icmp_send(skb, ICMP_DEST_UNREACH,
- ICMP_FRAG_NEEDED, htonl(mtu));
- ret = -EMSGSIZE;
- }
-out:
- return ret;
-}
-
-int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- int err;
-
- err = xfrm4_tunnel_check_size(skb);
- if (err)
- return err;
-
- XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol;
-
- return xfrm4_extract_header(skb);
-}
-
-int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb)
-{
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-
- IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
-
- return xfrm_output(sk, skb);
-}
-
static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+#ifdef CONFIG_NETFILTER
struct xfrm_state *x = skb_dst(skb)->xfrm;
- const struct xfrm_state_afinfo *afinfo;
- int ret = -EAFNOSUPPORT;
-#ifdef CONFIG_NETFILTER
if (!x) {
IPCB(skb)->flags |= IPSKB_REROUTED;
return dst_output(net, sk, skb);
}
#endif
- rcu_read_lock();
- afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family);
- if (likely(afinfo))
- ret = afinfo->output_finish(sk, skb);
- else
- kfree_skb(skb);
- rcu_read_unlock();
-
- return ret;
+ return xfrm_output(sk, skb);
}
int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index f8ed3c3bb928..87d4db591488 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -8,36 +8,12 @@
*
*/
-#include <net/ip.h>
#include <net/xfrm.h>
-#include <linux/pfkeyv2.h>
-#include <linux/ipsec.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/export.h>
-
-int xfrm4_extract_header(struct sk_buff *skb)
-{
- const struct iphdr *iph = ip_hdr(skb);
-
- XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
- XFRM_MODE_SKB_CB(skb)->id = iph->id;
- XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off;
- XFRM_MODE_SKB_CB(skb)->tos = iph->tos;
- XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl;
- XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph);
- memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0,
- sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
-
- return 0;
-}
static struct xfrm_state_afinfo xfrm4_state_afinfo = {
.family = AF_INET,
.proto = IPPROTO_IPIP,
.output = xfrm4_output,
- .output_finish = xfrm4_output_finish,
- .extract_input = xfrm4_extract_input,
- .extract_output = xfrm4_extract_output,
.transport_finish = xfrm4_transport_finish,
.local_error = xfrm4_local_error,
};
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 5a6111da26c4..4f03aece2980 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -88,6 +88,18 @@ config INET6_ESP_OFFLOAD
If unsure, say N.
+config INET6_ESPINTCP
+ bool "IPv6: ESP in TCP encapsulation (RFC 8229)"
+ depends on XFRM && INET6_ESP
+ select STREAM_PARSER
+ select NET_SOCK_MSG
+ select XFRM_ESPINTCP
+ help
+ Support for RFC 8229 encapsulation of ESP and IKE over
+ TCP/IPv6 sockets.
+
+ If unsure, say N.
+
config INET6_IPCOMP
tristate "IPv6: IPComp transformation"
select INET6_XFRM_TUNNEL
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ab7e839753ae..09cfbf5dd7ce 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2783,6 +2783,33 @@ put:
in6_dev_put(in6_dev);
}
+static int addrconf_set_sit_dstaddr(struct net *net, struct net_device *dev,
+ struct in6_ifreq *ireq)
+{
+ struct ip_tunnel_parm p = { };
+ int err;
+
+ if (!(ipv6_addr_type(&ireq->ifr6_addr) & IPV6_ADDR_COMPATv4))
+ return -EADDRNOTAVAIL;
+
+ p.iph.daddr = ireq->ifr6_addr.s6_addr32[3];
+ p.iph.version = 4;
+ p.iph.ihl = 5;
+ p.iph.protocol = IPPROTO_IPV6;
+ p.iph.ttl = 64;
+
+ if (!dev->netdev_ops->ndo_tunnel_ctl)
+ return -EOPNOTSUPP;
+ err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, SIOCADDTUNNEL);
+ if (err)
+ return err;
+
+ dev = __dev_get_by_name(net, p.name);
+ if (!dev)
+ return -ENOBUFS;
+ return dev_open(dev, NULL);
+}
+
/*
* Set destination address.
* Special case for SIT interfaces where we create a new "virtual"
@@ -2790,61 +2817,19 @@ put:
*/
int addrconf_set_dstaddr(struct net *net, void __user *arg)
{
- struct in6_ifreq ireq;
struct net_device *dev;
- int err = -EINVAL;
-
- rtnl_lock();
+ struct in6_ifreq ireq;
+ int err = -ENODEV;
- err = -EFAULT;
+ if (!IS_ENABLED(CONFIG_IPV6_SIT))
+ return -ENODEV;
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
- goto err_exit;
+ return -EFAULT;
+ rtnl_lock();
dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
-
- err = -ENODEV;
- if (!dev)
- goto err_exit;
-
-#if IS_ENABLED(CONFIG_IPV6_SIT)
- if (dev->type == ARPHRD_SIT) {
- const struct net_device_ops *ops = dev->netdev_ops;
- struct ifreq ifr;
- struct ip_tunnel_parm p;
-
- err = -EADDRNOTAVAIL;
- if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4))
- goto err_exit;
-
- memset(&p, 0, sizeof(p));
- p.iph.daddr = ireq.ifr6_addr.s6_addr32[3];
- p.iph.saddr = 0;
- p.iph.version = 4;
- p.iph.ihl = 5;
- p.iph.protocol = IPPROTO_IPV6;
- p.iph.ttl = 64;
- ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
-
- if (ops->ndo_do_ioctl) {
- mm_segment_t oldfs = get_fs();
-
- set_fs(KERNEL_DS);
- err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
- set_fs(oldfs);
- } else
- err = -EOPNOTSUPP;
-
- if (err == 0) {
- err = -ENOBUFS;
- dev = __dev_get_by_name(net, p.name);
- if (!dev)
- goto err_exit;
- err = dev_open(dev, NULL);
- }
- }
-#endif
-
-err_exit:
+ if (dev && dev->type == ARPHRD_SIT)
+ err = addrconf_set_sit_dstaddr(net, dev, &ireq);
rtnl_unlock();
return err;
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 771a462a8322..b304b882e031 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -60,6 +60,8 @@
#include <net/calipso.h>
#include <net/seg6.h>
#include <net/rpl.h>
+#include <net/compat.h>
+#include <net/xfrm.h>
#include <linux/uaccess.h>
#include <linux/mroute6.h>
@@ -504,9 +506,8 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock);
/*
* This does both peername and sockname.
*/
-
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
- int peer)
+ int peer)
{
struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
@@ -531,9 +532,13 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin6_addr = np->saddr;
else
sin->sin6_addr = sk->sk_v6_rcv_saddr;
-
sin->sin6_port = inet->inet_sport;
}
+ if (cgroup_bpf_enabled)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
+ peer ? BPF_CGROUP_INET6_GETPEERNAME :
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ NULL);
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
sk->sk_bound_dev_if);
return sizeof(*sin);
@@ -542,21 +547,25 @@ EXPORT_SYMBOL(inet6_getname);
int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
+ void __user *argp = (void __user *)arg;
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
switch (cmd) {
case SIOCADDRT:
- case SIOCDELRT:
-
- return ipv6_route_ioctl(net, cmd, (void __user *)arg);
+ case SIOCDELRT: {
+ struct in6_rtmsg rtmsg;
+ if (copy_from_user(&rtmsg, argp, sizeof(rtmsg)))
+ return -EFAULT;
+ return ipv6_route_ioctl(net, cmd, &rtmsg);
+ }
case SIOCSIFADDR:
- return addrconf_add_ifaddr(net, (void __user *) arg);
+ return addrconf_add_ifaddr(net, argp);
case SIOCDIFADDR:
- return addrconf_del_ifaddr(net, (void __user *) arg);
+ return addrconf_del_ifaddr(net, argp);
case SIOCSIFDSTADDR:
- return addrconf_set_dstaddr(net, (void __user *) arg);
+ return addrconf_set_dstaddr(net, argp);
default:
if (!sk->sk_prot->ioctl)
return -ENOIOCTLCMD;
@@ -567,6 +576,56 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
}
EXPORT_SYMBOL(inet6_ioctl);
+#ifdef CONFIG_COMPAT
+struct compat_in6_rtmsg {
+ struct in6_addr rtmsg_dst;
+ struct in6_addr rtmsg_src;
+ struct in6_addr rtmsg_gateway;
+ u32 rtmsg_type;
+ u16 rtmsg_dst_len;
+ u16 rtmsg_src_len;
+ u32 rtmsg_metric;
+ u32 rtmsg_info;
+ u32 rtmsg_flags;
+ s32 rtmsg_ifindex;
+};
+
+static int inet6_compat_routing_ioctl(struct sock *sk, unsigned int cmd,
+ struct compat_in6_rtmsg __user *ur)
+{
+ struct in6_rtmsg rt;
+
+ if (copy_from_user(&rt.rtmsg_dst, &ur->rtmsg_dst,
+ 3 * sizeof(struct in6_addr)) ||
+ get_user(rt.rtmsg_type, &ur->rtmsg_type) ||
+ get_user(rt.rtmsg_dst_len, &ur->rtmsg_dst_len) ||
+ get_user(rt.rtmsg_src_len, &ur->rtmsg_src_len) ||
+ get_user(rt.rtmsg_metric, &ur->rtmsg_metric) ||
+ get_user(rt.rtmsg_info, &ur->rtmsg_info) ||
+ get_user(rt.rtmsg_flags, &ur->rtmsg_flags) ||
+ get_user(rt.rtmsg_ifindex, &ur->rtmsg_ifindex))
+ return -EFAULT;
+
+
+ return ipv6_route_ioctl(sock_net(sk), cmd, &rt);
+}
+
+int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ void __user *argp = compat_ptr(arg);
+ struct sock *sk = sock->sk;
+
+ switch (cmd) {
+ case SIOCADDRT:
+ case SIOCDELRT:
+ return inet6_compat_routing_ioctl(sk, cmd, argp);
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+EXPORT_SYMBOL_GPL(inet6_compat_ioctl);
+#endif /* CONFIG_COMPAT */
+
INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *,
size_t));
int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
@@ -628,6 +687,7 @@ const struct proto_ops inet6_stream_ops = {
.read_sock = tcp_read_sock,
.peek_len = tcp_peek_len,
#ifdef CONFIG_COMPAT
+ .compat_ioctl = inet6_compat_ioctl,
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
@@ -656,6 +716,7 @@ const struct proto_ops inet6_dgram_ops = {
.sendpage = sock_no_sendpage,
.set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT
+ .compat_ioctl = inet6_compat_ioctl,
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
@@ -963,6 +1024,11 @@ static const struct ipv6_stub ipv6_stub_impl = {
.ip6_del_rt = ip6_del_rt,
.udpv6_encap_enable = udpv6_encap_enable,
.ndisc_send_na = ndisc_send_na,
+#if IS_ENABLED(CONFIG_XFRM)
+ .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu,
+ .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
+ .xfrm6_rcv_encap = xfrm6_rcv_encap,
+#endif
.nd_tbl = &nd_tbl,
};
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 45e2adc56610..d88d97617f7e 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -767,6 +767,7 @@ static const struct xfrm_type ah6_type = {
static struct xfrm6_protocol ah6_protocol = {
.handler = xfrm6_rcv,
+ .input_handler = xfrm_input,
.cb_handler = ah6_rcv_cb,
.err_handler = ah6_err,
.priority = 0,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 11143d039f16..c43592771126 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -26,11 +26,16 @@
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <net/ip6_checksum.h>
#include <net/ip6_route.h>
#include <net/icmp.h>
#include <net/ipv6.h>
#include <net/protocol.h>
+#include <net/udp.h>
#include <linux/icmpv6.h>
+#include <net/tcp.h>
+#include <net/espintcp.h>
+#include <net/inet6_hashtables.h>
#include <linux/highmem.h>
@@ -39,6 +44,11 @@ struct esp_skb_cb {
void *tmp;
};
+struct esp_output_extra {
+ __be32 seqhi;
+ u32 esphoff;
+};
+
#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
/*
@@ -72,9 +82,9 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen)
return kmalloc(len, GFP_ATOMIC);
}
-static inline __be32 *esp_tmp_seqhi(void *tmp)
+static inline void *esp_tmp_extra(void *tmp)
{
- return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32));
+ return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra));
}
static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
@@ -104,16 +114,17 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
{
+ struct esp_output_extra *extra = esp_tmp_extra(tmp);
struct crypto_aead *aead = x->data;
- int seqhilen = 0;
+ int extralen = 0;
u8 *iv;
struct aead_request *req;
struct scatterlist *sg;
if (x->props.flags & XFRM_STATE_ESN)
- seqhilen += sizeof(__be32);
+ extralen += sizeof(*extra);
- iv = esp_tmp_iv(aead, tmp, seqhilen);
+ iv = esp_tmp_iv(aead, tmp, extralen);
req = esp_tmp_req(aead, iv);
/* Unref skb_frag_pages in the src scatterlist if necessary.
@@ -124,6 +135,149 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
put_page(sg_page(sg));
}
+#ifdef CONFIG_INET6_ESPINTCP
+struct esp_tcp_sk {
+ struct sock *sk;
+ struct rcu_head rcu;
+};
+
+static void esp_free_tcp_sk(struct rcu_head *head)
+{
+ struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
+
+ sock_put(esk->sk);
+ kfree(esk);
+}
+
+static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
+{
+ struct xfrm_encap_tmpl *encap = x->encap;
+ struct esp_tcp_sk *esk;
+ __be16 sport, dport;
+ struct sock *nsk;
+ struct sock *sk;
+
+ sk = rcu_dereference(x->encap_sk);
+ if (sk && sk->sk_state == TCP_ESTABLISHED)
+ return sk;
+
+ spin_lock_bh(&x->lock);
+ sport = encap->encap_sport;
+ dport = encap->encap_dport;
+ nsk = rcu_dereference_protected(x->encap_sk,
+ lockdep_is_held(&x->lock));
+ if (sk && sk == nsk) {
+ esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
+ if (!esk) {
+ spin_unlock_bh(&x->lock);
+ return ERR_PTR(-ENOMEM);
+ }
+ RCU_INIT_POINTER(x->encap_sk, NULL);
+ esk->sk = sk;
+ call_rcu(&esk->rcu, esp_free_tcp_sk);
+ }
+ spin_unlock_bh(&x->lock);
+
+ sk = __inet6_lookup_established(xs_net(x), &tcp_hashinfo, &x->id.daddr.in6,
+ dport, &x->props.saddr.in6, ntohs(sport), 0, 0);
+ if (!sk)
+ return ERR_PTR(-ENOENT);
+
+ if (!tcp_is_ulp_esp(sk)) {
+ sock_put(sk);
+ return ERR_PTR(-EINVAL);
+ }
+
+ spin_lock_bh(&x->lock);
+ nsk = rcu_dereference_protected(x->encap_sk,
+ lockdep_is_held(&x->lock));
+ if (encap->encap_sport != sport ||
+ encap->encap_dport != dport) {
+ sock_put(sk);
+ sk = nsk ?: ERR_PTR(-EREMCHG);
+ } else if (sk == nsk) {
+ sock_put(sk);
+ } else {
+ rcu_assign_pointer(x->encap_sk, sk);
+ }
+ spin_unlock_bh(&x->lock);
+
+ return sk;
+}
+
+static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct sock *sk;
+ int err;
+
+ rcu_read_lock();
+
+ sk = esp6_find_tcp_sk(x);
+ err = PTR_ERR_OR_ZERO(sk);
+ if (err)
+ goto out;
+
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk))
+ err = espintcp_queue_out(sk, skb);
+ else
+ err = espintcp_push_skb(sk, skb);
+ bh_unlock_sock(sk);
+
+out:
+ rcu_read_unlock();
+ return err;
+}
+
+static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct xfrm_state *x = dst->xfrm;
+
+ return esp_output_tcp_finish(x, skb);
+}
+
+static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err;
+
+ local_bh_disable();
+ err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb);
+ local_bh_enable();
+
+ /* EINPROGRESS just happens to do the right thing. It
+ * actually means that the skb has been consumed and
+ * isn't coming back.
+ */
+ return err ?: -EINPROGRESS;
+}
+#else
+static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
+{
+ kfree_skb(skb);
+
+ return -EOPNOTSUPP;
+}
+#endif
+
+static void esp_output_encap_csum(struct sk_buff *skb)
+{
+ /* UDP encap with IPv6 requires a valid checksum */
+ if (*skb_mac_header(skb) == IPPROTO_UDP) {
+ struct udphdr *uh = udp_hdr(skb);
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ int len = ntohs(uh->len);
+ unsigned int offset = skb_transport_offset(skb);
+ __wsum csum = skb_checksum(skb, offset, skb->len - offset, 0);
+
+ uh->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+ len, IPPROTO_UDP, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ }
+}
+
static void esp_output_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
@@ -143,6 +297,8 @@ static void esp_output_done(struct crypto_async_request *base, int err)
esp_ssg_unref(x, tmp);
kfree(tmp);
+ esp_output_encap_csum(skb);
+
if (xo && (xo->flags & XFRM_DEV_RESUME)) {
if (err) {
XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
@@ -154,7 +310,11 @@ static void esp_output_done(struct crypto_async_request *base, int err)
secpath_reset(skb);
xfrm_dev_resume(skb);
} else {
- xfrm_output_resume(skb, err);
+ if (!err &&
+ x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
+ esp_output_tail_tcp(x, skb);
+ else
+ xfrm_output_resume(skb, err);
}
}
@@ -163,7 +323,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
{
struct ip_esp_hdr *esph = (void *)(skb->data + offset);
void *tmp = ESP_SKB_CB(skb)->tmp;
- __be32 *seqhi = esp_tmp_seqhi(tmp);
+ __be32 *seqhi = esp_tmp_extra(tmp);
esph->seq_no = esph->spi;
esph->spi = *seqhi;
@@ -171,27 +331,36 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
static void esp_output_restore_header(struct sk_buff *skb)
{
- esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32));
+ void *tmp = ESP_SKB_CB(skb)->tmp;
+ struct esp_output_extra *extra = esp_tmp_extra(tmp);
+
+ esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff -
+ sizeof(__be32));
}
static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb,
struct xfrm_state *x,
struct ip_esp_hdr *esph,
- __be32 *seqhi)
+ struct esp_output_extra *extra)
{
/* For ESN we move the header forward by 4 bytes to
* accomodate the high bits. We will move it back after
* encryption.
*/
if ((x->props.flags & XFRM_STATE_ESN)) {
+ __u32 seqhi;
struct xfrm_offload *xo = xfrm_offload(skb);
- esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
- *seqhi = esph->spi;
if (xo)
- esph->seq_no = htonl(xo->seq.hi);
+ seqhi = xo->seq.hi;
else
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ seqhi = XFRM_SKB_CB(skb)->seq.output.hi;
+
+ extra->esphoff = (unsigned char *)esph -
+ skb_transport_header(skb);
+ esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4);
+ extra->seqhi = esph->spi;
+ esph->seq_no = htonl(seqhi);
}
esph->spi = x->id.spi;
@@ -207,15 +376,122 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err)
esp_output_done(base, err);
}
+static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb,
+ int encap_type,
+ struct esp_info *esp,
+ __be16 sport,
+ __be16 dport)
+{
+ struct udphdr *uh;
+ __be32 *udpdata32;
+ unsigned int len;
+
+ len = skb->len + esp->tailen - skb_transport_offset(skb);
+ if (len > U16_MAX)
+ return ERR_PTR(-EMSGSIZE);
+
+ uh = (struct udphdr *)esp->esph;
+ uh->source = sport;
+ uh->dest = dport;
+ uh->len = htons(len);
+ uh->check = 0;
+
+ *skb_mac_header(skb) = IPPROTO_UDP;
+
+ if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) {
+ udpdata32 = (__be32 *)(uh + 1);
+ udpdata32[0] = udpdata32[1] = 0;
+ return (struct ip_esp_hdr *)(udpdata32 + 2);
+ }
+
+ return (struct ip_esp_hdr *)(uh + 1);
+}
+
+#ifdef CONFIG_INET6_ESPINTCP
+static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x,
+ struct sk_buff *skb,
+ struct esp_info *esp)
+{
+ __be16 *lenp = (void *)esp->esph;
+ struct ip_esp_hdr *esph;
+ unsigned int len;
+ struct sock *sk;
+
+ len = skb->len + esp->tailen - skb_transport_offset(skb);
+ if (len > IP_MAX_MTU)
+ return ERR_PTR(-EMSGSIZE);
+
+ rcu_read_lock();
+ sk = esp6_find_tcp_sk(x);
+ rcu_read_unlock();
+
+ if (IS_ERR(sk))
+ return ERR_CAST(sk);
+
+ *lenp = htons(len);
+ esph = (struct ip_esp_hdr *)(lenp + 1);
+
+ return esph;
+}
+#else
+static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x,
+ struct sk_buff *skb,
+ struct esp_info *esp)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+#endif
+
+static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb,
+ struct esp_info *esp)
+{
+ struct xfrm_encap_tmpl *encap = x->encap;
+ struct ip_esp_hdr *esph;
+ __be16 sport, dport;
+ int encap_type;
+
+ spin_lock_bh(&x->lock);
+ sport = encap->encap_sport;
+ dport = encap->encap_dport;
+ encap_type = encap->encap_type;
+ spin_unlock_bh(&x->lock);
+
+ switch (encap_type) {
+ default:
+ case UDP_ENCAP_ESPINUDP:
+ case UDP_ENCAP_ESPINUDP_NON_IKE:
+ esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport);
+ break;
+ case TCP_ENCAP_ESPINTCP:
+ esph = esp6_output_tcp_encap(x, skb, esp);
+ break;
+ }
+
+ if (IS_ERR(esph))
+ return PTR_ERR(esph);
+
+ esp->esph = esph;
+
+ return 0;
+}
+
int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
{
u8 *tail;
u8 *vaddr;
int nfrags;
+ int esph_offset;
struct page *page;
struct sk_buff *trailer;
int tailen = esp->tailen;
+ if (x->encap) {
+ int err = esp6_output_encap(x, skb, esp);
+
+ if (err < 0)
+ return err;
+ }
+
if (!skb_cloned(skb)) {
if (tailen <= skb_tailroom(skb)) {
nfrags = 1;
@@ -274,10 +550,13 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
}
cow:
+ esph_offset = (unsigned char *)esp->esph - skb_transport_header(skb);
+
nfrags = skb_cow_data(skb, tailen, &trailer);
if (nfrags < 0)
goto out;
tail = skb_tail_pointer(trailer);
+ esp->esph = (struct ip_esp_hdr *)(skb_transport_header(skb) + esph_offset);
skip_cow:
esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
@@ -295,20 +574,20 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
void *tmp;
int ivlen;
int assoclen;
- int seqhilen;
- __be32 *seqhi;
+ int extralen;
struct page *page;
struct ip_esp_hdr *esph;
struct aead_request *req;
struct crypto_aead *aead;
struct scatterlist *sg, *dsg;
+ struct esp_output_extra *extra;
int err = -ENOMEM;
assoclen = sizeof(struct ip_esp_hdr);
- seqhilen = 0;
+ extralen = 0;
if (x->props.flags & XFRM_STATE_ESN) {
- seqhilen += sizeof(__be32);
+ extralen += sizeof(*extra);
assoclen += sizeof(__be32);
}
@@ -316,12 +595,12 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
alen = crypto_aead_authsize(aead);
ivlen = crypto_aead_ivsize(aead);
- tmp = esp_alloc_tmp(aead, esp->nfrags + 2, seqhilen);
+ tmp = esp_alloc_tmp(aead, esp->nfrags + 2, extralen);
if (!tmp)
goto error;
- seqhi = esp_tmp_seqhi(tmp);
- iv = esp_tmp_iv(aead, tmp, seqhilen);
+ extra = esp_tmp_extra(tmp);
+ iv = esp_tmp_iv(aead, tmp, extralen);
req = esp_tmp_req(aead, iv);
sg = esp_req_sg(aead, req);
@@ -330,7 +609,8 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
else
dsg = &sg[esp->nfrags];
- esph = esp_output_set_esn(skb, x, ip_esp_hdr(skb), seqhi);
+ esph = esp_output_set_esn(skb, x, esp->esph, extra);
+ esp->esph = esph;
sg_init_table(sg, esp->nfrags);
err = skb_to_sgvec(skb, sg,
@@ -394,11 +674,15 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
case 0:
if ((x->props.flags & XFRM_STATE_ESN))
esp_output_restore_header(skb);
+ esp_output_encap_csum(skb);
}
if (sg != dsg)
esp_ssg_unref(x, tmp);
+ if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
+ err = esp_output_tail_tcp(x, skb);
+
error_free:
kfree(tmp);
error:
@@ -438,11 +722,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
esp.plen = esp.clen - skb->len - esp.tfclen;
esp.tailen = esp.tfclen + esp.plen + alen;
+ esp.esph = ip_esp_hdr(skb);
+
esp.nfrags = esp6_output_head(x, skb, &esp);
if (esp.nfrags < 0)
return esp.nfrags;
- esph = ip_esp_hdr(skb);
+ esph = esp.esph;
esph->spi = x->id.spi;
esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
@@ -517,6 +803,60 @@ int esp6_input_done2(struct sk_buff *skb, int err)
if (unlikely(err < 0))
goto out;
+ if (x->encap) {
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct xfrm_encap_tmpl *encap = x->encap;
+ struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len);
+ struct tcphdr *th = (void *)(skb_network_header(skb) + hdr_len);
+ __be16 source;
+
+ switch (x->encap->encap_type) {
+ case TCP_ENCAP_ESPINTCP:
+ source = th->source;
+ break;
+ case UDP_ENCAP_ESPINUDP:
+ case UDP_ENCAP_ESPINUDP_NON_IKE:
+ source = uh->source;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ err = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * 1) if the NAT-T peer's IP or port changed then
+ * advertize the change to the keying daemon.
+ * This is an inbound SA, so just compare
+ * SRC ports.
+ */
+ if (!ipv6_addr_equal(&ip6h->saddr, &x->props.saddr.in6) ||
+ source != encap->encap_sport) {
+ xfrm_address_t ipaddr;
+
+ memcpy(&ipaddr.a6, &ip6h->saddr.s6_addr, sizeof(ipaddr.a6));
+ km_new_mapping(x, &ipaddr, source);
+
+ /* XXX: perhaps add an extra
+ * policy check here, to see
+ * if we should allow or
+ * reject a packet from a
+ * different source
+ * address/port.
+ */
+ }
+
+ /*
+ * 2) ignore UDP/TCP checksums in case
+ * of NAT-T in Transport Mode, or
+ * perform other post-processing fixes
+ * as per draft-ietf-ipsec-udp-encaps-06,
+ * section 3.1.2
+ */
+ if (x->props.mode == XFRM_MODE_TRANSPORT)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+
skb_postpull_rcsum(skb, skb_network_header(skb),
skb_network_header_len(skb));
skb_pull_rcsum(skb, hlen);
@@ -632,7 +972,7 @@ skip_cow:
goto out;
ESP_SKB_CB(skb)->tmp = tmp;
- seqhi = esp_tmp_seqhi(tmp);
+ seqhi = esp_tmp_extra(tmp);
iv = esp_tmp_iv(aead, tmp, seqhilen);
req = esp_tmp_req(aead, iv);
sg = esp_req_sg(aead, req);
@@ -836,9 +1176,6 @@ static int esp6_init_state(struct xfrm_state *x)
u32 align;
int err;
- if (x->encap)
- return -EINVAL;
-
x->data = NULL;
if (x->aead)
@@ -867,6 +1204,30 @@ static int esp6_init_state(struct xfrm_state *x)
break;
}
+ if (x->encap) {
+ struct xfrm_encap_tmpl *encap = x->encap;
+
+ switch (encap->encap_type) {
+ default:
+ err = -EINVAL;
+ goto error;
+ case UDP_ENCAP_ESPINUDP:
+ x->props.header_len += sizeof(struct udphdr);
+ break;
+ case UDP_ENCAP_ESPINUDP_NON_IKE:
+ x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
+ break;
+#ifdef CONFIG_INET6_ESPINTCP
+ case TCP_ENCAP_ESPINTCP:
+ /* only the length field, TCP encap is done by
+ * the socket
+ */
+ x->props.header_len += 2;
+ break;
+#endif
+ }
+ }
+
align = ALIGN(crypto_aead_blocksize(aead), 4);
x->props.trailer_len = align + 1 + crypto_aead_authsize(aead);
@@ -893,6 +1254,7 @@ static const struct xfrm_type esp6_type = {
static struct xfrm6_protocol esp6_protocol = {
.handler = xfrm6_rcv,
+ .input_handler = xfrm_input,
.cb_handler = esp6_rcv_cb,
.err_handler = esp6_err,
.priority = 0,
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 8eab2c869d61..55addea1948f 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -85,10 +85,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
sp->olen++;
xo = xfrm_offload(skb);
- if (!xo) {
- xfrm_state_put(x);
+ if (!xo)
goto out_reset;
- }
}
xo->flags |= XFRM_GRO;
@@ -123,9 +121,16 @@ static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
struct ip_esp_hdr *esph;
struct ipv6hdr *iph = ipv6_hdr(skb);
struct xfrm_offload *xo = xfrm_offload(skb);
- int proto = iph->nexthdr;
+ u8 proto = iph->nexthdr;
skb_push(skb, -skb_network_offset(skb));
+
+ if (x->outer_mode.encap == XFRM_MODE_TRANSPORT) {
+ __be16 frag;
+
+ ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &proto, &frag);
+ }
+
esph = ip_esp_hdr(skb);
*skb_mac_header(skb) = IPPROTO_ESP;
@@ -166,23 +171,31 @@ static struct sk_buff *xfrm6_beet_gso_segment(struct xfrm_state *x,
struct xfrm_offload *xo = xfrm_offload(skb);
struct sk_buff *segs = ERR_PTR(-EINVAL);
const struct net_offload *ops;
- int proto = xo->proto;
+ u8 proto = xo->proto;
skb->transport_header += x->props.header_len;
- if (proto == IPPROTO_BEETPH) {
- struct ip_beet_phdr *ph = (struct ip_beet_phdr *)skb->data;
-
- skb->transport_header += ph->hdrlen * 8;
- proto = ph->nexthdr;
- }
-
if (x->sel.family != AF_INET6) {
skb->transport_header -=
(sizeof(struct ipv6hdr) - sizeof(struct iphdr));
+ if (proto == IPPROTO_BEETPH) {
+ struct ip_beet_phdr *ph =
+ (struct ip_beet_phdr *)skb->data;
+
+ skb->transport_header += ph->hdrlen * 8;
+ proto = ph->nexthdr;
+ } else {
+ skb->transport_header -= IPV4_BEET_PHMAXLEN;
+ }
+
if (proto == IPPROTO_TCP)
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
+ } else {
+ __be16 frag;
+
+ skb->transport_header +=
+ ipv6_skip_exthdr(skb, 0, &proto, &frag);
}
__skb_pull(skb, skb_transport_offset(skb));
@@ -271,7 +284,6 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
int alen;
int blksize;
struct xfrm_offload *xo;
- struct ip_esp_hdr *esph;
struct crypto_aead *aead;
struct esp_info esp;
bool hw_offload = true;
@@ -312,13 +324,13 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
seq = xo->seq.low;
- esph = ip_esp_hdr(skb);
- esph->spi = x->id.spi;
+ esp.esph = ip_esp_hdr(skb);
+ esp.esph->spi = x->id.spi;
skb_push(skb, -skb_network_offset(skb));
if (xo->flags & XFRM_GSO_SEGMENT) {
- esph->seq_no = htonl(seq);
+ esp.esph->seq_no = htonl(seq);
if (!skb_is_gso(skb))
xo->seq.low++;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 250ff52c674e..49ee89bbcba0 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -664,7 +664,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (arg.filter.table_id) {
tb = fib6_get_table(net, arg.filter.table_id);
if (!tb) {
- if (arg.filter.dump_all_families)
+ if (rtnl_msg_family(cb->nlh) != PF_INET6)
goto out;
NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4703b09808d0..821d96c720b9 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -89,6 +89,11 @@ struct ip6_tnl_net {
struct ip6_tnl __rcu *collect_md_tun;
};
+static inline int ip6_tnl_mpls_supported(void)
+{
+ return IS_ENABLED(CONFIG_MPLS);
+}
+
static struct net_device_stats *ip6_get_stats(struct net_device *dev)
{
struct pcpu_sw_netstats tmp, sum = { 0 };
@@ -718,6 +723,20 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
}
+static int
+mplsip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ __u32 rel_info = ntohl(info);
+ int err, rel_msg = 0;
+ u8 rel_type = type;
+ u8 rel_code = code;
+
+ err = ip6_tnl_err(skb, IPPROTO_MPLS, opt, &rel_type, &rel_code,
+ &rel_msg, &rel_info, offset);
+ return err;
+}
+
static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
const struct ipv6hdr *ipv6h,
struct sk_buff *skb)
@@ -740,6 +759,14 @@ static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
return IP6_ECN_decapsulate(ipv6h, skb);
}
+static inline int mplsip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
+ const struct ipv6hdr *ipv6h,
+ struct sk_buff *skb)
+{
+ /* ECN is not supported in AF_MPLS */
+ return 0;
+}
+
__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
const struct in6_addr *laddr,
const struct in6_addr *raddr)
@@ -901,6 +928,11 @@ static const struct tnl_ptk_info tpi_v4 = {
.proto = htons(ETH_P_IP),
};
+static const struct tnl_ptk_info tpi_mpls = {
+ /* no tunnel info required for mplsip6. */
+ .proto = htons(ETH_P_MPLS_UC),
+};
+
static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
const struct tnl_ptk_info *tpi,
int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
@@ -958,6 +990,12 @@ static int ip6ip6_rcv(struct sk_buff *skb)
ip6ip6_dscp_ecn_decapsulate);
}
+static int mplsip6_rcv(struct sk_buff *skb)
+{
+ return ipxip6_rcv(skb, IPPROTO_MPLS, &tpi_mpls,
+ mplsip6_dscp_ecn_decapsulate);
+}
+
struct ipv6_tel_txoption {
struct ipv6_txoptions ops;
__u8 dst_opt[8];
@@ -1232,6 +1270,8 @@ route_lookup:
ipv6_push_frag_opts(skb, &opt.ops, &proto);
}
+ skb_set_inner_ipproto(skb, proto);
+
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
@@ -1253,22 +1293,22 @@ tx_err_dst_release:
EXPORT_SYMBOL(ip6_tnl_xmit);
static inline int
-ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev,
+ u8 protocol)
{
struct ip6_tnl *t = netdev_priv(dev);
+ struct ipv6hdr *ipv6h;
const struct iphdr *iph;
int encap_limit = -1;
+ __u16 offset;
struct flowi6 fl6;
- __u8 dsfield;
+ __u8 dsfield, orig_dsfield;
__u32 mtu;
u8 tproto;
int err;
- iph = ip_hdr(skb);
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-
tproto = READ_ONCE(t->parms.proto);
- if (tproto != IPPROTO_IPIP && tproto != 0)
+ if (tproto != protocol && tproto != 0)
return -1;
if (t->parms.collect_md) {
@@ -1281,129 +1321,100 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
return -1;
key = &tun_info->key;
memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPIP;
+ fl6.flowi6_proto = protocol;
fl6.saddr = key->u.ipv6.src;
fl6.daddr = key->u.ipv6.dst;
fl6.flowlabel = key->label;
dsfield = key->tos;
+ switch (protocol) {
+ case IPPROTO_IPIP:
+ iph = ip_hdr(skb);
+ orig_dsfield = ipv4_get_dsfield(iph);
+ break;
+ case IPPROTO_IPV6:
+ ipv6h = ipv6_hdr(skb);
+ orig_dsfield = ipv6_get_dsfield(ipv6h);
+ break;
+ default:
+ orig_dsfield = dsfield;
+ break;
+ }
} else {
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
+ if (protocol == IPPROTO_IPV6) {
+ offset = ip6_tnl_parse_tlv_enc_lim(skb,
+ skb_network_header(skb));
+ /* ip6_tnl_parse_tlv_enc_lim() might have
+ * reallocated skb->head
+ */
+ if (offset > 0) {
+ struct ipv6_tlv_tnl_enc_lim *tel;
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPIP;
-
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- dsfield = ipv4_get_dsfield(iph);
- else
- dsfield = ip6_tclass(t->parms.flowinfo);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- fl6.flowi6_mark = skb->mark;
- else
- fl6.flowi6_mark = t->parms.fwmark;
- }
-
- fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
- dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
-
- if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
- return -1;
-
- skb_set_inner_ipproto(skb, IPPROTO_IPIP);
-
- err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
- IPPROTO_IPIP);
- if (err != 0) {
- /* XXX: send ICMP error even if DF is not set. */
- if (err == -EMSGSIZE)
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
- return -1;
- }
-
- return 0;
-}
-
-static inline int
-ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- struct ip6_tnl *t = netdev_priv(dev);
- struct ipv6hdr *ipv6h;
- int encap_limit = -1;
- __u16 offset;
- struct flowi6 fl6;
- __u8 dsfield;
- __u32 mtu;
- u8 tproto;
- int err;
-
- ipv6h = ipv6_hdr(skb);
- tproto = READ_ONCE(t->parms.proto);
- if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
- ip6_tnl_addr_conflict(t, ipv6h))
- return -1;
-
- if (t->parms.collect_md) {
- struct ip_tunnel_info *tun_info;
- const struct ip_tunnel_key *key;
-
- tun_info = skb_tunnel_info(skb);
- if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
- ip_tunnel_info_af(tun_info) != AF_INET6))
- return -1;
- key = &tun_info->key;
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPV6;
- fl6.saddr = key->u.ipv6.src;
- fl6.daddr = key->u.ipv6.dst;
- fl6.flowlabel = key->label;
- dsfield = key->tos;
- } else {
- offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
- /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
- ipv6h = ipv6_hdr(skb);
- if (offset > 0) {
- struct ipv6_tlv_tnl_enc_lim *tel;
-
- tel = (void *)&skb_network_header(skb)[offset];
- if (tel->encap_limit == 0) {
- icmpv6_send(skb, ICMPV6_PARAMPROB,
- ICMPV6_HDR_FIELD, offset + 2);
- return -1;
+ tel = (void *)&skb_network_header(skb)[offset];
+ if (tel->encap_limit == 0) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_HDR_FIELD, offset + 2);
+ return -1;
+ }
+ encap_limit = tel->encap_limit - 1;
}
- encap_limit = tel->encap_limit - 1;
- } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
- encap_limit = t->parms.encap_limit;
}
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPV6;
+ fl6.flowi6_proto = protocol;
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- dsfield = ipv6_get_dsfield(ipv6h);
- else
- dsfield = ip6_tclass(t->parms.flowinfo);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
- fl6.flowlabel |= ip6_flowlabel(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
else
fl6.flowi6_mark = t->parms.fwmark;
+ switch (protocol) {
+ case IPPROTO_IPIP:
+ iph = ip_hdr(skb);
+ orig_dsfield = ipv4_get_dsfield(iph);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ dsfield = orig_dsfield;
+ else
+ dsfield = ip6_tclass(t->parms.flowinfo);
+ break;
+ case IPPROTO_IPV6:
+ ipv6h = ipv6_hdr(skb);
+ orig_dsfield = ipv6_get_dsfield(ipv6h);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ dsfield = orig_dsfield;
+ else
+ dsfield = ip6_tclass(t->parms.flowinfo);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+ fl6.flowlabel |= ip6_flowlabel(ipv6h);
+ break;
+ default:
+ orig_dsfield = dsfield = ip6_tclass(t->parms.flowinfo);
+ break;
+ }
}
fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
- dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
+ dsfield = INET_ECN_encapsulate(dsfield, orig_dsfield);
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
- skb_set_inner_ipproto(skb, IPPROTO_IPV6);
-
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
- IPPROTO_IPV6);
+ protocol);
if (err != 0) {
+ /* XXX: send ICMP error even if DF is not set. */
if (err == -EMSGSIZE)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ switch (protocol) {
+ case IPPROTO_IPIP:
+ icmp_send(skb, ICMP_DEST_UNREACH,
+ ICMP_FRAG_NEEDED, htonl(mtu));
+ break;
+ case IPPROTO_IPV6:
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ break;
+ default:
+ break;
+ }
return -1;
}
@@ -1415,6 +1426,7 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
+ u8 ipproto;
int ret;
if (!pskb_inet_may_pull(skb))
@@ -1422,15 +1434,21 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IP):
- ret = ip4ip6_tnl_xmit(skb, dev);
+ ipproto = IPPROTO_IPIP;
break;
case htons(ETH_P_IPV6):
- ret = ip6ip6_tnl_xmit(skb, dev);
+ if (ip6_tnl_addr_conflict(t, ipv6_hdr(skb)))
+ goto tx_err;
+ ipproto = IPPROTO_IPV6;
+ break;
+ case htons(ETH_P_MPLS_UC):
+ ipproto = IPPROTO_MPLS;
break;
default:
goto tx_err;
}
+ ret = ipxip6_tnl_xmit(skb, dev, ipproto);
if (ret < 0)
goto tx_err;
@@ -2218,6 +2236,12 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
.priority = 1,
};
+static struct xfrm6_tunnel mplsip6_handler __read_mostly = {
+ .handler = mplsip6_rcv,
+ .err_handler = mplsip6_err,
+ .priority = 1,
+};
+
static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
{
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
@@ -2332,6 +2356,15 @@ static int __init ip6_tunnel_init(void)
pr_err("%s: can't register ip6ip6\n", __func__);
goto out_ip6ip6;
}
+
+ if (ip6_tnl_mpls_supported()) {
+ err = xfrm6_tunnel_register(&mplsip6_handler, AF_MPLS);
+ if (err < 0) {
+ pr_err("%s: can't register mplsip6\n", __func__);
+ goto out_mplsip6;
+ }
+ }
+
err = rtnl_link_register(&ip6_link_ops);
if (err < 0)
goto rtnl_link_failed;
@@ -2339,6 +2372,9 @@ static int __init ip6_tunnel_init(void)
return 0;
rtnl_link_failed:
+ if (ip6_tnl_mpls_supported())
+ xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS);
+out_mplsip6:
xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
out_ip6ip6:
xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
@@ -2361,6 +2397,9 @@ static void __exit ip6_tunnel_cleanup(void)
if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
pr_info("%s: can't deregister ip6ip6\n", __func__);
+ if (ip6_tnl_mpls_supported() &&
+ xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS))
+ pr_info("%s: can't deregister mplsip6\n", __func__);
unregister_pernet_device(&ip6_tnl_net_ops);
}
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 58956a6b66a2..2e0ad1bc84a8 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -25,17 +25,12 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
goto error;
if (cfg->ipv6_v6only) {
- int val = 1;
-
- err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY,
- (char *) &val, sizeof(val));
+ err = ip6_sock_set_v6only(sock->sk);
if (err < 0)
goto error;
}
if (cfg->bind_ifindex) {
- err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
- (void *)&cfg->bind_ifindex,
- sizeof(cfg->bind_ifindex));
+ err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
if (err < 0)
goto error;
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index cc6180e08a4f..1147f647b9a0 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -296,7 +296,8 @@ static void vti6_dev_uninit(struct net_device *dev)
dev_put(dev);
}
-static int vti6_rcv(struct sk_buff *skb)
+static int vti6_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type)
{
struct ip6_tnl *t;
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
@@ -323,7 +324,10 @@ static int vti6_rcv(struct sk_buff *skb)
rcu_read_unlock();
- return xfrm6_rcv_tnl(skb, t);
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
+ XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
+ return xfrm_input(skb, nexthdr, spi, encap_type);
}
rcu_read_unlock();
return -EINVAL;
@@ -332,6 +336,13 @@ discard:
return 0;
}
+static int vti6_rcv(struct sk_buff *skb)
+{
+ int nexthdr = skb_network_header(skb)[IP6CB(skb)->nhoff];
+
+ return vti6_input_proto(skb, nexthdr, 0, 0);
+}
+
static int vti6_rcv_cb(struct sk_buff *skb, int err)
{
unsigned short family;
@@ -1185,6 +1196,7 @@ static struct pernet_operations vti6_net_ops = {
static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
.handler = vti6_rcv,
+ .input_handler = vti6_input_proto,
.cb_handler = vti6_rcv_cb,
.err_handler = vti6_err,
.priority = 100,
@@ -1192,6 +1204,7 @@ static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
static struct xfrm6_protocol vti_ah6_protocol __read_mostly = {
.handler = vti6_rcv,
+ .input_handler = vti6_input_proto,
.cb_handler = vti6_rcv_cb,
.err_handler = vti6_err,
.priority = 100,
@@ -1199,6 +1212,7 @@ static struct xfrm6_protocol vti_ah6_protocol __read_mostly = {
static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
.handler = vti6_rcv,
+ .input_handler = vti6_input_proto,
.cb_handler = vti6_rcv_cb,
.err_handler = vti6_err,
.priority = 100,
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 65a54d74acc1..1f4d20e97c07 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -98,7 +98,8 @@ static void ipmr_expire_process(struct timer_list *t);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
#define ip6mr_for_each_table(mrt, net) \
list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
- lockdep_rtnl_is_held())
+ lockdep_rtnl_is_held() || \
+ list_empty(&net->ipv6.mr6_tables))
static struct mr_table *ip6mr_mr_table_iter(struct net *net,
struct mr_table *mrt)
@@ -2502,7 +2503,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
if (!mrt) {
- if (filter.dump_all_families)
+ if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
return skb->len;
NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 3752bd3e92ce..99668bfebd85 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -183,6 +183,7 @@ static const struct xfrm_type ipcomp6_type = {
static struct xfrm6_protocol ipcomp6_protocol = {
.handler = xfrm6_rcv,
+ .input_handler = xfrm_input,
.cb_handler = ipcomp6_rcv_cb,
.err_handler = ipcomp6_err,
.priority = 0,
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a0e50cc57e54..adbfed6adf11 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -136,6 +136,41 @@ static bool setsockopt_needs_rtnl(int optname)
return false;
}
+static int do_ipv6_mcast_group_source(struct sock *sk, int optname,
+ struct group_source_req *greqs)
+{
+ int omode, add;
+
+ if (greqs->gsr_group.ss_family != AF_INET6 ||
+ greqs->gsr_source.ss_family != AF_INET6)
+ return -EADDRNOTAVAIL;
+
+ if (optname == MCAST_BLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 1;
+ } else if (optname == MCAST_UNBLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 0;
+ } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
+ struct sockaddr_in6 *psin6;
+ int retv;
+
+ psin6 = (struct sockaddr_in6 *)&greqs->gsr_group;
+ retv = ipv6_sock_mc_join_ssm(sk, greqs->gsr_interface,
+ &psin6->sin6_addr,
+ MCAST_INCLUDE);
+ /* prior join w/ different source is ok */
+ if (retv && retv != -EADDRINUSE)
+ return retv;
+ omode = MCAST_INCLUDE;
+ add = 1;
+ } else /* MCAST_LEAVE_SOURCE_GROUP */ {
+ omode = MCAST_INCLUDE;
+ add = 0;
+ }
+ return ip6_mc_source(add, omode, sk, greqs);
+}
+
static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
{
@@ -715,7 +750,6 @@ done:
case MCAST_UNBLOCK_SOURCE:
{
struct group_source_req greqs;
- int omode, add;
if (optlen < sizeof(struct group_source_req))
goto e_inval;
@@ -723,34 +757,7 @@ done:
retv = -EFAULT;
break;
}
- if (greqs.gsr_group.ss_family != AF_INET6 ||
- greqs.gsr_source.ss_family != AF_INET6) {
- retv = -EADDRNOTAVAIL;
- break;
- }
- if (optname == MCAST_BLOCK_SOURCE) {
- omode = MCAST_EXCLUDE;
- add = 1;
- } else if (optname == MCAST_UNBLOCK_SOURCE) {
- omode = MCAST_EXCLUDE;
- add = 0;
- } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
- struct sockaddr_in6 *psin6;
-
- psin6 = (struct sockaddr_in6 *)&greqs.gsr_group;
- retv = ipv6_sock_mc_join_ssm(sk, greqs.gsr_interface,
- &psin6->sin6_addr,
- MCAST_INCLUDE);
- /* prior join w/ different source is ok */
- if (retv && retv != -EADDRINUSE)
- break;
- omode = MCAST_INCLUDE;
- add = 1;
- } else /* MCAST_LEAVE_SOURCE_GROUP */ {
- omode = MCAST_INCLUDE;
- add = 0;
- }
- retv = ip6_mc_source(add, omode, sk, &greqs);
+ retv = do_ipv6_mcast_group_source(sk, optname, &greqs);
break;
}
case MCAST_MSFILTER:
@@ -780,7 +787,7 @@ done:
retv = -EINVAL;
break;
}
- retv = ip6_mc_msfilter(sk, gsf);
+ retv = ip6_mc_msfilter(sk, gsf, gsf->gf_slist);
kfree(gsf);
break;
@@ -838,67 +845,10 @@ done:
break;
case IPV6_ADDR_PREFERENCES:
- {
- unsigned int pref = 0;
- unsigned int prefmask = ~0;
-
if (optlen < sizeof(int))
goto e_inval;
-
- retv = -EINVAL;
-
- /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
- switch (val & (IPV6_PREFER_SRC_PUBLIC|
- IPV6_PREFER_SRC_TMP|
- IPV6_PREFER_SRC_PUBTMP_DEFAULT)) {
- case IPV6_PREFER_SRC_PUBLIC:
- pref |= IPV6_PREFER_SRC_PUBLIC;
- break;
- case IPV6_PREFER_SRC_TMP:
- pref |= IPV6_PREFER_SRC_TMP;
- break;
- case IPV6_PREFER_SRC_PUBTMP_DEFAULT:
- break;
- case 0:
- goto pref_skip_pubtmp;
- default:
- goto e_inval;
- }
-
- prefmask &= ~(IPV6_PREFER_SRC_PUBLIC|
- IPV6_PREFER_SRC_TMP);
-pref_skip_pubtmp:
-
- /* check HOME/COA conflicts */
- switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) {
- case IPV6_PREFER_SRC_HOME:
- break;
- case IPV6_PREFER_SRC_COA:
- pref |= IPV6_PREFER_SRC_COA;
- case 0:
- goto pref_skip_coa;
- default:
- goto e_inval;
- }
-
- prefmask &= ~IPV6_PREFER_SRC_COA;
-pref_skip_coa:
-
- /* check CGA/NONCGA conflicts */
- switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) {
- case IPV6_PREFER_SRC_CGA:
- case IPV6_PREFER_SRC_NONCGA:
- case 0:
- break;
- default:
- goto e_inval;
- }
-
- np->srcprefs = (np->srcprefs & prefmask) | pref;
- retv = 0;
-
+ retv = __ip6_sock_set_addr_preferences(sk, val);
break;
- }
case IPV6_MINHOPCOUNT:
if (optlen < sizeof(int))
goto e_inval;
@@ -973,9 +923,110 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (level != SOL_IPV6)
return -ENOPROTOOPT;
- if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)
- return compat_mc_setsockopt(sk, level, optname, optval, optlen,
- ipv6_setsockopt);
+ switch (optname) {
+ case MCAST_JOIN_GROUP:
+ case MCAST_LEAVE_GROUP:
+ {
+ struct compat_group_req __user *gr32 = (void __user *)optval;
+ struct group_req greq;
+ struct sockaddr_in6 *psin6 = (struct sockaddr_in6 *)&greq.gr_group;
+
+ if (optlen < sizeof(struct compat_group_req))
+ return -EINVAL;
+
+ if (get_user(greq.gr_interface, &gr32->gr_interface) ||
+ copy_from_user(&greq.gr_group, &gr32->gr_group,
+ sizeof(greq.gr_group)))
+ return -EFAULT;
+
+ if (greq.gr_group.ss_family != AF_INET6)
+ return -EADDRNOTAVAIL;
+
+ rtnl_lock();
+ lock_sock(sk);
+ if (optname == MCAST_JOIN_GROUP)
+ err = ipv6_sock_mc_join(sk, greq.gr_interface,
+ &psin6->sin6_addr);
+ else
+ err = ipv6_sock_mc_drop(sk, greq.gr_interface,
+ &psin6->sin6_addr);
+ release_sock(sk);
+ rtnl_unlock();
+ return err;
+ }
+ case MCAST_JOIN_SOURCE_GROUP:
+ case MCAST_LEAVE_SOURCE_GROUP:
+ case MCAST_BLOCK_SOURCE:
+ case MCAST_UNBLOCK_SOURCE:
+ {
+ struct compat_group_source_req __user *gsr32 = (void __user *)optval;
+ struct group_source_req greqs;
+
+ if (optlen < sizeof(struct compat_group_source_req))
+ return -EINVAL;
+
+ if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) ||
+ copy_from_user(&greqs.gsr_group, &gsr32->gsr_group,
+ sizeof(greqs.gsr_group)) ||
+ copy_from_user(&greqs.gsr_source, &gsr32->gsr_source,
+ sizeof(greqs.gsr_source)))
+ return -EFAULT;
+
+ rtnl_lock();
+ lock_sock(sk);
+ err = do_ipv6_mcast_group_source(sk, optname, &greqs);
+ release_sock(sk);
+ rtnl_unlock();
+ return err;
+ }
+ case MCAST_MSFILTER:
+ {
+ const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ struct compat_group_filter *gf32;
+ void *p;
+ int n;
+
+ if (optlen < size0)
+ return -EINVAL;
+ if (optlen > sysctl_optmem_max - 4)
+ return -ENOBUFS;
+
+ p = kmalloc(optlen + 4, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+ if (copy_from_user(gf32, optval, optlen)) {
+ err = -EFAULT;
+ goto mc_msf_out;
+ }
+
+ n = gf32->gf_numsrc;
+ /* numsrc >= (4G-140)/128 overflow in 32 bits */
+ if (n >= 0x1ffffffU ||
+ n > sysctl_mld_max_msf) {
+ err = -ENOBUFS;
+ goto mc_msf_out;
+ }
+ if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) {
+ err = -EINVAL;
+ goto mc_msf_out;
+ }
+
+ rtnl_lock();
+ lock_sock(sk);
+ err = ip6_mc_msfilter(sk, &(struct group_filter){
+ .gf_interface = gf32->gf_interface,
+ .gf_group = gf32->gf_group,
+ .gf_fmode = gf32->gf_fmode,
+ .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist);
+ release_sock(sk);
+ rtnl_unlock();
+mc_msf_out:
+ kfree(p);
+ return err;
+ }
+ }
err = do_ipv6_setsockopt(sk, level, optname, optval, optlen);
#ifdef CONFIG_NETFILTER
@@ -1048,18 +1099,28 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case MCAST_MSFILTER:
{
+ struct group_filter __user *p = (void __user *)optval;
struct group_filter gsf;
+ const int size0 = offsetof(struct group_filter, gf_slist);
+ int num;
int err;
- if (len < GROUP_FILTER_SIZE(0))
+ if (len < size0)
return -EINVAL;
- if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0)))
+ if (copy_from_user(&gsf, p, size0))
return -EFAULT;
if (gsf.gf_group.ss_family != AF_INET6)
return -EADDRNOTAVAIL;
+ num = gsf.gf_numsrc;
lock_sock(sk);
- err = ip6_mc_msfget(sk, &gsf,
- (struct group_filter __user *)optval, optlen);
+ err = ip6_mc_msfget(sk, &gsf, p->gf_slist);
+ if (!err) {
+ if (num > gsf.gf_numsrc)
+ num = gsf.gf_numsrc;
+ if (put_user(GROUP_FILTER_SIZE(num), optlen) ||
+ copy_to_user(p, &gsf, size0))
+ err = -EFAULT;
+ }
release_sock(sk);
return err;
}
@@ -1428,9 +1489,44 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
if (level != SOL_IPV6)
return -ENOPROTOOPT;
- if (optname == MCAST_MSFILTER)
- return compat_mc_getsockopt(sk, level, optname, optval, optlen,
- ipv6_getsockopt);
+ if (optname == MCAST_MSFILTER) {
+ const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ struct compat_group_filter __user *p = (void __user *)optval;
+ struct compat_group_filter gf32;
+ struct group_filter gf;
+ int ulen, err;
+ int num;
+
+ if (get_user(ulen, optlen))
+ return -EFAULT;
+
+ if (ulen < size0)
+ return -EINVAL;
+
+ if (copy_from_user(&gf32, p, size0))
+ return -EFAULT;
+
+ gf.gf_interface = gf32.gf_interface;
+ gf.gf_fmode = gf32.gf_fmode;
+ num = gf.gf_numsrc = gf32.gf_numsrc;
+ gf.gf_group = gf32.gf_group;
+
+ if (gf.gf_group.ss_family != AF_INET6)
+ return -EADDRNOTAVAIL;
+ lock_sock(sk);
+ err = ip6_mc_msfget(sk, &gf, p->gf_slist);
+ release_sock(sk);
+ if (err)
+ return err;
+ if (num > gf.gf_numsrc)
+ num = gf.gf_numsrc;
+ ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf)-sizeof(gf32));
+ if (put_user(ulen, optlen) ||
+ put_user(gf.gf_fmode, &p->gf_fmode) ||
+ put_user(gf.gf_numsrc, &p->gf_numsrc))
+ return -EFAULT;
+ return 0;
+ }
err = do_ipv6_getsockopt(sk, level, optname, optval, optlen,
MSG_CMSG_COMPAT);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index eaa4c2cc2fbb..7e12d2114158 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -457,7 +457,8 @@ done:
return err;
}
-int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
+int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
+ struct sockaddr_storage *list)
{
const struct in6_addr *group;
struct ipv6_mc_socklist *pmc;
@@ -509,10 +510,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
goto done;
}
newpsl->sl_max = newpsl->sl_count = gsf->gf_numsrc;
- for (i = 0; i < newpsl->sl_count; ++i) {
+ for (i = 0; i < newpsl->sl_count; ++i, ++list) {
struct sockaddr_in6 *psin6;
- psin6 = (struct sockaddr_in6 *)&gsf->gf_slist[i];
+ psin6 = (struct sockaddr_in6 *)list;
newpsl->sl_addr[i] = psin6->sin6_addr;
}
err = ip6_mc_add_src(idev, group, gsf->gf_fmode,
@@ -547,7 +548,7 @@ done:
}
int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
- struct group_filter __user *optval, int __user *optlen)
+ struct sockaddr_storage *p)
{
int err, i, count, copycount;
const struct in6_addr *group;
@@ -592,14 +593,10 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
gsf->gf_numsrc = count;
- if (put_user(GROUP_FILTER_SIZE(copycount), optlen) ||
- copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
- return -EFAULT;
- }
/* changes to psl require the socket lock, and a write lock
* on pmc->sflock. We have the socket lock so reading here is safe.
*/
- for (i = 0; i < copycount; i++) {
+ for (i = 0; i < copycount; i++, p++) {
struct sockaddr_in6 *psin6;
struct sockaddr_storage ss;
@@ -607,7 +604,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
memset(&ss, 0, sizeof(ss));
psin6->sin6_family = AF_INET6;
psin6->sin6_addr = psl->sl_addr[i];
- if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss)))
+ if (copy_to_user(p, &ss, sizeof(ss)))
return -EFAULT;
}
return 0;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0028aa1d7869..8ef5a7b30524 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1377,6 +1377,7 @@ const struct proto_ops inet6_sockraw_ops = {
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
#ifdef CONFIG_COMPAT
+ .compat_ioctl = inet6_compat_ioctl,
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a8b4add0b545..82cbb46a2a4f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3421,6 +3421,11 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
#ifdef CONFIG_IPV6_ROUTER_PREF
fib6_nh->last_probe = jiffies;
#endif
+ if (cfg->fc_is_fdb) {
+ fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
+ fib6_nh->fib_nh_gw_family = AF_INET6;
+ return 0;
+ }
err = -ENODEV;
if (cfg->fc_ifindex) {
@@ -4336,41 +4341,29 @@ static void rtmsg_to_fib6_config(struct net *net,
};
}
-int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
{
struct fib6_config cfg;
- struct in6_rtmsg rtmsg;
int err;
- switch (cmd) {
- case SIOCADDRT: /* Add a route */
- case SIOCDELRT: /* Delete a route */
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
- err = copy_from_user(&rtmsg, arg,
- sizeof(struct in6_rtmsg));
- if (err)
- return -EFAULT;
+ if (cmd != SIOCADDRT && cmd != SIOCDELRT)
+ return -EINVAL;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
- rtmsg_to_fib6_config(net, &rtmsg, &cfg);
+ rtmsg_to_fib6_config(net, rtmsg, &cfg);
- rtnl_lock();
- switch (cmd) {
- case SIOCADDRT:
- err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
- break;
- case SIOCDELRT:
- err = ip6_route_del(&cfg, NULL);
- break;
- default:
- err = -EINVAL;
- }
- rtnl_unlock();
-
- return err;
+ rtnl_lock();
+ switch (cmd) {
+ case SIOCADDRT:
+ err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
+ break;
+ case SIOCDELRT:
+ err = ip6_route_del(&cfg, NULL);
+ break;
}
-
- return -EINVAL;
+ rtnl_unlock();
+ return err;
}
/*
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 98954830c40b..1fbb4dfbb191 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -83,6 +83,13 @@ struct sit_net {
struct net_device *fb_tunnel_dev;
};
+static inline struct sit_net *dev_to_sit_net(struct net_device *dev)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+
+ return net_generic(t->net, sit_net_id);
+}
+
/*
* Must be invoked with rcu_read_lock
*/
@@ -291,14 +298,18 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
}
-static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
- struct ip_tunnel_prl __user *a)
+static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
{
+ struct ip_tunnel_prl __user *a = ifr->ifr_ifru.ifru_data;
+ struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_prl kprl, *kp;
struct ip_tunnel_prl_entry *prl;
unsigned int cmax, c = 0, ca, len;
int ret = 0;
+ if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
+ return -EINVAL;
+
if (copy_from_user(&kprl, a, sizeof(kprl)))
return -EFAULT;
cmax = kprl.datalen / sizeof(kprl);
@@ -441,6 +452,35 @@ out:
return err;
}
+static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr,
+ int cmd)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_prl prl;
+ int err;
+
+ if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+ if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
+ return -EINVAL;
+
+ if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
+ return -EFAULT;
+
+ switch (cmd) {
+ case SIOCDELPRL:
+ err = ipip6_tunnel_del_prl(t, &prl);
+ break;
+ case SIOCADDPRL:
+ case SIOCCHGPRL:
+ err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
+ break;
+ }
+ dst_cache_reset(&t->dst_cache);
+ netdev_state_change(dev);
+ return err;
+}
+
static int
isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t)
{
@@ -1151,7 +1191,53 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
netdev_state_change(t->dev);
return 0;
}
-#endif
+
+static int
+ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_6rd ip6rd;
+ struct ip_tunnel_parm p;
+
+ if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ return -EFAULT;
+ t = ipip6_tunnel_locate(t->net, &p, 0);
+ }
+ if (!t)
+ t = netdev_priv(dev);
+
+ ip6rd.prefix = t->ip6rd.prefix;
+ ip6rd.relay_prefix = t->ip6rd.relay_prefix;
+ ip6rd.prefixlen = t->ip6rd.prefixlen;
+ ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, sizeof(ip6rd)))
+ return -EFAULT;
+ return 0;
+}
+
+static int
+ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_6rd ip6rd;
+ int err;
+
+ if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+ if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, sizeof(ip6rd)))
+ return -EFAULT;
+
+ if (cmd != SIOCDEL6RD) {
+ err = ipip6_tunnel_update_6rd(t, &ip6rd);
+ if (err < 0)
+ return err;
+ } else
+ ipip6_tunnel_clone_6rd(dev, dev_to_sit_net(dev));
+ return 0;
+}
+
+#endif /* CONFIG_IPV6_SIT_6RD */
static bool ipip6_valid_ip_proto(u8 ipproto)
{
@@ -1164,185 +1250,145 @@ static bool ipip6_valid_ip_proto(u8 ipproto)
}
static int
-ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+__ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p)
+{
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (!ipip6_valid_ip_proto(p->iph.protocol))
+ return -EINVAL;
+ if (p->iph.version != 4 ||
+ p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)))
+ return -EINVAL;
+
+ if (p->iph.ttl)
+ p->iph.frag_off |= htons(IP_DF);
+ return 0;
+}
+
+static int
+ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p)
{
- int err = 0;
- struct ip_tunnel_parm p;
- struct ip_tunnel_prl prl;
struct ip_tunnel *t = netdev_priv(dev);
- struct net *net = t->net;
- struct sit_net *sitn = net_generic(net, sit_net_id);
-#ifdef CONFIG_IPV6_SIT_6RD
- struct ip_tunnel_6rd ip6rd;
-#endif
- switch (cmd) {
- case SIOCGETTUNNEL:
-#ifdef CONFIG_IPV6_SIT_6RD
- case SIOCGET6RD:
-#endif
- if (dev == sitn->fb_tunnel_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
- err = -EFAULT;
- break;
- }
- t = ipip6_tunnel_locate(net, &p, 0);
- if (!t)
- t = netdev_priv(dev);
- }
+ if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
+ t = ipip6_tunnel_locate(t->net, p, 0);
+ if (!t)
+ t = netdev_priv(dev);
+ memcpy(p, &t->parms, sizeof(*p));
+ return 0;
+}
- err = -EFAULT;
- if (cmd == SIOCGETTUNNEL) {
- memcpy(&p, &t->parms, sizeof(p));
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p,
- sizeof(p)))
- goto done;
-#ifdef CONFIG_IPV6_SIT_6RD
+static int
+ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err;
+
+ err = __ipip6_tunnel_ioctl_validate(t->net, p);
+ if (err)
+ return err;
+
+ t = ipip6_tunnel_locate(t->net, p, 1);
+ if (!t)
+ return -ENOBUFS;
+ return 0;
+}
+
+static int
+ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err;
+
+ err = __ipip6_tunnel_ioctl_validate(t->net, p);
+ if (err)
+ return err;
+
+ t = ipip6_tunnel_locate(t->net, p, 0);
+ if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
+ if (!t)
+ return -ENOENT;
+ } else {
+ if (t) {
+ if (t->dev != dev)
+ return -EEXIST;
} else {
- ip6rd.prefix = t->ip6rd.prefix;
- ip6rd.relay_prefix = t->ip6rd.relay_prefix;
- ip6rd.prefixlen = t->ip6rd.prefixlen;
- ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd,
- sizeof(ip6rd)))
- goto done;
-#endif
+ if (((dev->flags & IFF_POINTOPOINT) && !p->iph.daddr) ||
+ (!(dev->flags & IFF_POINTOPOINT) && p->iph.daddr))
+ return -EINVAL;
+ t = netdev_priv(dev);
}
- err = 0;
- break;
- case SIOCADDTUNNEL:
- case SIOCCHGTUNNEL:
- err = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- goto done;
+ ipip6_tunnel_update(t, p, t->fwmark);
+ }
- err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- goto done;
-
- err = -EINVAL;
- if (!ipip6_valid_ip_proto(p.iph.protocol))
- goto done;
- if (p.iph.version != 4 ||
- p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
- goto done;
- if (p.iph.ttl)
- p.iph.frag_off |= htons(IP_DF);
-
- t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
-
- if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
- if (t) {
- if (t->dev != dev) {
- err = -EEXIST;
- break;
- }
- } else {
- if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
- (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
- err = -EINVAL;
- break;
- }
- t = netdev_priv(dev);
- }
+ return 0;
+}
- ipip6_tunnel_update(t, &p, t->fwmark);
- }
+static int
+ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
- if (t) {
- err = 0;
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
- err = -EFAULT;
- } else
- err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
- break;
+ if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
+ t = ipip6_tunnel_locate(t->net, p, 0);
+ if (!t)
+ return -ENOENT;
+ if (t == netdev_priv(dev_to_sit_net(dev)->fb_tunnel_dev))
+ return -EPERM;
+ dev = t->dev;
+ }
+ unregister_netdevice(dev);
+ return 0;
+}
+static int
+ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+{
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ return ipip6_tunnel_get(dev, p);
+ case SIOCADDTUNNEL:
+ return ipip6_tunnel_add(dev, p);
+ case SIOCCHGTUNNEL:
+ return ipip6_tunnel_change(dev, p);
case SIOCDELTUNNEL:
- err = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- goto done;
-
- if (dev == sitn->fb_tunnel_dev) {
- err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- goto done;
- err = -ENOENT;
- t = ipip6_tunnel_locate(net, &p, 0);
- if (!t)
- goto done;
- err = -EPERM;
- if (t == netdev_priv(sitn->fb_tunnel_dev))
- goto done;
- dev = t->dev;
- }
- unregister_netdevice(dev);
- err = 0;
- break;
+ return ipip6_tunnel_del(dev, p);
+ default:
+ return -EINVAL;
+ }
+}
+static int
+ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ case SIOCDELTUNNEL:
+ return ip_tunnel_ioctl(dev, ifr, cmd);
case SIOCGETPRL:
- err = -EINVAL;
- if (dev == sitn->fb_tunnel_dev)
- goto done;
- err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data);
- break;
-
+ return ipip6_tunnel_get_prl(dev, ifr);
case SIOCADDPRL:
case SIOCDELPRL:
case SIOCCHGPRL:
- err = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- goto done;
- err = -EINVAL;
- if (dev == sitn->fb_tunnel_dev)
- goto done;
- err = -EFAULT;
- if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
- goto done;
-
- switch (cmd) {
- case SIOCDELPRL:
- err = ipip6_tunnel_del_prl(t, &prl);
- break;
- case SIOCADDPRL:
- case SIOCCHGPRL:
- err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
- break;
- }
- dst_cache_reset(&t->dst_cache);
- netdev_state_change(dev);
- break;
-
+ return ipip6_tunnel_prl_ctl(dev, ifr, cmd);
#ifdef CONFIG_IPV6_SIT_6RD
+ case SIOCGET6RD:
+ return ipip6_tunnel_get6rd(dev, ifr);
case SIOCADD6RD:
case SIOCCHG6RD:
case SIOCDEL6RD:
- err = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- goto done;
-
- err = -EFAULT;
- if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data,
- sizeof(ip6rd)))
- goto done;
-
- if (cmd != SIOCDEL6RD) {
- err = ipip6_tunnel_update_6rd(t, &ip6rd);
- if (err < 0)
- goto done;
- } else
- ipip6_tunnel_clone_6rd(dev, sitn);
-
- err = 0;
- break;
+ return ipip6_tunnel_6rdctl(dev, ifr, cmd);
#endif
-
default:
- err = -EINVAL;
+ return -EINVAL;
}
-
-done:
- return err;
}
static const struct net_device_ops ipip6_netdev_ops = {
@@ -1352,6 +1398,7 @@ static const struct net_device_ops ipip6_netdev_ops = {
.ndo_do_ioctl = ipip6_tunnel_ioctl,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
+ .ndo_tunnel_ctl = ipip6_tunnel_ctl,
};
static void ipip6_dev_free(struct net_device *dev)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 413b3425ac66..b7415ca75c2d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -463,6 +463,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (fastopen && !fastopen->sk)
break;
+ ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
+
if (!sock_owned_by_user(sk)) {
sk->sk_err = err;
sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
@@ -471,6 +473,15 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
} else
sk->sk_err_soft = err;
goto out;
+ case TCP_LISTEN:
+ break;
+ default:
+ /* check if this ICMP message allows revert of backoff.
+ * (see RFC 6069)
+ */
+ if (!fastopen && type == ICMPV6_DEST_UNREACH &&
+ code == ICMPV6_NOROUTE)
+ tcp_ld_RTO_revert(sk, seq);
}
if (!sock_owned_by_user(sk) && np->recverr) {
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 21e7b95ddbfa..06c02ebe6b9b 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -21,8 +21,14 @@
static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly;
static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly;
+static struct xfrm6_tunnel __rcu *tunnelmpls6_handlers __read_mostly;
static DEFINE_MUTEX(tunnel6_mutex);
+static inline int xfrm6_tunnel_mpls_supported(void)
+{
+ return IS_ENABLED(CONFIG_MPLS);
+}
+
int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
{
struct xfrm6_tunnel __rcu **pprev;
@@ -32,8 +38,21 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
mutex_lock(&tunnel6_mutex);
- for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
- (t = rcu_dereference_protected(*pprev,
+ switch (family) {
+ case AF_INET6:
+ pprev = &tunnel6_handlers;
+ break;
+ case AF_INET:
+ pprev = &tunnel46_handlers;
+ break;
+ case AF_MPLS:
+ pprev = &tunnelmpls6_handlers;
+ break;
+ default:
+ goto err;
+ }
+
+ for (; (t = rcu_dereference_protected(*pprev,
lockdep_is_held(&tunnel6_mutex))) != NULL;
pprev = &t->next) {
if (t->priority > priority)
@@ -62,8 +81,21 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
mutex_lock(&tunnel6_mutex);
- for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
- (t = rcu_dereference_protected(*pprev,
+ switch (family) {
+ case AF_INET6:
+ pprev = &tunnel6_handlers;
+ break;
+ case AF_INET:
+ pprev = &tunnel46_handlers;
+ break;
+ case AF_MPLS:
+ pprev = &tunnelmpls6_handlers;
+ break;
+ default:
+ goto err;
+ }
+
+ for (; (t = rcu_dereference_protected(*pprev,
lockdep_is_held(&tunnel6_mutex))) != NULL;
pprev = &t->next) {
if (t == handler) {
@@ -73,6 +105,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
}
}
+err:
mutex_unlock(&tunnel6_mutex);
synchronize_net();
@@ -86,6 +119,24 @@ EXPORT_SYMBOL(xfrm6_tunnel_deregister);
handler != NULL; \
handler = rcu_dereference(handler->next)) \
+static int tunnelmpls6_rcv(struct sk_buff *skb)
+{
+ struct xfrm6_tunnel *handler;
+
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ goto drop;
+
+ for_each_tunnel_rcu(tunnelmpls6_handlers, handler)
+ if (!handler->handler(skb))
+ return 0;
+
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
static int tunnel6_rcv(struct sk_buff *skb)
{
struct xfrm6_tunnel *handler;
@@ -146,6 +197,18 @@ static int tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return -ENOENT;
}
+static int tunnelmpls6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct xfrm6_tunnel *handler;
+
+ for_each_tunnel_rcu(tunnelmpls6_handlers, handler)
+ if (!handler->err_handler(skb, opt, type, code, offset, info))
+ return 0;
+
+ return -ENOENT;
+}
+
static const struct inet6_protocol tunnel6_protocol = {
.handler = tunnel6_rcv,
.err_handler = tunnel6_err,
@@ -158,6 +221,12 @@ static const struct inet6_protocol tunnel46_protocol = {
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
+static const struct inet6_protocol tunnelmpls6_protocol = {
+ .handler = tunnelmpls6_rcv,
+ .err_handler = tunnelmpls6_err,
+ .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
static int __init tunnel6_init(void)
{
if (inet6_add_protocol(&tunnel6_protocol, IPPROTO_IPV6)) {
@@ -169,6 +238,13 @@ static int __init tunnel6_init(void)
inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6);
return -EAGAIN;
}
+ if (xfrm6_tunnel_mpls_supported() &&
+ inet6_add_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS)) {
+ pr_err("%s: can't add protocol\n", __func__);
+ inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6);
+ inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP);
+ return -EAGAIN;
+ }
return 0;
}
@@ -178,6 +254,9 @@ static void __exit tunnel6_fini(void)
pr_err("%s: can't remove protocol\n", __func__);
if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6))
pr_err("%s: can't remove protocol\n", __func__);
+ if (xfrm6_tunnel_mpls_supported() &&
+ inet6_del_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS))
+ pr_err("%s: can't remove protocol\n", __func__);
}
module_init(tunnel6_init);
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index a52cb3fc6df5..04cbeefd8982 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -17,11 +17,6 @@
#include <net/ipv6.h>
#include <net/xfrm.h>
-int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
-{
- return xfrm6_extract_header(skb);
-}
-
int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
struct ip6_tnl *t)
{
@@ -35,9 +30,12 @@ EXPORT_SYMBOL(xfrm6_rcv_spi);
static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- if (xfrm_trans_queue(skb, ip6_rcv_finish))
- __kfree_skb(skb);
- return -1;
+ if (xfrm_trans_queue(skb, ip6_rcv_finish)) {
+ kfree_skb(skb);
+ return NET_RX_DROP;
+ }
+
+ return 0;
}
int xfrm6_transport_finish(struct sk_buff *skb, int async)
@@ -60,13 +58,106 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
if (xo && (xo->flags & XFRM_GRO)) {
skb_mac_header_rebuild(skb);
skb_reset_transport_header(skb);
- return -1;
+ return 0;
}
NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
xfrm6_transport_finish2);
- return -1;
+ return 0;
+}
+
+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ struct udp_sock *up = udp_sk(sk);
+ struct udphdr *uh;
+ struct ipv6hdr *ip6h;
+ int len;
+ int ip6hlen = sizeof(struct ipv6hdr);
+
+ __u8 *udpdata;
+ __be32 *udpdata32;
+ __u16 encap_type = up->encap_type;
+
+ /* if this is not encapsulated socket, then just return now */
+ if (!encap_type)
+ return 1;
+
+ /* If this is a paged skb, make sure we pull up
+ * whatever data we need to look at. */
+ len = skb->len - sizeof(struct udphdr);
+ if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8)))
+ return 1;
+
+ /* Now we can get the pointers */
+ uh = udp_hdr(skb);
+ udpdata = (__u8 *)uh + sizeof(struct udphdr);
+ udpdata32 = (__be32 *)udpdata;
+
+ switch (encap_type) {
+ default:
+ case UDP_ENCAP_ESPINUDP:
+ /* Check if this is a keepalive packet. If so, eat it. */
+ if (len == 1 && udpdata[0] == 0xff) {
+ goto drop;
+ } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
+ /* ESP Packet without Non-ESP header */
+ len = sizeof(struct udphdr);
+ } else
+ /* Must be an IKE packet.. pass it through */
+ return 1;
+ break;
+ case UDP_ENCAP_ESPINUDP_NON_IKE:
+ /* Check if this is a keepalive packet. If so, eat it. */
+ if (len == 1 && udpdata[0] == 0xff) {
+ goto drop;
+ } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
+ udpdata32[0] == 0 && udpdata32[1] == 0) {
+
+ /* ESP Packet with Non-IKE marker */
+ len = sizeof(struct udphdr) + 2 * sizeof(u32);
+ } else
+ /* Must be an IKE packet.. pass it through */
+ return 1;
+ break;
+ }
+
+ /* At this point we are sure that this is an ESPinUDP packet,
+ * so we need to remove 'len' bytes from the packet (the UDP
+ * header and optional ESP marker bytes) and then modify the
+ * protocol to ESP, and then call into the transform receiver.
+ */
+ if (skb_unclone(skb, GFP_ATOMIC))
+ goto drop;
+
+ /* Now we can update and verify the packet length... */
+ ip6h = ipv6_hdr(skb);
+ ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len);
+ if (skb->len < ip6hlen + len) {
+ /* packet is too small!?! */
+ goto drop;
+ }
+
+ /* pull the data buffer up to the ESP header and set the
+ * transport header to point to ESP. Keep UDP on the stack
+ * for later.
+ */
+ __skb_pull(skb, len);
+ skb_reset_transport_header(skb);
+
+ /* process ESP */
+ return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
+
+drop:
+ kfree_skb(skb);
+ return 0;
}
int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index e34167f790e6..8b84d534b19d 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -23,24 +23,7 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
}
EXPORT_SYMBOL(xfrm6_find_1stfragopt);
-static int xfrm6_local_dontfrag(struct sk_buff *skb)
-{
- int proto;
- struct sock *sk = skb->sk;
-
- if (sk) {
- if (sk->sk_family != AF_INET6)
- return 0;
-
- proto = sk->sk_protocol;
- if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
- return inet6_sk(sk)->dontfrag;
- }
-
- return 0;
-}
-
-static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
+void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
{
struct flowi6 fl6;
struct sock *sk = skb->sk;
@@ -64,80 +47,9 @@ void xfrm6_local_error(struct sk_buff *skb, u32 mtu)
ipv6_local_error(sk, EMSGSIZE, &fl6, mtu);
}
-static int xfrm6_tunnel_check_size(struct sk_buff *skb)
-{
- int mtu, ret = 0;
- struct dst_entry *dst = skb_dst(skb);
-
- if (skb->ignore_df)
- goto out;
-
- mtu = dst_mtu(dst);
- if (mtu < IPV6_MIN_MTU)
- mtu = IPV6_MIN_MTU;
-
- if ((!skb_is_gso(skb) && skb->len > mtu) ||
- (skb_is_gso(skb) &&
- !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) {
- skb->dev = dst->dev;
- skb->protocol = htons(ETH_P_IPV6);
-
- if (xfrm6_local_dontfrag(skb))
- xfrm6_local_rxpmtu(skb, mtu);
- else if (skb->sk)
- xfrm_local_error(skb, mtu);
- else
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- ret = -EMSGSIZE;
- }
-out:
- return ret;
-}
-
-int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
-{
- int err;
-
- err = xfrm6_tunnel_check_size(skb);
- if (err)
- return err;
-
- XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr;
-
- return xfrm6_extract_header(skb);
-}
-
-int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
-{
- memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
-
- IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-
- return xfrm_output(sk, skb);
-}
-
-static int __xfrm6_output_state_finish(struct xfrm_state *x, struct sock *sk,
- struct sk_buff *skb)
-{
- const struct xfrm_state_afinfo *afinfo;
- int ret = -EAFNOSUPPORT;
-
- rcu_read_lock();
- afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family);
- if (likely(afinfo))
- ret = afinfo->output_finish(sk, skb);
- else
- kfree_skb(skb);
- rcu_read_unlock();
-
- return ret;
-}
-
static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct xfrm_state *x = skb_dst(skb)->xfrm;
-
- return __xfrm6_output_state_finish(x, sk, skb);
+ return xfrm_output(sk, skb);
}
static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -164,7 +76,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
toobig = skb->len > mtu && !skb_is_gso(skb);
- if (toobig && xfrm6_local_dontfrag(skb)) {
+ if (toobig && xfrm6_local_dontfrag(skb->sk)) {
xfrm6_local_rxpmtu(skb, mtu);
kfree_skb(skb);
return -EMSGSIZE;
@@ -179,7 +91,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
__xfrm6_output_finish);
skip_frag:
- return __xfrm6_output_state_finish(x, sk, skb);
+ return xfrm_output(sk, skb);
}
int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c
index 34cb65c7d5a7..ea2f805d3b01 100644
--- a/net/ipv6/xfrm6_protocol.c
+++ b/net/ipv6/xfrm6_protocol.c
@@ -14,6 +14,7 @@
#include <linux/mutex.h>
#include <linux/skbuff.h>
#include <linux/icmpv6.h>
+#include <net/ip6_route.h>
#include <net/ipv6.h>
#include <net/protocol.h>
#include <net/xfrm.h>
@@ -58,6 +59,53 @@ static int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
return 0;
}
+int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type)
+{
+ int ret;
+ struct xfrm6_protocol *handler;
+ struct xfrm6_protocol __rcu **head = proto_handlers(nexthdr);
+
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+ XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
+
+ if (!head)
+ goto out;
+
+ if (!skb_dst(skb)) {
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct dst_entry *dst;
+ struct flowi6 fl6 = {
+ .flowi6_iif = skb->dev->ifindex,
+ .daddr = ip6h->daddr,
+ .saddr = ip6h->saddr,
+ .flowlabel = ip6_flowinfo(ip6h),
+ .flowi6_mark = skb->mark,
+ .flowi6_proto = ip6h->nexthdr,
+ };
+
+ dst = ip6_route_input_lookup(dev_net(skb->dev), skb->dev, &fl6,
+ skb, flags);
+ if (dst->error)
+ goto drop;
+ skb_dst_set(skb, dst);
+ }
+
+ for_each_protocol_rcu(*head, handler)
+ if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL)
+ return ret;
+
+out:
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+EXPORT_SYMBOL(xfrm6_rcv_encap);
+
static int xfrm6_esp_rcv(struct sk_buff *skb)
{
int ret;
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 78daadecbdef..6610b2198fa9 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -13,37 +13,11 @@
*/
#include <net/xfrm.h>
-#include <linux/pfkeyv2.h>
-#include <linux/ipsec.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/export.h>
-#include <net/dsfield.h>
-#include <net/ipv6.h>
-#include <net/addrconf.h>
-
-int xfrm6_extract_header(struct sk_buff *skb)
-{
- struct ipv6hdr *iph = ipv6_hdr(skb);
-
- XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
- XFRM_MODE_SKB_CB(skb)->id = 0;
- XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF);
- XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph);
- XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit;
- XFRM_MODE_SKB_CB(skb)->optlen = 0;
- memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl,
- sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
-
- return 0;
-}
static struct xfrm_state_afinfo xfrm6_state_afinfo = {
.family = AF_INET6,
.proto = IPPROTO_IPV6,
.output = xfrm6_output,
- .output_finish = xfrm6_output_finish,
- .extract_input = xfrm6_extract_input,
- .extract_output = xfrm6_extract_output,
.transport_finish = xfrm6_transport_finish,
.local_error = xfrm6_local_error,
};
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c4bdcbc84b07..ee0add15497d 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -16,6 +16,7 @@
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/types.h>
+#include <linux/limits.h>
#include <linux/list.h>
#include <linux/errno.h>
#include <linux/kernel.h>
@@ -36,8 +37,6 @@
static char iucv_userid[80];
-static const struct proto_ops iucv_sock_ops;
-
static struct proto iucv_proto = {
.name = "AF_IUCV",
.owner = THIS_MODULE,
@@ -85,14 +84,11 @@ do { \
__ret; \
})
+static struct sock *iucv_accept_dequeue(struct sock *parent,
+ struct socket *newsock);
static void iucv_sock_kill(struct sock *sk);
static void iucv_sock_close(struct sock *sk);
-static void iucv_sever_path(struct sock *, int);
-static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev);
-static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
- struct sk_buff *skb, u8 flags);
static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify);
/* Call Back functions */
@@ -127,110 +123,6 @@ static inline void low_nmcpy(unsigned char *dst, char *src)
memcpy(&dst[8], src, 8);
}
-static int afiucv_pm_prepare(struct device *dev)
-{
-#ifdef CONFIG_PM_DEBUG
- printk(KERN_WARNING "afiucv_pm_prepare\n");
-#endif
- return 0;
-}
-
-static void afiucv_pm_complete(struct device *dev)
-{
-#ifdef CONFIG_PM_DEBUG
- printk(KERN_WARNING "afiucv_pm_complete\n");
-#endif
-}
-
-/**
- * afiucv_pm_freeze() - Freeze PM callback
- * @dev: AFIUCV dummy device
- *
- * Sever all established IUCV communication pathes
- */
-static int afiucv_pm_freeze(struct device *dev)
-{
- struct iucv_sock *iucv;
- struct sock *sk;
-
-#ifdef CONFIG_PM_DEBUG
- printk(KERN_WARNING "afiucv_pm_freeze\n");
-#endif
- read_lock(&iucv_sk_list.lock);
- sk_for_each(sk, &iucv_sk_list.head) {
- iucv = iucv_sk(sk);
- switch (sk->sk_state) {
- case IUCV_DISCONN:
- case IUCV_CLOSING:
- case IUCV_CONNECTED:
- iucv_sever_path(sk, 0);
- break;
- case IUCV_OPEN:
- case IUCV_BOUND:
- case IUCV_LISTEN:
- case IUCV_CLOSED:
- default:
- break;
- }
- skb_queue_purge(&iucv->send_skb_q);
- skb_queue_purge(&iucv->backlog_skb_q);
- }
- read_unlock(&iucv_sk_list.lock);
- return 0;
-}
-
-/**
- * afiucv_pm_restore_thaw() - Thaw and restore PM callback
- * @dev: AFIUCV dummy device
- *
- * socket clean up after freeze
- */
-static int afiucv_pm_restore_thaw(struct device *dev)
-{
- struct sock *sk;
-
-#ifdef CONFIG_PM_DEBUG
- printk(KERN_WARNING "afiucv_pm_restore_thaw\n");
-#endif
- read_lock(&iucv_sk_list.lock);
- sk_for_each(sk, &iucv_sk_list.head) {
- switch (sk->sk_state) {
- case IUCV_CONNECTED:
- sk->sk_err = EPIPE;
- sk->sk_state = IUCV_DISCONN;
- sk->sk_state_change(sk);
- break;
- case IUCV_DISCONN:
- case IUCV_CLOSING:
- case IUCV_LISTEN:
- case IUCV_BOUND:
- case IUCV_OPEN:
- default:
- break;
- }
- }
- read_unlock(&iucv_sk_list.lock);
- return 0;
-}
-
-static const struct dev_pm_ops afiucv_pm_ops = {
- .prepare = afiucv_pm_prepare,
- .complete = afiucv_pm_complete,
- .freeze = afiucv_pm_freeze,
- .thaw = afiucv_pm_restore_thaw,
- .restore = afiucv_pm_restore_thaw,
-};
-
-static struct device_driver af_iucv_driver = {
- .owner = THIS_MODULE,
- .name = "afiucv",
- .bus = NULL,
- .pm = &afiucv_pm_ops,
-};
-
-/* dummy device used as trigger for PM functions */
-static struct device *af_iucv_dev;
-
/**
* iucv_msg_length() - Returns the length of an iucv message.
* @msg: Pointer to struct iucv_message, MUST NOT be NULL
@@ -435,6 +327,20 @@ static void iucv_sock_cleanup_listen(struct sock *parent)
parent->sk_state = IUCV_CLOSED;
}
+static void iucv_sock_link(struct iucv_sock_list *l, struct sock *sk)
+{
+ write_lock_bh(&l->lock);
+ sk_add_node(sk, &l->head);
+ write_unlock_bh(&l->lock);
+}
+
+static void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk)
+{
+ write_lock_bh(&l->lock);
+ sk_del_node_init(sk);
+ write_unlock_bh(&l->lock);
+}
+
/* Kill socket (only if zapped and orphaned) */
static void iucv_sock_kill(struct sock *sk)
{
@@ -607,53 +513,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio,
return sk;
}
-/* Create an IUCV socket */
-static int iucv_sock_create(struct net *net, struct socket *sock, int protocol,
- int kern)
-{
- struct sock *sk;
-
- if (protocol && protocol != PF_IUCV)
- return -EPROTONOSUPPORT;
-
- sock->state = SS_UNCONNECTED;
-
- switch (sock->type) {
- case SOCK_STREAM:
- sock->ops = &iucv_sock_ops;
- break;
- case SOCK_SEQPACKET:
- /* currently, proto ops can handle both sk types */
- sock->ops = &iucv_sock_ops;
- break;
- default:
- return -ESOCKTNOSUPPORT;
- }
-
- sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern);
- if (!sk)
- return -ENOMEM;
-
- iucv_sock_init(sk, NULL);
-
- return 0;
-}
-
-void iucv_sock_link(struct iucv_sock_list *l, struct sock *sk)
-{
- write_lock_bh(&l->lock);
- sk_add_node(sk, &l->head);
- write_unlock_bh(&l->lock);
-}
-
-void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk)
-{
- write_lock_bh(&l->lock);
- sk_del_node_init(sk);
- write_unlock_bh(&l->lock);
-}
-
-void iucv_accept_enqueue(struct sock *parent, struct sock *sk)
+static void iucv_accept_enqueue(struct sock *parent, struct sock *sk)
{
unsigned long flags;
struct iucv_sock *par = iucv_sk(parent);
@@ -666,7 +526,7 @@ void iucv_accept_enqueue(struct sock *parent, struct sock *sk)
sk_acceptq_added(parent);
}
-void iucv_accept_unlink(struct sock *sk)
+static void iucv_accept_unlink(struct sock *sk)
{
unsigned long flags;
struct iucv_sock *par = iucv_sk(iucv_sk(sk)->parent);
@@ -679,7 +539,8 @@ void iucv_accept_unlink(struct sock *sk)
sock_put(sk);
}
-struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock)
+static struct sock *iucv_accept_dequeue(struct sock *parent,
+ struct socket *newsock)
{
struct iucv_sock *isk, *n;
struct sock *sk;
@@ -1100,7 +961,6 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
/* initialize defaults */
cmsg_done = 0; /* check for duplicate headers */
- txmsg.class = 0;
/* iterate over control messages */
for_each_cmsghdr(cmsg, msg) {
@@ -1511,8 +1371,8 @@ static inline __poll_t iucv_accept_poll(struct sock *parent)
return 0;
}
-__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t iucv_sock_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
__poll_t mask = 0;
@@ -1664,7 +1524,7 @@ static int iucv_sock_setsockopt(struct socket *sock, int level, int optname,
switch (sk->sk_state) {
case IUCV_OPEN:
case IUCV_BOUND:
- if (val < 1 || val > (u16)(~0))
+ if (val < 1 || val > U16_MAX)
rc = -EINVAL;
else
iucv->msglimit = val;
@@ -2396,6 +2256,35 @@ static const struct proto_ops iucv_sock_ops = {
.getsockopt = iucv_sock_getsockopt,
};
+static int iucv_sock_create(struct net *net, struct socket *sock, int protocol,
+ int kern)
+{
+ struct sock *sk;
+
+ if (protocol && protocol != PF_IUCV)
+ return -EPROTONOSUPPORT;
+
+ sock->state = SS_UNCONNECTED;
+
+ switch (sock->type) {
+ case SOCK_STREAM:
+ case SOCK_SEQPACKET:
+ /* currently, proto ops can handle both sk types */
+ sock->ops = &iucv_sock_ops;
+ break;
+ default:
+ return -ESOCKTNOSUPPORT;
+ }
+
+ sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern);
+ if (!sk)
+ return -ENOMEM;
+
+ iucv_sock_init(sk, NULL);
+
+ return 0;
+}
+
static const struct net_proto_family iucv_sock_family_ops = {
.family = AF_IUCV,
.owner = THIS_MODULE,
@@ -2409,45 +2298,11 @@ static struct packet_type iucv_packet_type = {
static int afiucv_iucv_init(void)
{
- int err;
-
- err = pr_iucv->iucv_register(&af_iucv_handler, 0);
- if (err)
- goto out;
- /* establish dummy device */
- af_iucv_driver.bus = pr_iucv->bus;
- err = driver_register(&af_iucv_driver);
- if (err)
- goto out_iucv;
- af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL);
- if (!af_iucv_dev) {
- err = -ENOMEM;
- goto out_driver;
- }
- dev_set_name(af_iucv_dev, "af_iucv");
- af_iucv_dev->bus = pr_iucv->bus;
- af_iucv_dev->parent = pr_iucv->root;
- af_iucv_dev->release = (void (*)(struct device *))kfree;
- af_iucv_dev->driver = &af_iucv_driver;
- err = device_register(af_iucv_dev);
- if (err)
- goto out_iucv_dev;
- return 0;
-
-out_iucv_dev:
- put_device(af_iucv_dev);
-out_driver:
- driver_unregister(&af_iucv_driver);
-out_iucv:
- pr_iucv->iucv_unregister(&af_iucv_handler, 0);
-out:
- return err;
+ return pr_iucv->iucv_register(&af_iucv_handler, 0);
}
static void afiucv_iucv_exit(void)
{
- device_unregister(af_iucv_dev);
- driver_unregister(&af_iucv_driver);
pr_iucv->iucv_unregister(&af_iucv_handler, 0);
}
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 9a2d023842fe..19250a0c85d3 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -67,32 +67,9 @@ static int iucv_bus_match(struct device *dev, struct device_driver *drv)
return 0;
}
-enum iucv_pm_states {
- IUCV_PM_INITIAL = 0,
- IUCV_PM_FREEZING = 1,
- IUCV_PM_THAWING = 2,
- IUCV_PM_RESTORING = 3,
-};
-static enum iucv_pm_states iucv_pm_state;
-
-static int iucv_pm_prepare(struct device *);
-static void iucv_pm_complete(struct device *);
-static int iucv_pm_freeze(struct device *);
-static int iucv_pm_thaw(struct device *);
-static int iucv_pm_restore(struct device *);
-
-static const struct dev_pm_ops iucv_pm_ops = {
- .prepare = iucv_pm_prepare,
- .complete = iucv_pm_complete,
- .freeze = iucv_pm_freeze,
- .thaw = iucv_pm_thaw,
- .restore = iucv_pm_restore,
-};
-
struct bus_type iucv_bus = {
.name = "iucv",
.match = iucv_bus_match,
- .pm = &iucv_pm_ops,
};
EXPORT_SYMBOL(iucv_bus);
@@ -435,31 +412,6 @@ static void iucv_block_cpu(void *data)
}
/**
- * iucv_block_cpu_almost
- * @data: unused
- *
- * Allow connection-severed interrupts only on this cpu.
- */
-static void iucv_block_cpu_almost(void *data)
-{
- int cpu = smp_processor_id();
- union iucv_param *parm;
-
- /* Allow iucv control interrupts only */
- parm = iucv_param_irq[cpu];
- memset(parm, 0, sizeof(union iucv_param));
- parm->set_mask.ipmask = 0x08;
- iucv_call_b2f0(IUCV_SETMASK, parm);
- /* Allow iucv-severed interrupt only */
- memset(parm, 0, sizeof(union iucv_param));
- parm->set_mask.ipmask = 0x20;
- iucv_call_b2f0(IUCV_SETCONTROLMASK, parm);
-
- /* Clear indication that iucv interrupts are allowed for this cpu. */
- cpumask_clear_cpu(cpu, &iucv_irq_cpumask);
-}
-
-/**
* iucv_declare_cpu
* @data: unused
*
@@ -1834,146 +1786,6 @@ static void iucv_external_interrupt(struct ext_code ext_code,
spin_unlock(&iucv_queue_lock);
}
-static int iucv_pm_prepare(struct device *dev)
-{
- int rc = 0;
-
-#ifdef CONFIG_PM_DEBUG
- printk(KERN_INFO "iucv_pm_prepare\n");
-#endif
- if (dev->driver && dev->driver->pm && dev->driver->pm->prepare)
- rc = dev->driver->pm->prepare(dev);
- return rc;
-}
-
-static void iucv_pm_complete(struct device *dev)
-{
-#ifdef CONFIG_PM_DEBUG
- printk(KERN_INFO "iucv_pm_complete\n");
-#endif
- if (dev->driver && dev->driver->pm && dev->driver->pm->complete)
- dev->driver->pm->complete(dev);
-}
-
-/**
- * iucv_path_table_empty() - determine if iucv path table is empty
- *
- * Returns 0 if there are still iucv pathes defined
- * 1 if there are no iucv pathes defined
- */
-static int iucv_path_table_empty(void)
-{
- int i;
-
- for (i = 0; i < iucv_max_pathid; i++) {
- if (iucv_path_table[i])
- return 0;
- }
- return 1;
-}
-
-/**
- * iucv_pm_freeze() - Freeze PM callback
- * @dev: iucv-based device
- *
- * disable iucv interrupts
- * invoke callback function of the iucv-based driver
- * shut down iucv, if no iucv-pathes are established anymore
- */
-static int iucv_pm_freeze(struct device *dev)
-{
- int cpu;
- struct iucv_irq_list *p, *n;
- int rc = 0;
-
-#ifdef CONFIG_PM_DEBUG
- printk(KERN_WARNING "iucv_pm_freeze\n");
-#endif
- if (iucv_pm_state != IUCV_PM_FREEZING) {
- for_each_cpu(cpu, &iucv_irq_cpumask)
- smp_call_function_single(cpu, iucv_block_cpu_almost,
- NULL, 1);
- cancel_work_sync(&iucv_work);
- list_for_each_entry_safe(p, n, &iucv_work_queue, list) {
- list_del_init(&p->list);
- iucv_sever_pathid(p->data.ippathid,
- iucv_error_no_listener);
- kfree(p);
- }
- }
- iucv_pm_state = IUCV_PM_FREEZING;
- if (dev->driver && dev->driver->pm && dev->driver->pm->freeze)
- rc = dev->driver->pm->freeze(dev);
- if (iucv_path_table_empty())
- iucv_disable();
- return rc;
-}
-
-/**
- * iucv_pm_thaw() - Thaw PM callback
- * @dev: iucv-based device
- *
- * make iucv ready for use again: allocate path table, declare interrupt buffers
- * and enable iucv interrupts
- * invoke callback function of the iucv-based driver
- */
-static int iucv_pm_thaw(struct device *dev)
-{
- int rc = 0;
-
-#ifdef CONFIG_PM_DEBUG
- printk(KERN_WARNING "iucv_pm_thaw\n");
-#endif
- iucv_pm_state = IUCV_PM_THAWING;
- if (!iucv_path_table) {
- rc = iucv_enable();
- if (rc)
- goto out;
- }
- if (cpumask_empty(&iucv_irq_cpumask)) {
- if (iucv_nonsmp_handler)
- /* enable interrupts on one cpu */
- iucv_allow_cpu(NULL);
- else
- /* enable interrupts on all cpus */
- iucv_setmask_mp();
- }
- if (dev->driver && dev->driver->pm && dev->driver->pm->thaw)
- rc = dev->driver->pm->thaw(dev);
-out:
- return rc;
-}
-
-/**
- * iucv_pm_restore() - Restore PM callback
- * @dev: iucv-based device
- *
- * make iucv ready for use again: allocate path table, declare interrupt buffers
- * and enable iucv interrupts
- * invoke callback function of the iucv-based driver
- */
-static int iucv_pm_restore(struct device *dev)
-{
- int rc = 0;
-
-#ifdef CONFIG_PM_DEBUG
- printk(KERN_WARNING "iucv_pm_restore %p\n", iucv_path_table);
-#endif
- if ((iucv_pm_state != IUCV_PM_RESTORING) && iucv_path_table)
- pr_warn("Suspending Linux did not completely close all IUCV connections\n");
- iucv_pm_state = IUCV_PM_RESTORING;
- if (cpumask_empty(&iucv_irq_cpumask)) {
- rc = iucv_query_maxconn();
- rc = iucv_enable();
- if (rc)
- goto out;
- }
- if (dev->driver && dev->driver->pm && dev->driver->pm->restore)
- rc = dev->driver->pm->restore(dev);
-out:
- return rc;
-}
-
struct iucv_interface iucv_if = {
.message_receive = iucv_message_receive,
.__message_receive = __iucv_message_receive,
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index fcb53ed1c4fb..6d7ef78c88af 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1458,6 +1458,9 @@ static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
if (sk->sk_type != SOCK_DGRAM)
return -EPROTONOSUPPORT;
+ if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+ return -EPROTONOSUPPORT;
+
if ((encap == L2TP_ENCAPTYPE_UDP && sk->sk_protocol != IPPROTO_UDP) ||
(encap == L2TP_ENCAPTYPE_IP && sk->sk_protocol != IPPROTO_L2TP))
return -EPROTONOSUPPORT;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 0d7c887a2b75..955662a6dee7 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -20,7 +20,6 @@
#include <net/icmp.h>
#include <net/udp.h>
#include <net/inet_common.h>
-#include <net/inet_hashtables.h>
#include <net/tcp_states.h>
#include <net/protocol.h>
#include <net/xfrm.h>
@@ -209,15 +208,31 @@ discard:
return 0;
}
-static int l2tp_ip_open(struct sock *sk)
+static int l2tp_ip_hash(struct sock *sk)
{
- /* Prevent autobind. We don't have ports. */
- inet_sk(sk)->inet_num = IPPROTO_L2TP;
+ if (sk_unhashed(sk)) {
+ write_lock_bh(&l2tp_ip_lock);
+ sk_add_node(sk, &l2tp_ip_table);
+ write_unlock_bh(&l2tp_ip_lock);
+ }
+ return 0;
+}
+static void l2tp_ip_unhash(struct sock *sk)
+{
+ if (sk_unhashed(sk))
+ return;
write_lock_bh(&l2tp_ip_lock);
- sk_add_node(sk, &l2tp_ip_table);
+ sk_del_node_init(sk);
write_unlock_bh(&l2tp_ip_lock);
+}
+
+static int l2tp_ip_open(struct sock *sk)
+{
+ /* Prevent autobind. We don't have ports. */
+ inet_sk(sk)->inet_num = IPPROTO_L2TP;
+ l2tp_ip_hash(sk);
return 0;
}
@@ -594,8 +609,8 @@ static struct proto l2tp_ip_prot = {
.sendmsg = l2tp_ip_sendmsg,
.recvmsg = l2tp_ip_recvmsg,
.backlog_rcv = l2tp_ip_backlog_recv,
- .hash = inet_hash,
- .unhash = inet_unhash,
+ .hash = l2tp_ip_hash,
+ .unhash = l2tp_ip_unhash,
.obj_size = sizeof(struct l2tp_ip_sock),
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index d148766f40d1..526ed2c24dd5 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -20,8 +20,6 @@
#include <net/icmp.h>
#include <net/udp.h>
#include <net/inet_common.h>
-#include <net/inet_hashtables.h>
-#include <net/inet6_hashtables.h>
#include <net/tcp_states.h>
#include <net/protocol.h>
#include <net/xfrm.h>
@@ -222,15 +220,31 @@ discard:
return 0;
}
-static int l2tp_ip6_open(struct sock *sk)
+static int l2tp_ip6_hash(struct sock *sk)
{
- /* Prevent autobind. We don't have ports. */
- inet_sk(sk)->inet_num = IPPROTO_L2TP;
+ if (sk_unhashed(sk)) {
+ write_lock_bh(&l2tp_ip6_lock);
+ sk_add_node(sk, &l2tp_ip6_table);
+ write_unlock_bh(&l2tp_ip6_lock);
+ }
+ return 0;
+}
+static void l2tp_ip6_unhash(struct sock *sk)
+{
+ if (sk_unhashed(sk))
+ return;
write_lock_bh(&l2tp_ip6_lock);
- sk_add_node(sk, &l2tp_ip6_table);
+ sk_del_node_init(sk);
write_unlock_bh(&l2tp_ip6_lock);
+}
+
+static int l2tp_ip6_open(struct sock *sk)
+{
+ /* Prevent autobind. We don't have ports. */
+ inet_sk(sk)->inet_num = IPPROTO_L2TP;
+ l2tp_ip6_hash(sk);
return 0;
}
@@ -728,8 +742,8 @@ static struct proto l2tp_ip6_prot = {
.sendmsg = l2tp_ip6_sendmsg,
.recvmsg = l2tp_ip6_recvmsg,
.backlog_rcv = l2tp_ip6_backlog_recv,
- .hash = inet6_hash,
- .unhash = inet_unhash,
+ .hash = l2tp_ip6_hash,
+ .unhash = l2tp_ip6_unhash,
.obj_size = sizeof(struct l2tp_ip6_sock),
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ipv6_setsockopt,
@@ -758,6 +772,7 @@ static const struct proto_ops l2tp_ip6_ops = {
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
#ifdef CONFIG_COMPAT
+ .compat_ioctl = inet6_compat_ioctl,
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 4d1c335e06e5..7f245e9f114c 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
*/
/**
@@ -292,7 +292,8 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
goto end;
}
- if (!sta->sta.ht_cap.ht_supported) {
+ if (!sta->sta.ht_cap.ht_supported &&
+ sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) {
ht_dbg(sta->sdata,
"STA %pM erroneously requests BA session on tid %d w/o QoS\n",
sta->sta.addr, tid);
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 33da6f738c99..b37c8a983d88 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -448,6 +448,45 @@ static void sta_addba_resp_timer_expired(struct timer_list *t)
ieee80211_stop_tx_ba_session(&sta->sta, tid);
}
+static void ieee80211_send_addba_with_timeout(struct sta_info *sta,
+ struct tid_ampdu_tx *tid_tx)
+{
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct ieee80211_local *local = sta->local;
+ u8 tid = tid_tx->tid;
+ u16 buf_size;
+
+ /* activate the timer for the recipient's addBA response */
+ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL);
+ ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n",
+ sta->sta.addr, tid);
+
+ spin_lock_bh(&sta->lock);
+ sta->ampdu_mlme.last_addba_req_time[tid] = jiffies;
+ sta->ampdu_mlme.addba_req_num[tid]++;
+ spin_unlock_bh(&sta->lock);
+
+ if (sta->sta.he_cap.has_he) {
+ buf_size = local->hw.max_tx_aggregation_subframes;
+ } else {
+ /*
+ * We really should use what the driver told us it will
+ * transmit as the maximum, but certain APs (e.g. the
+ * LinkSys WRT120N with FW v1.0.07 build 002 Jun 18 2012)
+ * will crash when we use a lower number.
+ */
+ buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
+ }
+
+ /* send AddBA request */
+ ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
+ tid_tx->dialog_token,
+ sta->tid_seq[tid] >> 4,
+ buf_size, tid_tx->timeout);
+
+ WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state));
+}
+
void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
{
struct tid_ampdu_tx *tid_tx;
@@ -462,7 +501,6 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
.timeout = 0,
};
int ret;
- u16 buf_size;
tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
@@ -485,7 +523,9 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
params.ssn = sta->tid_seq[tid] >> 4;
ret = drv_ampdu_action(local, sdata, &params);
- if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) {
+ if (ret == IEEE80211_AMPDU_TX_START_DELAY_ADDBA) {
+ return;
+ } else if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) {
/*
* We didn't send the request yet, so don't need to check
* here if we already got a response, just mark as driver
@@ -508,32 +548,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
return;
}
- /* activate the timer for the recipient's addBA response */
- mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL);
- ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n",
- sta->sta.addr, tid);
-
- spin_lock_bh(&sta->lock);
- sta->ampdu_mlme.last_addba_req_time[tid] = jiffies;
- sta->ampdu_mlme.addba_req_num[tid]++;
- spin_unlock_bh(&sta->lock);
-
- if (sta->sta.he_cap.has_he) {
- buf_size = local->hw.max_tx_aggregation_subframes;
- } else {
- /*
- * We really should use what the driver told us it will
- * transmit as the maximum, but certain APs (e.g. the
- * LinkSys WRT120N with FW v1.0.07 build 002 Jun 18 2012)
- * will crash when we use a lower number.
- */
- buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
- }
-
- /* send AddBA request */
- ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
- tid_tx->dialog_token, params.ssn,
- buf_size, tid_tx->timeout);
+ ieee80211_send_addba_with_timeout(sta, tid_tx);
}
/*
@@ -578,7 +593,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
"Requested to start BA session on reserved tid=%d", tid))
return -EINVAL;
- if (!pubsta->ht_cap.ht_supported)
+ if (!pubsta->ht_cap.ht_supported &&
+ sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ)
return -EINVAL;
if (WARN_ON_ONCE(!local->ops->ampdu_action))
@@ -754,6 +770,12 @@ void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)))
return;
+ if (!test_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)) {
+ ieee80211_send_addba_with_timeout(sta, tid_tx);
+ /* RESPONSE_RECEIVED state whould trigger the flow again */
+ return;
+ }
+
if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state))
ieee80211_agg_tx_operational(local, sta, tid);
}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 0f72813fed53..9b360544ad6f 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -994,7 +994,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
BSS_CHANGED_TWT |
BSS_CHANGED_HE_OBSS_PD |
BSS_CHANGED_HE_BSS_COLOR;
- int err;
+ int i, err;
int prev_beacon_int;
old = sdata_dereference(sdata->u.ap.beacon, sdata);
@@ -1085,6 +1085,17 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |=
IEEE80211_P2P_OPPPS_ENABLE_BIT;
+ sdata->beacon_rate_set = false;
+ if (wiphy_ext_feature_isset(local->hw.wiphy,
+ NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) {
+ for (i = 0; i < NUM_NL80211_BANDS; i++) {
+ sdata->beacon_rateidx_mask[i] =
+ params->beacon_rate.control[i].legacy;
+ if (sdata->beacon_rateidx_mask[i])
+ sdata->beacon_rate_set = true;
+ }
+ }
+
err = ieee80211_assign_beacon(sdata, &params->beacon, NULL);
if (err < 0) {
ieee80211_vif_release_channel(sdata);
@@ -1189,6 +1200,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
ieee80211_free_keys(sdata, true);
sdata->vif.bss_conf.enable_beacon = false;
+ sdata->beacon_rate_set = false;
sdata->vif.bss_conf.ssid_len = 0;
clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
@@ -1508,7 +1520,9 @@ static int sta_apply_parameters(struct ieee80211_local *local,
if (params->he_capa)
ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
(void *)params->he_capa,
- params->he_capa_len, sta);
+ params->he_capa_len,
+ (void *)params->he_6ghz_capa,
+ sta);
if (params->opmode_notif_used) {
/* returned value is only needed for rc update, but the
@@ -1949,6 +1963,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
const u8 *old_ie;
struct ieee80211_sub_if_data *sdata = container_of(ifmsh,
struct ieee80211_sub_if_data, u.mesh);
+ int i;
/* allocate information elements */
new_ie = NULL;
@@ -1987,6 +2002,17 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
sdata->vif.bss_conf.beacon_int = setup->beacon_interval;
sdata->vif.bss_conf.dtim_period = setup->dtim_period;
+ sdata->beacon_rate_set = false;
+ if (wiphy_ext_feature_isset(sdata->local->hw.wiphy,
+ NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) {
+ for (i = 0; i < NUM_NL80211_BANDS; i++) {
+ sdata->beacon_rateidx_mask[i] =
+ setup->beacon_rate.control[i].legacy;
+ if (sdata->beacon_rateidx_mask[i])
+ sdata->beacon_rate_set = true;
+ }
+ }
+
return 0;
}
@@ -2172,7 +2198,8 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
}
if (!sdata->vif.bss_conf.use_short_slot &&
- sband->band == NL80211_BAND_5GHZ) {
+ (sband->band == NL80211_BAND_5GHZ ||
+ sband->band == NL80211_BAND_6GHZ)) {
sdata->vif.bss_conf.use_short_slot = true;
changed |= BSS_CHANGED_ERP_SLOT;
}
@@ -3287,6 +3314,12 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
goto out;
}
+ if (params->chandef.chan->freq_offset) {
+ /* this may work, but is untested */
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
chanctx = container_of(conf, struct ieee80211_chanctx, conf);
ch_switch.timestamp = 0;
@@ -3398,41 +3431,43 @@ int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb,
return 0;
}
-static void ieee80211_mgmt_frame_register(struct wiphy *wiphy,
+static void
+ieee80211_update_mgmt_frame_registrations(struct wiphy *wiphy,
struct wireless_dev *wdev,
- u16 frame_type, bool reg)
+ struct mgmt_frame_regs *upd)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ u32 preq_mask = BIT(IEEE80211_STYPE_PROBE_REQ >> 4);
+ u32 action_mask = BIT(IEEE80211_STYPE_ACTION >> 4);
+ bool global_change, intf_change;
+
+ global_change =
+ (local->probe_req_reg != !!(upd->global_stypes & preq_mask)) ||
+ (local->rx_mcast_action_reg !=
+ !!(upd->global_mcast_stypes & action_mask));
+ local->probe_req_reg = upd->global_stypes & preq_mask;
+ local->rx_mcast_action_reg = upd->global_mcast_stypes & action_mask;
+
+ intf_change = (sdata->vif.probe_req_reg !=
+ !!(upd->interface_stypes & preq_mask)) ||
+ (sdata->vif.rx_mcast_action_reg !=
+ !!(upd->interface_mcast_stypes & action_mask));
+ sdata->vif.probe_req_reg = upd->interface_stypes & preq_mask;
+ sdata->vif.rx_mcast_action_reg =
+ upd->interface_mcast_stypes & action_mask;
+
+ if (!local->open_count)
+ return;
- switch (frame_type) {
- case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ:
- if (reg) {
- local->probe_req_reg++;
- sdata->vif.probe_req_reg++;
- } else {
- if (local->probe_req_reg)
- local->probe_req_reg--;
-
- if (sdata->vif.probe_req_reg)
- sdata->vif.probe_req_reg--;
- }
-
- if (!local->open_count)
- break;
-
- if (sdata->vif.probe_req_reg == 1)
- drv_config_iface_filter(local, sdata, FIF_PROBE_REQ,
- FIF_PROBE_REQ);
- else if (sdata->vif.probe_req_reg == 0)
- drv_config_iface_filter(local, sdata, 0,
- FIF_PROBE_REQ);
+ if (intf_change && ieee80211_sdata_running(sdata))
+ drv_config_iface_filter(local, sdata,
+ sdata->vif.probe_req_reg ?
+ FIF_PROBE_REQ : 0,
+ FIF_PROBE_REQ);
+ if (global_change)
ieee80211_configure_filter(local);
- break;
- default:
- break;
- }
}
static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant)
@@ -3925,7 +3960,7 @@ static int ieee80211_set_tid_config(struct wiphy *wiphy,
static int ieee80211_reset_tid_config(struct wiphy *wiphy,
struct net_device *dev,
- const u8 *peer, u8 tid)
+ const u8 *peer, u8 tids)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct sta_info *sta;
@@ -3935,7 +3970,7 @@ static int ieee80211_reset_tid_config(struct wiphy *wiphy,
return -EOPNOTSUPP;
if (!peer)
- return drv_reset_tid_config(sdata->local, sdata, NULL, tid);
+ return drv_reset_tid_config(sdata->local, sdata, NULL, tids);
mutex_lock(&sdata->local->sta_mtx);
sta = sta_info_get_bss(sdata, peer);
@@ -3944,7 +3979,7 @@ static int ieee80211_reset_tid_config(struct wiphy *wiphy,
return -ENOENT;
}
- ret = drv_reset_tid_config(sdata->local, sdata, &sta->sta, tid);
+ ret = drv_reset_tid_config(sdata->local, sdata, &sta->sta, tids);
mutex_unlock(&sdata->local->sta_mtx);
return ret;
@@ -4017,7 +4052,8 @@ const struct cfg80211_ops mac80211_config_ops = {
.mgmt_tx_cancel_wait = ieee80211_mgmt_tx_cancel_wait,
.set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
.set_cqm_rssi_range_config = ieee80211_set_cqm_rssi_range_config,
- .mgmt_frame_register = ieee80211_mgmt_frame_register,
+ .update_mgmt_frame_registrations =
+ ieee80211_update_mgmt_frame_registrations,
.set_antenna = ieee80211_set_antenna,
.get_antenna = ieee80211_get_antenna,
.set_rekey_data = ieee80211_set_rekey_data,
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 9c94baaf693c..e6e192f53e4e 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -533,6 +533,7 @@ static void ieee80211_del_chanctx(struct ieee80211_local *local,
struct cfg80211_chan_def *chandef = &local->_oper_chandef;
chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
chandef->center_freq1 = chandef->chan->center_freq;
+ chandef->freq1_offset = chandef->chan->freq_offset;
chandef->center_freq2 = 0;
/* NOTE: Disabling radar is only valid here for
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 3dbe7c5cefd1..d7e955127d5c 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -236,7 +236,7 @@ IEEE80211_IF_FILE_R(hw_queues);
/* STA attributes */
IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
-IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
+IEEE80211_IF_FILE(aid, vif.bss_conf.aid, DEC);
IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS);
static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 3877710e3b48..de69fc9c4f07 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1375,12 +1375,12 @@ static inline int drv_set_tid_config(struct ieee80211_local *local,
static inline int drv_reset_tid_config(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
- struct ieee80211_sta *sta, u8 tid)
+ struct ieee80211_sta *sta, u8 tids)
{
int ret;
might_sleep();
- ret = local->ops->reset_tid_config(&local->hw, &sdata->vif, sta, tid);
+ ret = local->ops->reset_tid_config(&local->hw, &sdata->vif, sta, tids);
trace_drv_return_int(local, ret);
return ret;
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index 1087f715338b..cc26f239838b 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -8,10 +8,55 @@
#include "ieee80211_i.h"
+static void
+ieee80211_update_from_he_6ghz_capa(const struct ieee80211_he_6ghz_capa *he_6ghz_capa,
+ struct sta_info *sta)
+{
+ enum ieee80211_smps_mode smps_mode;
+
+ if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
+ sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+ switch (le16_get_bits(he_6ghz_capa->capa,
+ IEEE80211_HE_6GHZ_CAP_SM_PS)) {
+ case WLAN_HT_CAP_SM_PS_INVALID:
+ case WLAN_HT_CAP_SM_PS_STATIC:
+ smps_mode = IEEE80211_SMPS_STATIC;
+ break;
+ case WLAN_HT_CAP_SM_PS_DYNAMIC:
+ smps_mode = IEEE80211_SMPS_DYNAMIC;
+ break;
+ case WLAN_HT_CAP_SM_PS_DISABLED:
+ smps_mode = IEEE80211_SMPS_OFF;
+ break;
+ }
+
+ sta->sta.smps_mode = smps_mode;
+ } else {
+ sta->sta.smps_mode = IEEE80211_SMPS_OFF;
+ }
+
+ switch (le16_get_bits(he_6ghz_capa->capa,
+ IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN)) {
+ case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
+ sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454;
+ break;
+ case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991:
+ sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991;
+ break;
+ case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895:
+ default:
+ sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895;
+ break;
+ }
+
+ sta->sta.he_6ghz_capa = *he_6ghz_capa;
+}
+
void
ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
const u8 *he_cap_ie, u8 he_cap_len,
+ const struct ieee80211_he_6ghz_capa *he_6ghz_capa,
struct sta_info *sta)
{
struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap;
@@ -53,21 +98,21 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta);
sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
+
+ if (sband->band == NL80211_BAND_6GHZ && he_6ghz_capa)
+ ieee80211_update_from_he_6ghz_capa(he_6ghz_capa, sta);
}
void
ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif,
- const struct ieee80211_he_operation *he_op_ie_elem)
+ const struct ieee80211_he_operation *he_op_ie)
{
- struct ieee80211_he_operation *he_operation =
- &vif->bss_conf.he_operation;
-
- if (!he_op_ie_elem) {
- memset(he_operation, 0, sizeof(*he_operation));
+ memset(&vif->bss_conf.he_oper, 0, sizeof(vif->bss_conf.he_oper));
+ if (!he_op_ie)
return;
- }
- vif->bss_conf.he_operation = *he_op_ie_elem;
+ vif->bss_conf.he_oper.params = __le32_to_cpu(he_op_ie->he_oper_params);
+ vif->bss_conf.he_oper.nss_set = __le16_to_cpu(he_op_ie->he_mcs_nss_set);
}
void
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index d40744903fa9..81d26fef41e9 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -9,7 +9,7 @@
* Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2016 Intel Deutschland GmbH
- * Copyright(c) 2018-2019 Intel Corporation
+ * Copyright(c) 2018-2020 Intel Corporation
*/
#include <linux/delay.h>
@@ -781,6 +781,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
enum nl80211_channel_type ch_type;
int err;
u32 sta_flags;
+ u32 vht_cap_info = 0;
sdata_assert_lock(sdata);
@@ -798,9 +799,13 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
break;
}
+ if (elems->vht_cap_elem)
+ vht_cap_info = le32_to_cpu(elems->vht_cap_elem->vht_cap_info);
+
memset(&params, 0, sizeof(params));
err = ieee80211_parse_ch_switch_ie(sdata, elems,
ifibss->chandef.chan->band,
+ vht_cap_info,
sta_flags, ifibss->bssid, &csa_ie);
/* can't switch to destination channel, fail */
if (err < 0)
@@ -1060,8 +1065,10 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
/* we both use VHT */
struct ieee80211_vht_cap cap_ie;
struct ieee80211_sta_vht_cap cap = sta->sta.vht_cap;
+ u32 vht_cap_info =
+ le32_to_cpu(elems->vht_cap_elem->vht_cap_info);
- ieee80211_chandef_vht_oper(&local->hw,
+ ieee80211_chandef_vht_oper(&local->hw, vht_cap_info,
elems->vht_operation,
elems->ht_operation,
&chandef);
@@ -1758,6 +1765,11 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
int i;
int ret;
+ if (params->chandef.chan->freq_offset) {
+ /* this may work, but is untested */
+ return -EOPNOTSUPP;
+ }
+
ret = cfg80211_chandef_dfs_required(local->hw.wiphy,
&params->chandef,
sdata->wdev.iftype);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f8ed4f621f7f..ec1a71ac65f2 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -111,6 +111,8 @@ struct ieee80211_bss {
size_t supp_rates_len;
struct ieee80211_rate *beacon_rate;
+ u32 vht_cap_info;
+
/*
* During association, we save an ERP value from a probe response so
* that we can feed ERP info to the driver when handling the
@@ -267,7 +269,7 @@ struct probe_resp {
struct rcu_head rcu_head;
int len;
u16 csa_counter_offsets[IEEE80211_MAX_CSA_COUNTERS_NUM];
- u8 data[0];
+ u8 data[];
};
struct ps_data {
@@ -450,8 +452,6 @@ struct ieee80211_if_managed {
u8 bssid[ETH_ALEN] __aligned(2);
- u16 aid;
-
bool powersave; /* powersave requested for this iface */
bool broken_ap; /* AP is broken -- turn off powersave */
bool have_beacon;
@@ -964,6 +964,10 @@ struct ieee80211_sub_if_data {
bool rc_has_vht_mcs_mask[NUM_NL80211_BANDS];
u16 rc_rateidx_vht_mcs_mask[NUM_NL80211_BANDS][NL80211_VHT_NSS_MAX];
+ /* Beacon frame (non-MCS) rate (as a bitmap) */
+ u32 beacon_rateidx_mask[NUM_NL80211_BANDS];
+ bool beacon_rate_set;
+
union {
struct ieee80211_if_ap ap;
struct ieee80211_if_wds wds;
@@ -1169,7 +1173,8 @@ struct ieee80211_local {
/* number of interfaces with corresponding FIF_ flags */
int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll,
fif_probe_req;
- int probe_req_reg;
+ bool probe_req_reg;
+ bool rx_mcast_action_reg;
unsigned int filter_flags; /* FIF_* */
bool wiphy_ciphers_allocated;
@@ -1491,6 +1496,7 @@ struct ieee802_11_elems {
const struct ieee80211_he_operation *he_operation;
const struct ieee80211_he_spr *he_spr;
const struct ieee80211_mu_edca_param_set *mu_edca_param_set;
+ const struct ieee80211_he_6ghz_capa *he_6ghz_capa;
const u8 *uora_element;
const u8 *mesh_id;
const u8 *peering;
@@ -1780,7 +1786,8 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb,
void __ieee80211_subif_start_xmit(struct sk_buff *skb,
struct net_device *dev,
u32 info_flags,
- u32 ctrl_flags);
+ u32 ctrl_flags,
+ u64 *cookie);
void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
struct sk_buff_head *skbs);
struct sk_buff *
@@ -1797,7 +1804,8 @@ void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata);
void ieee80211_clear_fast_xmit(struct sta_info *sta);
int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
const u8 *buf, size_t len,
- const u8 *dest, __be16 proto, bool unencrypted);
+ const u8 *dest, __be16 proto, bool unencrypted,
+ u64 *cookie);
int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev,
const u8 *buf, size_t len);
@@ -1891,6 +1899,7 @@ void
ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
const u8 *he_cap_ie, u8 he_cap_len,
+ const struct ieee80211_he_6ghz_capa *he_6ghz_capa,
struct sta_info *sta);
void
ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif,
@@ -1909,6 +1918,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
* @sdata: the sdata of the interface which has received the frame
* @elems: parsed 802.11 elements received with the frame
* @current_band: indicates the current band
+ * @vht_cap_info: VHT capabilities of the transmitter
* @sta_flags: contains information about own capabilities and restrictions
* to decide which channel switch announcements can be accepted. Only the
* following subset of &enum ieee80211_sta_flags are evaluated:
@@ -1923,6 +1933,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems,
enum nl80211_band current_band,
+ u32 vht_cap_info,
u32 sta_flags, u8 *bssid,
struct ieee80211_csa_ie *csa_ie);
@@ -2133,7 +2144,7 @@ enum {
IEEE80211_PROBE_FLAG_RANDOM_SN = BIT(2),
};
-int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
+int ieee80211_build_preq_ies(struct ieee80211_sub_if_data *sdata, u8 *buffer,
size_t buffer_len,
struct ieee80211_scan_ies *ie_desc,
const u8 *ie, size_t ie_len,
@@ -2171,7 +2182,9 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype);
u8 *ieee80211_ie_build_he_cap(u8 *pos,
const struct ieee80211_sta_he_cap *he_cap,
u8 *end);
-u8 *ieee80211_ie_build_he_oper(u8 *pos);
+void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb);
+u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef);
int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
const struct ieee80211_supported_band *sband,
const u8 *srates, int srates_len, u32 *rates);
@@ -2186,10 +2199,13 @@ u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo);
/* channel management */
bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
struct cfg80211_chan_def *chandef);
-bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
+bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
const struct ieee80211_vht_operation *oper,
const struct ieee80211_ht_operation *htop,
struct cfg80211_chan_def *chandef);
+bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_he_operation *he_oper,
+ struct cfg80211_chan_def *chandef);
u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c);
int __must_check
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d069825705d6..f900c84fb40f 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -644,6 +644,11 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
local->fif_probe_req++;
}
+ if (sdata->vif.probe_req_reg)
+ drv_config_iface_filter(local, sdata,
+ FIF_PROBE_REQ,
+ FIF_PROBE_REQ);
+
if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
sdata->vif.type != NL80211_IFTYPE_NAN)
changed |= ieee80211_reset_erp_info(sdata);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 6423173bb87e..b4a2efe8e83a 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -64,6 +64,9 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
if (local->fif_pspoll)
new_flags |= FIF_PSPOLL;
+ if (local->rx_mcast_action_reg)
+ new_flags |= FIF_MCAST_ACTION;
+
spin_lock_bh(&local->filter_lock);
changed_flags = local->filter_flags ^ new_flags;
@@ -104,13 +107,15 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
chandef.chan = local->tmp_channel;
chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
chandef.center_freq1 = chandef.chan->center_freq;
+ chandef.freq1_offset = chandef.chan->freq_offset;
} else
chandef = local->_oper_chandef;
WARN(!cfg80211_chandef_valid(&chandef),
- "control:%d MHz width:%d center: %d/%d MHz",
- chandef.chan->center_freq, chandef.width,
- chandef.center_freq1, chandef.center_freq2);
+ "control:%d.%03d MHz width:%d center: %d.%03d/%d MHz",
+ chandef.chan->center_freq, chandef.chan->freq_offset,
+ chandef.width, chandef.center_freq1, chandef.freq1_offset,
+ chandef.center_freq2);
if (!cfg80211_chandef_identical(&chandef, &local->_oper_chandef))
local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
@@ -591,6 +596,10 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211);
wiphy_ext_feature_set(wiphy,
NL80211_EXT_FEATURE_CONTROL_PORT_NO_PREAUTH);
+ wiphy_ext_feature_set(wiphy,
+ NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS);
+ wiphy_ext_feature_set(wiphy,
+ NL80211_EXT_FEATURE_SCAN_FREQ_KHZ);
if (!ops->hw_scan) {
wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 36978a0e5000..5f1ca25b6c97 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
* Authors: Luis Carlos Cobo <luisca@cozybit.com>
* Javier Cardona <javier@cozybit.com>
*/
@@ -63,6 +63,7 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
u32 basic_rates = 0;
struct cfg80211_chan_def sta_chan_def;
struct ieee80211_supported_band *sband;
+ u32 vht_cap_info = 0;
/*
* As support for each feature is added, check for matching
@@ -96,9 +97,14 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
cfg80211_chandef_create(&sta_chan_def, sdata->vif.bss_conf.chandef.chan,
NL80211_CHAN_NO_HT);
ieee80211_chandef_ht_oper(ie->ht_operation, &sta_chan_def);
- ieee80211_chandef_vht_oper(&sdata->local->hw,
+
+ if (ie->vht_cap_elem)
+ vht_cap_info = le32_to_cpu(ie->vht_cap_elem->vht_cap_info);
+
+ ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info,
ie->vht_operation, ie->ht_operation,
&sta_chan_def);
+ ieee80211_chandef_he_6ghz_oper(sdata, ie->he_operation, &sta_chan_def);
if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef,
&sta_chan_def))
@@ -415,6 +421,10 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata,
if (!sband)
return -EINVAL;
+ /* HT not allowed in 6 GHz */
+ if (sband->band == NL80211_BAND_6GHZ)
+ return 0;
+
if (!sband->ht_cap.ht_supported ||
sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
@@ -452,6 +462,10 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
sband = local->hw.wiphy->bands[channel->band];
ht_cap = &sband->ht_cap;
+ /* HT not allowed in 6 GHz */
+ if (sband->band == NL80211_BAND_6GHZ)
+ return 0;
+
if (!ht_cap->ht_supported ||
sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
@@ -479,6 +493,10 @@ int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata,
if (!sband)
return -EINVAL;
+ /* VHT not allowed in 6 GHz */
+ if (sband->band == NL80211_BAND_6GHZ)
+ return 0;
+
if (!sband->vht_cap.vht_supported ||
sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
@@ -516,6 +534,10 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
sband = local->hw.wiphy->bands[channel->band];
vht_cap = &sband->vht_cap;
+ /* VHT not allowed in 6 GHz */
+ if (sband->band == NL80211_BAND_6GHZ)
+ return 0;
+
if (!vht_cap->vht_supported ||
sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
@@ -565,6 +587,7 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
{
const struct ieee80211_sta_he_cap *he_cap;
struct ieee80211_supported_band *sband;
+ u32 len;
u8 *pos;
sband = ieee80211_get_sband(sdata);
@@ -578,12 +601,23 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
return 0;
- if (skb_tailroom(skb) < 2 + 1 + sizeof(struct ieee80211_he_operation))
+ len = 2 + 1 + sizeof(struct ieee80211_he_operation);
+ if (sdata->vif.bss_conf.chandef.chan->band == NL80211_BAND_6GHZ)
+ len += sizeof(struct ieee80211_he_6ghz_oper);
+
+ if (skb_tailroom(skb) < len)
return -ENOMEM;
- pos = skb_put(skb, 2 + 1 + sizeof(struct ieee80211_he_operation));
- ieee80211_ie_build_he_oper(pos);
+ pos = skb_put(skb, len);
+ ieee80211_ie_build_he_oper(pos, &sdata->vif.bss_conf.chandef);
+
+ return 0;
+}
+int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ ieee80211_ie_build_he_6ghz_cap(sdata, skb);
return 0;
}
@@ -766,6 +800,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
2 + sizeof(struct ieee80211_vht_operation) +
ie_len_he_cap +
2 + 1 + sizeof(struct ieee80211_he_operation) +
+ sizeof(struct ieee80211_he_6ghz_oper) +
+ 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) +
ifmsh->ie_len;
bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL);
@@ -885,6 +921,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
mesh_add_vht_oper_ie(sdata, skb) ||
mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) ||
mesh_add_he_oper_ie(sdata, skb) ||
+ mesh_add_he_6ghz_cap_ie(sdata, skb) ||
mesh_add_vendor_ies(sdata, skb))
goto out_free;
@@ -994,6 +1031,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
/* stop the beacon */
ifmsh->mesh_id_len = 0;
sdata->vif.bss_conf.enable_beacon = false;
+ sdata->beacon_rate_set = false;
clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
@@ -1044,7 +1082,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct ieee80211_supported_band *sband;
int err;
- u32 sta_flags;
+ u32 sta_flags, vht_cap_info = 0;
sdata_assert_lock(sdata);
@@ -1067,8 +1105,13 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
break;
}
+ if (elems->vht_cap_elem)
+ vht_cap_info =
+ le32_to_cpu(elems->vht_cap_elem->vht_cap_info);
+
memset(&params, 0, sizeof(params));
err = ieee80211_parse_ch_switch_ie(sdata, elems, sband->band,
+ vht_cap_info,
sta_flags, sdata->vif.addr,
&csa_ie);
if (err < 0)
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 953f720754e8..40492d1bd8fd 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -222,6 +222,8 @@ int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u8 ie_len);
int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
+int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb);
void mesh_rmc_free(struct ieee80211_sub_if_data *sdata);
int mesh_rmc_init(struct ieee80211_sub_if_data *sdata);
void ieee80211s_init(void);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 38a0383dfbcf..aa5150929996 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -1103,7 +1103,14 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
mesh_path_sel_frame_tx(MPATH_PREQ, 0, sdata->vif.addr, ifmsh->sn,
target_flags, mpath->dst, mpath->sn, da, 0,
ttl, lifetime, 0, ifmsh->preq_id++, sdata);
+
+ spin_lock_bh(&mpath->state_lock);
+ if (mpath->flags & MESH_PATH_DELETED) {
+ spin_unlock_bh(&mpath->state_lock);
+ goto enddiscovery;
+ }
mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout);
+ spin_unlock_bh(&mpath->state_lock);
enddiscovery:
rcu_read_unlock();
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 737c5f4dbf52..798e4b6b383f 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -238,6 +238,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
2 + sizeof(struct ieee80211_vht_operation) +
ie_len_he_cap +
2 + 1 + sizeof(struct ieee80211_he_operation) +
+ sizeof(struct ieee80211_he_6ghz_oper) +
+ 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) +
2 + 8 + /* peering IE */
sdata->u.mesh.ie_len);
if (!skb)
@@ -328,7 +330,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
mesh_add_vht_cap_ie(sdata, skb) ||
mesh_add_vht_oper_ie(sdata, skb) ||
mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) ||
- mesh_add_he_oper_ie(sdata, skb))
+ mesh_add_he_oper_ie(sdata, skb) ||
+ mesh_add_he_6ghz_cap_ie(sdata, skb))
goto free;
}
@@ -441,7 +444,9 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
elems->vht_cap_elem, sta);
ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap,
- elems->he_cap_len, sta);
+ elems->he_cap_len,
+ elems->he_6ghz_capa,
+ sta);
if (bw != sta->sta.bandwidth)
changed |= IEEE80211_RC_BW_CHANGED;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 16d75da0996a..5820ef02a587 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -145,6 +145,7 @@ static u32
ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
struct ieee80211_channel *channel,
+ u32 vht_cap_info,
const struct ieee80211_ht_operation *ht_oper,
const struct ieee80211_vht_operation *vht_oper,
const struct ieee80211_he_operation *he_oper,
@@ -155,13 +156,23 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta_ht_cap sta_ht_cap;
u32 ht_cfreq, ret;
- memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
- ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
-
memset(chandef, 0, sizeof(struct cfg80211_chan_def));
chandef->chan = channel;
chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
chandef->center_freq1 = channel->center_freq;
+ chandef->freq1_offset = channel->freq_offset;
+
+ if (channel->band == NL80211_BAND_6GHZ) {
+ if (!ieee80211_chandef_he_6ghz_oper(sdata, he_oper, chandef))
+ ret = IEEE80211_STA_DISABLE_HT |
+ IEEE80211_STA_DISABLE_VHT |
+ IEEE80211_STA_DISABLE_HE;
+ vht_chandef = *chandef;
+ goto out;
+ }
+
+ memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
+ ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
if (!ht_oper || !sta_ht_cap.ht_supported) {
ret = IEEE80211_STA_DISABLE_HT |
@@ -222,7 +233,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
memcpy(&he_oper_vht_cap, he_oper->optional, 3);
he_oper_vht_cap.basic_mcs_set = cpu_to_le16(0);
- if (!ieee80211_chandef_vht_oper(&sdata->local->hw,
+ if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info,
&he_oper_vht_cap, ht_oper,
&vht_chandef)) {
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
@@ -231,8 +242,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
ret = IEEE80211_STA_DISABLE_HE;
goto out;
}
- } else if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_oper,
- ht_oper, &vht_chandef)) {
+ } else if (!ieee80211_chandef_vht_oper(&sdata->local->hw,
+ vht_cap_info,
+ vht_oper, ht_oper,
+ &vht_chandef)) {
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
sdata_info(sdata,
"AP VHT information is invalid, disable VHT\n");
@@ -328,6 +341,7 @@ out:
static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta,
const struct ieee80211_ht_cap *ht_cap,
+ const struct ieee80211_vht_cap *vht_cap,
const struct ieee80211_ht_operation *ht_oper,
const struct ieee80211_vht_operation *vht_oper,
const struct ieee80211_he_operation *he_oper,
@@ -342,6 +356,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
u16 ht_opmode;
u32 flags;
enum ieee80211_sta_rx_bandwidth new_sta_bw;
+ u32 vht_cap_info = 0;
int ret;
/* if HT was/is disabled, don't track any bandwidth changes */
@@ -370,8 +385,11 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.ht_operation_mode = ht_opmode;
}
+ if (vht_cap)
+ vht_cap_info = le32_to_cpu(vht_cap->vht_cap_info);
+
/* calculate new channel (type) based on HT/VHT/HE operation IEs */
- flags = ieee80211_determine_chantype(sdata, sband, chan,
+ flags = ieee80211_determine_chantype(sdata, sband, chan, vht_cap_info,
ht_oper, vht_oper, he_oper,
&chandef, true);
@@ -396,9 +414,12 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
return 0;
sdata_info(sdata,
- "AP %pM changed bandwidth, new config is %d MHz, width %d (%d/%d MHz)\n",
- ifmgd->bssid, chandef.chan->center_freq, chandef.width,
- chandef.center_freq1, chandef.center_freq2);
+ "AP %pM changed bandwidth, new config is %d.%03d MHz, "
+ "width %d (%d.%03d/%d MHz)\n",
+ ifmgd->bssid, chandef.chan->center_freq,
+ chandef.chan->freq_offset, chandef.width,
+ chandef.center_freq1, chandef.freq1_offset,
+ chandef.center_freq2);
if (flags != (ifmgd->flags & (IEEE80211_STA_DISABLE_HT |
IEEE80211_STA_DISABLE_VHT |
@@ -654,6 +675,8 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
he_cap->he_cap_elem.phy_cap_info);
pos = skb_put(skb, he_cap_size);
ieee80211_ie_build_he_cap(pos, he_cap, pos + he_cap_size);
+
+ ieee80211_ie_build_he_6ghz_cap(sdata, skb);
}
static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
@@ -727,6 +750,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
2 + 1 + sizeof(struct ieee80211_he_cap_elem) + /* HE */
sizeof(struct ieee80211_he_mcs_nss_supp) +
IEEE80211_HE_PPE_THRES_MAX_LEN +
+ 2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) +
assoc_data->ie_len + /* extra IEs */
(assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) +
9, /* WMM */
@@ -899,7 +923,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)))
ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
+ if (sband->band != NL80211_BAND_6GHZ &&
+ !(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
ieee80211_add_ht_ie(sdata, skb, assoc_data->ap_ht_param,
sband, chan, sdata->smps_mode);
@@ -953,7 +978,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
offset = noffset;
}
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
+ if (sband->band != NL80211_BAND_6GHZ &&
+ !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
ieee80211_add_vht_ie(sdata, skb, sband,
&assoc_data->ap_vht_cap);
@@ -1320,6 +1346,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
enum nl80211_band current_band;
struct ieee80211_csa_ie csa_ie;
struct ieee80211_channel_switch ch_switch;
+ struct ieee80211_bss *bss;
int res;
sdata_assert_lock(sdata);
@@ -1331,7 +1358,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
return;
current_band = cbss->channel->band;
+ bss = (void *)cbss->priv;
res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band,
+ bss->vht_cap_info,
ifmgd->flags,
ifmgd->associated->bssid, &csa_ie);
@@ -1364,10 +1393,14 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chandef,
IEEE80211_CHAN_DISABLED)) {
sdata_info(sdata,
- "AP %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
+ "AP %pM switches to unsupported channel "
+ "(%d.%03d MHz, width:%d, CF1/2: %d.%03d/%d MHz), "
+ "disconnecting\n",
ifmgd->associated->bssid,
csa_ie.chandef.chan->center_freq,
+ csa_ie.chandef.chan->freq_offset,
csa_ie.chandef.width, csa_ie.chandef.center_freq1,
+ csa_ie.chandef.freq1_offset,
csa_ie.chandef.center_freq2);
ieee80211_queue_work(&local->hw,
&ifmgd->csa_connection_drop_work);
@@ -1500,6 +1533,7 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata,
chan_increment = 1;
break;
case NL80211_BAND_5GHZ:
+ case NL80211_BAND_6GHZ:
chan_increment = 4;
break;
}
@@ -2137,7 +2171,8 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
}
use_short_slot = !!(capab & WLAN_CAPABILITY_SHORT_SLOT_TIME);
- if (sband->band == NL80211_BAND_5GHZ)
+ if (sband->band == NL80211_BAND_5GHZ ||
+ sband->band == NL80211_BAND_6GHZ)
use_short_slot = true;
if (use_protection != bss_conf->use_cts_prot) {
@@ -2948,10 +2983,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
}
if (status_code != WLAN_STATUS_SUCCESS) {
+ cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
+
+ if (auth_alg == WLAN_AUTH_SAE &&
+ status_code == WLAN_STATUS_ANTI_CLOG_REQUIRED)
+ return;
+
sdata_info(sdata, "%pM denied authentication (status %d)\n",
mgmt->sa, status_code);
ieee80211_destroy_auth_data(sdata, false);
- cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
event.u.mlme.status = MLME_DENIED;
event.u.mlme.reason = status_code;
drv_event_callback(sdata->local, sdata, &event);
@@ -3149,15 +3189,16 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
*have_higher_than_11mbit = true;
/*
- * Skip HT and VHT BSS membership selectors since they're not
- * rates.
+ * Skip HT, VHT and HE BSS membership selectors since they're
+ * not rates.
*
* Note: Even though the membership selector and the basic
* rate flag share the same bit, they are not exactly
* the same.
*/
if (supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HT_PHY) ||
- supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_VHT_PHY))
+ supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_VHT_PHY) ||
+ supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HE_PHY))
continue;
for (j = 0; j < sband->n_bitrates; j++) {
@@ -3220,6 +3261,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
const struct cfg80211_bss_ies *bss_ies = NULL;
struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
+ bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
u32 changed = 0;
int err;
bool ret;
@@ -3249,7 +3291,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
return false;
}
- ifmgd->aid = aid;
+ sdata->vif.bss_conf.aid = aid;
ifmgd->tdls_chan_switch_prohibited =
elems->ext_capab && elems->ext_capab_len >= 5 &&
(elems->ext_capab[4] & WLAN_EXT_CAPA5_TDLS_CH_SW_PROHIBITED);
@@ -3261,11 +3303,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
* 2G/3G/4G wifi routers, reported models include the "Onda PN51T",
* "Vodafone PocketWiFi 2", "ZTE MF60" and a similar T-Mobile device.
*/
- if ((assoc_data->wmm && !elems->wmm_param) ||
- (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
- (!elems->ht_cap_elem || !elems->ht_operation)) ||
- (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
- (!elems->vht_cap_elem || !elems->vht_operation))) {
+ if (!is_6ghz &&
+ ((assoc_data->wmm && !elems->wmm_param) ||
+ (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
+ (!elems->ht_cap_elem || !elems->ht_operation)) ||
+ (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
+ (!elems->vht_cap_elem || !elems->vht_operation)))) {
const struct cfg80211_bss_ies *ies;
struct ieee802_11_elems bss_elems;
@@ -3323,7 +3366,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
* We previously checked these in the beacon/probe response, so
* they should be present here. This is just a safety net.
*/
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
+ if (!is_6ghz && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
(!elems->wmm_param || !elems->ht_cap_elem || !elems->ht_operation)) {
sdata_info(sdata,
"HT AP is missing WMM params or HT capability/operation\n");
@@ -3331,7 +3374,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
goto out;
}
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
+ if (!is_6ghz && !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
(!elems->vht_cap_elem || !elems->vht_operation)) {
sdata_info(sdata,
"VHT AP is missing VHT capability/operation\n");
@@ -3339,6 +3382,14 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
goto out;
}
+ if (is_6ghz && !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
+ !elems->he_6ghz_capa) {
+ sdata_info(sdata,
+ "HE 6 GHz AP is missing HE 6 GHz band capability\n");
+ ret = false;
+ goto out;
+ }
+
mutex_lock(&sdata->local->sta_mtx);
/*
* station info was already allocated and inserted before
@@ -3381,13 +3432,23 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
elems->he_cap,
elems->he_cap_len,
+ elems->he_6ghz_capa,
sta);
bss_conf->he_support = sta->sta.he_cap.has_he;
+ if (elems->rsnx && elems->rsnx_len &&
+ (elems->rsnx[0] & WLAN_RSNX_CAPA_PROTECTED_TWT) &&
+ wiphy_ext_feature_isset(local->hw.wiphy,
+ NL80211_EXT_FEATURE_PROTECTED_TWT))
+ bss_conf->twt_protected = true;
+ else
+ bss_conf->twt_protected = false;
+
changed |= ieee80211_recalc_twt_req(sdata, sta, elems);
} else {
bss_conf->he_support = false;
bss_conf->twt_requester = false;
+ bss_conf->twt_protected = false;
}
if (bss_conf->he_support) {
@@ -3521,9 +3582,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
bss_conf->protected_keep_alive = false;
}
- /* set AID and assoc capability,
+ /* set assoc capability (AID was already set earlier),
* ieee80211_set_associated() will tell the driver */
- bss_conf->aid = aid;
bss_conf->assoc_capability = capab_info;
ieee80211_set_associated(sdata, cbss, changed);
@@ -3661,7 +3721,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
sdata_assert_lock(sdata);
- channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq);
+ channel = ieee80211_get_channel_khz(local->hw.wiphy,
+ ieee80211_rx_status_to_khz(rx_status));
if (!channel)
return;
@@ -3877,7 +3938,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
return;
}
- if (rx_status->freq != chanctx_conf->def.chan->center_freq) {
+ if (ieee80211_rx_status_to_khz(rx_status) !=
+ ieee80211_channel_to_khz(chanctx_conf->def.chan)) {
rcu_read_unlock();
return;
}
@@ -3948,7 +4010,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
mgmt->bssid, bssid);
if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
- ieee80211_check_tim(elems.tim, elems.tim_len, ifmgd->aid)) {
+ ieee80211_check_tim(elems.tim, elems.tim_len, bss_conf->aid)) {
if (local->hw.conf.dynamic_ps_timeout > 0) {
if (local->hw.conf.flags & IEEE80211_CONF_PS) {
local->hw.conf.flags &= ~IEEE80211_CONF_PS;
@@ -4070,8 +4132,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
changed |= ieee80211_recalc_twt_req(sdata, sta, &elems);
- if (ieee80211_config_bw(sdata, sta,
- elems.ht_cap_elem, elems.ht_operation,
+ if (ieee80211_config_bw(sdata, sta, elems.ht_cap_elem,
+ elems.vht_cap_elem, elems.ht_operation,
elems.vht_operation, elems.he_operation,
bssid, &changed)) {
mutex_unlock(&local->sta_mtx);
@@ -4788,6 +4850,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
const struct ieee80211_he_operation *he_oper = NULL;
struct ieee80211_supported_band *sband;
struct cfg80211_chan_def chandef;
+ bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
+ struct ieee80211_bss *bss = (void *)cbss->priv;
int ret;
u32 i;
bool have_80mhz;
@@ -4799,21 +4863,23 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
IEEE80211_STA_DISABLE_160MHZ);
/* disable HT/VHT/HE if we don't support them */
- if (!sband->ht_cap.ht_supported) {
+ if (!sband->ht_cap.ht_supported && !is_6ghz) {
ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
}
- if (!sband->vht_cap.vht_supported)
+ if (!sband->vht_cap.vht_supported && !is_6ghz) {
ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+ ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+ }
if (!ieee80211_get_he_sta_cap(sband))
ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
rcu_read_lock();
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && !is_6ghz) {
const u8 *ht_oper_ie, *ht_cap_ie;
ht_oper_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_OPERATION);
@@ -4830,7 +4896,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
}
}
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) {
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && !is_6ghz) {
const u8 *vht_oper_ie, *vht_cap;
vht_oper_ie = ieee80211_bss_get_ie(cbss,
@@ -4886,6 +4952,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
cbss->channel,
+ bss->vht_cap_info,
ht_oper, vht_oper, he_oper,
&chandef, false);
@@ -4894,6 +4961,11 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
rcu_read_unlock();
+ if (ifmgd->flags & IEEE80211_STA_DISABLE_HE && is_6ghz) {
+ sdata_info(sdata, "Rejecting non-HE 6/7 GHz connection");
+ return -EINVAL;
+ }
+
/* will change later if needed */
sdata->smps_mode = IEEE80211_SMPS_OFF;
@@ -5022,8 +5094,16 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
* doesn't happen any more, but keep the workaround so
* in case some *other* APs are buggy in different ways
* we can connect -- with a warning.
+ * Allow this workaround only in case the AP provided at least
+ * one rate.
*/
- if (!basic_rates && min_rate_index >= 0) {
+ if (min_rate_index < 0) {
+ sdata_info(sdata,
+ "No legacy rates in association response\n");
+
+ sta_info_free(local, new_sta);
+ return -EINVAL;
+ } else if (!basic_rates) {
sdata_info(sdata,
"No basic rates, using min rate instead\n");
basic_rates = BIT(min_rate_index);
@@ -5267,6 +5347,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
struct cfg80211_assoc_request *req)
{
+ bool is_6ghz = req->bss->channel->band == NL80211_BAND_6GHZ;
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_bss *bss = (void *)req->bss->priv;
@@ -5409,14 +5490,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
if (ht_ie && ht_ie[1] >= sizeof(struct ieee80211_ht_operation))
assoc_data->ap_ht_param =
((struct ieee80211_ht_operation *)(ht_ie + 2))->ht_param;
- else
+ else if (!is_6ghz)
ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
vht_ie = ieee80211_bss_get_ie(req->bss, WLAN_EID_VHT_CAPABILITY);
if (vht_ie && vht_ie[1] >= sizeof(struct ieee80211_vht_cap))
memcpy(&assoc_data->ap_vht_cap, vht_ie + 2,
sizeof(struct ieee80211_vht_cap));
- else
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+ else if (!is_6ghz)
+ ifmgd->flags |= IEEE80211_STA_DISABLE_VHT |
+ IEEE80211_STA_DISABLE_HE;
rcu_read_unlock();
if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) &&
@@ -5517,7 +5599,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
assoc_data->timeout_started = true;
assoc_data->need_beacon = true;
} else if (beacon_ies) {
- const u8 *ie;
+ const struct element *elem;
u8 dtim_count = 0;
ieee80211_get_dtim(beacon_ies, &dtim_count,
@@ -5534,15 +5616,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.sync_dtim_count = dtim_count;
}
- ie = cfg80211_find_ext_ie(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION,
- beacon_ies->data, beacon_ies->len);
- if (ie && ie[1] >= 3)
- sdata->vif.bss_conf.profile_periodicity = ie[4];
+ elem = cfg80211_find_ext_elem(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION,
+ beacon_ies->data, beacon_ies->len);
+ if (elem && elem->datalen >= 3)
+ sdata->vif.bss_conf.profile_periodicity = elem->data[2];
- ie = cfg80211_find_ie(WLAN_EID_EXT_CAPABILITY,
- beacon_ies->data, beacon_ies->len);
- if (ie && ie[1] >= 11 &&
- (ie[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
+ elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY,
+ beacon_ies->data, beacon_ies->len);
+ if (elem && elem->datalen >= 11 &&
+ (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
sdata->vif.bss_conf.ema_ap = true;
} else {
assoc_data->timeout = jiffies;
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index c710504ccf1a..db3b8bf75656 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -557,6 +557,10 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
lockdep_assert_held(&local->mtx);
+ if (channel->freq_offset)
+ /* this may work, but is untested */
+ return -EOPNOTSUPP;
+
if (local->use_chanctx && !local->ops->remain_on_channel)
return -EOPNOTSUPP;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 5dc3e5bc4e64..b11a2af55b06 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2010-2013 Felix Fietkau <nbd@openwrt.org>
+ * Copyright (C) 2019-2020 Intel Corporation
*/
#include <linux/netdevice.h>
#include <linux/types.h>
@@ -490,7 +491,7 @@ minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi,
tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
- if (tmp_cck_tp_rate && tmp_cck_tp > tmp_mcs_tp) {
+ if (tmp_cck_tp > tmp_mcs_tp) {
for(i = 0; i < MAX_THR_RATES; i++) {
minstrel_ht_sort_best_tp_rates(mi, tmp_cck_tp_rate[i],
tmp_mcs_tp_rate);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 91a13aee4378..21854a61a2b7 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -93,13 +93,44 @@ static u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
* This function cleans up the SKB, i.e. it removes all the stuff
* only useful for monitoring.
*/
-static void remove_monitor_info(struct sk_buff *skb,
- unsigned int present_fcs_len,
- unsigned int rtap_space)
+static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb,
+ unsigned int present_fcs_len,
+ unsigned int rtap_space)
{
+ struct ieee80211_hdr *hdr;
+ unsigned int hdrlen;
+ __le16 fc;
+
if (present_fcs_len)
__pskb_trim(skb, skb->len - present_fcs_len);
__pskb_pull(skb, rtap_space);
+
+ hdr = (void *)skb->data;
+ fc = hdr->frame_control;
+
+ /*
+ * Remove the HT-Control field (if present) on management
+ * frames after we've sent the frame to monitoring. We
+ * (currently) don't need it, and don't properly parse
+ * frames with it present, due to the assumption of a
+ * fixed management header length.
+ */
+ if (likely(!ieee80211_is_mgmt(fc) || !ieee80211_has_order(fc)))
+ return skb;
+
+ hdrlen = ieee80211_hdrlen(fc);
+ hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_ORDER);
+
+ if (!pskb_may_pull(skb, hdrlen)) {
+ dev_kfree_skb(skb);
+ return NULL;
+ }
+
+ memmove(skb->data + IEEE80211_HT_CTL_LEN, skb->data,
+ hdrlen - IEEE80211_HT_CTL_LEN);
+ __pskb_pull(skb, IEEE80211_HT_CTL_LEN);
+
+ return skb;
}
static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len,
@@ -412,6 +443,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
pos++;
/* IEEE80211_RADIOTAP_CHANNEL */
+ /* TODO: frequency offset in KHz */
put_unaligned_le16(status->freq, pos);
pos += 2;
if (status->bw == RATE_INFO_BW_10)
@@ -826,8 +858,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
return NULL;
}
- remove_monitor_info(origskb, present_fcs_len, rtap_space);
- return origskb;
+ return ieee80211_clean_skb(origskb, present_fcs_len,
+ rtap_space);
}
ieee80211_handle_mu_mimo_mon(monitor_sdata, origskb, rtap_space);
@@ -870,8 +902,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
if (!origskb)
return NULL;
- remove_monitor_info(origskb, present_fcs_len, rtap_space);
- return origskb;
+ return ieee80211_clean_skb(origskb, present_fcs_len, rtap_space);
}
static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
@@ -1984,8 +2015,12 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
if (mmie_keyidx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS ||
mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS +
- NUM_DEFAULT_BEACON_KEYS)
+ NUM_DEFAULT_BEACON_KEYS) {
+ cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
+ skb->data,
+ skb->len);
return RX_DROP_MONITOR; /* unexpected BIP keyidx */
+ }
rx->key = ieee80211_rx_get_bigtk(rx, mmie_keyidx);
if (!rx->key)
@@ -2131,6 +2166,10 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
/* either the frame has been decrypted or will be dropped */
status->flag |= RX_FLAG_DECRYPTED;
+ if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE))
+ cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
+ skb->data, skb->len);
+
return result;
}
@@ -2411,8 +2450,12 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
return -EACCES;
}
if (unlikely(ieee80211_is_beacon(fc) && rx->key &&
- ieee80211_get_mmie_keyidx(rx->skb) < 0))
+ ieee80211_get_mmie_keyidx(rx->skb) < 0)) {
+ cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
+ rx->skb->data,
+ rx->skb->len);
return -EACCES;
+ }
/*
* When using MFP, Action frames are not allowed prior to
* having configured keys.
@@ -3082,9 +3125,10 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
!(status->flag & RX_FLAG_NO_SIGNAL_VAL))
sig = status->signal;
- cfg80211_report_obss_beacon(rx->local->hw.wiphy,
- rx->skb->data, rx->skb->len,
- status->freq, sig);
+ cfg80211_report_obss_beacon_khz(rx->local->hw.wiphy,
+ rx->skb->data, rx->skb->len,
+ ieee80211_rx_status_to_khz(status),
+ sig);
rx->flags |= IEEE80211_RX_BEACON_REPORTED;
}
@@ -3340,19 +3384,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
}
}
break;
- case WLAN_CATEGORY_SA_QUERY:
- if (len < (IEEE80211_MIN_ACTION_SIZE +
- sizeof(mgmt->u.action.u.sa_query)))
- break;
-
- switch (mgmt->u.action.u.sa_query.action) {
- case WLAN_ACTION_SA_QUERY_REQUEST:
- if (sdata->vif.type != NL80211_IFTYPE_STATION)
- break;
- ieee80211_process_sa_query_req(sdata, mgmt, len);
- goto handled;
- }
- break;
case WLAN_CATEGORY_SELF_PROTECTED:
if (len < (IEEE80211_MIN_ACTION_SIZE +
sizeof(mgmt->u.action.u.self_prot.action_code)))
@@ -3430,8 +3461,9 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
!(status->flag & RX_FLAG_NO_SIGNAL_VAL))
sig = status->signal;
- if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig,
- rx->skb->data, rx->skb->len, 0)) {
+ if (cfg80211_rx_mgmt_khz(&rx->sdata->wdev,
+ ieee80211_rx_status_to_khz(status), sig,
+ rx->skb->data, rx->skb->len, 0)) {
if (rx->sta)
rx->sta->rx_stats.packets++;
dev_kfree_skb(rx->skb);
@@ -3442,6 +3474,41 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
}
static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_action_post_userspace(struct ieee80211_rx_data *rx)
+{
+ struct ieee80211_sub_if_data *sdata = rx->sdata;
+ struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
+ int len = rx->skb->len;
+
+ if (!ieee80211_is_action(mgmt->frame_control))
+ return RX_CONTINUE;
+
+ switch (mgmt->u.action.category) {
+ case WLAN_CATEGORY_SA_QUERY:
+ if (len < (IEEE80211_MIN_ACTION_SIZE +
+ sizeof(mgmt->u.action.u.sa_query)))
+ break;
+
+ switch (mgmt->u.action.u.sa_query.action) {
+ case WLAN_ACTION_SA_QUERY_REQUEST:
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ break;
+ ieee80211_process_sa_query_req(sdata, mgmt, len);
+ goto handled;
+ }
+ break;
+ }
+
+ return RX_CONTINUE;
+
+ handled:
+ if (rx->sta)
+ rx->sta->rx_stats.packets++;
+ dev_kfree_skb(rx->skb);
+ return RX_QUEUED;
+}
+
+static ieee80211_rx_result debug_noinline
ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
{
struct ieee80211_local *local = rx->local;
@@ -3721,6 +3788,7 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
CALL_RXH(ieee80211_rx_h_mgmt_check);
CALL_RXH(ieee80211_rx_h_action);
CALL_RXH(ieee80211_rx_h_userspace_mgmt);
+ CALL_RXH(ieee80211_rx_h_action_post_userspace);
CALL_RXH(ieee80211_rx_h_action_return);
CALL_RXH(ieee80211_rx_h_mgmt);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index fdac8192a519..ad90bbe57457 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -132,6 +132,12 @@ ieee80211_update_bss_from_elems(struct ieee80211_local *local,
bss->beacon_rate =
&sband->bitrates[rx_status->rate_idx];
}
+
+ if (elems->vht_cap_elem)
+ bss->vht_cap_info =
+ le32_to_cpu(elems->vht_cap_elem->vht_cap_info);
+ else
+ bss->vht_cap_info = 0;
}
struct ieee80211_bss *
@@ -275,7 +281,8 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
return;
}
- channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq);
+ channel = ieee80211_get_channel_khz(local->hw.wiphy,
+ ieee80211_rx_status_to_khz(rx_status));
if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
return;
@@ -306,8 +313,9 @@ ieee80211_prepare_scan_chandef(struct cfg80211_chan_def *chandef,
}
/* return false if no more work */
-static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
+static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata)
{
+ struct ieee80211_local *local = sdata->local;
struct cfg80211_scan_request *req;
struct cfg80211_chan_def chandef;
u8 bands_used = 0;
@@ -354,7 +362,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT)
flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT;
- ielen = ieee80211_build_preq_ies(local,
+ ielen = ieee80211_build_preq_ies(sdata,
(u8 *)local->hw_scan_req->req.ie,
local->hw_scan_ies_bufsize,
&local->hw_scan_req->ies,
@@ -394,9 +402,12 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
if (WARN_ON(!local->scan_req))
return;
+ scan_sdata = rcu_dereference_protected(local->scan_sdata,
+ lockdep_is_held(&local->mtx));
+
if (hw_scan && !aborted &&
!ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS) &&
- ieee80211_prep_hw_scan(local)) {
+ ieee80211_prep_hw_scan(scan_sdata)) {
int rc;
rc = drv_hw_scan(local,
@@ -425,9 +436,6 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
cfg80211_scan_done(scan_req, &local->scan_info);
}
RCU_INIT_POINTER(local->scan_req, NULL);
-
- scan_sdata = rcu_dereference_protected(local->scan_sdata,
- lockdep_is_held(&local->mtx));
RCU_INIT_POINTER(local->scan_sdata, NULL);
local->scanning = 0;
@@ -769,7 +777,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
ieee80211_recalc_idle(local);
if (hw_scan) {
- WARN_ON(!ieee80211_prep_hw_scan(local));
+ WARN_ON(!ieee80211_prep_hw_scan(sdata));
rc = drv_hw_scan(local, sdata, local->hw_scan_req);
} else {
rc = ieee80211_start_sw_scan(local, sdata);
@@ -896,6 +904,7 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
local->scan_chandef.chan = chan;
local->scan_chandef.center_freq1 = chan->center_freq;
+ local->scan_chandef.freq1_offset = chan->freq_offset;
local->scan_chandef.center_freq2 = 0;
switch (scan_req->scan_width) {
case NL80211_BSS_CHAN_WIDTH_5:
@@ -1266,7 +1275,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
ieee80211_prepare_scan_chandef(&chandef, req->scan_width);
- ieee80211_build_preq_ies(local, ie, num_bands * iebufsz,
+ ieee80211_build_preq_ies(sdata, ie, num_bands * iebufsz,
&sched_scan_ies, req->ie,
req->ie_len, bands_used, rate_masks, &chandef,
flags);
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 5fe2b645912f..ae1cb2c68722 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2008, Intel Corporation
* Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018, 2020 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -22,6 +22,7 @@
int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems,
enum nl80211_band current_band,
+ u32 vht_cap_info,
u32 sta_flags, u8 *bssid,
struct ieee80211_csa_ie *csa_ie)
{
@@ -150,6 +151,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
/* ignore if parsing fails */
if (!ieee80211_chandef_vht_oper(&sdata->local->hw,
+ vht_cap_info,
&vht_oper, &ht_oper,
&new_vht_chandef))
new_vht_chandef.chan = NULL;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 36f1abaab9ff..49728047dfad 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -3,6 +3,7 @@
* Copyright 2002-2005, Devicescape Software, Inc.
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015-2017 Intel Deutschland GmbH
+ * Copyright(c) 2020 Intel Corporation
*/
#ifndef STA_INFO_H
@@ -68,6 +69,8 @@
* @WLAN_STA_MPSP_RECIPIENT: local STA is recipient of a MPSP.
* @WLAN_STA_PS_DELIVER: station woke up, but we're still blocking TX
* until pending frames are delivered
+ * @WLAN_STA_USES_ENCRYPTION: This station was configured for encryption,
+ * so drop all packets without a key later.
*
* @NUM_WLAN_STA_FLAGS: number of defined flags
*/
@@ -116,6 +119,7 @@ enum ieee80211_sta_info_flags {
#define HT_AGG_STATE_WANT_STOP 5
#define HT_AGG_STATE_START_CB 6
#define HT_AGG_STATE_STOP_CB 7
+#define HT_AGG_STATE_SENT_ADDBA 8
DECLARE_EWMA(avg_signal, 10, 8)
enum ieee80211_agg_stop_reason {
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 22512805eafb..7b1bacac39c6 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -649,10 +649,17 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
info->status.ack_signal,
info->status.is_valid_ack_signal,
GFP_ATOMIC);
- else
+ else if (ieee80211_is_mgmt(hdr->frame_control))
cfg80211_mgmt_tx_status(&sdata->wdev, cookie,
skb->data, skb->len,
acked, GFP_ATOMIC);
+ else
+ cfg80211_control_port_tx_status(&sdata->wdev,
+ cookie,
+ skb->data,
+ skb->len,
+ acked,
+ GFP_ATOMIC);
}
rcu_read_unlock();
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index fca1f5477396..4b0cff4a07bd 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -226,12 +226,11 @@ static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata,
static void
ieee80211_tdls_add_aid(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
{
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u8 *pos = skb_put(skb, 4);
*pos++ = WLAN_EID_AID;
*pos++ = 2; /* len */
- put_unaligned_le16(ifmgd->aid, pos);
+ put_unaligned_le16(sdata->vif.bss_conf.aid, pos);
}
/* translate numbering in the WMM parameter IE to the mac80211 notation */
@@ -1055,7 +1054,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
/* disable bottom halves when entering the Tx path */
local_bh_disable();
- __ieee80211_subif_start_xmit(skb, dev, flags, 0);
+ __ieee80211_subif_start_xmit(skb, dev, flags, 0, NULL);
local_bh_enable();
return ret;
@@ -1567,6 +1566,10 @@ ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev,
u32 ch_sw_tm_ie;
int ret;
+ if (chandef->chan->freq_offset)
+ /* this may work, but is untested */
+ return -EOPNOTSUPP;
+
mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, addr);
if (!sta) {
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 427f51a0a994..1b4709694d2a 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -37,32 +37,42 @@
#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
#define CHANDEF_ENTRY __field(u32, control_freq) \
+ __field(u32, freq_offset) \
__field(u32, chan_width) \
__field(u32, center_freq1) \
+ __field(u32, freq1_offset) \
__field(u32, center_freq2)
#define CHANDEF_ASSIGN(c) \
__entry->control_freq = (c) ? ((c)->chan ? (c)->chan->center_freq : 0) : 0; \
+ __entry->freq_offset = (c) ? ((c)->chan ? (c)->chan->freq_offset : 0) : 0; \
__entry->chan_width = (c) ? (c)->width : 0; \
__entry->center_freq1 = (c) ? (c)->center_freq1 : 0; \
+ __entry->freq1_offset = (c) ? (c)->freq1_offset : 0; \
__entry->center_freq2 = (c) ? (c)->center_freq2 : 0;
-#define CHANDEF_PR_FMT " control:%d MHz width:%d center: %d/%d MHz"
-#define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \
- __entry->center_freq1, __entry->center_freq2
+#define CHANDEF_PR_FMT " control:%d.%03d MHz width:%d center: %d.%03d/%d MHz"
+#define CHANDEF_PR_ARG __entry->control_freq, __entry->freq_offset, __entry->chan_width, \
+ __entry->center_freq1, __entry->freq1_offset, __entry->center_freq2
#define MIN_CHANDEF_ENTRY \
__field(u32, min_control_freq) \
+ __field(u32, min_freq_offset) \
__field(u32, min_chan_width) \
__field(u32, min_center_freq1) \
+ __field(u32, min_freq1_offset) \
__field(u32, min_center_freq2)
#define MIN_CHANDEF_ASSIGN(c) \
__entry->min_control_freq = (c)->chan ? (c)->chan->center_freq : 0; \
+ __entry->min_freq_offset = (c)->chan ? (c)->chan->freq_offset : 0; \
__entry->min_chan_width = (c)->width; \
__entry->min_center_freq1 = (c)->center_freq1; \
+ __entry->freq1_offset = (c)->freq1_offset; \
__entry->min_center_freq2 = (c)->center_freq2;
-#define MIN_CHANDEF_PR_FMT " min_control:%d MHz min_width:%d min_center: %d/%d MHz"
-#define MIN_CHANDEF_PR_ARG __entry->min_control_freq, __entry->min_chan_width, \
- __entry->min_center_freq1, __entry->min_center_freq2
+#define MIN_CHANDEF_PR_FMT " min_control:%d.%03d MHz min_width:%d min_center: %d.%03d/%d MHz"
+#define MIN_CHANDEF_PR_ARG __entry->min_control_freq, __entry->min_freq_offset, \
+ __entry->min_chan_width, \
+ __entry->min_center_freq1, __entry->min_freq1_offset, \
+ __entry->min_center_freq2
#define CHANCTX_ENTRY CHANDEF_ENTRY \
MIN_CHANDEF_ENTRY \
@@ -412,6 +422,7 @@ TRACE_EVENT(drv_bss_info_changed,
__field(s32, cqm_rssi_hyst)
__field(u32, channel_width)
__field(u32, channel_cfreq1)
+ __field(u32, channel_cfreq1_offset)
__dynamic_array(u32, arp_addr_list,
info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ?
IEEE80211_BSS_ARP_ADDR_LIST_LEN :
@@ -452,6 +463,7 @@ TRACE_EVENT(drv_bss_info_changed,
__entry->cqm_rssi_hyst = info->cqm_rssi_hyst;
__entry->channel_width = info->chandef.width;
__entry->channel_cfreq1 = info->chandef.center_freq1;
+ __entry->channel_cfreq1_offset = info->chandef.freq1_offset;
__entry->arp_addr_cnt = info->arp_addr_cnt;
memcpy(__get_dynamic_array(arp_addr_list), info->arp_addr_list,
sizeof(u32) * (info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ?
@@ -1223,6 +1235,7 @@ TRACE_EVENT(drv_remain_on_channel,
LOCAL_ENTRY
VIF_ENTRY
__field(int, center_freq)
+ __field(int, freq_offset)
__field(unsigned int, duration)
__field(u32, type)
),
@@ -1231,14 +1244,16 @@ TRACE_EVENT(drv_remain_on_channel,
LOCAL_ASSIGN;
VIF_ASSIGN;
__entry->center_freq = chan->center_freq;
+ __entry->freq_offset = chan->freq_offset;
__entry->duration = duration;
__entry->type = type;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT " freq:%dMHz duration:%dms type=%d",
+ LOCAL_PR_FMT VIF_PR_FMT " freq:%d.%03dMHz duration:%dms type=%d",
LOCAL_PR_ARG, VIF_PR_ARG,
- __entry->center_freq, __entry->duration, __entry->type
+ __entry->center_freq, __entry->freq_offset,
+ __entry->duration, __entry->type
)
);
@@ -1546,8 +1561,10 @@ struct trace_vif_entry {
struct trace_chandef_entry {
u32 control_freq;
+ u32 freq_offset;
u32 chan_width;
u32 center_freq1;
+ u32 freq1_offset;
u32 center_freq2;
} __packed;
@@ -1597,18 +1614,26 @@ TRACE_EVENT(drv_switch_vif_chanctx,
sizeof(local_vifs[i].vif.vif_name));
SWITCH_ENTRY_ASSIGN(old_chandef.control_freq,
old_ctx->def.chan->center_freq);
+ SWITCH_ENTRY_ASSIGN(old_chandef.freq_offset,
+ old_ctx->def.chan->freq_offset);
SWITCH_ENTRY_ASSIGN(old_chandef.chan_width,
old_ctx->def.width);
SWITCH_ENTRY_ASSIGN(old_chandef.center_freq1,
old_ctx->def.center_freq1);
+ SWITCH_ENTRY_ASSIGN(old_chandef.freq1_offset,
+ old_ctx->def.freq1_offset);
SWITCH_ENTRY_ASSIGN(old_chandef.center_freq2,
old_ctx->def.center_freq2);
SWITCH_ENTRY_ASSIGN(new_chandef.control_freq,
new_ctx->def.chan->center_freq);
+ SWITCH_ENTRY_ASSIGN(new_chandef.freq_offset,
+ new_ctx->def.chan->freq_offset);
SWITCH_ENTRY_ASSIGN(new_chandef.chan_width,
new_ctx->def.width);
SWITCH_ENTRY_ASSIGN(new_chandef.center_freq1,
new_ctx->def.center_freq1);
+ SWITCH_ENTRY_ASSIGN(new_chandef.freq1_offset,
+ new_ctx->def.freq1_offset);
SWITCH_ENTRY_ASSIGN(new_chandef.center_freq2,
new_ctx->def.center_freq2);
}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 9849c14694db..e9ce658141f5 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2436,13 +2436,19 @@ int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata,
return 0;
}
-static int ieee80211_store_ack_skb(struct ieee80211_local *local,
+static u16 ieee80211_store_ack_skb(struct ieee80211_local *local,
struct sk_buff *skb,
- u32 *info_flags)
+ u32 *info_flags,
+ u64 *cookie)
{
- struct sk_buff *ack_skb = skb_clone_sk(skb);
+ struct sk_buff *ack_skb;
u16 info_id = 0;
+ if (skb->sk)
+ ack_skb = skb_clone_sk(skb);
+ else
+ ack_skb = skb_clone(skb, GFP_ATOMIC);
+
if (ack_skb) {
unsigned long flags;
int id;
@@ -2455,6 +2461,10 @@ static int ieee80211_store_ack_skb(struct ieee80211_local *local,
if (id >= 0) {
info_id = id;
*info_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+ if (cookie) {
+ *cookie = ieee80211_mgmt_tx_cookie(local);
+ IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie;
+ }
} else {
kfree_skb(ack_skb);
}
@@ -2484,7 +2494,8 @@ static int ieee80211_store_ack_skb(struct ieee80211_local *local,
*/
static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u32 info_flags,
- struct sta_info *sta, u32 ctrl_flags)
+ struct sta_info *sta, u32 ctrl_flags,
+ u64 *cookie)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_tx_info *info;
@@ -2755,9 +2766,11 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
goto free;
}
- if (unlikely(!multicast && skb->sk &&
- skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS))
- info_id = ieee80211_store_ack_skb(local, skb, &info_flags);
+ if (unlikely(!multicast && ((skb->sk &&
+ skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ||
+ ctrl_flags & IEEE80211_TX_CTL_REQ_TX_STATUS)))
+ info_id = ieee80211_store_ack_skb(local, skb, &info_flags,
+ cookie);
/*
* If the skb is shared we need to obtain our own copy.
@@ -3913,7 +3926,8 @@ EXPORT_SYMBOL(ieee80211_txq_schedule_start);
void __ieee80211_subif_start_xmit(struct sk_buff *skb,
struct net_device *dev,
u32 info_flags,
- u32 ctrl_flags)
+ u32 ctrl_flags,
+ u64 *cookie)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
@@ -3983,7 +3997,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
skb_mark_not_on_list(skb);
skb = ieee80211_build_hdr(sdata, skb, info_flags,
- sta, ctrl_flags);
+ sta, ctrl_flags, cookie);
if (IS_ERR(skb)) {
kfree_skb_list(next);
goto out;
@@ -4125,9 +4139,9 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
__skb_queue_head_init(&queue);
ieee80211_convert_to_unicast(skb, dev, &queue);
while ((skb = __skb_dequeue(&queue)))
- __ieee80211_subif_start_xmit(skb, dev, 0, 0);
+ __ieee80211_subif_start_xmit(skb, dev, 0, 0, NULL);
} else {
- __ieee80211_subif_start_xmit(skb, dev, 0, 0);
+ __ieee80211_subif_start_xmit(skb, dev, 0, 0, NULL);
}
return NETDEV_TX_OK;
@@ -4215,7 +4229,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
if (unlikely(!multicast && skb->sk &&
skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS))
- ieee80211_store_ack_skb(local, skb, &info->flags);
+ ieee80211_store_ack_skb(local, skb, &info->flags, NULL);
memset(info, 0, sizeof(*info));
@@ -4299,7 +4313,7 @@ ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
goto out;
}
- skb = ieee80211_build_hdr(sdata, skb, info_flags, sta, 0);
+ skb = ieee80211_build_hdr(sdata, skb, info_flags, sta, 0, NULL);
if (IS_ERR(skb))
goto out;
@@ -4883,7 +4897,10 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
txrc.bss_conf = &sdata->vif.bss_conf;
txrc.skb = skb;
txrc.reported_rate.idx = -1;
- txrc.rate_idx_mask = sdata->rc_rateidx_mask[band];
+ if (sdata->beacon_rate_set && sdata->beacon_rateidx_mask[band])
+ txrc.rate_idx_mask = sdata->beacon_rateidx_mask[band];
+ else
+ txrc.rate_idx_mask = sdata->rc_rateidx_mask[band];
txrc.bss = true;
rate_control_get_rate(sdata, NULL, &txrc);
@@ -5006,7 +5023,7 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
pspoll = skb_put_zero(skb, sizeof(*pspoll));
pspoll->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
IEEE80211_STYPE_PSPOLL);
- pspoll->aid = cpu_to_le16(ifmgd->aid);
+ pspoll->aid = cpu_to_le16(sdata->vif.bss_conf.aid);
/* aid in PS-Poll has its two MSBs each set to 1 */
pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14);
@@ -5336,14 +5353,15 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
const u8 *buf, size_t len,
- const u8 *dest, __be16 proto, bool unencrypted)
+ const u8 *dest, __be16 proto, bool unencrypted,
+ u64 *cookie)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
struct ethhdr *ehdr;
u32 ctrl_flags = 0;
- u32 flags;
+ u32 flags = 0;
/* Only accept CONTROL_PORT_PROTOCOL configured in CONNECT/ASSOCIATE
* or Pre-Authentication
@@ -5356,9 +5374,13 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
ctrl_flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO;
if (unencrypted)
- flags = IEEE80211_TX_INTFL_DONT_ENCRYPT;
- else
- flags = 0;
+ flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+
+ if (cookie)
+ ctrl_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+
+ flags |= IEEE80211_TX_INTFL_NL80211_FRAME_TX |
+ IEEE80211_TX_CTL_INJECTED;
skb = dev_alloc_skb(local->hw.extra_tx_headroom +
sizeof(struct ethhdr) + len);
@@ -5379,10 +5401,15 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
skb_reset_network_header(skb);
skb_reset_mac_header(skb);
+ /* mutex lock is only needed for incrementing the cookie counter */
+ mutex_lock(&local->mtx);
+
local_bh_disable();
- __ieee80211_subif_start_xmit(skb, skb->dev, flags, ctrl_flags);
+ __ieee80211_subif_start_xmit(skb, skb->dev, flags, ctrl_flags, cookie);
local_bh_enable();
+ mutex_unlock(&local->mtx);
+
return 0;
}
@@ -5409,7 +5436,8 @@ int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev,
local_bh_disable();
__ieee80211_subif_start_xmit(skb, skb->dev, 0,
- IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP);
+ IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP,
+ NULL);
local_bh_enable();
return 0;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 20436c86b9bf..21c94094a699 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -936,6 +936,10 @@ static void ieee80211_parse_extension_element(u32 *crc,
len >= ieee80211_he_spr_size(data))
elems->he_spr = data;
break;
+ case WLAN_EID_EXT_HE_6GHZ_CAPA:
+ if (len == sizeof(*elems->he_6ghz_capa))
+ elems->he_6ghz_capa = data;
+ break;
}
}
@@ -1659,7 +1663,20 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
}
}
-static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
+static u8 *ieee80211_write_he_6ghz_cap(u8 *pos, __le16 cap, u8 *end)
+{
+ if ((end - pos) < 5)
+ return pos;
+
+ *pos++ = WLAN_EID_EXTENSION;
+ *pos++ = 1 + sizeof(cap);
+ *pos++ = WLAN_EID_EXT_HE_6GHZ_CAPA;
+ memcpy(pos, &cap, sizeof(cap));
+
+ return pos + 2;
+}
+
+static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
u8 *buffer, size_t buffer_len,
const u8 *ie, size_t ie_len,
enum nl80211_band band,
@@ -1667,6 +1684,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
struct cfg80211_chan_def *chandef,
size_t *offset, u32 flags)
{
+ struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
const struct ieee80211_sta_he_cap *he_cap;
u8 *pos = buffer, *end = buffer + buffer_len;
@@ -1844,6 +1862,14 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
pos = ieee80211_ie_build_he_cap(pos, he_cap, end);
if (!pos)
goto out_err;
+
+ if (sband->band == NL80211_BAND_6GHZ) {
+ enum nl80211_iftype iftype =
+ ieee80211_vif_type_p2p(&sdata->vif);
+ __le16 cap = ieee80211_get_he_6ghz_capa(sband, iftype);
+
+ pos = ieee80211_write_he_6ghz_cap(pos, cap, end);
+ }
}
/*
@@ -1858,7 +1884,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
return pos - buffer;
}
-int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
+int ieee80211_build_preq_ies(struct ieee80211_sub_if_data *sdata, u8 *buffer,
size_t buffer_len,
struct ieee80211_scan_ies *ie_desc,
const u8 *ie, size_t ie_len,
@@ -1873,7 +1899,7 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
for (i = 0; i < NUM_NL80211_BANDS; i++) {
if (bands_used & BIT(i)) {
- pos += ieee80211_build_preq_ies_band(local,
+ pos += ieee80211_build_preq_ies_band(sdata,
buffer + pos,
buffer_len - pos,
ie, ie_len, i,
@@ -1935,7 +1961,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
return NULL;
rate_masks[chan->band] = ratemask;
- ies_len = ieee80211_build_preq_ies(local, skb_tail_pointer(skb),
+ ies_len = ieee80211_build_preq_ies(sdata, skb_tail_pointer(skb),
skb_tailroom(skb), &dummy_ie_desc,
ie, ie_len, BIT(chan->band),
rate_masks, &chandef, flags);
@@ -2835,6 +2861,50 @@ end:
return pos;
}
+void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee80211_supported_band *sband;
+ const struct ieee80211_sband_iftype_data *iftd;
+ enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
+ u8 *pos;
+ u16 cap;
+
+ sband = ieee80211_get_sband(sdata);
+ if (!sband)
+ return;
+
+ iftd = ieee80211_get_sband_iftype_data(sband, iftype);
+ if (WARN_ON(!iftd))
+ return;
+
+ cap = le16_to_cpu(iftd->he_6ghz_capa.capa);
+ cap &= ~IEEE80211_HE_6GHZ_CAP_SM_PS;
+
+ switch (sdata->smps_mode) {
+ case IEEE80211_SMPS_AUTOMATIC:
+ case IEEE80211_SMPS_NUM_MODES:
+ WARN_ON(1);
+ /* fall through */
+ case IEEE80211_SMPS_OFF:
+ cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_DISABLED,
+ IEEE80211_HE_6GHZ_CAP_SM_PS);
+ break;
+ case IEEE80211_SMPS_STATIC:
+ cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_STATIC,
+ IEEE80211_HE_6GHZ_CAP_SM_PS);
+ break;
+ case IEEE80211_SMPS_DYNAMIC:
+ cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_DYNAMIC,
+ IEEE80211_HE_6GHZ_CAP_SM_PS);
+ break;
+ }
+
+ pos = skb_put(skb, 2 + 1 + sizeof(cap));
+ ieee80211_write_he_6ghz_cap(pos, cpu_to_le16(cap),
+ pos + 2 + 1 + sizeof(cap));
+}
+
u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
const struct cfg80211_chan_def *chandef,
u16 prot_mode, bool rifs_mode)
@@ -2958,13 +3028,18 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
return pos + sizeof(struct ieee80211_vht_operation);
}
-u8 *ieee80211_ie_build_he_oper(u8 *pos)
+u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef)
{
struct ieee80211_he_operation *he_oper;
+ struct ieee80211_he_6ghz_oper *he_6ghz_op;
u32 he_oper_params;
+ u8 ie_len = 1 + sizeof(struct ieee80211_he_operation);
+
+ if (chandef->chan->band == NL80211_BAND_6GHZ)
+ ie_len += sizeof(struct ieee80211_he_6ghz_oper);
*pos++ = WLAN_EID_EXTENSION;
- *pos++ = 1 + sizeof(struct ieee80211_he_operation);
+ *pos++ = ie_len;
*pos++ = WLAN_EID_EXT_HE_OPERATION;
he_oper_params = 0;
@@ -2974,16 +3049,68 @@ u8 *ieee80211_ie_build_he_oper(u8 *pos)
IEEE80211_HE_OPERATION_ER_SU_DISABLE);
he_oper_params |= u32_encode_bits(1,
IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED);
+ if (chandef->chan->band == NL80211_BAND_6GHZ)
+ he_oper_params |= u32_encode_bits(1,
+ IEEE80211_HE_OPERATION_6GHZ_OP_INFO);
he_oper = (struct ieee80211_he_operation *)pos;
he_oper->he_oper_params = cpu_to_le32(he_oper_params);
/* don't require special HE peer rates */
he_oper->he_mcs_nss_set = cpu_to_le16(0xffff);
+ pos += sizeof(struct ieee80211_he_operation);
- /* TODO add VHT operational and 6GHz operational subelement? */
+ if (chandef->chan->band != NL80211_BAND_6GHZ)
+ goto out;
- return pos + sizeof(struct ieee80211_vht_operation);
+ /* TODO add VHT operational */
+ he_6ghz_op = (struct ieee80211_he_6ghz_oper *)pos;
+ he_6ghz_op->minrate = 6; /* 6 Mbps */
+ he_6ghz_op->primary =
+ ieee80211_frequency_to_channel(chandef->chan->center_freq);
+ he_6ghz_op->ccfs0 =
+ ieee80211_frequency_to_channel(chandef->center_freq1);
+ if (chandef->center_freq2)
+ he_6ghz_op->ccfs1 =
+ ieee80211_frequency_to_channel(chandef->center_freq2);
+ else
+ he_6ghz_op->ccfs1 = 0;
+
+ switch (chandef->width) {
+ case NL80211_CHAN_WIDTH_160:
+ /* Convert 160 MHz channel width to new style as interop
+ * workaround.
+ */
+ he_6ghz_op->control =
+ IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ;
+ he_6ghz_op->ccfs1 = he_6ghz_op->ccfs0;
+ if (chandef->chan->center_freq < chandef->center_freq1)
+ he_6ghz_op->ccfs0 -= 8;
+ else
+ he_6ghz_op->ccfs0 += 8;
+ fallthrough;
+ case NL80211_CHAN_WIDTH_80P80:
+ he_6ghz_op->control =
+ IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ;
+ break;
+ case NL80211_CHAN_WIDTH_80:
+ he_6ghz_op->control =
+ IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ;
+ break;
+ case NL80211_CHAN_WIDTH_40:
+ he_6ghz_op->control =
+ IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ;
+ break;
+ default:
+ he_6ghz_op->control =
+ IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ;
+ break;
+ }
+
+ pos += sizeof(struct ieee80211_he_6ghz_oper);
+
+out:
+ return pos;
}
bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
@@ -3013,7 +3140,7 @@ bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
return true;
}
-bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
+bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
const struct ieee80211_vht_operation *oper,
const struct ieee80211_ht_operation *htop,
struct cfg80211_chan_def *chandef)
@@ -3025,6 +3152,10 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
u32 vht_cap;
bool support_80_80 = false;
bool support_160 = false;
+ u8 ext_nss_bw_supp = u32_get_bits(vht_cap_info,
+ IEEE80211_VHT_CAP_EXT_NSS_BW_MASK);
+ u8 supp_chwidth = u32_get_bits(vht_cap_info,
+ IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK);
if (!oper || !htop)
return false;
@@ -3044,11 +3175,48 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
IEEE80211_HT_OP_MODE_CCFS2_MASK)
>> IEEE80211_HT_OP_MODE_CCFS2_SHIFT;
- /* when parsing (and we know how to) CCFS1 and CCFS2 are equivalent */
ccf0 = ccfs0;
- ccf1 = ccfs1;
- if (!ccfs1 && ieee80211_hw_check(hw, SUPPORTS_VHT_EXT_NSS_BW))
+
+ /* if not supported, parse as though we didn't understand it */
+ if (!ieee80211_hw_check(hw, SUPPORTS_VHT_EXT_NSS_BW))
+ ext_nss_bw_supp = 0;
+
+ /*
+ * Cf. IEEE 802.11 Table 9-250
+ *
+ * We really just consider that because it's inefficient to connect
+ * at a higher bandwidth than we'll actually be able to use.
+ */
+ switch ((supp_chwidth << 4) | ext_nss_bw_supp) {
+ default:
+ case 0x00:
+ ccf1 = 0;
+ support_160 = false;
+ support_80_80 = false;
+ break;
+ case 0x01:
+ support_80_80 = false;
+ /* fall through */
+ case 0x02:
+ case 0x03:
ccf1 = ccfs2;
+ break;
+ case 0x10:
+ ccf1 = ccfs1;
+ break;
+ case 0x11:
+ case 0x12:
+ if (!ccfs1)
+ ccf1 = ccfs2;
+ else
+ ccf1 = ccfs1;
+ break;
+ case 0x13:
+ case 0x20:
+ case 0x23:
+ ccf1 = ccfs1;
+ break;
+ }
cf0 = ieee80211_channel_to_frequency(ccf0, chandef->chan->band);
cf1 = ieee80211_channel_to_frequency(ccf1, chandef->chan->band);
@@ -3096,6 +3264,112 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
return true;
}
+bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_he_operation *he_oper,
+ struct cfg80211_chan_def *chandef)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_supported_band *sband;
+ enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
+ const struct ieee80211_sta_he_cap *he_cap;
+ struct cfg80211_chan_def he_chandef = *chandef;
+ const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
+ bool support_80_80, support_160;
+ u8 he_phy_cap;
+ u32 freq;
+
+ if (chandef->chan->band != NL80211_BAND_6GHZ)
+ return true;
+
+ sband = local->hw.wiphy->bands[NL80211_BAND_6GHZ];
+
+ he_cap = ieee80211_get_he_iftype_cap(sband, iftype);
+ if (!he_cap) {
+ sdata_info(sdata, "Missing iftype sband data/HE cap");
+ return false;
+ }
+
+ he_phy_cap = he_cap->he_cap_elem.phy_cap_info[0];
+ support_160 =
+ he_phy_cap &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
+ support_80_80 =
+ he_phy_cap &
+ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G;
+
+ if (!he_oper) {
+ sdata_info(sdata,
+ "HE is not advertised on (on %d MHz), expect issues\n",
+ chandef->chan->center_freq);
+ return false;
+ }
+
+ he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper);
+
+ if (!he_6ghz_oper) {
+ sdata_info(sdata,
+ "HE 6GHz operation missing (on %d MHz), expect issues\n",
+ chandef->chan->center_freq);
+ return false;
+ }
+
+ freq = ieee80211_channel_to_frequency(he_6ghz_oper->primary,
+ NL80211_BAND_6GHZ);
+ he_chandef.chan = ieee80211_get_channel(sdata->local->hw.wiphy, freq);
+
+ switch (u8_get_bits(he_6ghz_oper->control,
+ IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH)) {
+ case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ:
+ he_chandef.width = NL80211_CHAN_WIDTH_20;
+ break;
+ case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ:
+ he_chandef.width = NL80211_CHAN_WIDTH_40;
+ break;
+ case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ:
+ he_chandef.width = NL80211_CHAN_WIDTH_80;
+ break;
+ case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ:
+ he_chandef.width = NL80211_CHAN_WIDTH_80;
+ if (!he_6ghz_oper->ccfs1)
+ break;
+ if (abs(he_6ghz_oper->ccfs1 - he_6ghz_oper->ccfs0) == 8) {
+ if (support_160)
+ he_chandef.width = NL80211_CHAN_WIDTH_160;
+ } else {
+ if (support_80_80)
+ he_chandef.width = NL80211_CHAN_WIDTH_80P80;
+ }
+ break;
+ }
+
+ if (he_chandef.width == NL80211_CHAN_WIDTH_160) {
+ he_chandef.center_freq1 =
+ ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1,
+ NL80211_BAND_6GHZ);
+ } else {
+ he_chandef.center_freq1 =
+ ieee80211_channel_to_frequency(he_6ghz_oper->ccfs0,
+ NL80211_BAND_6GHZ);
+ he_chandef.center_freq2 =
+ ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1,
+ NL80211_BAND_6GHZ);
+ }
+
+ if (!cfg80211_chandef_valid(&he_chandef)) {
+ sdata_info(sdata,
+ "HE 6GHz operation resulted in invalid chandef: %d MHz/%d/%d MHz/%d MHz\n",
+ he_chandef.chan ? he_chandef.chan->center_freq : 0,
+ he_chandef.width,
+ he_chandef.center_freq1,
+ he_chandef.center_freq2);
+ return false;
+ }
+
+ *chandef = he_chandef;
+
+ return true;
+}
+
int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
const struct ieee80211_supported_band *sband,
const u8 *srates, int srates_len, u32 *rates)
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 632f07401850..9c6045f9c24d 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -4,7 +4,7 @@
*
* Portions of this file
* Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -575,15 +575,21 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ:
+ /* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */
sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20;
break;
case IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ:
+ /* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */
sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40;
break;
case IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ:
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
+ if (opmode & IEEE80211_OPMODE_NOTIF_BW_160_80P80)
+ sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+ else
+ sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
break;
case IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ:
+ /* legacy only, no longer used by newer spec */
sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
break;
}
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index a42e4ed5ab0e..fd30ea61336e 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1593,7 +1593,8 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
dev->type == ARPHRD_IPGRE ||
dev->type == ARPHRD_IP6GRE ||
dev->type == ARPHRD_SIT ||
- dev->type == ARPHRD_TUNNEL) {
+ dev->type == ARPHRD_TUNNEL ||
+ dev->type == ARPHRD_TUNNEL6) {
mdev = mpls_add_dev(dev);
if (IS_ERR(mdev))
return notifier_from_errno(PTR_ERR(mdev));
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 0e9aa94adc07..838cdfc10e47 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -172,17 +172,6 @@ struct mpls_route { /* next hop label forwarding entry */
#define endfor_nexthops(rt) }
-static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos)
-{
- struct mpls_shim_hdr result;
- result.label_stack_entry =
- cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) |
- (tc << MPLS_LS_TC_SHIFT) |
- (bos ? (1 << MPLS_LS_S_SHIFT) : 0) |
- (ttl << MPLS_LS_TTL_SHIFT));
- return result;
-}
-
static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr)
{
struct mpls_entry_decoded result;
diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c
index c151628bd416..0f5a414a9366 100644
--- a/net/mptcp/crypto.c
+++ b/net/mptcp/crypto.c
@@ -47,8 +47,6 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn)
void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
{
u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE];
- __be32 mptcp_hashed_key[SHA256_DIGEST_WORDS];
- __be32 *hash_out = (__force __be32 *)hmac;
struct sha256_state state;
u8 key1be[8];
u8 key2be[8];
@@ -86,11 +84,7 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
sha256_init(&state);
sha256_update(&state, input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE);
- sha256_final(&state, (u8 *)mptcp_hashed_key);
-
- /* takes only first 160 bits */
- for (i = 0; i < 5; i++)
- hash_out[i] = mptcp_hashed_key[i];
+ sha256_final(&state, (u8 *)hmac);
}
#ifdef CONFIG_MPTCP_HMAC_TEST
@@ -101,29 +95,29 @@ struct test_cast {
};
/* we can't reuse RFC 4231 test vectors, as we have constraint on the
- * input and key size, and we truncate the output.
+ * input and key size.
*/
static struct test_cast tests[] = {
{
.key = "0b0b0b0b0b0b0b0b",
.msg = "48692054",
- .result = "8385e24fb4235ac37556b6b886db106284a1da67",
+ .result = "8385e24fb4235ac37556b6b886db106284a1da671699f46db1f235ec622dcafa",
},
{
.key = "aaaaaaaaaaaaaaaa",
.msg = "dddddddd",
- .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492",
+ .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492984e1eb71aff9022f71046e9",
},
{
.key = "0102030405060708",
.msg = "cdcdcdcd",
- .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6",
+ .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6f23b4d8c4da736a5dbbc6e7d",
},
};
static int __init test_mptcp_crypto(void)
{
- char hmac[20], hmac_hex[41];
+ char hmac[32], hmac_hex[65];
u32 nonce1, nonce2;
u64 key1, key2;
u8 msg[8];
@@ -140,11 +134,11 @@ static int __init test_mptcp_crypto(void)
put_unaligned_be32(nonce2, &msg[4]);
mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
- for (j = 0; j < 20; ++j)
+ for (j = 0; j < 32; ++j)
sprintf(&hmac_hex[j << 1], "%02x", hmac[j] & 0xff);
- hmac_hex[40] = 0;
+ hmac_hex[64] = 0;
- if (memcmp(hmac_hex, tests[i].result, 40))
+ if (memcmp(hmac_hex, tests[i].result, 64))
pr_err("test %d failed, got %s expected %s", i,
hmac_hex, tests[i].result);
else
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index ece6f92cf7d1..01f1f4cf4902 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -7,6 +7,7 @@
#define pr_fmt(fmt) "MPTCP: " fmt
#include <linux/kernel.h>
+#include <crypto/sha.h>
#include <net/tcp.h>
#include <net/mptcp.h>
#include "protocol.h"
@@ -540,7 +541,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
struct in_addr *addr)
{
- u8 hmac[MPTCP_ADDR_HMAC_LEN];
+ u8 hmac[SHA256_DIGEST_SIZE];
u8 msg[7];
msg[0] = addr_id;
@@ -550,14 +551,14 @@ static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac);
- return get_unaligned_be64(hmac);
+ return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]);
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
struct in6_addr *addr)
{
- u8 hmac[MPTCP_ADDR_HMAC_LEN];
+ u8 hmac[SHA256_DIGEST_SIZE];
u8 msg[19];
msg[0] = addr_id;
@@ -567,7 +568,7 @@ static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac);
- return get_unaligned_be64(hmac);
+ return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]);
}
#endif
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e3a628bea2b8..14b253d10ccf 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -144,12 +144,29 @@ static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
unsigned int offset, size_t copy_len)
{
struct sock *sk = (struct sock *)msk;
+ struct sk_buff *tail;
__skb_unlink(skb, &ssk->sk_receive_queue);
- skb_set_owner_r(skb, sk);
- __skb_queue_tail(&sk->sk_receive_queue, skb);
+ skb_ext_reset(skb);
+ skb_orphan(skb);
msk->ack_seq += copy_len;
+
+ tail = skb_peek_tail(&sk->sk_receive_queue);
+ if (offset == 0 && tail) {
+ bool fragstolen;
+ int delta;
+
+ if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
+ kfree_skb_partial(skb, fragstolen);
+ atomic_add(delta, &sk->sk_rmem_alloc);
+ sk_mem_charge(sk, delta);
+ return;
+ }
+ }
+
+ skb_set_owner_r(skb, sk);
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
MPTCP_SKB_CB(skb)->offset = offset;
}
@@ -1015,7 +1032,8 @@ fallback:
pr_debug("block timeout %ld", timeo);
mptcp_wait_data(sk, &timeo);
- if (unlikely(__mptcp_tcp_fallback(msk)))
+ ssock = __mptcp_tcp_fallback(msk);
+ if (unlikely(ssock))
goto fallback;
}
@@ -1329,11 +1347,14 @@ static void mptcp_close(struct sock *sk, long timeout)
lock_sock(sk);
- mptcp_token_destroy(msk->token);
inet_sk_state_store(sk, TCP_CLOSE);
- __mptcp_flush_join_list(msk);
-
+ /* be sure to always acquire the join list lock, to sync vs
+ * mptcp_finish_join().
+ */
+ spin_lock_bh(&msk->join_list_lock);
+ list_splice_tail_init(&msk->join_list, &msk->conn_list);
+ spin_unlock_bh(&msk->join_list_lock);
list_splice_init(&msk->conn_list, &conn_list);
data_fin_tx_seq = msk->write_seq;
@@ -1523,6 +1544,7 @@ static void mptcp_destroy(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ mptcp_token_destroy(msk->token);
if (msk->cached_ext)
__skb_ext_put(msk->cached_ext);
@@ -1689,22 +1711,30 @@ bool mptcp_finish_join(struct sock *sk)
if (!msk->pm.server_side)
return true;
- /* passive connection, attach to msk socket */
+ if (!mptcp_pm_allow_new_subflow(msk))
+ return false;
+
+ /* active connections are already on conn_list, and we can't acquire
+ * msk lock here.
+ * use the join list lock as synchronization point and double-check
+ * msk status to avoid racing with mptcp_close()
+ */
+ spin_lock_bh(&msk->join_list_lock);
+ ret = inet_sk_state_load(parent) == TCP_ESTABLISHED;
+ if (ret && !WARN_ON_ONCE(!list_empty(&subflow->node)))
+ list_add_tail(&subflow->node, &msk->join_list);
+ spin_unlock_bh(&msk->join_list_lock);
+ if (!ret)
+ return false;
+
+ /* attach to msk socket only after we are sure he will deal with us
+ * at close time
+ */
parent_sock = READ_ONCE(parent->sk_socket);
if (parent_sock && !sk->sk_socket)
mptcp_sock_graft(sk, parent_sock);
-
- ret = mptcp_pm_allow_new_subflow(msk);
- if (ret) {
- subflow->map_seq = msk->ack_seq;
-
- /* active connections are already on conn_list */
- spin_lock_bh(&msk->join_list_lock);
- if (!WARN_ON_ONCE(!list_empty(&subflow->node)))
- list_add_tail(&subflow->node, &msk->join_list);
- spin_unlock_bh(&msk->join_list_lock);
- }
- return ret;
+ subflow->map_seq = msk->ack_seq;
+ return true;
}
static bool mptcp_memory_free(const struct sock *sk, int wake)
@@ -1771,6 +1801,14 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int err;
lock_sock(sock->sk);
+ if (sock->state != SS_UNCONNECTED && msk->subflow) {
+ /* pending connection or invalid state, let existing subflow
+ * cope with that
+ */
+ ssock = msk->subflow;
+ goto do_connect;
+ }
+
ssock = __mptcp_socket_create(msk, TCP_SYN_SENT);
if (IS_ERR(ssock)) {
err = PTR_ERR(ssock);
@@ -1785,9 +1823,17 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
mptcp_subflow_ctx(ssock->sk)->request_mptcp = 0;
#endif
+do_connect:
err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
- inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
- mptcp_copy_inaddrs(sock->sk, ssock->sk);
+ sock->state = ssock->state;
+
+ /* on successful connect, the msk state will be moved to established by
+ * subflow_finish_connect()
+ */
+ if (!err || err == EINPROGRESS)
+ mptcp_copy_inaddrs(sock->sk, ssock->sk);
+ else
+ inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
unlock:
release_sock(sock->sk);
@@ -2068,6 +2114,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
#ifdef CONFIG_COMPAT
+ .compat_ioctl = inet6_compat_ioctl,
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index f5adca93e8fb..809687d3f410 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -81,7 +81,6 @@
/* MPTCP ADD_ADDR flags */
#define MPTCP_ADDR_ECHO BIT(0)
-#define MPTCP_ADDR_HMAC_LEN 20
#define MPTCP_ADDR_IPVERSION_4 4
#define MPTCP_ADDR_IPVERSION_6 6
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 0020d356233d..493b98a0825c 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/netdevice.h>
#include <crypto/algapi.h>
+#include <crypto/sha.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
@@ -89,7 +90,7 @@ static bool subflow_token_join_request(struct request_sock *req,
const struct sk_buff *skb)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
- u8 hmac[MPTCPOPT_HMAC_LEN];
+ u8 hmac[SHA256_DIGEST_SIZE];
struct mptcp_sock *msk;
int local_id;
@@ -201,7 +202,7 @@ static void subflow_v6_init_req(struct request_sock *req,
/* validate received truncated hmac and create hmac for third ACK */
static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
{
- u8 hmac[MPTCPOPT_HMAC_LEN];
+ u8 hmac[SHA256_DIGEST_SIZE];
u64 thmac;
subflow_generate_hmac(subflow->remote_key, subflow->local_key,
@@ -267,6 +268,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
}
} else if (subflow->mp_join) {
+ u8 hmac[SHA256_DIGEST_SIZE];
+
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u",
subflow, subflow->thmac,
subflow->remote_nonce);
@@ -279,7 +282,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow_generate_hmac(subflow->local_key, subflow->remote_key,
subflow->local_nonce,
subflow->remote_nonce,
- subflow->hmac);
+ hmac);
+
+ memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
if (skb)
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
@@ -347,7 +352,7 @@ static bool subflow_hmac_valid(const struct request_sock *req,
const struct mptcp_options_received *mp_opt)
{
const struct mptcp_subflow_request_sock *subflow_req;
- u8 hmac[MPTCPOPT_HMAC_LEN];
+ u8 hmac[SHA256_DIGEST_SIZE];
struct mptcp_sock *msk;
bool ret;
@@ -361,7 +366,7 @@ static bool subflow_hmac_valid(const struct request_sock *req,
subflow_req->local_nonce, hmac);
ret = true;
- if (crypto_memneq(hmac, mp_opt->hmac, sizeof(hmac)))
+ if (crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN))
ret = false;
sock_put((struct sock *)msk);
@@ -408,6 +413,20 @@ static void subflow_ulp_fallback(struct sock *sk,
tcp_sk(sk)->is_mptcp = 0;
}
+static void subflow_drop_ctx(struct sock *ssk)
+{
+ struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
+
+ if (!ctx)
+ return;
+
+ subflow_ulp_fallback(ssk, ctx);
+ if (ctx->conn)
+ sock_put(ctx->conn);
+
+ kfree_rcu(ctx, rcu);
+}
+
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
@@ -480,10 +499,7 @@ create_child:
if (fallback_is_fatal)
goto dispose_child;
- if (ctx) {
- subflow_ulp_fallback(child, ctx);
- kfree_rcu(ctx, rcu);
- }
+ subflow_drop_ctx(child);
goto out;
}
@@ -532,6 +548,7 @@ out:
return child;
dispose_child:
+ subflow_drop_ctx(child);
tcp_rsk(req)->drop_req = true;
tcp_send_active_reset(child, GFP_ATOMIC);
inet_csk_prepare_for_destroy_sock(child);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index cd747c0962fd..5a67f7966574 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -59,7 +59,7 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
/* Don't lookup sub-counters at all */
opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS;
if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
- opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
+ opt->cmdflags |= IPSET_FLAG_SKIP_COUNTER_UPDATE;
list_for_each_entry_rcu(e, &map->members, list) {
ret = ip_set_test(e->id, skb, par, opt);
if (ret <= 0)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1d57b95d3481..bb72ca5f3999 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2016,22 +2016,18 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
nf_conntrack_get(skb_nfct(nskb));
}
-static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
+static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
{
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
- enum ip_conntrack_info ctinfo;
struct nf_nat_hook *nat_hook;
unsigned int status;
- struct nf_conn *ct;
int dataoff;
u16 l3num;
u8 l4num;
- ct = nf_ct_get(skb, &ctinfo);
- if (!ct || nf_ct_is_confirmed(ct))
- return 0;
-
l3num = nf_ct_l3num(ct);
dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
@@ -2088,6 +2084,76 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
return 0;
}
+/* This packet is coming from userspace via nf_queue, complete the packet
+ * processing after the helper invocation in nf_confirm().
+ */
+static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ const struct nf_conntrack_helper *helper;
+ const struct nf_conn_help *help;
+ int protoff;
+
+ help = nfct_help(ct);
+ if (!help)
+ return 0;
+
+ helper = rcu_dereference(help->helper);
+ if (!(helper->flags & NF_CT_HELPER_F_USERSPACE))
+ return 0;
+
+ switch (nf_ct_l3num(ct)) {
+ case NFPROTO_IPV4:
+ protoff = skb_network_offset(skb) + ip_hdrlen(skb);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case NFPROTO_IPV6: {
+ __be16 frag_off;
+ u8 pnum;
+
+ pnum = ipv6_hdr(skb)->nexthdr;
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
+ &frag_off);
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
+ return 0;
+ break;
+ }
+#endif
+ default:
+ return 0;
+ }
+
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+ !nf_is_loopback_packet(skb)) {
+ if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
+ NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+ return -1;
+ }
+ }
+
+ /* We've seen it coming out the other side: confirm it */
+ return nf_conntrack_confirm(skb) == NF_DROP ? - 1 : 0;
+}
+
+static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ int err;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return 0;
+
+ if (!nf_ct_is_confirmed(ct)) {
+ err = __nf_conntrack_update(net, skb, ct, ctinfo);
+ if (err < 0)
+ return err;
+ }
+
+ return nf_confirm_cthelper(skb, ct, ctinfo);
+}
+
static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
const struct sk_buff *skb)
{
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index a971183f11af..1f44d523b512 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -72,24 +72,32 @@ EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn);
#if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
/* PptpControlMessageType names */
-const char *const pptp_msg_name[] = {
- "UNKNOWN_MESSAGE",
- "START_SESSION_REQUEST",
- "START_SESSION_REPLY",
- "STOP_SESSION_REQUEST",
- "STOP_SESSION_REPLY",
- "ECHO_REQUEST",
- "ECHO_REPLY",
- "OUT_CALL_REQUEST",
- "OUT_CALL_REPLY",
- "IN_CALL_REQUEST",
- "IN_CALL_REPLY",
- "IN_CALL_CONNECT",
- "CALL_CLEAR_REQUEST",
- "CALL_DISCONNECT_NOTIFY",
- "WAN_ERROR_NOTIFY",
- "SET_LINK_INFO"
+static const char *const pptp_msg_name_array[PPTP_MSG_MAX + 1] = {
+ [0] = "UNKNOWN_MESSAGE",
+ [PPTP_START_SESSION_REQUEST] = "START_SESSION_REQUEST",
+ [PPTP_START_SESSION_REPLY] = "START_SESSION_REPLY",
+ [PPTP_STOP_SESSION_REQUEST] = "STOP_SESSION_REQUEST",
+ [PPTP_STOP_SESSION_REPLY] = "STOP_SESSION_REPLY",
+ [PPTP_ECHO_REQUEST] = "ECHO_REQUEST",
+ [PPTP_ECHO_REPLY] = "ECHO_REPLY",
+ [PPTP_OUT_CALL_REQUEST] = "OUT_CALL_REQUEST",
+ [PPTP_OUT_CALL_REPLY] = "OUT_CALL_REPLY",
+ [PPTP_IN_CALL_REQUEST] = "IN_CALL_REQUEST",
+ [PPTP_IN_CALL_REPLY] = "IN_CALL_REPLY",
+ [PPTP_IN_CALL_CONNECT] = "IN_CALL_CONNECT",
+ [PPTP_CALL_CLEAR_REQUEST] = "CALL_CLEAR_REQUEST",
+ [PPTP_CALL_DISCONNECT_NOTIFY] = "CALL_DISCONNECT_NOTIFY",
+ [PPTP_WAN_ERROR_NOTIFY] = "WAN_ERROR_NOTIFY",
+ [PPTP_SET_LINK_INFO] = "SET_LINK_INFO"
};
+
+const char *pptp_msg_name(u_int16_t msg)
+{
+ if (msg > PPTP_MSG_MAX)
+ return pptp_msg_name_array[0];
+
+ return pptp_msg_name_array[msg];
+}
EXPORT_SYMBOL(pptp_msg_name);
#endif
@@ -276,7 +284,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
typeof(nf_nat_pptp_hook_inbound) nf_nat_pptp_inbound;
msg = ntohs(ctlh->messageType);
- pr_debug("inbound control message %s\n", pptp_msg_name[msg]);
+ pr_debug("inbound control message %s\n", pptp_msg_name(msg));
switch (msg) {
case PPTP_START_SESSION_REPLY:
@@ -311,7 +319,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
pcid = pptpReq->ocack.peersCallID;
if (info->pns_call_id != pcid)
goto invalid;
- pr_debug("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
+ pr_debug("%s, CID=%X, PCID=%X\n", pptp_msg_name(msg),
ntohs(cid), ntohs(pcid));
if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
@@ -328,7 +336,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
goto invalid;
cid = pptpReq->icreq.callID;
- pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+ pr_debug("%s, CID=%X\n", pptp_msg_name(msg), ntohs(cid));
info->cstate = PPTP_CALL_IN_REQ;
info->pac_call_id = cid;
break;
@@ -347,7 +355,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
if (info->pns_call_id != pcid)
goto invalid;
- pr_debug("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
+ pr_debug("%s, PCID=%X\n", pptp_msg_name(msg), ntohs(pcid));
info->cstate = PPTP_CALL_IN_CONF;
/* we expect a GRE connection from PAC to PNS */
@@ -357,7 +365,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
case PPTP_CALL_DISCONNECT_NOTIFY:
/* server confirms disconnect */
cid = pptpReq->disc.callID;
- pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+ pr_debug("%s, CID=%X\n", pptp_msg_name(msg), ntohs(cid));
info->cstate = PPTP_CALL_NONE;
/* untrack this call id, unexpect GRE packets */
@@ -384,7 +392,7 @@ pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
invalid:
pr_debug("invalid %s: type=%d cid=%u pcid=%u "
"cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
- msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+ pptp_msg_name(msg),
msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
ntohs(info->pns_call_id), ntohs(info->pac_call_id));
return NF_ACCEPT;
@@ -404,7 +412,7 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
typeof(nf_nat_pptp_hook_outbound) nf_nat_pptp_outbound;
msg = ntohs(ctlh->messageType);
- pr_debug("outbound control message %s\n", pptp_msg_name[msg]);
+ pr_debug("outbound control message %s\n", pptp_msg_name(msg));
switch (msg) {
case PPTP_START_SESSION_REQUEST:
@@ -426,7 +434,7 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
info->cstate = PPTP_CALL_OUT_REQ;
/* track PNS call id */
cid = pptpReq->ocreq.callID;
- pr_debug("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
+ pr_debug("%s, CID=%X\n", pptp_msg_name(msg), ntohs(cid));
info->pns_call_id = cid;
break;
@@ -440,7 +448,7 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
pcid = pptpReq->icack.peersCallID;
if (info->pac_call_id != pcid)
goto invalid;
- pr_debug("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
+ pr_debug("%s, CID=%X PCID=%X\n", pptp_msg_name(msg),
ntohs(cid), ntohs(pcid));
if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
@@ -480,7 +488,7 @@ pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
invalid:
pr_debug("invalid %s: type=%d cid=%u pcid=%u "
"cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
- msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
+ pptp_msg_name(msg),
msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate,
ntohs(info->pns_call_id), ntohs(info->pac_call_id));
return NF_ACCEPT;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index a5f294aa8e4c..5b0d0a77379c 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -103,7 +103,7 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
if (help->helper->data_len == 0)
return -EINVAL;
- nla_memcpy(help->data, nla_data(attr), sizeof(help->data));
+ nla_memcpy(help->data, attr, sizeof(help->data));
return 0;
}
@@ -240,6 +240,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
ret = -ENOMEM;
goto err2;
}
+ helper->data_len = size;
helper->flags |= NF_CT_HELPER_F_USERSPACE;
memcpy(&helper->tuple, tuple, sizeof(struct nf_conntrack_tuple));
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 6f2fbc6b9eb2..a042261a45c5 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -14,6 +14,8 @@
#include <net/genetlink.h>
#include <net/psample.h>
#include <linux/spinlock.h>
+#include <net/ip_tunnels.h>
+#include <net/dst_metadata.h>
#define PSAMPLE_MAX_PACKET_SIZE 0xffff
@@ -207,10 +209,159 @@ void psample_group_put(struct psample_group *group)
}
EXPORT_SYMBOL_GPL(psample_group_put);
+#ifdef CONFIG_INET
+static int __psample_ip_tun_to_nlattr(struct sk_buff *skb,
+ struct ip_tunnel_info *tun_info)
+{
+ unsigned short tun_proto = ip_tunnel_info_af(tun_info);
+ const void *tun_opts = ip_tunnel_info_opts(tun_info);
+ const struct ip_tunnel_key *tun_key = &tun_info->key;
+ int tun_opts_len = tun_info->options_len;
+
+ if (tun_key->tun_flags & TUNNEL_KEY &&
+ nla_put_be64(skb, PSAMPLE_TUNNEL_KEY_ATTR_ID, tun_key->tun_id,
+ PSAMPLE_TUNNEL_KEY_ATTR_PAD))
+ return -EMSGSIZE;
+
+ if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE &&
+ nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE))
+ return -EMSGSIZE;
+
+ switch (tun_proto) {
+ case AF_INET:
+ if (tun_key->u.ipv4.src &&
+ nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_SRC,
+ tun_key->u.ipv4.src))
+ return -EMSGSIZE;
+ if (tun_key->u.ipv4.dst &&
+ nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_DST,
+ tun_key->u.ipv4.dst))
+ return -EMSGSIZE;
+ break;
+ case AF_INET6:
+ if (!ipv6_addr_any(&tun_key->u.ipv6.src) &&
+ nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_SRC,
+ &tun_key->u.ipv6.src))
+ return -EMSGSIZE;
+ if (!ipv6_addr_any(&tun_key->u.ipv6.dst) &&
+ nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_DST,
+ &tun_key->u.ipv6.dst))
+ return -EMSGSIZE;
+ break;
+ }
+ if (tun_key->tos &&
+ nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TOS, tun_key->tos))
+ return -EMSGSIZE;
+ if (nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TTL, tun_key->ttl))
+ return -EMSGSIZE;
+ if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+ nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
+ return -EMSGSIZE;
+ if ((tun_key->tun_flags & TUNNEL_CSUM) &&
+ nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_CSUM))
+ return -EMSGSIZE;
+ if (tun_key->tp_src &&
+ nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_SRC, tun_key->tp_src))
+ return -EMSGSIZE;
+ if (tun_key->tp_dst &&
+ nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, tun_key->tp_dst))
+ return -EMSGSIZE;
+ if ((tun_key->tun_flags & TUNNEL_OAM) &&
+ nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_OAM))
+ return -EMSGSIZE;
+ if (tun_opts_len) {
+ if (tun_key->tun_flags & TUNNEL_GENEVE_OPT &&
+ nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+ tun_opts_len, tun_opts))
+ return -EMSGSIZE;
+ else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT &&
+ nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
+ tun_opts_len, tun_opts))
+ return -EMSGSIZE;
+ }
+
+ return 0;
+}
+
+static int psample_ip_tun_to_nlattr(struct sk_buff *skb,
+ struct ip_tunnel_info *tun_info)
+{
+ struct nlattr *nla;
+ int err;
+
+ nla = nla_nest_start_noflag(skb, PSAMPLE_ATTR_TUNNEL);
+ if (!nla)
+ return -EMSGSIZE;
+
+ err = __psample_ip_tun_to_nlattr(skb, tun_info);
+ if (err) {
+ nla_nest_cancel(skb, nla);
+ return err;
+ }
+
+ nla_nest_end(skb, nla);
+
+ return 0;
+}
+
+static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
+{
+ unsigned short tun_proto = ip_tunnel_info_af(tun_info);
+ const struct ip_tunnel_key *tun_key = &tun_info->key;
+ int tun_opts_len = tun_info->options_len;
+ int sum = 0;
+
+ if (tun_key->tun_flags & TUNNEL_KEY)
+ sum += nla_total_size(sizeof(u64));
+
+ if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE)
+ sum += nla_total_size(0);
+
+ switch (tun_proto) {
+ case AF_INET:
+ if (tun_key->u.ipv4.src)
+ sum += nla_total_size(sizeof(u32));
+ if (tun_key->u.ipv4.dst)
+ sum += nla_total_size(sizeof(u32));
+ break;
+ case AF_INET6:
+ if (!ipv6_addr_any(&tun_key->u.ipv6.src))
+ sum += nla_total_size(sizeof(struct in6_addr));
+ if (!ipv6_addr_any(&tun_key->u.ipv6.dst))
+ sum += nla_total_size(sizeof(struct in6_addr));
+ break;
+ }
+ if (tun_key->tos)
+ sum += nla_total_size(sizeof(u8));
+ sum += nla_total_size(sizeof(u8)); /* TTL */
+ if (tun_key->tun_flags & TUNNEL_DONT_FRAGMENT)
+ sum += nla_total_size(0);
+ if (tun_key->tun_flags & TUNNEL_CSUM)
+ sum += nla_total_size(0);
+ if (tun_key->tp_src)
+ sum += nla_total_size(sizeof(u16));
+ if (tun_key->tp_dst)
+ sum += nla_total_size(sizeof(u16));
+ if (tun_key->tun_flags & TUNNEL_OAM)
+ sum += nla_total_size(0);
+ if (tun_opts_len) {
+ if (tun_key->tun_flags & TUNNEL_GENEVE_OPT)
+ sum += nla_total_size(tun_opts_len);
+ else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT)
+ sum += nla_total_size(tun_opts_len);
+ }
+
+ return sum;
+}
+#endif
+
void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
u32 trunc_size, int in_ifindex, int out_ifindex,
u32 sample_rate)
{
+#ifdef CONFIG_INET
+ struct ip_tunnel_info *tun_info;
+#endif
struct sk_buff *nl_skb;
int data_len;
int meta_len;
@@ -224,6 +375,12 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
nla_total_size(sizeof(u32)) + /* group_num */
nla_total_size(sizeof(u32)); /* seq */
+#ifdef CONFIG_INET
+ tun_info = skb_tunnel_info(skb);
+ if (tun_info)
+ meta_len += psample_tunnel_meta_len(tun_info);
+#endif
+
data_len = min(skb->len, trunc_size);
if (meta_len + nla_total_size(data_len) > PSAMPLE_MAX_PACKET_SIZE)
data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN
@@ -278,6 +435,14 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
goto error;
}
+#ifdef CONFIG_INET
+ if (tun_info) {
+ ret = psample_ip_tun_to_nlattr(nl_skb, tun_info);
+ if (unlikely(ret < 0))
+ goto error;
+ }
+#endif
+
genlmsg_end(nl_skb, data);
genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0,
PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC);
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 3ca196fc7f9b..d8252fdab851 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -714,6 +714,10 @@ void qrtr_ns_init(void)
goto err_sock;
}
+ qrtr_ns.workqueue = alloc_workqueue("qrtr_ns_handler", WQ_UNBOUND, 1);
+ if (!qrtr_ns.workqueue)
+ goto err_sock;
+
qrtr_ns.sock->sk->sk_data_ready = qrtr_ns_data_ready;
sq.sq_port = QRTR_PORT_CTRL;
@@ -722,17 +726,13 @@ void qrtr_ns_init(void)
ret = kernel_bind(qrtr_ns.sock, (struct sockaddr *)&sq, sizeof(sq));
if (ret < 0) {
pr_err("failed to bind to socket\n");
- goto err_sock;
+ goto err_wq;
}
qrtr_ns.bcast_sq.sq_family = AF_QIPCRTR;
qrtr_ns.bcast_sq.sq_node = QRTR_NODE_BCAST;
qrtr_ns.bcast_sq.sq_port = QRTR_PORT_CTRL;
- qrtr_ns.workqueue = alloc_workqueue("qrtr_ns_handler", WQ_UNBOUND, 1);
- if (!qrtr_ns.workqueue)
- goto err_sock;
-
ret = say_hello(&qrtr_ns.bcast_sq);
if (ret < 0)
goto err_wq;
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 7ed31b5e77e4..2d8d6131bc5f 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -854,7 +854,7 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
}
mutex_unlock(&qrtr_node_lock);
- qrtr_local_enqueue(node, skb, type, from, to);
+ qrtr_local_enqueue(NULL, skb, type, from, to);
return 0;
}
diff --git a/net/rds/info.c b/net/rds/info.c
index e1d63563e81c..b6b46a8214a0 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -234,7 +234,8 @@ call_func:
ret = -EFAULT;
out:
- unpin_user_pages(pages, nr_pages);
+ if (pages)
+ unpin_user_pages(pages, nr_pages);
kfree(pages);
return ret;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 46782fac4c16..43db0eca911f 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -89,15 +89,6 @@ static struct ctl_table rds_tcp_sysctl_table[] = {
{ }
};
-/* doing it this way avoids calling tcp_sk() */
-void rds_tcp_nonagle(struct socket *sock)
-{
- int val = 1;
-
- kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (void *)&val,
- sizeof(val));
-}
-
u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
{
/* seq# of the last byte of data in tcp send buffer */
@@ -502,7 +493,7 @@ void rds_tcp_tune(struct socket *sock)
struct net *net = sock_net(sk);
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
- rds_tcp_nonagle(sock);
+ tcp_sock_set_nodelay(sock->sk);
lock_sock(sk);
if (rtn->sndbuf_size > 0) {
sk->sk_sndbuf = rtn->sndbuf_size;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 3c69361d21c7..bad9cf49d565 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -50,7 +50,6 @@ struct rds_tcp_statistics {
/* tcp.c */
void rds_tcp_tune(struct socket *sock);
-void rds_tcp_nonagle(struct socket *sock);
void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_restore_callbacks(struct socket *sock,
@@ -71,9 +70,8 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6);
void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
void rds_tcp_listen_data_ready(struct sock *sk);
int rds_tcp_accept_one(struct socket *sock);
-int rds_tcp_keepalive(struct socket *sock);
+void rds_tcp_keepalive(struct socket *sock);
void *rds_tcp_listen_sock_def_readable(struct net *net);
-void rds_tcp_set_linger(struct socket *sock);
/* tcp_recv.c */
int rds_tcp_recv_init(void);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 008f50fb25dd..4e64598176b0 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -207,7 +207,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
if (sock) {
if (rds_destroy_pending(cp->cp_conn))
- rds_tcp_set_linger(sock);
+ sock_no_linger(sock->sk);
sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
lock_sock(sock->sk);
rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 810a3a49e947..101cf14215a0 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -38,36 +38,19 @@
#include "rds.h"
#include "tcp.h"
-int rds_tcp_keepalive(struct socket *sock)
+void rds_tcp_keepalive(struct socket *sock)
{
/* values below based on xs_udp_default_timeout */
int keepidle = 5; /* send a probe 'keepidle' secs after last data */
int keepcnt = 5; /* number of unack'ed probes before declaring dead */
- int keepalive = 1;
- int ret = 0;
-
- ret = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
- (char *)&keepalive, sizeof(keepalive));
- if (ret < 0)
- goto bail;
-
- ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT,
- (char *)&keepcnt, sizeof(keepcnt));
- if (ret < 0)
- goto bail;
-
- ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE,
- (char *)&keepidle, sizeof(keepidle));
- if (ret < 0)
- goto bail;
+ sock_set_keepalive(sock->sk);
+ tcp_sock_set_keepcnt(sock->sk, keepcnt);
+ tcp_sock_set_keepidle(sock->sk, keepidle);
/* KEEPINTVL is the interval between successive probes. We follow
* the model in xs_tcp_finish_connecting() and re-use keepidle.
*/
- ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL,
- (char *)&keepidle, sizeof(keepidle));
-bail:
- return ret;
+ tcp_sock_set_keepintvl(sock->sk, keepidle);
}
/* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the
@@ -111,17 +94,6 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
return NULL;
}
-void rds_tcp_set_linger(struct socket *sock)
-{
- struct linger no_linger = {
- .l_onoff = 1,
- .l_linger = 0,
- };
-
- kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
- (char *)&no_linger, sizeof(no_linger));
-}
-
int rds_tcp_accept_one(struct socket *sock)
{
struct socket *new_sock = NULL;
@@ -160,10 +132,7 @@ int rds_tcp_accept_one(struct socket *sock)
new_sock->ops = sock->ops;
__module_get(new_sock->ops->owner);
- ret = rds_tcp_keepalive(new_sock);
- if (ret < 0)
- goto out;
-
+ rds_tcp_keepalive(new_sock);
rds_tcp_tune(new_sock);
inet = inet_sk(new_sock->sk);
@@ -241,7 +210,7 @@ rst_nsk:
* be pending on it. By setting linger, we achieve the side-effect
* of avoiding TIME_WAIT state on new_sock.
*/
- rds_tcp_set_linger(new_sock);
+ sock_no_linger(new_sock->sk);
kernel_sock_shutdown(new_sock, SHUT_RDWR);
ret = 0;
out:
@@ -303,7 +272,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
}
sock->sk->sk_reuse = SK_CAN_REUSE;
- rds_tcp_nonagle(sock);
+ tcp_sock_set_nodelay(sock->sk);
write_lock_bh(&sock->sk->sk_callback_lock);
sock->sk->sk_user_data = sock->sk->sk_data_ready;
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 78a2554a4497..8c4d1d6e9249 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -38,23 +38,18 @@
#include "rds.h"
#include "tcp.h"
-static void rds_tcp_cork(struct socket *sock, int val)
-{
- kernel_setsockopt(sock, SOL_TCP, TCP_CORK, (void *)&val, sizeof(val));
-}
-
void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp)
{
struct rds_tcp_connection *tc = cp->cp_transport_data;
- rds_tcp_cork(tc->t_sock, 1);
+ tcp_sock_set_cork(tc->t_sock->sk, true);
}
void rds_tcp_xmit_path_complete(struct rds_conn_path *cp)
{
struct rds_tcp_connection *tc = cp->cp_transport_data;
- rds_tcp_cork(tc->t_sock, 0);
+ tcp_sock_set_cork(tc->t_sock->sk, false);
}
/* the core send_sem serializes this with other xmit and shutdown */
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 6ffb7e9887ce..ddd0f95713a9 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -25,6 +25,7 @@ rxrpc-y := \
peer_event.o \
peer_object.o \
recvmsg.o \
+ rtt.o \
security.o \
sendmsg.o \
skbuff.o \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 15ee92d79581..394189b81849 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -571,6 +571,19 @@ out:
return ret;
}
+int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val)
+{
+ if (sk->sk_state != RXRPC_UNBOUND)
+ return -EISCONN;
+ if (val > RXRPC_SECURITY_MAX)
+ return -EINVAL;
+ lock_sock(sk);
+ rxrpc_sk(sk)->min_sec_level = val;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(rxrpc_sock_set_min_security_level);
+
/*
* set RxRPC socket options
*/
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 3eb1ab40ca5c..9fe264bec70c 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -7,6 +7,7 @@
#include <linux/atomic.h>
#include <linux/seqlock.h>
+#include <linux/win_minmax.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/sock.h>
@@ -311,11 +312,14 @@ struct rxrpc_peer {
#define RXRPC_RTT_CACHE_SIZE 32
spinlock_t rtt_input_lock; /* RTT lock for input routine */
ktime_t rtt_last_req; /* Time of last RTT request */
- u64 rtt; /* Current RTT estimate (in nS) */
- u64 rtt_sum; /* Sum of cache contents */
- u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */
- u8 rtt_cursor; /* next entry at which to insert */
- u8 rtt_usage; /* amount of cache actually used */
+ unsigned int rtt_count; /* Number of samples we've got */
+
+ u32 srtt_us; /* smoothed round trip time << 3 in usecs */
+ u32 mdev_us; /* medium deviation */
+ u32 mdev_max_us; /* maximal mdev for the last rtt period */
+ u32 rttvar_us; /* smoothed mdev_max */
+ u32 rto_j; /* Retransmission timeout in jiffies */
+ u8 backoff; /* Backoff timeout */
u8 cong_cwnd; /* Congestion window size */
};
@@ -1041,7 +1045,6 @@ extern unsigned long rxrpc_idle_ack_delay;
extern unsigned int rxrpc_rx_window_size;
extern unsigned int rxrpc_rx_mtu;
extern unsigned int rxrpc_rx_jumbo_max;
-extern unsigned long rxrpc_resend_timeout;
extern const s8 rxrpc_ack_priority[];
@@ -1069,8 +1072,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *);
* peer_event.c
*/
void rxrpc_error_report(struct sock *);
-void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace,
- rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
void rxrpc_peer_keepalive_worker(struct work_struct *);
/*
@@ -1103,6 +1104,14 @@ void rxrpc_notify_socket(struct rxrpc_call *);
int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
/*
+ * rtt.c
+ */
+void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace,
+ rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
+unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *, bool);
+void rxrpc_peer_init_rtt(struct rxrpc_peer *);
+
+/*
* rxkad.c
*/
#ifdef CONFIG_RXKAD
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 70e44abf106c..b7611cc159e5 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -248,7 +248,7 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb)
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
ktime_t now = skb->tstamp;
- if (call->peer->rtt_usage < 3 ||
+ if (call->peer->rtt_count < 3 ||
ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial,
true, true,
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index cedbbb3a7c2e..2a65ac41055f 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -111,8 +111,8 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
} else {
unsigned long now = jiffies, ack_at;
- if (call->peer->rtt_usage > 0)
- ack_at = nsecs_to_jiffies(call->peer->rtt);
+ if (call->peer->srtt_us != 0)
+ ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3);
else
ack_at = expiry;
@@ -157,24 +157,18 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
{
struct sk_buff *skb;
- unsigned long resend_at;
+ unsigned long resend_at, rto_j;
rxrpc_seq_t cursor, seq, top;
- ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo;
+ ktime_t now, max_age, oldest, ack_ts;
int ix;
u8 annotation, anno_type, retrans = 0, unacked = 0;
_enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
- if (call->peer->rtt_usage > 1)
- timeout = ns_to_ktime(call->peer->rtt * 3 / 2);
- else
- timeout = ms_to_ktime(rxrpc_resend_timeout);
- min_timeo = ns_to_ktime((1000000000 / HZ) * 4);
- if (ktime_before(timeout, min_timeo))
- timeout = min_timeo;
+ rto_j = call->peer->rto_j;
now = ktime_get_real();
- max_age = ktime_sub(now, timeout);
+ max_age = ktime_sub(now, jiffies_to_usecs(rto_j));
spin_lock_bh(&call->lock);
@@ -219,7 +213,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
}
resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
- resend_at += jiffies + rxrpc_resend_timeout;
+ resend_at += jiffies + rto_j;
WRITE_ONCE(call->resend_at, resend_at);
if (unacked)
@@ -234,7 +228,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
rxrpc_timer_set_for_resend);
spin_unlock_bh(&call->lock);
ack_ts = ktime_sub(now, call->acks_latest_ts);
- if (ktime_to_ns(ack_ts) < call->peer->rtt)
+ if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3))
goto out;
rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false,
rxrpc_propose_ack_ping_for_lost_ack);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 69e09d69c896..3be4177baf70 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -91,11 +91,11 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
/* We analyse the number of packets that get ACK'd per RTT
* period and increase the window if we managed to fill it.
*/
- if (call->peer->rtt_usage == 0)
+ if (call->peer->rtt_count == 0)
goto out;
if (ktime_before(skb->tstamp,
- ktime_add_ns(call->cong_tstamp,
- call->peer->rtt)))
+ ktime_add_us(call->cong_tstamp,
+ call->peer->srtt_us >> 3)))
goto out_no_clear_ca;
change = rxrpc_cong_rtt_window_end;
call->cong_tstamp = skb->tstamp;
@@ -803,6 +803,30 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
}
/*
+ * Return true if the ACK is valid - ie. it doesn't appear to have regressed
+ * with respect to the ack state conveyed by preceding ACKs.
+ */
+static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
+ rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt)
+{
+ rxrpc_seq_t base = READ_ONCE(call->ackr_first_seq);
+
+ if (after(first_pkt, base))
+ return true; /* The window advanced */
+
+ if (before(first_pkt, base))
+ return false; /* firstPacket regressed */
+
+ if (after_eq(prev_pkt, call->ackr_prev_seq))
+ return true; /* previousPacket hasn't regressed. */
+
+ /* Some rx implementations put a serial number in previousPacket. */
+ if (after_eq(prev_pkt, base + call->tx_winsize))
+ return false;
+ return true;
+}
+
+/*
* Process an ACK packet.
*
* ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet
@@ -865,9 +889,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
}
/* Discard any out-of-order or duplicate ACKs (outside lock). */
- if (before(first_soft_ack, call->ackr_first_seq) ||
- before(prev_pkt, call->ackr_prev_seq))
+ if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
+ trace_rxrpc_rx_discard_ack(call->debug_id, sp->hdr.serial,
+ first_soft_ack, call->ackr_first_seq,
+ prev_pkt, call->ackr_prev_seq);
return;
+ }
buf.info.rxMTU = 0;
ioffset = offset + nr_acks + 3;
@@ -878,9 +905,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
spin_lock(&call->input_lock);
/* Discard any out-of-order or duplicate ACKs (inside lock). */
- if (before(first_soft_ack, call->ackr_first_seq) ||
- before(prev_pkt, call->ackr_prev_seq))
+ if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
+ trace_rxrpc_rx_discard_ack(call->debug_id, sp->hdr.serial,
+ first_soft_ack, call->ackr_first_seq,
+ prev_pkt, call->ackr_prev_seq);
goto out;
+ }
call->acks_latest_ts = skb->tstamp;
call->ackr_first_seq = first_soft_ack;
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 01135e54d95d..c8b2097f499c 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -107,7 +107,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
{
struct sock *usk;
- int ret, opt;
+ int ret;
_enter("%p{%d,%d}",
local, local->srx.transport_type, local->srx.transport.family);
@@ -157,13 +157,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
switch (local->srx.transport.family) {
case AF_INET6:
/* we want to receive ICMPv6 errors */
- opt = 1;
- ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR,
- (char *) &opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
+ ip6_sock_set_recverr(local->socket->sk);
/* Fall through and set IPv4 options too otherwise we don't get
* errors from IPv4 packets sent through the IPv6 socket.
@@ -171,31 +165,13 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
/* Fall through */
case AF_INET:
/* we want to receive ICMP errors */
- opt = 1;
- ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
- (char *) &opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
+ ip_sock_set_recverr(local->socket->sk);
/* we want to set the don't fragment bit */
- opt = IP_PMTUDISC_DO;
- ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
- (char *) &opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
+ ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DO);
/* We want receive timestamps. */
- opt = 1;
- ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
- (char *)&opt, sizeof(opt));
- if (ret < 0) {
- _debug("setsockopt failed");
- goto error;
- }
+ sock_enable_timestamps(local->socket->sk);
break;
default:
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 214405f75346..d4144fd86f84 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -63,11 +63,6 @@ unsigned int rxrpc_rx_mtu = 5692;
*/
unsigned int rxrpc_rx_jumbo_max = 4;
-/*
- * Time till packet resend (in milliseconds).
- */
-unsigned long rxrpc_resend_timeout = 4 * HZ;
-
const s8 rxrpc_ack_priority[] = {
[0] = 0,
[RXRPC_ACK_DELAY] = 1,
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 90e263c6aa69..1ba43c3df4ad 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -321,7 +321,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
struct kvec iov[2];
rxrpc_serial_t serial;
size_t len;
- int ret, opt;
+ int ret;
_enter(",{%d}", skb->len);
@@ -369,7 +369,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
(test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) ||
retrans ||
call->cong_mode == RXRPC_CALL_SLOW_START ||
- (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
+ (call->peer->rtt_count < 3 && sp->hdr.seq & 1) ||
ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
ktime_get_real())))
whdr.flags |= RXRPC_REQUEST_ACK;
@@ -423,13 +423,10 @@ done:
if (whdr.flags & RXRPC_REQUEST_ACK) {
call->peer->rtt_last_req = skb->tstamp;
trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
- if (call->peer->rtt_usage > 1) {
+ if (call->peer->rtt_count > 1) {
unsigned long nowj = jiffies, ack_lost_at;
- ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt);
- if (ack_lost_at < 1)
- ack_lost_at = 1;
-
+ ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans);
ack_lost_at += nowj;
WRITE_ONCE(call->ack_lost_at, ack_lost_at);
rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
@@ -476,18 +473,14 @@ send_fragmentable:
switch (conn->params.local->srx.transport.family) {
case AF_INET6:
case AF_INET:
- opt = IP_PMTUDISC_DONT;
- kernel_setsockopt(conn->params.local->socket,
- SOL_IP, IP_MTU_DISCOVER,
- (char *)&opt, sizeof(opt));
+ ip_sock_set_mtu_discover(conn->params.local->socket->sk,
+ IP_PMTUDISC_DONT);
ret = kernel_sendmsg(conn->params.local->socket, &msg,
iov, 2, len);
conn->params.peer->last_tx_at = ktime_get_seconds();
- opt = IP_PMTUDISC_DO;
- kernel_setsockopt(conn->params.local->socket,
- SOL_IP, IP_MTU_DISCOVER,
- (char *)&opt, sizeof(opt));
+ ip_sock_set_mtu_discover(conn->params.local->socket->sk,
+ IP_PMTUDISC_DO);
break;
default:
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 923b263c401b..b1449d971883 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -296,52 +296,6 @@ static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error,
}
/*
- * Add RTT information to cache. This is called in softirq mode and has
- * exclusive access to the peer RTT data.
- */
-void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
- rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
- ktime_t send_time, ktime_t resp_time)
-{
- struct rxrpc_peer *peer = call->peer;
- s64 rtt;
- u64 sum = peer->rtt_sum, avg;
- u8 cursor = peer->rtt_cursor, usage = peer->rtt_usage;
-
- rtt = ktime_to_ns(ktime_sub(resp_time, send_time));
- if (rtt < 0)
- return;
-
- spin_lock(&peer->rtt_input_lock);
-
- /* Replace the oldest datum in the RTT buffer */
- sum -= peer->rtt_cache[cursor];
- sum += rtt;
- peer->rtt_cache[cursor] = rtt;
- peer->rtt_cursor = (cursor + 1) & (RXRPC_RTT_CACHE_SIZE - 1);
- peer->rtt_sum = sum;
- if (usage < RXRPC_RTT_CACHE_SIZE) {
- usage++;
- peer->rtt_usage = usage;
- }
-
- spin_unlock(&peer->rtt_input_lock);
-
- /* Now recalculate the average */
- if (usage == RXRPC_RTT_CACHE_SIZE) {
- avg = sum / RXRPC_RTT_CACHE_SIZE;
- } else {
- avg = sum;
- do_div(avg, usage);
- }
-
- /* Don't need to update this under lock */
- peer->rtt = avg;
- trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt,
- usage, avg);
-}
-
-/*
* Perform keep-alive pings.
*/
static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 452163eadb98..ca29976bb193 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -225,6 +225,8 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
spin_lock_init(&peer->rtt_input_lock);
peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ rxrpc_peer_init_rtt(peer);
+
if (RXRPC_TX_SMSS > 2190)
peer->cong_cwnd = 2;
else if (RXRPC_TX_SMSS > 1095)
@@ -497,14 +499,14 @@ void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
EXPORT_SYMBOL(rxrpc_kernel_get_peer);
/**
- * rxrpc_kernel_get_rtt - Get a call's peer RTT
+ * rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT
* @sock: The socket on which the call is in progress.
* @call: The call to query
*
- * Get the call's peer RTT.
+ * Get the call's peer smoothed RTT.
*/
-u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call)
+u32 rxrpc_kernel_get_srtt(struct socket *sock, struct rxrpc_call *call)
{
- return call->peer->rtt;
+ return call->peer->srtt_us >> 3;
}
-EXPORT_SYMBOL(rxrpc_kernel_get_rtt);
+EXPORT_SYMBOL(rxrpc_kernel_get_srtt);
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index b9d053e42821..8b179e3c802a 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -222,7 +222,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
seq_puts(seq,
"Proto Local "
" Remote "
- " Use CW MTU LastUse RTT Rc\n"
+ " Use CW MTU LastUse RTT RTO\n"
);
return 0;
}
@@ -236,15 +236,15 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
now = ktime_get_seconds();
seq_printf(seq,
"UDP %-47.47s %-47.47s %3u"
- " %3u %5u %6llus %12llu %2u\n",
+ " %3u %5u %6llus %8u %8u\n",
lbuff,
rbuff,
atomic_read(&peer->usage),
peer->cong_cwnd,
peer->mtu,
now - peer->last_tx_at,
- peer->rtt,
- peer->rtt_cursor);
+ peer->srtt_us >> 3,
+ jiffies_to_usecs(peer->rto_j));
return 0;
}
diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c
new file mode 100644
index 000000000000..928d8b34a3ee
--- /dev/null
+++ b/net/rxrpc/rtt.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+/* RTT/RTO calculation.
+ *
+ * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com)
+ *
+ * https://tools.ietf.org/html/rfc6298
+ * https://tools.ietf.org/html/rfc1122#section-4.2.3.1
+ * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf
+ */
+
+#include <linux/net.h>
+#include "ar-internal.h"
+
+#define RXRPC_RTO_MAX ((unsigned)(120 * HZ))
+#define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */
+#define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */
+#define rxrpc_min_rtt_wlen 300 /* As sysctl_tcp_min_rtt_wlen */
+
+static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer)
+{
+ return 200;
+}
+
+static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer)
+{
+ return _usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us);
+}
+
+static u32 rxrpc_bound_rto(u32 rto)
+{
+ return min(rto, RXRPC_RTO_MAX);
+}
+
+/*
+ * Called to compute a smoothed rtt estimate. The data fed to this
+ * routine either comes from timestamps, or from segments that were
+ * known _not_ to have been retransmitted [see Karn/Partridge
+ * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
+ * piece by Van Jacobson.
+ * NOTE: the next three routines used to be one big routine.
+ * To save cycles in the RFC 1323 implementation it was better to break
+ * it up into three procedures. -- erics
+ */
+static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us)
+{
+ long m = sample_rtt_us; /* RTT */
+ u32 srtt = peer->srtt_us;
+
+ /* The following amusing code comes from Jacobson's
+ * article in SIGCOMM '88. Note that rtt and mdev
+ * are scaled versions of rtt and mean deviation.
+ * This is designed to be as fast as possible
+ * m stands for "measurement".
+ *
+ * On a 1990 paper the rto value is changed to:
+ * RTO = rtt + 4 * mdev
+ *
+ * Funny. This algorithm seems to be very broken.
+ * These formulae increase RTO, when it should be decreased, increase
+ * too slowly, when it should be increased quickly, decrease too quickly
+ * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
+ * does not matter how to _calculate_ it. Seems, it was trap
+ * that VJ failed to avoid. 8)
+ */
+ if (srtt != 0) {
+ m -= (srtt >> 3); /* m is now error in rtt est */
+ srtt += m; /* rtt = 7/8 rtt + 1/8 new */
+ if (m < 0) {
+ m = -m; /* m is now abs(error) */
+ m -= (peer->mdev_us >> 2); /* similar update on mdev */
+ /* This is similar to one of Eifel findings.
+ * Eifel blocks mdev updates when rtt decreases.
+ * This solution is a bit different: we use finer gain
+ * for mdev in this case (alpha*beta).
+ * Like Eifel it also prevents growth of rto,
+ * but also it limits too fast rto decreases,
+ * happening in pure Eifel.
+ */
+ if (m > 0)
+ m >>= 3;
+ } else {
+ m -= (peer->mdev_us >> 2); /* similar update on mdev */
+ }
+
+ peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */
+ if (peer->mdev_us > peer->mdev_max_us) {
+ peer->mdev_max_us = peer->mdev_us;
+ if (peer->mdev_max_us > peer->rttvar_us)
+ peer->rttvar_us = peer->mdev_max_us;
+ }
+ } else {
+ /* no previous measure. */
+ srtt = m << 3; /* take the measured time to be rtt */
+ peer->mdev_us = m << 1; /* make sure rto = 3*rtt */
+ peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer));
+ peer->mdev_max_us = peer->rttvar_us;
+ }
+
+ peer->srtt_us = max(1U, srtt);
+}
+
+/*
+ * Calculate rto without backoff. This is the second half of Van Jacobson's
+ * routine referred to above.
+ */
+static void rxrpc_set_rto(struct rxrpc_peer *peer)
+{
+ u32 rto;
+
+ /* 1. If rtt variance happened to be less 50msec, it is hallucination.
+ * It cannot be less due to utterly erratic ACK generation made
+ * at least by solaris and freebsd. "Erratic ACKs" has _nothing_
+ * to do with delayed acks, because at cwnd>2 true delack timeout
+ * is invisible. Actually, Linux-2.4 also generates erratic
+ * ACKs in some circumstances.
+ */
+ rto = __rxrpc_set_rto(peer);
+
+ /* 2. Fixups made earlier cannot be right.
+ * If we do not estimate RTO correctly without them,
+ * all the algo is pure shit and should be replaced
+ * with correct one. It is exactly, which we pretend to do.
+ */
+
+ /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo
+ * guarantees that rto is higher.
+ */
+ peer->rto_j = rxrpc_bound_rto(rto);
+}
+
+static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us)
+{
+ if (rtt_us < 0)
+ return;
+
+ //rxrpc_update_rtt_min(peer, rtt_us);
+ rxrpc_rtt_estimator(peer, rtt_us);
+ rxrpc_set_rto(peer);
+
+ /* RFC6298: only reset backoff on valid RTT measurement. */
+ peer->backoff = 0;
+}
+
+/*
+ * Add RTT information to cache. This is called in softirq mode and has
+ * exclusive access to the peer RTT data.
+ */
+void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
+ rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
+ ktime_t send_time, ktime_t resp_time)
+{
+ struct rxrpc_peer *peer = call->peer;
+ s64 rtt_us;
+
+ rtt_us = ktime_to_us(ktime_sub(resp_time, send_time));
+ if (rtt_us < 0)
+ return;
+
+ spin_lock(&peer->rtt_input_lock);
+ rxrpc_ack_update_rtt(peer, rtt_us);
+ if (peer->rtt_count < 3)
+ peer->rtt_count++;
+ spin_unlock(&peer->rtt_input_lock);
+
+ trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial,
+ peer->srtt_us >> 3, peer->rto_j);
+}
+
+/*
+ * Get the retransmission timeout to set in jiffies, backing it off each time
+ * we retransmit.
+ */
+unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans)
+{
+ u64 timo_j;
+ u8 backoff = READ_ONCE(peer->backoff);
+
+ timo_j = peer->rto_j;
+ timo_j <<= backoff;
+ if (retrans && timo_j * 2 <= RXRPC_RTO_MAX)
+ WRITE_ONCE(peer->backoff, backoff + 1);
+
+ if (timo_j < 1)
+ timo_j = 1;
+
+ return timo_j;
+}
+
+void rxrpc_peer_init_rtt(struct rxrpc_peer *peer)
+{
+ peer->rto_j = RXRPC_TIMEOUT_INIT;
+ peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT);
+ peer->backoff = 0;
+ //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U);
+}
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 098f1f9ec53b..52a24d4ef5d8 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -1148,7 +1148,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key,
&expiry, _abort_code);
if (ret < 0)
- goto temporary_error_free_resp;
+ goto temporary_error_free_ticket;
/* use the session key from inside the ticket to decrypt the
* response */
@@ -1230,7 +1230,6 @@ protocol_error:
temporary_error_free_ticket:
kfree(ticket);
-temporary_error_free_resp:
kfree(response);
temporary_error:
/* Ignore the response packet if we got a temporary error such as
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 0fcf157aa09f..5e9c43d4a314 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -66,15 +66,14 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
struct rxrpc_call *call)
{
rxrpc_seq_t tx_start, tx_win;
- signed long rtt2, timeout;
- u64 rtt;
+ signed long rtt, timeout;
- rtt = READ_ONCE(call->peer->rtt);
- rtt2 = nsecs_to_jiffies64(rtt) * 2;
- if (rtt2 < 2)
- rtt2 = 2;
+ rtt = READ_ONCE(call->peer->srtt_us) >> 3;
+ rtt = usecs_to_jiffies(rtt) * 2;
+ if (rtt < 2)
+ rtt = 2;
- timeout = rtt2;
+ timeout = rtt;
tx_start = READ_ONCE(call->tx_hard_ack);
for (;;) {
@@ -92,7 +91,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
return -EINTR;
if (tx_win != tx_start) {
- timeout = rtt2;
+ timeout = rtt;
tx_start = tx_win;
}
@@ -271,16 +270,9 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
_debug("need instant resend %d", ret);
rxrpc_instant_resend(call, ix);
} else {
- unsigned long now = jiffies, resend_at;
+ unsigned long now = jiffies;
+ unsigned long resend_at = now + call->peer->rto_j;
- if (call->peer->rtt_usage > 1)
- resend_at = nsecs_to_jiffies(call->peer->rtt * 3 / 2);
- else
- resend_at = rxrpc_resend_timeout;
- if (resend_at < 1)
- resend_at = 1;
-
- resend_at += now;
WRITE_ONCE(call->resend_at, resend_at);
rxrpc_reduce_call_timer(call, resend_at, now,
rxrpc_timer_set_for_send);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 174e903e18de..e91acc95ff28 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -71,15 +71,6 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.extra1 = (void *)&one_jiffy,
.extra2 = (void *)&max_jiffies,
},
- {
- .procname = "resend_timeout",
- .data = &rxrpc_resend_timeout,
- .maxlen = sizeof(unsigned long),
- .mode = 0644,
- .proc_handler = proc_doulongvec_ms_jiffies_minmax,
- .extra1 = (void *)&one_jiffy,
- .extra2 = (void *)&max_jiffies,
- },
/* Non-time values */
{
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 9adff83b523b..e29f0f45d688 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -200,6 +200,9 @@ static int tcf_ct_flow_table_add_action_nat(struct net *net,
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
struct nf_conntrack_tuple target;
+ if (!(ct->status & IPS_NAT_MASK))
+ return 0;
+
nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
switch (tuple->src.l3num) {
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 0c574700da75..96f5999281e0 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -668,6 +668,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_MPLS_BOS] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_MPLS_TC] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_MPLS_LABEL] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_MPLS_OPTS] = { .type = NLA_NESTED },
[TCA_FLOWER_KEY_TCP_FLAGS] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_TCP_FLAGS_MASK] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_IP_TOS] = { .type = NLA_U8 },
@@ -726,6 +727,20 @@ erspan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID] = { .type = NLA_U8 },
};
+static const struct nla_policy
+mpls_opts_policy[TCA_FLOWER_KEY_MPLS_OPTS_MAX + 1] = {
+ [TCA_FLOWER_KEY_MPLS_OPTS_LSE] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = {
+ [TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_OPT_LSE_TC] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL] = { .type = NLA_U32 },
+};
+
static void fl_set_key_val(struct nlattr **tb,
void *val, int val_type,
void *mask, int mask_type, int len)
@@ -776,14 +791,157 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
return 0;
}
+static int fl_set_key_mpls_lse(const struct nlattr *nla_lse,
+ struct flow_dissector_key_mpls *key_val,
+ struct flow_dissector_key_mpls *key_mask,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1];
+ struct flow_dissector_mpls_lse *lse_mask;
+ struct flow_dissector_mpls_lse *lse_val;
+ u8 lse_index;
+ u8 depth;
+ int err;
+
+ err = nla_parse_nested(tb, TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX, nla_lse,
+ mpls_stack_entry_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]) {
+ NL_SET_ERR_MSG(extack, "Missing MPLS option \"depth\"");
+ return -EINVAL;
+ }
+
+ depth = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]);
+
+ /* LSE depth starts at 1, for consistency with terminology used by
+ * RFC 3031 (section 3.9), where depth 0 refers to unlabeled packets.
+ */
+ if (depth < 1 || depth > FLOW_DIS_MPLS_MAX) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH],
+ "Invalid MPLS depth");
+ return -EINVAL;
+ }
+ lse_index = depth - 1;
+
+ dissector_set_mpls_lse(key_val, lse_index);
+ dissector_set_mpls_lse(key_mask, lse_index);
+
+ lse_val = &key_val->ls[lse_index];
+ lse_mask = &key_mask->ls[lse_index];
+
+ if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]) {
+ lse_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]);
+ lse_mask->mpls_ttl = MPLS_TTL_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS]) {
+ u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS]);
+
+ if (bos & ~MPLS_BOS_MASK) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS],
+ "Bottom Of Stack (BOS) must be 0 or 1");
+ return -EINVAL;
+ }
+ lse_val->mpls_bos = bos;
+ lse_mask->mpls_bos = MPLS_BOS_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC]) {
+ u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC]);
+
+ if (tc & ~MPLS_TC_MASK) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC],
+ "Traffic Class (TC) must be between 0 and 7");
+ return -EINVAL;
+ }
+ lse_val->mpls_tc = tc;
+ lse_mask->mpls_tc = MPLS_TC_MASK;
+ }
+ if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]) {
+ u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]);
+
+ if (label & ~MPLS_LABEL_MASK) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL],
+ "Label must be between 0 and 1048575");
+ return -EINVAL;
+ }
+ lse_val->mpls_label = label;
+ lse_mask->mpls_label = MPLS_LABEL_MASK;
+ }
+
+ return 0;
+}
+
+static int fl_set_key_mpls_opts(const struct nlattr *nla_mpls_opts,
+ struct flow_dissector_key_mpls *key_val,
+ struct flow_dissector_key_mpls *key_mask,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *nla_lse;
+ int rem;
+ int err;
+
+ if (!(nla_mpls_opts->nla_type & NLA_F_NESTED)) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_mpls_opts,
+ "NLA_F_NESTED is missing");
+ return -EINVAL;
+ }
+
+ nla_for_each_nested(nla_lse, nla_mpls_opts, rem) {
+ if (nla_type(nla_lse) != TCA_FLOWER_KEY_MPLS_OPTS_LSE) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_lse,
+ "Invalid MPLS option type");
+ return -EINVAL;
+ }
+
+ err = fl_set_key_mpls_lse(nla_lse, key_val, key_mask, extack);
+ if (err < 0)
+ return err;
+ }
+ if (rem) {
+ NL_SET_ERR_MSG(extack,
+ "Bytes leftover after parsing MPLS options");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int fl_set_key_mpls(struct nlattr **tb,
struct flow_dissector_key_mpls *key_val,
struct flow_dissector_key_mpls *key_mask,
struct netlink_ext_ack *extack)
{
+ struct flow_dissector_mpls_lse *lse_mask;
+ struct flow_dissector_mpls_lse *lse_val;
+
+ if (tb[TCA_FLOWER_KEY_MPLS_OPTS]) {
+ if (tb[TCA_FLOWER_KEY_MPLS_TTL] ||
+ tb[TCA_FLOWER_KEY_MPLS_BOS] ||
+ tb[TCA_FLOWER_KEY_MPLS_TC] ||
+ tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[TCA_FLOWER_KEY_MPLS_OPTS],
+ "MPLS label, Traffic Class, Bottom Of Stack and Time To Live must be encapsulated in the MPLS options attribute");
+ return -EBADMSG;
+ }
+
+ return fl_set_key_mpls_opts(tb[TCA_FLOWER_KEY_MPLS_OPTS],
+ key_val, key_mask, extack);
+ }
+
+ lse_val = &key_val->ls[0];
+ lse_mask = &key_mask->ls[0];
+
if (tb[TCA_FLOWER_KEY_MPLS_TTL]) {
- key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
- key_mask->mpls_ttl = MPLS_TTL_MASK;
+ lse_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
+ lse_mask->mpls_ttl = MPLS_TTL_MASK;
+ dissector_set_mpls_lse(key_val, 0);
+ dissector_set_mpls_lse(key_mask, 0);
}
if (tb[TCA_FLOWER_KEY_MPLS_BOS]) {
u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]);
@@ -794,8 +952,10 @@ static int fl_set_key_mpls(struct nlattr **tb,
"Bottom Of Stack (BOS) must be 0 or 1");
return -EINVAL;
}
- key_val->mpls_bos = bos;
- key_mask->mpls_bos = MPLS_BOS_MASK;
+ lse_val->mpls_bos = bos;
+ lse_mask->mpls_bos = MPLS_BOS_MASK;
+ dissector_set_mpls_lse(key_val, 0);
+ dissector_set_mpls_lse(key_mask, 0);
}
if (tb[TCA_FLOWER_KEY_MPLS_TC]) {
u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]);
@@ -806,8 +966,10 @@ static int fl_set_key_mpls(struct nlattr **tb,
"Traffic Class (TC) must be between 0 and 7");
return -EINVAL;
}
- key_val->mpls_tc = tc;
- key_mask->mpls_tc = MPLS_TC_MASK;
+ lse_val->mpls_tc = tc;
+ lse_mask->mpls_tc = MPLS_TC_MASK;
+ dissector_set_mpls_lse(key_val, 0);
+ dissector_set_mpls_lse(key_mask, 0);
}
if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]);
@@ -818,8 +980,10 @@ static int fl_set_key_mpls(struct nlattr **tb,
"Label must be between 0 and 1048575");
return -EINVAL;
}
- key_val->mpls_label = label;
- key_mask->mpls_label = MPLS_LABEL_MASK;
+ lse_val->mpls_label = label;
+ lse_mask->mpls_label = MPLS_LABEL_MASK;
+ dissector_set_mpls_lse(key_val, 0);
+ dissector_set_mpls_lse(key_mask, 0);
}
return 0;
}
@@ -2218,35 +2382,132 @@ static int fl_dump_key_port_range(struct sk_buff *skb, struct fl_flow_key *key,
return 0;
}
+static int fl_dump_key_mpls_opt_lse(struct sk_buff *skb,
+ struct flow_dissector_key_mpls *mpls_key,
+ struct flow_dissector_key_mpls *mpls_mask,
+ u8 lse_index)
+{
+ struct flow_dissector_mpls_lse *lse_mask = &mpls_mask->ls[lse_index];
+ struct flow_dissector_mpls_lse *lse_key = &mpls_key->ls[lse_index];
+ int err;
+
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH,
+ lse_index + 1);
+ if (err)
+ return err;
+
+ if (lse_mask->mpls_ttl) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL,
+ lse_key->mpls_ttl);
+ if (err)
+ return err;
+ }
+ if (lse_mask->mpls_bos) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS,
+ lse_key->mpls_bos);
+ if (err)
+ return err;
+ }
+ if (lse_mask->mpls_tc) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_TC,
+ lse_key->mpls_tc);
+ if (err)
+ return err;
+ }
+ if (lse_mask->mpls_label) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL,
+ lse_key->mpls_label);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int fl_dump_key_mpls_opts(struct sk_buff *skb,
+ struct flow_dissector_key_mpls *mpls_key,
+ struct flow_dissector_key_mpls *mpls_mask)
+{
+ struct nlattr *opts;
+ struct nlattr *lse;
+ u8 lse_index;
+ int err;
+
+ opts = nla_nest_start(skb, TCA_FLOWER_KEY_MPLS_OPTS);
+ if (!opts)
+ return -EMSGSIZE;
+
+ for (lse_index = 0; lse_index < FLOW_DIS_MPLS_MAX; lse_index++) {
+ if (!(mpls_mask->used_lses & 1 << lse_index))
+ continue;
+
+ lse = nla_nest_start(skb, TCA_FLOWER_KEY_MPLS_OPTS_LSE);
+ if (!lse) {
+ err = -EMSGSIZE;
+ goto err_opts;
+ }
+
+ err = fl_dump_key_mpls_opt_lse(skb, mpls_key, mpls_mask,
+ lse_index);
+ if (err)
+ goto err_opts_lse;
+ nla_nest_end(skb, lse);
+ }
+ nla_nest_end(skb, opts);
+
+ return 0;
+
+err_opts_lse:
+ nla_nest_cancel(skb, lse);
+err_opts:
+ nla_nest_cancel(skb, opts);
+
+ return err;
+}
+
static int fl_dump_key_mpls(struct sk_buff *skb,
struct flow_dissector_key_mpls *mpls_key,
struct flow_dissector_key_mpls *mpls_mask)
{
+ struct flow_dissector_mpls_lse *lse_mask;
+ struct flow_dissector_mpls_lse *lse_key;
int err;
- if (!memchr_inv(mpls_mask, 0, sizeof(*mpls_mask)))
+ if (!mpls_mask->used_lses)
return 0;
- if (mpls_mask->mpls_ttl) {
+
+ lse_mask = &mpls_mask->ls[0];
+ lse_key = &mpls_key->ls[0];
+
+ /* For backward compatibility, don't use the MPLS nested attributes if
+ * the rule can be expressed using the old attributes.
+ */
+ if (mpls_mask->used_lses & ~1 ||
+ (!lse_mask->mpls_ttl && !lse_mask->mpls_bos &&
+ !lse_mask->mpls_tc && !lse_mask->mpls_label))
+ return fl_dump_key_mpls_opts(skb, mpls_key, mpls_mask);
+
+ if (lse_mask->mpls_ttl) {
err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TTL,
- mpls_key->mpls_ttl);
+ lse_key->mpls_ttl);
if (err)
return err;
}
- if (mpls_mask->mpls_tc) {
+ if (lse_mask->mpls_tc) {
err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TC,
- mpls_key->mpls_tc);
+ lse_key->mpls_tc);
if (err)
return err;
}
- if (mpls_mask->mpls_label) {
+ if (lse_mask->mpls_label) {
err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_LABEL,
- mpls_key->mpls_label);
+ lse_key->mpls_label);
if (err)
return err;
}
- if (mpls_mask->mpls_bos) {
+ if (lse_mask->mpls_bos) {
err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_BOS,
- mpls_key->mpls_bos);
+ lse_key->mpls_bos);
if (err)
return err;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 0d99df1e764d..9a3449b56bd6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -32,6 +32,8 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <trace/events/qdisc.h>
+
/*
Short review.
@@ -1283,6 +1285,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
}
qdisc_hash_add(sch, false);
+ trace_qdisc_create(ops, dev, parent);
return sch;
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 1496e87cd07b..60f8ae578819 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -584,26 +584,48 @@ static bool cobalt_should_drop(struct cobalt_vars *vars,
return drop;
}
-static void cake_update_flowkeys(struct flow_keys *keys,
+static bool cake_update_flowkeys(struct flow_keys *keys,
const struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
struct nf_conntrack_tuple tuple = {};
- bool rev = !skb->_nfct;
+ bool rev = !skb->_nfct, upd = false;
+ __be32 ip;
if (tc_skb_protocol(skb) != htons(ETH_P_IP))
- return;
+ return false;
if (!nf_ct_get_tuple_skb(&tuple, skb))
- return;
+ return false;
- keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
- keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
+ ip = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
+ if (ip != keys->addrs.v4addrs.src) {
+ keys->addrs.v4addrs.src = ip;
+ upd = true;
+ }
+ ip = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
+ if (ip != keys->addrs.v4addrs.dst) {
+ keys->addrs.v4addrs.dst = ip;
+ upd = true;
+ }
if (keys->ports.ports) {
- keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all;
- keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all;
+ __be16 port;
+
+ port = rev ? tuple.dst.u.all : tuple.src.u.all;
+ if (port != keys->ports.src) {
+ keys->ports.src = port;
+ upd = true;
+ }
+ port = rev ? tuple.src.u.all : tuple.dst.u.all;
+ if (port != keys->ports.dst) {
+ port = keys->ports.dst;
+ upd = true;
+ }
}
+ return upd;
+#else
+ return false;
#endif
}
@@ -624,23 +646,36 @@ static bool cake_ddst(int flow_mode)
static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
int flow_mode, u16 flow_override, u16 host_override)
{
+ bool hash_flows = (!flow_override && !!(flow_mode & CAKE_FLOW_FLOWS));
+ bool hash_hosts = (!host_override && !!(flow_mode & CAKE_FLOW_HOSTS));
+ bool nat_enabled = !!(flow_mode & CAKE_FLOW_NAT_FLAG);
u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0;
u16 reduced_hash, srchost_idx, dsthost_idx;
struct flow_keys keys, host_keys;
+ bool use_skbhash = skb->l4_hash;
if (unlikely(flow_mode == CAKE_FLOW_NONE))
return 0;
- /* If both overrides are set we can skip packet dissection entirely */
- if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) &&
- (host_override || !(flow_mode & CAKE_FLOW_HOSTS)))
+ /* If both overrides are set, or we can use the SKB hash and nat mode is
+ * disabled, we can skip packet dissection entirely. If nat mode is
+ * enabled there's another check below after doing the conntrack lookup.
+ */
+ if ((!hash_flows || (use_skbhash && !nat_enabled)) && !hash_hosts)
goto skip_hash;
skb_flow_dissect_flow_keys(skb, &keys,
FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
- if (flow_mode & CAKE_FLOW_NAT_FLAG)
- cake_update_flowkeys(&keys, skb);
+ /* Don't use the SKB hash if we change the lookup keys from conntrack */
+ if (nat_enabled && cake_update_flowkeys(&keys, skb))
+ use_skbhash = false;
+
+ /* If we can still use the SKB hash and don't need the host hash, we can
+ * skip the rest of the hashing procedure
+ */
+ if (use_skbhash && !hash_hosts)
+ goto skip_hash;
/* flow_hash_from_keys() sorts the addresses by value, so we have
* to preserve their order in a separate data structure to treat
@@ -679,12 +714,14 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
/* This *must* be after the above switch, since as a
* side-effect it sorts the src and dst addresses.
*/
- if (flow_mode & CAKE_FLOW_FLOWS)
+ if (hash_flows && !use_skbhash)
flow_hash = flow_hash_from_keys(&keys);
skip_hash:
if (flow_override)
flow_hash = flow_override - 1;
+ else if (use_skbhash)
+ flow_hash = skb->hash;
if (host_override) {
dsthost_hash = host_override - 1;
srchost_hash = host_override - 1;
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index a9da8776bf5b..fb760cee824e 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -297,9 +297,9 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
goto flow_error;
}
q->flows_cnt = nla_get_u32(tb[TCA_FQ_PIE_FLOWS]);
- if (!q->flows_cnt || q->flows_cnt > 65536) {
+ if (!q->flows_cnt || q->flows_cnt >= 65536) {
NL_SET_ERR_MSG_MOD(extack,
- "Number of flows must be < 65536");
+ "Number of flows must range in [1..65535]");
goto flow_error;
}
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ebc55d884247..b19a0021a0bd 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -896,8 +896,10 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
}
sch->parent = parentid;
- if (!ops->init || ops->init(sch, NULL, extack) == 0)
+ if (!ops->init || ops->init(sch, NULL, extack) == 0) {
+ trace_qdisc_create(ops, dev_queue->dev, parentid);
return sch;
+ }
qdisc_put(sch);
return NULL;
@@ -911,6 +913,8 @@ void qdisc_reset(struct Qdisc *qdisc)
const struct Qdisc_ops *ops = qdisc->ops;
struct sk_buff *skb, *tmp;
+ trace_qdisc_reset(qdisc);
+
if (ops->reset)
ops->reset(qdisc);
@@ -949,7 +953,6 @@ static void qdisc_free_cb(struct rcu_head *head)
static void qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
- struct sk_buff *skb, *tmp;
#ifdef CONFIG_NET_SCHED
qdisc_hash_del(qdisc);
@@ -957,23 +960,16 @@ static void qdisc_destroy(struct Qdisc *qdisc)
qdisc_put_stab(rtnl_dereference(qdisc->stab));
#endif
gen_kill_estimator(&qdisc->rate_est);
- if (ops->reset)
- ops->reset(qdisc);
+
+ qdisc_reset(qdisc);
+
if (ops->destroy)
ops->destroy(qdisc);
module_put(ops->owner);
dev_put(qdisc_dev(qdisc));
- skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
- __skb_unlink(skb, &qdisc->gso_skb);
- kfree_skb_list(skb);
- }
-
- skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
- __skb_unlink(skb, &qdisc->skb_bad_txq);
- kfree_skb_list(skb);
- }
+ trace_qdisc_destroy(qdisc);
call_rcu(&qdisc->rcu, qdisc_free_cb);
}
@@ -1132,6 +1128,28 @@ void dev_activate(struct net_device *dev)
}
EXPORT_SYMBOL(dev_activate);
+static void qdisc_deactivate(struct Qdisc *qdisc)
+{
+ bool nolock = qdisc->flags & TCQ_F_NOLOCK;
+
+ if (qdisc->flags & TCQ_F_BUILTIN)
+ return;
+ if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state))
+ return;
+
+ if (nolock)
+ spin_lock_bh(&qdisc->seqlock);
+ spin_lock_bh(qdisc_lock(qdisc));
+
+ set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
+
+ qdisc_reset(qdisc);
+
+ spin_unlock_bh(qdisc_lock(qdisc));
+ if (nolock)
+ spin_unlock_bh(&qdisc->seqlock);
+}
+
static void dev_deactivate_queue(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_qdisc_default)
@@ -1141,21 +1159,8 @@ static void dev_deactivate_queue(struct net_device *dev,
qdisc = rtnl_dereference(dev_queue->qdisc);
if (qdisc) {
- bool nolock = qdisc->flags & TCQ_F_NOLOCK;
-
- if (nolock)
- spin_lock_bh(&qdisc->seqlock);
- spin_lock_bh(qdisc_lock(qdisc));
-
- if (!(qdisc->flags & TCQ_F_BUILTIN))
- set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
-
+ qdisc_deactivate(qdisc);
rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
- qdisc_reset(qdisc);
-
- spin_unlock_bh(qdisc_lock(qdisc));
- if (nolock)
- spin_unlock_bh(&qdisc->seqlock);
}
}
@@ -1186,16 +1191,6 @@ static bool some_qdisc_is_busy(struct net_device *dev)
return false;
}
-static void dev_qdisc_reset(struct net_device *dev,
- struct netdev_queue *dev_queue,
- void *none)
-{
- struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
-
- if (qdisc)
- qdisc_reset(qdisc);
-}
-
/**
* dev_deactivate_many - deactivate transmissions on several devices
* @head: list of devices to deactivate
@@ -1232,12 +1227,6 @@ void dev_deactivate_many(struct list_head *head)
*/
schedule_timeout_uninterruptible(1);
}
- /* The new qdisc is assigned at this point so we can safely
- * unwind stale skb lists and qdisc statistics
- */
- netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
- if (dev_ingress_queue(dev))
- dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
}
}
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 6e2eb1dd64ed..68934438ee19 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -31,7 +31,7 @@ menuconfig IP_SCTP
homing at either or both ends of an association."
To compile this protocol support as a module, choose M here: the
- module will be called sctp. Debug messages are handeled by the
+ module will be called sctp. Debug messages are handled by the
kernel's dynamic debugging framework.
If in doubt, say N.
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 437079a4883d..72315137d7e7 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -432,7 +432,7 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
changeover = 1 ;
asoc->peer.primary_path = transport;
- sctp_ulpevent_nofity_peer_addr_change(transport,
+ sctp_ulpevent_notify_peer_addr_change(transport,
SCTP_ADDR_MADE_PRIM, 0);
/* Set a default msg_name for events. */
@@ -574,7 +574,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
asoc->peer.transport_count--;
- sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_REMOVED, 0);
+ sctp_ulpevent_notify_peer_addr_change(peer, SCTP_ADDR_REMOVED, 0);
sctp_transport_free(peer);
}
@@ -714,7 +714,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
asoc->peer.transport_count++;
- sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_ADDED, 0);
+ sctp_ulpevent_notify_peer_addr_change(peer, SCTP_ADDR_ADDED, 0);
/* If we do not yet have a primary path, set one. */
if (!asoc->peer.primary_path) {
@@ -840,7 +840,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
* to the user.
*/
if (ulp_notify)
- sctp_ulpevent_nofity_peer_addr_change(transport,
+ sctp_ulpevent_notify_peer_addr_change(transport,
spc_state, error);
/* Select new active and retran paths. */
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index c87af430107a..ccfa0ab3e7f4 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -1032,6 +1032,7 @@ static const struct proto_ops inet6_seqpacket_ops = {
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
#ifdef CONFIG_COMPAT
+ .compat_ioctl = inet6_compat_ioctl,
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 2bc29463e1dc..9f36fe911d08 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1523,9 +1523,17 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type,
timeout = asoc->timeouts[cmd->obj.to];
BUG_ON(!timeout);
- timer->expires = jiffies + timeout;
- sctp_association_hold(asoc);
- add_timer(timer);
+ /*
+ * SCTP has a hard time with timer starts. Because we process
+ * timer starts as side effects, it can be hard to tell if we
+ * have already started a timer or not, which leads to BUG
+ * halts when we call add_timer. So here, instead of just starting
+ * a timer, if the timer is already started, and just mod
+ * the timer with the shorter of the two expiration times
+ */
+ if (!timer_pending(timer))
+ sctp_association_hold(asoc);
+ timer_reduce(timer, jiffies + timeout);
break;
case SCTP_CMD_TIMER_RESTART:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 26788f4a3b9e..e86620fbd90f 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1856,12 +1856,13 @@ static enum sctp_disposition sctp_sf_do_dupcook_a(
/* Update the content of current association. */
sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
- if (sctp_state(asoc, SHUTDOWN_PENDING) &&
+ if ((sctp_state(asoc, SHUTDOWN_PENDING) ||
+ sctp_state(asoc, SHUTDOWN_SENT)) &&
(sctp_sstate(asoc->base.sk, CLOSING) ||
sock_flag(asoc->base.sk, SOCK_DEAD))) {
- /* if were currently in SHUTDOWN_PENDING, but the socket
- * has been closed by user, don't transition to ESTABLISHED.
- * Instead trigger SHUTDOWN bundled with COOKIE_ACK.
+ /* If the socket has been closed by user, don't
+ * transition to ESTABLISHED. Instead trigger SHUTDOWN
+ * bundled with COOKIE_ACK.
*/
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
return sctp_sf_do_9_2_start_shutdown(net, ep, asoc,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 827a9903ee28..d57e1a002ffc 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -972,23 +972,22 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw)
* it.
*
* sk The sk of the socket
- * addrs The pointer to the addresses in user land
+ * addrs The pointer to the addresses
* addrssize Size of the addrs buffer
* op Operation to perform (add or remove, see the flags of
* sctp_bindx)
*
* Returns 0 if ok, <0 errno code on error.
*/
-static int sctp_setsockopt_bindx(struct sock *sk,
- struct sockaddr __user *addrs,
- int addrs_size, int op)
+static int sctp_setsockopt_bindx_kernel(struct sock *sk,
+ struct sockaddr *addrs, int addrs_size,
+ int op)
{
- struct sockaddr *kaddrs;
int err;
int addrcnt = 0;
int walk_size = 0;
struct sockaddr *sa_addr;
- void *addr_buf;
+ void *addr_buf = addrs;
struct sctp_af *af;
pr_debug("%s: sk:%p addrs:%p addrs_size:%d opt:%d\n",
@@ -997,17 +996,10 @@ static int sctp_setsockopt_bindx(struct sock *sk,
if (unlikely(addrs_size <= 0))
return -EINVAL;
- kaddrs = memdup_user(addrs, addrs_size);
- if (IS_ERR(kaddrs))
- return PTR_ERR(kaddrs);
-
/* Walk through the addrs buffer and count the number of addresses. */
- addr_buf = kaddrs;
while (walk_size < addrs_size) {
- if (walk_size + sizeof(sa_family_t) > addrs_size) {
- kfree(kaddrs);
+ if (walk_size + sizeof(sa_family_t) > addrs_size)
return -EINVAL;
- }
sa_addr = addr_buf;
af = sctp_get_af_specific(sa_addr->sa_family);
@@ -1015,10 +1007,8 @@ static int sctp_setsockopt_bindx(struct sock *sk,
/* If the address family is not supported or if this address
* causes the address buffer to overflow return EINVAL.
*/
- if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
- kfree(kaddrs);
+ if (!af || (walk_size + af->sockaddr_len) > addrs_size)
return -EINVAL;
- }
addrcnt++;
addr_buf += af->sockaddr_len;
walk_size += af->sockaddr_len;
@@ -1029,31 +1019,48 @@ static int sctp_setsockopt_bindx(struct sock *sk,
case SCTP_BINDX_ADD_ADDR:
/* Allow security module to validate bindx addresses. */
err = security_sctp_bind_connect(sk, SCTP_SOCKOPT_BINDX_ADD,
- (struct sockaddr *)kaddrs,
- addrs_size);
+ addrs, addrs_size);
if (err)
- goto out;
- err = sctp_bindx_add(sk, kaddrs, addrcnt);
+ return err;
+ err = sctp_bindx_add(sk, addrs, addrcnt);
if (err)
- goto out;
- err = sctp_send_asconf_add_ip(sk, kaddrs, addrcnt);
- break;
-
+ return err;
+ return sctp_send_asconf_add_ip(sk, addrs, addrcnt);
case SCTP_BINDX_REM_ADDR:
- err = sctp_bindx_rem(sk, kaddrs, addrcnt);
+ err = sctp_bindx_rem(sk, addrs, addrcnt);
if (err)
- goto out;
- err = sctp_send_asconf_del_ip(sk, kaddrs, addrcnt);
- break;
+ return err;
+ return sctp_send_asconf_del_ip(sk, addrs, addrcnt);
default:
- err = -EINVAL;
- break;
+ return -EINVAL;
}
+}
-out:
+static int sctp_setsockopt_bindx(struct sock *sk,
+ struct sockaddr __user *addrs,
+ int addrs_size, int op)
+{
+ struct sockaddr *kaddrs;
+ int err;
+
+ kaddrs = memdup_user(addrs, addrs_size);
+ if (IS_ERR(kaddrs))
+ return PTR_ERR(kaddrs);
+ err = sctp_setsockopt_bindx_kernel(sk, kaddrs, addrs_size, op);
kfree(kaddrs);
+ return err;
+}
+static int sctp_bind_add(struct sock *sk, struct sockaddr *addrs,
+ int addrlen)
+{
+ int err;
+
+ lock_sock(sk);
+ err = sctp_setsockopt_bindx_kernel(sk, addrs, addrlen,
+ SCTP_BINDX_ADD_ADDR);
+ release_sock(sk);
return err;
}
@@ -9625,6 +9632,7 @@ struct proto sctp_prot = {
.sendmsg = sctp_sendmsg,
.recvmsg = sctp_recvmsg,
.bind = sctp_bind,
+ .bind_add = sctp_bind_add,
.backlog_rcv = sctp_backlog_rcv,
.hash = sctp_hash,
.unhash = sctp_unhash,
@@ -9667,6 +9675,7 @@ struct proto sctpv6_prot = {
.sendmsg = sctp_sendmsg,
.recvmsg = sctp_recvmsg,
.bind = sctp_bind,
+ .bind_add = sctp_bind_add,
.backlog_rcv = sctp_backlog_rcv,
.hash = sctp_hash,
.unhash = sctp_unhash,
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index c82dbdcf13f2..0c3d2b4d7321 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -336,13 +336,16 @@ fail:
return NULL;
}
-void sctp_ulpevent_nofity_peer_addr_change(struct sctp_transport *transport,
+void sctp_ulpevent_notify_peer_addr_change(struct sctp_transport *transport,
int state, int error)
{
struct sctp_association *asoc = transport->asoc;
struct sockaddr_storage addr;
struct sctp_ulpevent *event;
+ if (asoc->state < SCTP_STATE_ESTABLISHED)
+ return;
+
memset(&addr, 0, sizeof(struct sockaddr_storage));
memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len);
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index b2b85e1be72c..a47e8855e045 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -116,19 +116,15 @@ int smc_cdc_msg_send(struct smc_connection *conn,
}
/* send a validation msg indicating the move of a conn to an other QP link */
-int smcr_cdc_msg_send_validation(struct smc_connection *conn)
+int smcr_cdc_msg_send_validation(struct smc_connection *conn,
+ struct smc_cdc_tx_pend *pend,
+ struct smc_wr_buf *wr_buf)
{
struct smc_host_cdc_msg *local = &conn->local_tx_ctrl;
struct smc_link *link = conn->lnk;
- struct smc_cdc_tx_pend *pend;
- struct smc_wr_buf *wr_buf;
struct smc_cdc_msg *peer;
int rc;
- rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend);
- if (rc)
- return rc;
-
peer = (struct smc_cdc_msg *)wr_buf;
peer->common.type = local->common.type;
peer->len = local->len;
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 2ddcc5fb5ceb..0a0a89abd38b 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -296,7 +296,9 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend);
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
int smcd_cdc_msg_send(struct smc_connection *conn);
-int smcr_cdc_msg_send_validation(struct smc_connection *conn);
+int smcr_cdc_msg_send_validation(struct smc_connection *conn,
+ struct smc_cdc_tx_pend *pend,
+ struct smc_wr_buf *wr_buf);
int smc_cdc_init(void) __init;
void smcd_cdc_rx_init(struct smc_connection *conn);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 65de700e1f17..7964a21e5e6f 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -483,7 +483,8 @@ static int smc_write_space(struct smc_connection *conn)
return space;
}
-static int smc_switch_cursor(struct smc_sock *smc)
+static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
+ struct smc_wr_buf *wr_buf)
{
struct smc_connection *conn = &smc->conn;
union smc_host_cursor cons, fin;
@@ -520,11 +521,14 @@ static int smc_switch_cursor(struct smc_sock *smc)
if (smc->sk.sk_state != SMC_INIT &&
smc->sk.sk_state != SMC_CLOSED) {
- rc = smcr_cdc_msg_send_validation(conn);
+ rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
if (!rc) {
schedule_delayed_work(&conn->tx_work, 0);
smc->sk.sk_data_ready(&smc->sk);
}
+ } else {
+ smc_wr_tx_put_slot(conn->lnk,
+ (struct smc_wr_tx_pend_priv *)pend);
}
return rc;
}
@@ -533,7 +537,9 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
struct smc_link *from_lnk, bool is_dev_err)
{
struct smc_link *to_lnk = NULL;
+ struct smc_cdc_tx_pend *pend;
struct smc_connection *conn;
+ struct smc_wr_buf *wr_buf;
struct smc_sock *smc;
struct rb_node *node;
int i, rc = 0;
@@ -582,10 +588,16 @@ again:
}
sock_hold(&smc->sk);
read_unlock_bh(&lgr->conns_lock);
+ /* pre-fetch buffer outside of send_lock, might sleep */
+ rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
+ if (rc) {
+ smcr_link_down_cond_sched(to_lnk);
+ return NULL;
+ }
/* avoid race with smcr_tx_sndbuf_nonempty() */
spin_lock_bh(&conn->send_lock);
conn->lnk = to_lnk;
- rc = smc_switch_cursor(smc);
+ rc = smc_switch_cursor(smc, pend, wr_buf);
spin_unlock_bh(&conn->send_lock);
sock_put(&smc->sk);
if (rc) {
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index be03f1260d59..014d91b9778e 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -32,7 +32,7 @@
static struct net_device *pnet_find_base_ndev(struct net_device *ndev);
-static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
+static const struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
[SMC_PNETID_NAME] = {
.type = NLA_NUL_STRING,
.len = SMC_MAX_PNETID_LEN
diff --git a/net/socket.c b/net/socket.c
index 1c9a7260a41d..976426d03f09 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3366,94 +3366,6 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
return err;
}
-struct rtentry32 {
- u32 rt_pad1;
- struct sockaddr rt_dst; /* target address */
- struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
- struct sockaddr rt_genmask; /* target network mask (IP) */
- unsigned short rt_flags;
- short rt_pad2;
- u32 rt_pad3;
- unsigned char rt_tos;
- unsigned char rt_class;
- short rt_pad4;
- short rt_metric; /* +1 for binary compatibility! */
- /* char * */ u32 rt_dev; /* forcing the device at add */
- u32 rt_mtu; /* per route MTU/Window */
- u32 rt_window; /* Window clamping */
- unsigned short rt_irtt; /* Initial RTT */
-};
-
-struct in6_rtmsg32 {
- struct in6_addr rtmsg_dst;
- struct in6_addr rtmsg_src;
- struct in6_addr rtmsg_gateway;
- u32 rtmsg_type;
- u16 rtmsg_dst_len;
- u16 rtmsg_src_len;
- u32 rtmsg_metric;
- u32 rtmsg_info;
- u32 rtmsg_flags;
- s32 rtmsg_ifindex;
-};
-
-static int routing_ioctl(struct net *net, struct socket *sock,
- unsigned int cmd, void __user *argp)
-{
- int ret;
- void *r = NULL;
- struct in6_rtmsg r6;
- struct rtentry r4;
- char devname[16];
- u32 rtdev;
- mm_segment_t old_fs = get_fs();
-
- if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
- struct in6_rtmsg32 __user *ur6 = argp;
- ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
- 3 * sizeof(struct in6_addr));
- ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
- ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
- ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
- ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
- ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
- ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
- ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
-
- r = (void *) &r6;
- } else { /* ipv4 */
- struct rtentry32 __user *ur4 = argp;
- ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
- 3 * sizeof(struct sockaddr));
- ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
- ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
- ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
- ret |= get_user(r4.rt_window, &(ur4->rt_window));
- ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
- ret |= get_user(rtdev, &(ur4->rt_dev));
- if (rtdev) {
- ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
- r4.rt_dev = (char __user __force *)devname;
- devname[15] = 0;
- } else
- r4.rt_dev = NULL;
-
- r = (void *) &r4;
- }
-
- if (ret) {
- ret = -EFAULT;
- goto out;
- }
-
- set_fs(KERNEL_DS);
- ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
- set_fs(old_fs);
-
-out:
- return ret;
-}
-
/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
* for some operations; this forces use of the newer bridge-utils that
* use compatible ioctls
@@ -3492,9 +3404,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCGIFMAP:
case SIOCSIFMAP:
return compat_sioc_ifmap(net, cmd, argp);
- case SIOCADDRT:
- case SIOCDELRT:
- return routing_ioctl(net, sock, cmd, argp);
case SIOCGSTAMP_OLD:
case SIOCGSTAMPNS_OLD:
if (!sock->ops->gettstamp)
@@ -3716,71 +3625,6 @@ int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
EXPORT_SYMBOL(kernel_getpeername);
/**
- * kernel_getsockopt - get a socket option (kernel space)
- * @sock: socket
- * @level: API level (SOL_SOCKET, ...)
- * @optname: option tag
- * @optval: option value
- * @optlen: option length
- *
- * Assigns the option length to @optlen.
- * Returns 0 or an error.
- */
-
-int kernel_getsockopt(struct socket *sock, int level, int optname,
- char *optval, int *optlen)
-{
- mm_segment_t oldfs = get_fs();
- char __user *uoptval;
- int __user *uoptlen;
- int err;
-
- uoptval = (char __user __force *) optval;
- uoptlen = (int __user __force *) optlen;
-
- set_fs(KERNEL_DS);
- if (level == SOL_SOCKET)
- err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
- else
- err = sock->ops->getsockopt(sock, level, optname, uoptval,
- uoptlen);
- set_fs(oldfs);
- return err;
-}
-EXPORT_SYMBOL(kernel_getsockopt);
-
-/**
- * kernel_setsockopt - set a socket option (kernel space)
- * @sock: socket
- * @level: API level (SOL_SOCKET, ...)
- * @optname: option tag
- * @optval: option value
- * @optlen: option length
- *
- * Returns 0 or an error.
- */
-
-int kernel_setsockopt(struct socket *sock, int level, int optname,
- char *optval, unsigned int optlen)
-{
- mm_segment_t oldfs = get_fs();
- char __user *uoptval;
- int err;
-
- uoptval = (char __user __force *) optval;
-
- set_fs(KERNEL_DS);
- if (level == SOL_SOCKET)
- err = sock_setsockopt(sock, level, optname, uoptval, optlen);
- else
- err = sock->ops->setsockopt(sock, level, optname, uoptval,
- optlen);
- set_fs(oldfs);
- return err;
-}
-EXPORT_SYMBOL(kernel_setsockopt);
-
-/**
* kernel_sendpage - send a &page through a socket (kernel space)
* @sock: socket
* @page: page
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8350d3a2e9a7..61b21dafd7c0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -889,7 +889,9 @@ static void rpc_free_client_work(struct work_struct *work)
* here.
*/
rpc_clnt_debugfs_unregister(clnt);
+ rpc_free_clid(clnt);
rpc_clnt_remove_pipedir(clnt);
+ xprt_put(rcu_dereference_raw(clnt->cl_xprt));
kfree(clnt);
rpciod_down();
@@ -907,10 +909,8 @@ rpc_free_client(struct rpc_clnt *clnt)
rpc_unregister_client(clnt);
rpc_free_iostats(clnt->cl_metrics);
clnt->cl_metrics = NULL;
- xprt_put(rcu_dereference_raw(clnt->cl_xprt));
xprt_iter_destroy(&clnt->cl_xpi);
put_cred(clnt->cl_cred);
- rpc_free_clid(clnt);
INIT_WORK(&clnt->cl_work, rpc_free_client_work);
schedule_work(&clnt->cl_work);
@@ -2433,6 +2433,11 @@ rpc_check_timeout(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
+ if (RPC_SIGNALLED(task)) {
+ rpc_call_rpcerror(task, -ERESTARTSYS);
+ return;
+ }
+
if (xprt_adjust_timeout(task->tk_rqstp) == 0)
return;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 023514e392b3..e7a0037d9b56 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -323,17 +323,9 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
{
- struct svc_sock *svsk;
- struct socket *sock;
- struct linger no_linger = {
- .l_onoff = 1,
- .l_linger = 0,
- };
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
- svsk = container_of(xprt, struct svc_sock, sk_xprt);
- sock = svsk->sk_sock;
- kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
- (char *)&no_linger, sizeof(no_linger));
+ sock_no_linger(svsk->sk_sock->sk);
}
/*
@@ -603,8 +595,6 @@ static struct svc_xprt_class svc_udp_class = {
static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
{
- int err, level, optname, one = 1;
-
svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
&svsk->sk_xprt, serv);
clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
@@ -624,19 +614,14 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
/* make sure we get destination address info */
switch (svsk->sk_sk->sk_family) {
case AF_INET:
- level = SOL_IP;
- optname = IP_PKTINFO;
+ ip_sock_set_pktinfo(svsk->sk_sock->sk);
break;
case AF_INET6:
- level = SOL_IPV6;
- optname = IPV6_RECVPKTINFO;
+ ip6_sock_set_recvpktinfo(svsk->sk_sock->sk);
break;
default:
BUG();
}
- err = kernel_setsockopt(svsk->sk_sock, level, optname,
- (char *)&one, sizeof(one));
- dprintk("svc: kernel_setsockopt returned %d\n", err);
}
/*
@@ -1337,7 +1322,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
struct sockaddr *newsin = (struct sockaddr *)&addr;
int newlen;
int family;
- int val;
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk("svc: svc_create_socket(%s, %d, %s)\n",
@@ -1373,11 +1357,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
* getting requests from IPv4 remotes. Those should
* be shunted to a PF_INET listener via rpcbind.
*/
- val = 1;
if (family == PF_INET6)
- kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
- (char *)&val, sizeof(val));
-
+ ip6_sock_set_v6only(sock->sk);
if (type == SOCK_STREAM)
sock->sk->sk_reuse = SK_CAN_REUSE; /* allow address reuse */
error = kernel_bind(sock, sin, len);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 845d0be805ec..3a143e250b9a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1594,21 +1594,6 @@ static int xs_get_random_port(void)
return rand + min;
}
-/**
- * xs_set_reuseaddr_port - set the socket's port and address reuse options
- * @sock: socket
- *
- * Note that this function has to be called on all sockets that share the
- * same port, and it must be called before binding.
- */
-static void xs_sock_set_reuseport(struct socket *sock)
-{
- int opt = 1;
-
- kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT,
- (char *)&opt, sizeof(opt));
-}
-
static unsigned short xs_sock_getport(struct socket *sock)
{
struct sockaddr_storage buf;
@@ -1801,7 +1786,7 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
xs_reclassify_socket(family, sock);
if (reuseport)
- xs_sock_set_reuseport(sock);
+ sock_set_reuseport(sock->sk);
err = xs_bind(transport, sock);
if (err) {
@@ -2110,7 +2095,6 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
unsigned int keepidle;
unsigned int keepcnt;
- unsigned int opt_on = 1;
unsigned int timeo;
spin_lock(&xprt->transport_lock);
@@ -2122,18 +2106,13 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
spin_unlock(&xprt->transport_lock);
/* TCP Keepalive options */
- kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
- (char *)&opt_on, sizeof(opt_on));
- kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
- (char *)&keepidle, sizeof(keepidle));
- kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
- (char *)&keepidle, sizeof(keepidle));
- kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
- (char *)&keepcnt, sizeof(keepcnt));
+ sock_set_keepalive(sock->sk);
+ tcp_sock_set_keepidle(sock->sk, keepidle);
+ tcp_sock_set_keepintvl(sock->sk, keepidle);
+ tcp_sock_set_keepcnt(sock->sk, keepcnt);
/* TCP user timeout (see RFC5482) */
- kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
- (char *)&timeo, sizeof(timeo));
+ tcp_sock_set_user_timeout(sock->sk, timeo);
}
static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
@@ -2171,7 +2150,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
if (!transport->inet) {
struct sock *sk = sock->sk;
- unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC;
/* Avoid temporary address, they are bad for long-lived
* connections such as NFS mounts.
@@ -2180,8 +2158,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
* knowledge about the normal duration of connections,
* MAY override this as appropriate.
*/
- kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES,
- (char *)&addr_pref, sizeof(addr_pref));
+ if (xs_addr(xprt)->sa_family == PF_INET6) {
+ ip6_sock_set_addr_preferences(sk,
+ IPV6_PREFER_SRC_PUBLIC);
+ }
xs_tcp_set_socket_timeouts(xprt, sock);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 4c20be08b9c4..383f87bc1061 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -46,6 +46,7 @@
#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */
const char tipc_bclink_name[] = "broadcast-link";
+unsigned long sysctl_tipc_bc_retruni __read_mostly;
/**
* struct tipc_bc_base - base structure for keeping broadcast send state
@@ -474,7 +475,7 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
__skb_queue_head_init(&xmitq);
tipc_bcast_lock(net);
- tipc_link_bc_ack_rcv(l, acked, &xmitq);
+ tipc_link_bc_ack_rcv(l, acked, 0, NULL, &xmitq, NULL);
tipc_bcast_unlock(net);
tipc_bcbase_xmit(net, &xmitq);
@@ -489,9 +490,11 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
* RCU is locked, no other locks set
*/
int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
- struct tipc_msg *hdr)
+ struct tipc_msg *hdr,
+ struct sk_buff_head *retrq)
{
struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+ struct tipc_gap_ack_blks *ga;
struct sk_buff_head xmitq;
int rc = 0;
@@ -501,8 +504,13 @@ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
if (msg_type(hdr) != STATE_MSG) {
tipc_link_bc_init_rcv(l, hdr);
} else if (!msg_bc_ack_invalid(hdr)) {
- tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq);
- rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq);
+ tipc_get_gap_ack_blks(&ga, l, hdr, false);
+ if (!sysctl_tipc_bc_retruni)
+ retrq = &xmitq;
+ rc = tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr),
+ msg_bc_gap(hdr), ga, &xmitq,
+ retrq);
+ rc |= tipc_link_bc_sync_rcv(l, hdr, &xmitq);
}
tipc_bcast_unlock(net);
@@ -555,10 +563,8 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l)
tipc_sk_rcv(net, inputq);
}
-int tipc_bclink_reset_stats(struct net *net)
+int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l)
{
- struct tipc_link *l = tipc_bc_sndlink(net);
-
if (!l)
return -ENOPROTOOPT;
@@ -686,7 +692,7 @@ int tipc_bcast_init(struct net *net)
tn->bcbase = bb;
spin_lock_init(&tipc_net(net)->bclock);
- if (!tipc_link_bc_create(net, 0, 0,
+ if (!tipc_link_bc_create(net, 0, 0, NULL,
FB_MTU,
BCLINK_WIN_DEFAULT,
BCLINK_WIN_DEFAULT,
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 9e847d9617d3..4240c95188b1 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -45,6 +45,7 @@ struct tipc_nl_msg;
struct tipc_nlist;
struct tipc_nitem;
extern const char tipc_bclink_name[];
+extern unsigned long sysctl_tipc_bc_retruni;
#define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000)
@@ -93,10 +94,12 @@ int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
struct tipc_msg *hdr);
int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
- struct tipc_msg *hdr);
-int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
+ struct tipc_msg *hdr,
+ struct sk_buff_head *retrq);
+int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg,
+ struct tipc_link *bcl);
int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
-int tipc_bclink_reset_stats(struct net *net);
+int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l);
u32 tipc_bcast_get_broadcast_mode(struct net *net);
u32 tipc_bcast_get_broadcast_ratio(struct net *net);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index d4675e922a8f..ee3b8d0576b8 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -188,6 +188,8 @@ struct tipc_link {
/* Broadcast */
u16 ackers;
u16 acked;
+ u16 last_gap;
+ struct tipc_gap_ack_blks *last_ga;
struct tipc_link *bc_rcvlink;
struct tipc_link *bc_sndlink;
u8 nack_state;
@@ -249,11 +251,14 @@ static int tipc_link_build_nack_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
static void tipc_link_build_bc_init_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
-static int tipc_link_release_pkts(struct tipc_link *l, u16 to);
-static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap);
-static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
+static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga,
+ struct tipc_link *l, u8 start_index);
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr);
+static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r,
+ u16 acked, u16 gap,
struct tipc_gap_ack_blks *ga,
- struct sk_buff_head *xmitq);
+ struct sk_buff_head *xmitq,
+ bool *retransmitted, int *rc);
static void tipc_link_update_cwin(struct tipc_link *l, int released,
bool retransmitted);
/*
@@ -370,7 +375,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
snd_l->ackers--;
rcv_l->bc_peer_is_up = true;
rcv_l->state = LINK_ESTABLISHED;
- tipc_link_bc_ack_rcv(rcv_l, ack, xmitq);
+ tipc_link_bc_ack_rcv(rcv_l, ack, 0, NULL, xmitq, NULL);
trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!");
tipc_link_reset(rcv_l);
rcv_l->state = LINK_RESET;
@@ -534,7 +539,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
*
* Returns true if link was created, otherwise false
*/
-bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
+bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id,
int mtu, u32 min_win, u32 max_win, u16 peer_caps,
struct sk_buff_head *inputq,
struct sk_buff_head *namedq,
@@ -549,7 +554,18 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
return false;
l = *link;
- strcpy(l->name, tipc_bclink_name);
+ if (peer_id) {
+ char peer_str[NODE_ID_STR_LEN] = {0,};
+
+ tipc_nodeid2string(peer_str, peer_id);
+ if (strlen(peer_str) > 16)
+ sprintf(peer_str, "%x", peer);
+ /* Broadcast receiver link name: "broadcast-link:<peer>" */
+ snprintf(l->name, sizeof(l->name), "%s:%s", tipc_bclink_name,
+ peer_str);
+ } else {
+ strcpy(l->name, tipc_bclink_name);
+ }
trace_tipc_link_reset(l, TIPC_DUMP_ALL, "bclink created!");
tipc_link_reset(l);
l->state = LINK_RESET;
@@ -784,8 +800,6 @@ bool tipc_link_too_silent(struct tipc_link *l)
return (l->silent_intv_cnt + 2 > l->abort_limit);
}
-static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
- u16 from, u16 to, struct sk_buff_head *xmitq);
/* tipc_link_timeout - perform periodic task as instructed from node timeout
*/
int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
@@ -948,6 +962,9 @@ void tipc_link_reset(struct tipc_link *l)
l->snd_nxt_state = 1;
l->rcv_nxt_state = 1;
l->acked = 0;
+ l->last_gap = 0;
+ kfree(l->last_ga);
+ l->last_ga = NULL;
l->silent_intv_cnt = 0;
l->rst_cnt = 0;
l->bc_peer_is_up = false;
@@ -1183,68 +1200,14 @@ static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r,
if (link_is_bc_sndlink(l)) {
r->state = LINK_RESET;
- *rc = TIPC_LINK_DOWN_EVT;
+ *rc |= TIPC_LINK_DOWN_EVT;
} else {
- *rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ *rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
}
return true;
}
-/* tipc_link_bc_retrans() - retransmit zero or more packets
- * @l: the link to transmit on
- * @r: the receiving link ordering the retransmit. Same as l if unicast
- * @from: retransmit from (inclusive) this sequence number
- * @to: retransmit to (inclusive) this sequence number
- * xmitq: queue for accumulating the retransmitted packets
- */
-static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
- u16 from, u16 to, struct sk_buff_head *xmitq)
-{
- struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
- u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
- u16 ack = l->rcv_nxt - 1;
- int retransmitted = 0;
- struct tipc_msg *hdr;
- int rc = 0;
-
- if (!skb)
- return 0;
- if (less(to, from))
- return 0;
-
- trace_tipc_link_retrans(r, from, to, &l->transmq);
-
- if (link_retransmit_failure(l, r, &rc))
- return rc;
-
- skb_queue_walk(&l->transmq, skb) {
- hdr = buf_msg(skb);
- if (less(msg_seqno(hdr), from))
- continue;
- if (more(msg_seqno(hdr), to))
- break;
- if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
- continue;
- TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
- _skb = pskb_copy(skb, GFP_ATOMIC);
- if (!_skb)
- return 0;
- hdr = buf_msg(_skb);
- msg_set_ack(hdr, ack);
- msg_set_bcast_ack(hdr, bc_ack);
- _skb->priority = TC_PRIO_CONTROL;
- __skb_queue_tail(xmitq, _skb);
- l->stats.retransmitted++;
- retransmitted++;
- /* Increase actual retrans counter & mark first time */
- if (!TIPC_SKB_CB(skb)->retr_cnt++)
- TIPC_SKB_CB(skb)->retr_stamp = jiffies;
- }
- tipc_link_update_cwin(l, 0, retransmitted);
- return 0;
-}
-
/* tipc_data_input - deliver data and name distr msgs to upper layer
*
* Consumes buffer if message is of right type
@@ -1402,46 +1365,68 @@ static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb,
return rc;
}
-static int tipc_link_release_pkts(struct tipc_link *l, u16 acked)
-{
- int released = 0;
- struct sk_buff *skb, *tmp;
-
- skb_queue_walk_safe(&l->transmq, skb, tmp) {
- if (more(buf_seqno(skb), acked))
- break;
- __skb_unlink(skb, &l->transmq);
- kfree_skb(skb);
- released++;
+/**
+ * tipc_get_gap_ack_blks - get Gap ACK blocks from PROTOCOL/STATE_MSG
+ * @ga: returned pointer to the Gap ACK blocks if any
+ * @l: the tipc link
+ * @hdr: the PROTOCOL/STATE_MSG header
+ * @uc: desired Gap ACK blocks type, i.e. unicast (= 1) or broadcast (= 0)
+ *
+ * Return: the total Gap ACK blocks size
+ */
+u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
+ struct tipc_msg *hdr, bool uc)
+{
+ struct tipc_gap_ack_blks *p;
+ u16 sz = 0;
+
+ /* Does peer support the Gap ACK blocks feature? */
+ if (l->peer_caps & TIPC_GAP_ACK_BLOCK) {
+ p = (struct tipc_gap_ack_blks *)msg_data(hdr);
+ sz = ntohs(p->len);
+ /* Sanity check */
+ if (sz == tipc_gap_ack_blks_sz(p->ugack_cnt + p->bgack_cnt)) {
+ /* Good, check if the desired type exists */
+ if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt))
+ goto ok;
+ /* Backward compatible: peer might not support bc, but uc? */
+ } else if (uc && sz == tipc_gap_ack_blks_sz(p->ugack_cnt)) {
+ if (p->ugack_cnt) {
+ p->bgack_cnt = 0;
+ goto ok;
+ }
+ }
}
- return released;
+ /* Other cases: ignore! */
+ p = NULL;
+
+ok:
+ *ga = p;
+ return sz;
}
-/* tipc_build_gap_ack_blks - build Gap ACK blocks
- * @l: tipc link that data have come with gaps in sequence if any
- * @data: data buffer to store the Gap ACK blocks after built
- *
- * returns the actual allocated memory size
- */
-static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap)
+static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga,
+ struct tipc_link *l, u8 start_index)
{
+ struct tipc_gap_ack *gacks = &ga->gacks[start_index];
struct sk_buff *skb = skb_peek(&l->deferdq);
- struct tipc_gap_ack_blks *ga = data;
- u16 len, expect, seqno = 0;
+ u16 expect, seqno = 0;
u8 n = 0;
- if (!skb || !gap)
- goto exit;
+ if (!skb)
+ return 0;
expect = buf_seqno(skb);
skb_queue_walk(&l->deferdq, skb) {
seqno = buf_seqno(skb);
if (unlikely(more(seqno, expect))) {
- ga->gacks[n].ack = htons(expect - 1);
- ga->gacks[n].gap = htons(seqno - expect);
- if (++n >= MAX_GAP_ACK_BLKS) {
- pr_info_ratelimited("Too few Gap ACK blocks!\n");
- goto exit;
+ gacks[n].ack = htons(expect - 1);
+ gacks[n].gap = htons(seqno - expect);
+ if (++n >= MAX_GAP_ACK_BLKS / 2) {
+ pr_info_ratelimited("Gacks on %s: %d, ql: %d!\n",
+ l->name, n,
+ skb_queue_len(&l->deferdq));
+ return n;
}
} else if (unlikely(less(seqno, expect))) {
pr_warn("Unexpected skb in deferdq!\n");
@@ -1451,14 +1436,44 @@ static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap)
}
/* last block */
- ga->gacks[n].ack = htons(seqno);
- ga->gacks[n].gap = 0;
+ gacks[n].ack = htons(seqno);
+ gacks[n].gap = 0;
n++;
+ return n;
+}
-exit:
- len = tipc_gap_ack_blks_sz(n);
+/* tipc_build_gap_ack_blks - build Gap ACK blocks
+ * @l: tipc unicast link
+ * @hdr: the tipc message buffer to store the Gap ACK blocks after built
+ *
+ * The function builds Gap ACK blocks for both the unicast & broadcast receiver
+ * links of a certain peer, the buffer after built has the network data format
+ * as found at the struct tipc_gap_ack_blks definition.
+ *
+ * returns the actual allocated memory size
+ */
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr)
+{
+ struct tipc_link *bcl = l->bc_rcvlink;
+ struct tipc_gap_ack_blks *ga;
+ u16 len;
+
+ ga = (struct tipc_gap_ack_blks *)msg_data(hdr);
+
+ /* Start with broadcast link first */
+ tipc_bcast_lock(bcl->net);
+ msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1);
+ msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
+ ga->bgack_cnt = __tipc_build_gap_ack_blks(ga, bcl, 0);
+ tipc_bcast_unlock(bcl->net);
+
+ /* Now for unicast link, but an explicit NACK only (???) */
+ ga->ugack_cnt = (msg_seq_gap(hdr)) ?
+ __tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0;
+
+ /* Total len */
+ len = tipc_gap_ack_blks_sz(ga->bgack_cnt + ga->ugack_cnt);
ga->len = htons(len);
- ga->gack_cnt = n;
return len;
}
@@ -1466,47 +1481,111 @@ exit:
* acked packets, also doing retransmissions if
* gaps found
* @l: tipc link with transmq queue to be advanced
+ * @r: tipc link "receiver" i.e. in case of broadcast (= "l" if unicast)
* @acked: seqno of last packet acked by peer without any gaps before
* @gap: # of gap packets
* @ga: buffer pointer to Gap ACK blocks from peer
* @xmitq: queue for accumulating the retransmitted packets if any
+ * @retransmitted: returned boolean value if a retransmission is really issued
+ * @rc: returned code e.g. TIPC_LINK_DOWN_EVT if a repeated retransmit failures
+ * happens (- unlikely case)
*
- * In case of a repeated retransmit failures, the call will return shortly
- * with a returned code (e.g. TIPC_LINK_DOWN_EVT)
+ * Return: the number of packets released from the link transmq
*/
-static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
+static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r,
+ u16 acked, u16 gap,
struct tipc_gap_ack_blks *ga,
- struct sk_buff_head *xmitq)
+ struct sk_buff_head *xmitq,
+ bool *retransmitted, int *rc)
{
+ struct tipc_gap_ack_blks *last_ga = r->last_ga, *this_ga = NULL;
+ struct tipc_gap_ack *gacks = NULL;
struct sk_buff *skb, *_skb, *tmp;
struct tipc_msg *hdr;
+ u32 qlen = skb_queue_len(&l->transmq);
+ u16 nacked = acked, ngap = gap, gack_cnt = 0;
u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
- bool retransmitted = false;
u16 ack = l->rcv_nxt - 1;
- bool passed = false;
- u16 released = 0;
u16 seqno, n = 0;
- int rc = 0;
+ u16 end = r->acked, start = end, offset = r->last_gap;
+ u16 si = (last_ga) ? last_ga->start_index : 0;
+ bool is_uc = !link_is_bc_sndlink(l);
+ bool bc_has_acked = false;
+
+ trace_tipc_link_retrans(r, acked + 1, acked + gap, &l->transmq);
+
+ /* Determine Gap ACK blocks if any for the particular link */
+ if (ga && is_uc) {
+ /* Get the Gap ACKs, uc part */
+ gack_cnt = ga->ugack_cnt;
+ gacks = &ga->gacks[ga->bgack_cnt];
+ } else if (ga) {
+ /* Copy the Gap ACKs, bc part, for later renewal if needed */
+ this_ga = kmemdup(ga, tipc_gap_ack_blks_sz(ga->bgack_cnt),
+ GFP_ATOMIC);
+ if (likely(this_ga)) {
+ this_ga->start_index = 0;
+ /* Start with the bc Gap ACKs */
+ gack_cnt = this_ga->bgack_cnt;
+ gacks = &this_ga->gacks[0];
+ } else {
+ /* Hmm, we can get in trouble..., simply ignore it */
+ pr_warn_ratelimited("Ignoring bc Gap ACKs, no memory\n");
+ }
+ }
+ /* Advance the link transmq */
skb_queue_walk_safe(&l->transmq, skb, tmp) {
seqno = buf_seqno(skb);
next_gap_ack:
- if (less_eq(seqno, acked)) {
+ if (less_eq(seqno, nacked)) {
+ if (is_uc)
+ goto release;
+ /* Skip packets peer has already acked */
+ if (!more(seqno, r->acked))
+ continue;
+ /* Get the next of last Gap ACK blocks */
+ while (more(seqno, end)) {
+ if (!last_ga || si >= last_ga->bgack_cnt)
+ break;
+ start = end + offset + 1;
+ end = ntohs(last_ga->gacks[si].ack);
+ offset = ntohs(last_ga->gacks[si].gap);
+ si++;
+ WARN_ONCE(more(start, end) ||
+ (!offset &&
+ si < last_ga->bgack_cnt) ||
+ si > MAX_GAP_ACK_BLKS,
+ "Corrupted Gap ACK: %d %d %d %d %d\n",
+ start, end, offset, si,
+ last_ga->bgack_cnt);
+ }
+ /* Check against the last Gap ACK block */
+ if (in_range(seqno, start, end))
+ continue;
+ /* Update/release the packet peer is acking */
+ bc_has_acked = true;
+ if (--TIPC_SKB_CB(skb)->ackers)
+ continue;
+release:
/* release skb */
__skb_unlink(skb, &l->transmq);
kfree_skb(skb);
- released++;
- } else if (less_eq(seqno, acked + gap)) {
- /* First, check if repeated retrans failures occurs? */
- if (!passed && link_retransmit_failure(l, l, &rc))
- return rc;
- passed = true;
-
+ } else if (less_eq(seqno, nacked + ngap)) {
+ /* First gap: check if repeated retrans failures? */
+ if (unlikely(seqno == acked + 1 &&
+ link_retransmit_failure(l, r, rc))) {
+ /* Ignore this bc Gap ACKs if any */
+ kfree(this_ga);
+ this_ga = NULL;
+ break;
+ }
/* retransmit skb if unrestricted*/
if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
continue;
- TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
+ TIPC_SKB_CB(skb)->nxt_retr = (is_uc) ?
+ TIPC_UC_RETR_TIME : TIPC_BC_RETR_LIM;
_skb = pskb_copy(skb, GFP_ATOMIC);
if (!_skb)
continue;
@@ -1516,25 +1595,53 @@ next_gap_ack:
_skb->priority = TC_PRIO_CONTROL;
__skb_queue_tail(xmitq, _skb);
l->stats.retransmitted++;
- retransmitted = true;
+ if (!is_uc)
+ r->stats.retransmitted++;
+ *retransmitted = true;
/* Increase actual retrans counter & mark first time */
if (!TIPC_SKB_CB(skb)->retr_cnt++)
TIPC_SKB_CB(skb)->retr_stamp = jiffies;
} else {
/* retry with Gap ACK blocks if any */
- if (!ga || n >= ga->gack_cnt)
+ if (n >= gack_cnt)
break;
- acked = ntohs(ga->gacks[n].ack);
- gap = ntohs(ga->gacks[n].gap);
+ nacked = ntohs(gacks[n].ack);
+ ngap = ntohs(gacks[n].gap);
n++;
goto next_gap_ack;
}
}
- if (released || retransmitted)
- tipc_link_update_cwin(l, released, retransmitted);
- if (released)
- tipc_link_advance_backlog(l, xmitq);
- return 0;
+
+ /* Renew last Gap ACK blocks for bc if needed */
+ if (bc_has_acked) {
+ if (this_ga) {
+ kfree(last_ga);
+ r->last_ga = this_ga;
+ r->last_gap = gap;
+ } else if (last_ga) {
+ if (less(acked, start)) {
+ si--;
+ offset = start - acked - 1;
+ } else if (less(acked, end)) {
+ acked = end;
+ }
+ if (si < last_ga->bgack_cnt) {
+ last_ga->start_index = si;
+ r->last_gap = offset;
+ } else {
+ kfree(last_ga);
+ r->last_ga = NULL;
+ r->last_gap = 0;
+ }
+ } else {
+ r->last_gap = 0;
+ }
+ r->acked = acked;
+ } else {
+ kfree(this_ga);
+ }
+
+ return qlen - skb_queue_len(&l->transmq);
}
/* tipc_link_build_state_msg: prepare link state message for transmission
@@ -1651,11 +1758,13 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
kfree_skb(skb);
break;
}
- released += tipc_link_release_pkts(l, msg_ack(hdr));
+ released += tipc_link_advance_transmq(l, l, msg_ack(hdr), 0,
+ NULL, NULL, NULL, NULL);
/* Defer delivery if sequence gap */
if (unlikely(seqno != rcv_nxt)) {
- __tipc_skb_queue_sorted(defq, seqno, skb);
+ if (!__tipc_skb_queue_sorted(defq, seqno, skb))
+ l->stats.duplicates++;
rc |= tipc_link_build_nack_msg(l, xmitq);
break;
}
@@ -1689,15 +1798,15 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
int tolerance, int priority,
struct sk_buff_head *xmitq)
{
+ struct tipc_mon_state *mstate = &l->mon_state;
+ struct sk_buff_head *dfq = &l->deferdq;
struct tipc_link *bcl = l->bc_rcvlink;
- struct sk_buff *skb;
struct tipc_msg *hdr;
- struct sk_buff_head *dfq = &l->deferdq;
+ struct sk_buff *skb;
bool node_up = link_is_up(bcl);
- struct tipc_mon_state *mstate = &l->mon_state;
+ u16 glen = 0, bc_rcvgap = 0;
int dlen = 0;
void *data;
- u16 glen = 0;
/* Don't send protocol message during reset or link failover */
if (tipc_link_is_blocked(l))
@@ -1735,11 +1844,12 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
if (l->peer_caps & TIPC_LINK_PROTO_SEQNO)
msg_set_seqno(hdr, l->snd_nxt_state++);
msg_set_seq_gap(hdr, rcvgap);
- msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
+ bc_rcvgap = link_bc_rcv_gap(bcl);
+ msg_set_bc_gap(hdr, bc_rcvgap);
msg_set_probe(hdr, probe);
msg_set_is_keepalive(hdr, probe || probe_reply);
if (l->peer_caps & TIPC_GAP_ACK_BLOCK)
- glen = tipc_build_gap_ack_blks(l, data, rcvgap);
+ glen = tipc_build_gap_ack_blks(l, hdr);
tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id);
msg_set_size(hdr, INT_H_SIZE + glen + dlen);
skb_trim(skb, INT_H_SIZE + glen + dlen);
@@ -1760,6 +1870,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
l->stats.sent_probes++;
if (rcvgap)
l->stats.sent_nacks++;
+ if (bc_rcvgap)
+ bcl->stats.sent_nacks++;
skb->priority = TC_PRIO_CONTROL;
__skb_queue_tail(xmitq, skb);
trace_tipc_proto_build(skb, false, l->name);
@@ -2027,20 +2139,19 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
{
struct tipc_msg *hdr = buf_msg(skb);
struct tipc_gap_ack_blks *ga = NULL;
- u16 rcvgap = 0;
- u16 ack = msg_ack(hdr);
- u16 gap = msg_seq_gap(hdr);
+ bool reply = msg_probe(hdr), retransmitted = false;
+ u16 dlen = msg_data_sz(hdr), glen = 0;
u16 peers_snd_nxt = msg_next_sent(hdr);
u16 peers_tol = msg_link_tolerance(hdr);
u16 peers_prio = msg_linkprio(hdr);
+ u16 gap = msg_seq_gap(hdr);
+ u16 ack = msg_ack(hdr);
u16 rcv_nxt = l->rcv_nxt;
- u16 dlen = msg_data_sz(hdr);
+ u16 rcvgap = 0;
int mtyp = msg_type(hdr);
- bool reply = msg_probe(hdr);
- u16 glen = 0;
- void *data;
+ int rc = 0, released;
char *if_name;
- int rc = 0;
+ void *data;
trace_tipc_proto_rcv(skb, false, l->name);
if (tipc_link_is_blocked(l) || !xmitq)
@@ -2137,13 +2248,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
}
/* Receive Gap ACK blocks from peer if any */
- if (l->peer_caps & TIPC_GAP_ACK_BLOCK) {
- ga = (struct tipc_gap_ack_blks *)data;
- glen = ntohs(ga->len);
- /* sanity check: if failed, ignore Gap ACK blocks */
- if (glen != tipc_gap_ack_blks_sz(ga->gack_cnt))
- ga = NULL;
- }
+ glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr,
&l->mon_state, l->bearer_id);
@@ -2158,9 +2263,14 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
tipc_link_build_proto_msg(l, STATE_MSG, 0, reply,
rcvgap, 0, 0, xmitq);
- rc |= tipc_link_advance_transmq(l, ack, gap, ga, xmitq);
+ released = tipc_link_advance_transmq(l, l, ack, gap, ga, xmitq,
+ &retransmitted, &rc);
if (gap)
l->stats.recv_nacks++;
+ if (released || retransmitted)
+ tipc_link_update_cwin(l, released, retransmitted);
+ if (released)
+ tipc_link_advance_backlog(l, xmitq);
if (unlikely(!skb_queue_empty(&l->wakeupq)))
link_prepare_wakeup(l);
}
@@ -2246,10 +2356,7 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr)
int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
struct sk_buff_head *xmitq)
{
- struct tipc_link *snd_l = l->bc_sndlink;
u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
- u16 from = msg_bcast_ack(hdr) + 1;
- u16 to = from + msg_bc_gap(hdr) - 1;
int rc = 0;
if (!link_is_up(l))
@@ -2265,14 +2372,10 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
if (!l->bc_peer_is_up)
return rc;
- l->stats.recv_nacks++;
-
/* Ignore if peers_snd_nxt goes beyond receive window */
if (more(peers_snd_nxt, l->rcv_nxt + l->window))
return rc;
- rc = tipc_link_bc_retrans(snd_l, l, from, to, xmitq);
-
l->snd_nxt = peers_snd_nxt;
if (link_bc_rcv_gap(l))
rc |= TIPC_LINK_SND_STATE;
@@ -2307,38 +2410,34 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
return 0;
}
-void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
- struct sk_buff_head *xmitq)
+int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap,
+ struct tipc_gap_ack_blks *ga,
+ struct sk_buff_head *xmitq,
+ struct sk_buff_head *retrq)
{
- struct sk_buff *skb, *tmp;
- struct tipc_link *snd_l = l->bc_sndlink;
-
- if (!link_is_up(l) || !l->bc_peer_is_up)
- return;
+ struct tipc_link *l = r->bc_sndlink;
+ bool unused = false;
+ int rc = 0;
- if (!more(acked, l->acked))
- return;
+ if (!link_is_up(r) || !r->bc_peer_is_up)
+ return 0;
- trace_tipc_link_bc_ack(l, l->acked, acked, &snd_l->transmq);
- /* Skip over packets peer has already acked */
- skb_queue_walk(&snd_l->transmq, skb) {
- if (more(buf_seqno(skb), l->acked))
- break;
+ if (gap) {
+ l->stats.recv_nacks++;
+ r->stats.recv_nacks++;
}
- /* Update/release the packets peer is acking now */
- skb_queue_walk_from_safe(&snd_l->transmq, skb, tmp) {
- if (more(buf_seqno(skb), acked))
- break;
- if (!--TIPC_SKB_CB(skb)->ackers) {
- __skb_unlink(skb, &snd_l->transmq);
- kfree_skb(skb);
- }
- }
- l->acked = acked;
- tipc_link_advance_backlog(snd_l, xmitq);
- if (unlikely(!skb_queue_empty(&snd_l->wakeupq)))
- link_prepare_wakeup(snd_l);
+ if (less(acked, r->acked) || (acked == r->acked && !gap && !ga))
+ return 0;
+
+ trace_tipc_link_bc_ack(r, acked, gap, &l->transmq);
+ tipc_link_advance_transmq(l, r, acked, gap, ga, retrq, &unused, &rc);
+
+ tipc_link_advance_backlog(l, xmitq);
+ if (unlikely(!skb_queue_empty(&l->wakeupq)))
+ link_prepare_wakeup(l);
+
+ return rc;
}
/* tipc_link_bc_nack_rcv(): receive broadcast nack message
@@ -2366,8 +2465,8 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
return 0;
if (dnode == tipc_own_addr(l->net)) {
- tipc_link_bc_ack_rcv(l, acked, xmitq);
- rc = tipc_link_bc_retrans(l->bc_sndlink, l, from, to, xmitq);
+ rc = tipc_link_bc_ack_rcv(l, acked, to - acked, NULL, xmitq,
+ xmitq);
l->stats.recv_nacks++;
return rc;
}
@@ -2639,16 +2738,15 @@ msg_full:
return -EMSGSIZE;
}
-int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
+int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg,
+ struct tipc_link *bcl)
{
int err;
void *hdr;
struct nlattr *attrs;
struct nlattr *prop;
- struct tipc_net *tn = net_generic(net, tipc_net_id);
u32 bc_mode = tipc_bcast_get_broadcast_mode(net);
u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net);
- struct tipc_link *bcl = tn->bcl;
if (!bcl)
return 0;
@@ -2735,21 +2833,6 @@ void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
l->abort_limit = limit;
}
-char *tipc_link_name_ext(struct tipc_link *l, char *buf)
-{
- if (!l)
- scnprintf(buf, TIPC_MAX_LINK_NAME, "null");
- else if (link_is_bc_sndlink(l))
- scnprintf(buf, TIPC_MAX_LINK_NAME, "broadcast-sender");
- else if (link_is_bc_rcvlink(l))
- scnprintf(buf, TIPC_MAX_LINK_NAME,
- "broadcast-receiver, peer %x", l->addr);
- else
- memcpy(buf, l->name, TIPC_MAX_LINK_NAME);
-
- return buf;
-}
-
/**
* tipc_link_dump - dump TIPC link data
* @l: tipc link to be dumped
diff --git a/net/tipc/link.h b/net/tipc/link.h
index d3c1c3fc1659..fc07232c9a12 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -80,7 +80,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
struct sk_buff_head *inputq,
struct sk_buff_head *namedq,
struct tipc_link **link);
-bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
+bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id,
int mtu, u32 min_win, u32 max_win, u16 peer_caps,
struct sk_buff_head *inputq,
struct sk_buff_head *namedq,
@@ -111,7 +111,6 @@ u16 tipc_link_rcv_nxt(struct tipc_link *l);
u16 tipc_link_acked(struct tipc_link *l);
u32 tipc_link_id(struct tipc_link *l);
char *tipc_link_name(struct tipc_link *l);
-char *tipc_link_name_ext(struct tipc_link *l, char *buf);
u32 tipc_link_state(struct tipc_link *l);
char tipc_link_plane(struct tipc_link *l);
int tipc_link_prio(struct tipc_link *l);
@@ -143,8 +142,12 @@ int tipc_link_bc_peers(struct tipc_link *l);
void tipc_link_set_mtu(struct tipc_link *l, int mtu);
int tipc_link_mtu(struct tipc_link *l);
int tipc_link_mss(struct tipc_link *l);
-void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
- struct sk_buff_head *xmitq);
+u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
+ struct tipc_msg *hdr, bool uc);
+int tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, u16 gap,
+ struct tipc_gap_ack_blks *ga,
+ struct sk_buff_head *xmitq,
+ struct sk_buff_head *retrq);
void tipc_link_build_bc_sync_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 4d0e0bdd997b..c0afcd627c5e 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -212,7 +212,7 @@ err:
int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
int mss, struct sk_buff_head *txq)
{
- struct sk_buff *skb, *prev;
+ struct sk_buff *skb;
int accounted, total, curr;
int mlen, cpy, rem = dlen;
struct tipc_msg *hdr;
@@ -223,7 +223,6 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
while (rem) {
if (!skb || skb->len >= mss) {
- prev = skb;
skb = tipc_buf_acquire(mss, GFP_KERNEL);
if (unlikely(!skb))
return -ENOMEM;
@@ -235,9 +234,6 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
msg_set_size(hdr, MIN_H_SIZE);
__skb_queue_tail(txq, skb);
total += 1;
- if (prev)
- msg_set_ack_required(buf_msg(prev), 0);
- msg_set_ack_required(hdr, 1);
}
hdr = buf_msg(skb);
curr = msg_blocks(hdr);
@@ -825,19 +821,19 @@ bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
* @seqno: sequence number of buffer to add
* @skb: buffer to add
*/
-void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
+bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
struct sk_buff *skb)
{
struct sk_buff *_skb, *tmp;
if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) {
__skb_queue_head(list, skb);
- return;
+ return true;
}
if (more(seqno, buf_seqno(skb_peek_tail(list)))) {
__skb_queue_tail(list, skb);
- return;
+ return true;
}
skb_queue_walk_safe(list, _skb, tmp) {
@@ -846,9 +842,10 @@ void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
if (seqno == buf_seqno(_skb))
break;
__skb_queue_before(list, _skb, skb);
- return;
+ return true;
}
kfree_skb(skb);
+ return false;
}
void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 871feadbbc19..58660d56bc83 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -160,20 +160,39 @@ struct tipc_gap_ack {
/* struct tipc_gap_ack_blks
* @len: actual length of the record
- * @gack_cnt: number of Gap ACK blocks in the record
+ * @ugack_cnt: number of Gap ACK blocks for unicast (following the broadcast
+ * ones)
+ * @start_index: starting index for "valid" broadcast Gap ACK blocks
+ * @bgack_cnt: number of Gap ACK blocks for broadcast in the record
* @gacks: array of Gap ACK blocks
+ *
+ * 31 16 15 0
+ * +-------------+-------------+-------------+-------------+
+ * | bgack_cnt | ugack_cnt | len |
+ * +-------------+-------------+-------------+-------------+ -
+ * | gap | ack | |
+ * +-------------+-------------+-------------+-------------+ > bc gacks
+ * : : : |
+ * +-------------+-------------+-------------+-------------+ -
+ * | gap | ack | |
+ * +-------------+-------------+-------------+-------------+ > uc gacks
+ * : : : |
+ * +-------------+-------------+-------------+-------------+ -
*/
struct tipc_gap_ack_blks {
__be16 len;
- u8 gack_cnt;
- u8 reserved;
+ union {
+ u8 ugack_cnt;
+ u8 start_index;
+ };
+ u8 bgack_cnt;
struct tipc_gap_ack gacks[];
};
#define tipc_gap_ack_blks_sz(n) (sizeof(struct tipc_gap_ack_blks) + \
sizeof(struct tipc_gap_ack) * (n))
-#define MAX_GAP_ACK_BLKS 32
+#define MAX_GAP_ACK_BLKS 128
#define MAX_GAP_ACK_BLKS_SZ tipc_gap_ack_blks_sz(MAX_GAP_ACK_BLKS)
static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
@@ -321,9 +340,19 @@ static inline int msg_ack_required(struct tipc_msg *m)
return msg_bits(m, 0, 18, 1);
}
-static inline void msg_set_ack_required(struct tipc_msg *m, u32 d)
+static inline void msg_set_ack_required(struct tipc_msg *m)
{
- msg_set_bits(m, 0, 18, 1, d);
+ msg_set_bits(m, 0, 18, 1, 1);
+}
+
+static inline int msg_nagle_ack(struct tipc_msg *m)
+{
+ return msg_bits(m, 0, 18, 1);
+}
+
+static inline void msg_set_nagle_ack(struct tipc_msg *m)
+{
+ msg_set_bits(m, 0, 18, 1, 1);
}
static inline bool msg_is_rcast(struct tipc_msg *m)
@@ -1126,7 +1155,7 @@ bool tipc_msg_assemble(struct sk_buff_head *list);
bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq);
bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
struct sk_buff_head *cpy);
-void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
+bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
struct sk_buff *skb);
bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy);
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index bb9862410e68..c4aee6247d55 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -188,7 +188,7 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
},
{
.cmd = TIPC_NL_LINK_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .validate = GENL_DONT_VALIDATE_STRICT,
.doit = tipc_nl_node_get_link,
.dumpit = tipc_nl_node_dump_link,
},
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 803a3a6d0f50..0312fb181d94 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1138,7 +1138,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
if (unlikely(!n->bc_entry.link)) {
snd_l = tipc_bc_sndlink(net);
if (!tipc_link_bc_create(net, tipc_own_addr(net),
- addr, U16_MAX,
+ addr, peer_id, U16_MAX,
tipc_link_min_win(snd_l),
tipc_link_max_win(snd_l),
n->capabilities,
@@ -1772,7 +1772,7 @@ static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
struct tipc_link *ucl;
int rc;
- rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr);
+ rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr, xmitq);
if (rc & TIPC_LINK_DOWN_EVT) {
tipc_node_reset_links(n);
@@ -2071,10 +2071,16 @@ rcv:
le = &n->links[bearer_id];
/* Ensure broadcast reception is in synch with peer's send state */
- if (unlikely(usr == LINK_PROTOCOL))
+ if (unlikely(usr == LINK_PROTOCOL)) {
+ if (unlikely(skb_linearize(skb))) {
+ tipc_node_put(n);
+ goto discard;
+ }
+ hdr = buf_msg(skb);
tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq);
- else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack))
+ } else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) {
tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr);
+ }
/* Receive packet directly if conditions permit */
tipc_node_read_lock(n);
@@ -2429,7 +2435,7 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
if (strcmp(name, tipc_bclink_name) == 0) {
- err = tipc_nl_add_bc_link(net, &msg);
+ err = tipc_nl_add_bc_link(net, &msg, tipc_net(net)->bcl);
if (err)
goto err_free;
} else {
@@ -2473,6 +2479,7 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
struct tipc_node *node;
struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
struct net *net = sock_net(skb->sk);
+ struct tipc_net *tn = tipc_net(net);
struct tipc_link_entry *le;
if (!info->attrs[TIPC_NLA_LINK])
@@ -2489,11 +2496,26 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
- if (strcmp(link_name, tipc_bclink_name) == 0) {
- err = tipc_bclink_reset_stats(net);
+ err = -EINVAL;
+ if (!strcmp(link_name, tipc_bclink_name)) {
+ err = tipc_bclink_reset_stats(net, tipc_bc_sndlink(net));
if (err)
return err;
return 0;
+ } else if (strstr(link_name, tipc_bclink_name)) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(node, &tn->node_list, list) {
+ tipc_node_read_lock(node);
+ link = node->bc_entry.link;
+ if (link && !strcmp(link_name, tipc_link_name(link))) {
+ err = tipc_bclink_reset_stats(net, link);
+ tipc_node_read_unlock(node);
+ break;
+ }
+ tipc_node_read_unlock(node);
+ }
+ rcu_read_unlock();
+ return err;
}
node = tipc_node_find_by_name(net, link_name, &bearer_id);
@@ -2517,7 +2539,8 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
/* Caller should hold node lock */
static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
- struct tipc_node *node, u32 *prev_link)
+ struct tipc_node *node, u32 *prev_link,
+ bool bc_link)
{
u32 i;
int err;
@@ -2533,6 +2556,14 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
if (err)
return err;
}
+
+ if (bc_link) {
+ *prev_link = i;
+ err = tipc_nl_add_bc_link(net, msg, node->bc_entry.link);
+ if (err)
+ return err;
+ }
+
*prev_link = 0;
return 0;
@@ -2541,17 +2572,36 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+ struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_node *node;
struct tipc_nl_msg msg;
u32 prev_node = cb->args[0];
u32 prev_link = cb->args[1];
int done = cb->args[2];
+ bool bc_link = cb->args[3];
int err;
if (done)
return 0;
+ if (!prev_node) {
+ /* Check if broadcast-receiver links dumping is needed */
+ if (attrs && attrs[TIPC_NLA_LINK]) {
+ err = nla_parse_nested_deprecated(link,
+ TIPC_NLA_LINK_MAX,
+ attrs[TIPC_NLA_LINK],
+ tipc_nl_link_policy,
+ NULL);
+ if (unlikely(err))
+ return err;
+ if (unlikely(!link[TIPC_NLA_LINK_BROADCAST]))
+ return -EINVAL;
+ bc_link = true;
+ }
+ }
+
msg.skb = skb;
msg.portid = NETLINK_CB(cb->skb).portid;
msg.seq = cb->nlh->nlmsg_seq;
@@ -2575,7 +2625,7 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb)
list) {
tipc_node_read_lock(node);
err = __tipc_nl_add_node_links(net, &msg, node,
- &prev_link);
+ &prev_link, bc_link);
tipc_node_read_unlock(node);
if (err)
goto out;
@@ -2583,14 +2633,14 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb)
prev_node = node->addr;
}
} else {
- err = tipc_nl_add_bc_link(net, &msg);
+ err = tipc_nl_add_bc_link(net, &msg, tn->bcl);
if (err)
goto out;
list_for_each_entry_rcu(node, &tn->node_list, list) {
tipc_node_read_lock(node);
err = __tipc_nl_add_node_links(net, &msg, node,
- &prev_link);
+ &prev_link, bc_link);
tipc_node_read_unlock(node);
if (err)
goto out;
@@ -2605,6 +2655,7 @@ out:
cb->args[0] = prev_node;
cb->args[1] = prev_link;
cb->args[2] = done;
+ cb->args[3] = bc_link;
return skb->len;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e370ad0edd76..3734cdbedc9c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -48,6 +48,8 @@
#include "group.h"
#include "trace.h"
+#define NAGLE_START_INIT 4
+#define NAGLE_START_MAX 1024
#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */
#define TIPC_FWD_MSG 1
@@ -119,7 +121,10 @@ struct tipc_sock {
struct rcu_head rcu;
struct tipc_group *group;
u32 oneway;
+ u32 nagle_start;
u16 snd_backlog;
+ u16 msg_acc;
+ u16 pkt_cnt;
bool expect_ack;
bool nodelay;
bool group_is_open;
@@ -143,7 +148,7 @@ static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
-static void tipc_sk_push_backlog(struct tipc_sock *tsk);
+static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
@@ -191,17 +196,17 @@ static int tsk_importance(struct tipc_sock *tsk)
return msg_importance(&tsk->phdr);
}
-static int tsk_set_importance(struct tipc_sock *tsk, int imp)
+static struct tipc_sock *tipc_sk(const struct sock *sk)
{
- if (imp > TIPC_CRITICAL_IMPORTANCE)
- return -EINVAL;
- msg_set_importance(&tsk->phdr, (u32)imp);
- return 0;
+ return container_of(sk, struct tipc_sock, sk);
}
-static struct tipc_sock *tipc_sk(const struct sock *sk)
+int tsk_set_importance(struct sock *sk, int imp)
{
- return container_of(sk, struct tipc_sock, sk);
+ if (imp > TIPC_CRITICAL_IMPORTANCE)
+ return -EINVAL;
+ msg_set_importance(&tipc_sk(sk)->phdr, (u32)imp);
+ return 0;
}
static bool tsk_conn_cong(struct tipc_sock *tsk)
@@ -474,6 +479,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
tsk = tipc_sk(sk);
tsk->max_pkt = MAX_PKT_DEFAULT;
tsk->maxnagle = 0;
+ tsk->nagle_start = NAGLE_START_INIT;
INIT_LIST_HEAD(&tsk->publications);
INIT_LIST_HEAD(&tsk->cong_links);
msg = &tsk->phdr;
@@ -541,7 +547,7 @@ static void __tipc_shutdown(struct socket *sock, int error)
!tsk_conn_cong(tsk)));
/* Push out delayed messages if in Nagle mode */
- tipc_sk_push_backlog(tsk);
+ tipc_sk_push_backlog(tsk, false);
/* Remove pending SYN */
__skb_queue_purge(&sk->sk_write_queue);
@@ -1252,14 +1258,37 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
/* tipc_sk_push_backlog(): send accumulated buffers in socket write queue
* when socket is in Nagle mode
*/
-static void tipc_sk_push_backlog(struct tipc_sock *tsk)
+static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack)
{
struct sk_buff_head *txq = &tsk->sk.sk_write_queue;
+ struct sk_buff *skb = skb_peek_tail(txq);
struct net *net = sock_net(&tsk->sk);
u32 dnode = tsk_peer_node(tsk);
- struct sk_buff *skb = skb_peek(txq);
int rc;
+ if (nagle_ack) {
+ tsk->pkt_cnt += skb_queue_len(txq);
+ if (!tsk->pkt_cnt || tsk->msg_acc / tsk->pkt_cnt < 2) {
+ tsk->oneway = 0;
+ if (tsk->nagle_start < NAGLE_START_MAX)
+ tsk->nagle_start *= 2;
+ tsk->expect_ack = false;
+ pr_debug("tsk %10u: bad nagle %u -> %u, next start %u!\n",
+ tsk->portid, tsk->msg_acc, tsk->pkt_cnt,
+ tsk->nagle_start);
+ } else {
+ tsk->nagle_start = NAGLE_START_INIT;
+ if (skb) {
+ msg_set_ack_required(buf_msg(skb));
+ tsk->expect_ack = true;
+ } else {
+ tsk->expect_ack = false;
+ }
+ }
+ tsk->msg_acc = 0;
+ tsk->pkt_cnt = 0;
+ }
+
if (!skb || tsk->cong_link_cnt)
return;
@@ -1267,9 +1296,10 @@ static void tipc_sk_push_backlog(struct tipc_sock *tsk)
if (msg_is_syn(buf_msg(skb)))
return;
+ if (tsk->msg_acc)
+ tsk->pkt_cnt += skb_queue_len(txq);
tsk->snt_unacked += tsk->snd_backlog;
tsk->snd_backlog = 0;
- tsk->expect_ack = true;
rc = tipc_node_xmit(net, txq, dnode, tsk->portid);
if (rc == -ELINKCONG)
tsk->cong_link_cnt = 1;
@@ -1322,8 +1352,7 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
return;
} else if (mtyp == CONN_ACK) {
was_cong = tsk_conn_cong(tsk);
- tsk->expect_ack = false;
- tipc_sk_push_backlog(tsk);
+ tipc_sk_push_backlog(tsk, msg_nagle_ack(hdr));
tsk->snt_unacked -= msg_conn_ack(hdr);
if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
tsk->snd_win = msg_adv_win(hdr);
@@ -1516,6 +1545,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *hdr = &tsk->phdr;
struct net *net = sock_net(sk);
+ struct sk_buff *skb;
u32 dnode = tsk_peer_node(tsk);
int maxnagle = tsk->maxnagle;
int maxpkt = tsk->max_pkt;
@@ -1544,17 +1574,25 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
break;
send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
blocks = tsk->snd_backlog;
- if (tsk->oneway++ >= 4 && send <= maxnagle) {
+ if (tsk->oneway++ >= tsk->nagle_start && send <= maxnagle) {
rc = tipc_msg_append(hdr, m, send, maxnagle, txq);
if (unlikely(rc < 0))
break;
blocks += rc;
+ tsk->msg_acc++;
if (blocks <= 64 && tsk->expect_ack) {
tsk->snd_backlog = blocks;
sent += send;
break;
+ } else if (blocks > 64) {
+ tsk->pkt_cnt += skb_queue_len(txq);
+ } else {
+ skb = skb_peek_tail(txq);
+ msg_set_ack_required(buf_msg(skb));
+ tsk->expect_ack = true;
+ tsk->msg_acc = 0;
+ tsk->pkt_cnt = 0;
}
- tsk->expect_ack = true;
} else {
rc = tipc_msg_build(hdr, m, sent, send, maxpkt, txq);
if (unlikely(rc != send))
@@ -2091,7 +2129,7 @@ static void tipc_sk_proto_rcv(struct sock *sk,
smp_wmb();
tsk->cong_link_cnt--;
wakeup = true;
- tipc_sk_push_backlog(tsk);
+ tipc_sk_push_backlog(tsk, false);
break;
case GROUP_PROTOCOL:
tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
@@ -2180,7 +2218,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb,
return false;
case TIPC_ESTABLISHED:
if (!skb_queue_empty(&sk->sk_write_queue))
- tipc_sk_push_backlog(tsk);
+ tipc_sk_push_backlog(tsk, false);
/* Accept only connection-based messages sent by peer */
if (likely(con_msg && !err && pport == oport &&
pnode == onode)) {
@@ -2188,8 +2226,10 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb,
struct sk_buff *skb;
skb = tipc_sk_build_ack(tsk);
- if (skb)
+ if (skb) {
+ msg_set_nagle_ack(buf_msg(skb));
__skb_queue_tail(xmitq, skb);
+ }
}
return true;
}
@@ -2681,7 +2721,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
/* Connect new socket to it's peer */
tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
- tsk_set_importance(new_tsock, msg_importance(msg));
+ tsk_set_importance(new_sk, msg_importance(msg));
if (msg_named(msg)) {
new_tsock->conn_type = msg_nametype(msg);
new_tsock->conn_instance = msg_nameinst(msg);
@@ -3099,7 +3139,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
switch (opt) {
case TIPC_IMPORTANCE:
- res = tsk_set_importance(tsk, value);
+ res = tsk_set_importance(sk, value);
break;
case TIPC_SRC_DROPPABLE:
if (sock->type != SOCK_STREAM)
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 235b9679acee..b11575afc66f 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -75,4 +75,6 @@ u32 tipc_sock_get_portid(struct sock *sk);
bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb);
bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb);
+int tsk_set_importance(struct sock *sk, int imp);
+
#endif
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index 58ab3d6dcdce..97a6264a2993 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -36,7 +36,7 @@
#include "core.h"
#include "trace.h"
#include "crypto.h"
-
+#include "bcast.h"
#include <linux/sysctl.h>
static struct ctl_table_header *tipc_ctl_hdr;
@@ -75,6 +75,13 @@ static struct ctl_table tipc_table[] = {
.extra1 = SYSCTL_ONE,
},
#endif
+ {
+ .procname = "bc_retruni",
+ .data = &sysctl_tipc_bc_retruni,
+ .maxlen = sizeof(sysctl_tipc_bc_retruni),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
{}
};
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 446af7bbd13e..1489cfb941d8 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -497,7 +497,6 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk)
static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
{
- int imp = TIPC_CRITICAL_IMPORTANCE;
struct socket *lsock = NULL;
struct sockaddr_tipc saddr;
struct sock *sk;
@@ -514,8 +513,9 @@ static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
sk->sk_user_data = srv;
write_unlock_bh(&sk->sk_callback_lock);
- rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE,
- (char *)&imp, sizeof(imp));
+ lock_sock(sk);
+ rc = tsk_set_importance(sk, TIPC_CRITICAL_IMPORTANCE);
+ release_sock(sk);
if (rc < 0)
goto err;
diff --git a/net/tipc/trace.h b/net/tipc/trace.h
index 4d8e00483afc..04af83f0500c 100644
--- a/net/tipc/trace.h
+++ b/net/tipc/trace.h
@@ -255,7 +255,7 @@ DECLARE_EVENT_CLASS(tipc_link_class,
TP_fast_assign(
__assign_str(header, header);
- tipc_link_name_ext(l, __entry->name);
+ memcpy(__entry->name, tipc_link_name(l), TIPC_MAX_LINK_NAME);
tipc_link_dump(l, dqueues, __get_str(buf));
),
@@ -295,12 +295,14 @@ DECLARE_EVENT_CLASS(tipc_link_transmq_class,
),
TP_fast_assign(
- tipc_link_name_ext(r, __entry->name);
+ memcpy(__entry->name, tipc_link_name(r), TIPC_MAX_LINK_NAME);
__entry->from = f;
__entry->to = t;
__entry->len = skb_queue_len(tq);
- __entry->fseqno = msg_seqno(buf_msg(skb_peek(tq)));
- __entry->lseqno = msg_seqno(buf_msg(skb_peek_tail(tq)));
+ __entry->fseqno = __entry->len ?
+ msg_seqno(buf_msg(skb_peek(tq))) : 0;
+ __entry->lseqno = __entry->len ?
+ msg_seqno(buf_msg(skb_peek_tail(tq))) : 0;
),
TP_printk("<%s> retrans req: [%u-%u] transmq: %u [%u-%u]\n",
@@ -308,15 +310,16 @@ DECLARE_EVENT_CLASS(tipc_link_transmq_class,
__entry->len, __entry->fseqno, __entry->lseqno)
);
-DEFINE_EVENT(tipc_link_transmq_class, tipc_link_retrans,
+DEFINE_EVENT_CONDITION(tipc_link_transmq_class, tipc_link_retrans,
TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
- TP_ARGS(r, f, t, tq)
+ TP_ARGS(r, f, t, tq),
+ TP_CONDITION(less_eq(f, t))
);
DEFINE_EVENT_PRINT(tipc_link_transmq_class, tipc_link_bc_ack,
TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
TP_ARGS(r, f, t, tq),
- TP_printk("<%s> acked: [%u-%u] transmq: %u [%u-%u]\n",
+ TP_printk("<%s> acked: %u gap: %u transmq: %u [%u-%u]\n",
__entry->name, __entry->from, __entry->to,
__entry->len, __entry->fseqno, __entry->lseqno)
);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index d6620ad53546..28a283f26a8d 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -161,9 +161,11 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
struct udp_bearer *ub, struct udp_media_addr *src,
struct udp_media_addr *dst, struct dst_cache *cache)
{
- struct dst_entry *ndst = dst_cache_get(cache);
+ struct dst_entry *ndst;
int ttl, err = 0;
+ local_bh_disable();
+ ndst = dst_cache_get(cache);
if (dst->proto == htons(ETH_P_IP)) {
struct rtable *rt = (struct rtable *)ndst;
@@ -210,9 +212,11 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
src->port, dst->port, false);
#endif
}
+ local_bh_enable();
return err;
tx_error:
+ local_bh_enable();
kfree_skb(skb);
return err;
}
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index a562ebaaa33c..0e55f8365ce2 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -694,10 +694,11 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_offload_context_rx *rx_ctx;
+ bool is_req_pending, is_force_resync;
u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE];
- u32 sock_data, is_req_pending;
struct tls_prot_info *prot;
s64 resync_req;
+ u32 sock_data;
u32 req_seq;
if (tls_ctx->rx_conf != TLS_HW)
@@ -712,9 +713,11 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
resync_req = atomic64_read(&rx_ctx->resync_req);
req_seq = resync_req >> 32;
seq += TLS_HEADER_SIZE - 1;
- is_req_pending = resync_req;
+ is_req_pending = resync_req & RESYNC_REQ;
+ is_force_resync = resync_req & RESYNC_REQ_FORCE;
- if (likely(!is_req_pending) || req_seq != seq ||
+ if (likely(!is_req_pending) ||
+ (!is_force_resync && req_seq != seq) ||
!atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0))
return;
break;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index e23f94a5549b..8c2763eb6aae 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -206,10 +206,12 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
kfree(aead_req);
+ spin_lock_bh(&ctx->decrypt_compl_lock);
pending = atomic_dec_return(&ctx->decrypt_pending);
- if (!pending && READ_ONCE(ctx->async_notify))
+ if (!pending && ctx->async_notify)
complete(&ctx->async_wait.completion);
+ spin_unlock_bh(&ctx->decrypt_compl_lock);
}
static int tls_do_decryption(struct sock *sk,
@@ -467,10 +469,12 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
ready = true;
}
+ spin_lock_bh(&ctx->encrypt_compl_lock);
pending = atomic_dec_return(&ctx->encrypt_pending);
- if (!pending && READ_ONCE(ctx->async_notify))
+ if (!pending && ctx->async_notify)
complete(&ctx->async_wait.completion);
+ spin_unlock_bh(&ctx->encrypt_compl_lock);
if (!ready)
return;
@@ -780,7 +784,7 @@ static int tls_push_record(struct sock *sk, int flags,
static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
bool full_record, u8 record_type,
- size_t *copied, int flags)
+ ssize_t *copied, int flags)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
@@ -796,9 +800,10 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
psock = sk_psock_get(sk);
if (!psock || !policy) {
err = tls_push_record(sk, flags, record_type);
- if (err && err != -EINPROGRESS) {
+ if (err && sk->sk_err == EBADMSG) {
*copied -= sk_msg_free(sk, msg);
tls_free_open_rec(sk);
+ err = -sk->sk_err;
}
if (psock)
sk_psock_put(sk, psock);
@@ -824,9 +829,10 @@ more_data:
switch (psock->eval) {
case __SK_PASS:
err = tls_push_record(sk, flags, record_type);
- if (err && err != -EINPROGRESS) {
+ if (err && sk->sk_err == EBADMSG) {
*copied -= sk_msg_free(sk, msg);
tls_free_open_rec(sk);
+ err = -sk->sk_err;
goto out_err;
}
break;
@@ -916,7 +922,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
unsigned char record_type = TLS_RECORD_TYPE_DATA;
bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
bool eor = !(msg->msg_flags & MSG_MORE);
- size_t try_to_copy, copied = 0;
+ size_t try_to_copy;
+ ssize_t copied = 0;
struct sk_msg *msg_pl, *msg_en;
struct tls_rec *rec;
int required_size;
@@ -926,6 +933,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
int num_zc = 0;
int orig_size;
int ret = 0;
+ int pending;
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
return -EOPNOTSUPP;
@@ -1092,13 +1100,19 @@ trim_sgl:
goto send_end;
} else if (num_zc) {
/* Wait for pending encryptions to get completed */
- smp_store_mb(ctx->async_notify, true);
+ spin_lock_bh(&ctx->encrypt_compl_lock);
+ ctx->async_notify = true;
- if (atomic_read(&ctx->encrypt_pending))
+ pending = atomic_read(&ctx->encrypt_pending);
+ spin_unlock_bh(&ctx->encrypt_compl_lock);
+ if (pending)
crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
else
reinit_completion(&ctx->async_wait.completion);
+ /* There can be no concurrent accesses, since we have no
+ * pending encrypt operations
+ */
WRITE_ONCE(ctx->async_notify, false);
if (ctx->async_wait.err) {
@@ -1118,7 +1132,7 @@ send_end:
release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
- return copied ? copied : ret;
+ return copied > 0 ? copied : ret;
}
static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
@@ -1132,7 +1146,7 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
struct sk_msg *msg_pl;
struct tls_rec *rec;
int num_async = 0;
- size_t copied = 0;
+ ssize_t copied = 0;
bool full_record;
int record_room;
int ret = 0;
@@ -1234,7 +1248,7 @@ wait_for_memory:
}
sendpage_end:
ret = sk_stream_error(sk, flags, ret);
- return copied ? copied : ret;
+ return copied > 0 ? copied : ret;
}
int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
@@ -1729,6 +1743,7 @@ int tls_sw_recvmsg(struct sock *sk,
bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
bool is_peek = flags & MSG_PEEK;
int num_async = 0;
+ int pending;
flags |= nonblock;
@@ -1891,8 +1906,11 @@ pick_next_record:
recv_end:
if (num_async) {
/* Wait for all previously submitted records to be decrypted */
- smp_store_mb(ctx->async_notify, true);
- if (atomic_read(&ctx->decrypt_pending)) {
+ spin_lock_bh(&ctx->decrypt_compl_lock);
+ ctx->async_notify = true;
+ pending = atomic_read(&ctx->decrypt_pending);
+ spin_unlock_bh(&ctx->decrypt_compl_lock);
+ if (pending) {
err = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
if (err) {
/* one of async decrypt failed */
@@ -1904,6 +1922,10 @@ recv_end:
} else {
reinit_completion(&ctx->async_wait.completion);
}
+
+ /* There can be no concurrent accesses, since we have no
+ * pending decrypt operations
+ */
WRITE_ONCE(ctx->async_notify, false);
/* Drain records from the rx_list & copy if required */
@@ -2290,6 +2312,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
if (tx) {
crypto_init_wait(&sw_ctx_tx->async_wait);
+ spin_lock_init(&sw_ctx_tx->encrypt_compl_lock);
crypto_info = &ctx->crypto_send.info;
cctx = &ctx->tx;
aead = &sw_ctx_tx->aead_send;
@@ -2298,6 +2321,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
sw_ctx_tx->tx_work.sk = sk;
} else {
crypto_init_wait(&sw_ctx_rx->async_wait);
+ spin_lock_init(&sw_ctx_rx->decrypt_compl_lock);
crypto_info = &ctx->crypto_recv.info;
cctx = &ctx->rx;
skb_queue_head_init(&sw_ctx_rx->rx_list);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index a5f28708e0e7..626bf9044418 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1408,7 +1408,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
/* Wait for children sockets to appear; these are the new sockets
* created upon connection establishment.
*/
- timeout = sock_sndtimeo(listener, flags & O_NONBLOCK);
+ timeout = sock_rcvtimeo(listener, flags & O_NONBLOCK);
prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
while ((connected = vsock_dequeue_accept(listener)) == NULL &&
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 69efc891885f..0edda1edf988 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -1132,6 +1132,14 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
lock_sock(sk);
+ /* Check if sk has been released before lock_sock */
+ if (sk->sk_shutdown == SHUTDOWN_MASK) {
+ (void)virtio_transport_reset_no_sock(t, pkt);
+ release_sock(sk);
+ sock_put(sk);
+ goto free_pkt;
+ }
+
/* Update CID in case it has changed after a transport reset event */
vsk->local_addr.svm_cid = dst.svm_cid;
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 63cf7131f601..813e93644ae7 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -181,8 +181,8 @@ config CFG80211_CRDA_SUPPORT
default y
help
You should enable this option unless you know for sure you have no
- need for it, for example when using internal regdb (above) or the
- database loaded as a firmware file.
+ need for it, for example when using the regulatory database loaded as
+ a firmware file.
If unsure, say Y.
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index fcac5c6366e1..cddf92c5d09e 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -6,7 +6,7 @@
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2018 Intel Corporation
+ * Copyright 2018-2020 Intel Corporation
*/
#include <linux/export.h>
@@ -27,6 +27,7 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef,
return;
chandef->chan = chan;
+ chandef->freq1_offset = chan->freq_offset;
chandef->center_freq2 = 0;
chandef->edmg.bw_config = 0;
chandef->edmg.channels = 0;
@@ -146,6 +147,9 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
if (!chandef->chan)
return false;
+ if (chandef->freq1_offset >= 1000)
+ return false;
+
control_freq = chandef->chan->center_freq;
switch (chandef->width) {
@@ -153,7 +157,8 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
case NL80211_CHAN_WIDTH_10:
case NL80211_CHAN_WIDTH_20:
case NL80211_CHAN_WIDTH_20_NOHT:
- if (chandef->center_freq1 != control_freq)
+ if (ieee80211_chandef_to_khz(chandef) !=
+ ieee80211_channel_to_khz(chandef->chan))
return false;
if (chandef->center_freq2)
return false;
@@ -386,10 +391,11 @@ static u32 cfg80211_get_start_freq(u32 center_freq,
{
u32 start_freq;
- if (bandwidth <= 20)
+ bandwidth = MHZ_TO_KHZ(bandwidth);
+ if (bandwidth <= MHZ_TO_KHZ(20))
start_freq = center_freq;
else
- start_freq = center_freq - bandwidth/2 + 10;
+ start_freq = center_freq - bandwidth / 2 + MHZ_TO_KHZ(10);
return start_freq;
}
@@ -399,10 +405,11 @@ static u32 cfg80211_get_end_freq(u32 center_freq,
{
u32 end_freq;
- if (bandwidth <= 20)
+ bandwidth = MHZ_TO_KHZ(bandwidth);
+ if (bandwidth <= MHZ_TO_KHZ(20))
end_freq = center_freq;
else
- end_freq = center_freq + bandwidth/2 - 10;
+ end_freq = center_freq + bandwidth / 2 - MHZ_TO_KHZ(10);
return end_freq;
}
@@ -417,8 +424,8 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy,
start_freq = cfg80211_get_start_freq(center_freq, bandwidth);
end_freq = cfg80211_get_end_freq(center_freq, bandwidth);
- for (freq = start_freq; freq <= end_freq; freq += 20) {
- c = ieee80211_get_channel(wiphy, freq);
+ for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) {
+ c = ieee80211_get_channel_khz(wiphy, freq);
if (!c)
return -EINVAL;
@@ -449,8 +456,8 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
return -EINVAL;
ret = cfg80211_get_chans_dfs_required(wiphy,
- chandef->center_freq1,
- width);
+ ieee80211_chandef_to_khz(chandef),
+ width);
if (ret < 0)
return ret;
else if (ret > 0)
@@ -460,8 +467,8 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
return 0;
ret = cfg80211_get_chans_dfs_required(wiphy,
- chandef->center_freq2,
- width);
+ MHZ_TO_KHZ(chandef->center_freq2),
+ width);
if (ret < 0)
return ret;
else if (ret > 0)
@@ -503,8 +510,8 @@ static int cfg80211_get_chans_dfs_usable(struct wiphy *wiphy,
* DFS_AVAILABLE). Return number of usable channels
* (require CAC). Allow DFS and non-DFS channel mix.
*/
- for (freq = start_freq; freq <= end_freq; freq += 20) {
- c = ieee80211_get_channel(wiphy, freq);
+ for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) {
+ c = ieee80211_get_channel_khz(wiphy, freq);
if (!c)
return -EINVAL;
@@ -536,8 +543,9 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy,
if (width < 0)
return false;
- r1 = cfg80211_get_chans_dfs_usable(wiphy, chandef->center_freq1,
- width);
+ r1 = cfg80211_get_chans_dfs_usable(wiphy,
+ MHZ_TO_KHZ(chandef->center_freq1),
+ width);
if (r1 < 0)
return false;
@@ -546,8 +554,8 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy,
case NL80211_CHAN_WIDTH_80P80:
WARN_ON(!chandef->center_freq2);
r2 = cfg80211_get_chans_dfs_usable(wiphy,
- chandef->center_freq2,
- width);
+ MHZ_TO_KHZ(chandef->center_freq2),
+ width);
if (r2 < 0)
return false;
break;
@@ -694,8 +702,8 @@ static bool cfg80211_get_chans_dfs_available(struct wiphy *wiphy,
* If any channel in between is disabled or has not
* had gone through CAC return false
*/
- for (freq = start_freq; freq <= end_freq; freq += 20) {
- c = ieee80211_get_channel(wiphy, freq);
+ for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) {
+ c = ieee80211_get_channel_khz(wiphy, freq);
if (!c)
return false;
@@ -724,7 +732,8 @@ static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy,
if (width < 0)
return false;
- r = cfg80211_get_chans_dfs_available(wiphy, chandef->center_freq1,
+ r = cfg80211_get_chans_dfs_available(wiphy,
+ MHZ_TO_KHZ(chandef->center_freq1),
width);
/* If any of channels unavailable for cf1 just return */
@@ -735,8 +744,8 @@ static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy,
case NL80211_CHAN_WIDTH_80P80:
WARN_ON(!chandef->center_freq2);
r = cfg80211_get_chans_dfs_available(wiphy,
- chandef->center_freq2,
- width);
+ MHZ_TO_KHZ(chandef->center_freq2),
+ width);
break;
default:
WARN_ON(chandef->center_freq2);
@@ -757,8 +766,8 @@ static unsigned int cfg80211_get_chans_dfs_cac_time(struct wiphy *wiphy,
start_freq = cfg80211_get_start_freq(center_freq, bandwidth);
end_freq = cfg80211_get_end_freq(center_freq, bandwidth);
- for (freq = start_freq; freq <= end_freq; freq += 20) {
- c = ieee80211_get_channel(wiphy, freq);
+ for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) {
+ c = ieee80211_get_channel_khz(wiphy, freq);
if (!c)
return 0;
@@ -790,14 +799,14 @@ cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy,
return 0;
t1 = cfg80211_get_chans_dfs_cac_time(wiphy,
- chandef->center_freq1,
+ MHZ_TO_KHZ(chandef->center_freq1),
width);
if (!chandef->center_freq2)
return t1;
t2 = cfg80211_get_chans_dfs_cac_time(wiphy,
- chandef->center_freq2,
+ MHZ_TO_KHZ(chandef->center_freq2),
width);
return max(t1, t2);
@@ -813,8 +822,8 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
start_freq = cfg80211_get_start_freq(center_freq, bandwidth);
end_freq = cfg80211_get_end_freq(center_freq, bandwidth);
- for (freq = start_freq; freq <= end_freq; freq += 20) {
- c = ieee80211_get_channel(wiphy, freq);
+ for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) {
+ c = ieee80211_get_channel_khz(wiphy, freq);
if (!c || c->flags & prohibited_flags)
return false;
}
@@ -910,7 +919,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
width = 10;
break;
case NL80211_CHAN_WIDTH_20:
- if (!ht_cap->ht_supported)
+ if (!ht_cap->ht_supported &&
+ chandef->chan->band != NL80211_BAND_6GHZ)
return false;
/* fall through */
case NL80211_CHAN_WIDTH_20_NOHT:
@@ -919,6 +929,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
break;
case NL80211_CHAN_WIDTH_40:
width = 40;
+ if (chandef->chan->band == NL80211_BAND_6GHZ)
+ break;
if (!ht_cap->ht_supported)
return false;
if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) ||
@@ -933,24 +945,29 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
break;
case NL80211_CHAN_WIDTH_80P80:
cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
- if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
+ if (chandef->chan->band != NL80211_BAND_6GHZ &&
+ cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
return false;
/* fall through */
case NL80211_CHAN_WIDTH_80:
- if (!vht_cap->vht_supported)
- return false;
prohibited_flags |= IEEE80211_CHAN_NO_80MHZ;
width = 80;
+ if (chandef->chan->band == NL80211_BAND_6GHZ)
+ break;
+ if (!vht_cap->vht_supported)
+ return false;
break;
case NL80211_CHAN_WIDTH_160:
+ prohibited_flags |= IEEE80211_CHAN_NO_160MHZ;
+ width = 160;
+ if (chandef->chan->band == NL80211_BAND_6GHZ)
+ break;
if (!vht_cap->vht_supported)
return false;
cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ &&
cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
return false;
- prohibited_flags |= IEEE80211_CHAN_NO_160MHZ;
- width = 160;
break;
default:
WARN_ON_ONCE(1);
@@ -976,13 +993,15 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
prohibited_flags |= IEEE80211_CHAN_NO_OFDM;
- if (!cfg80211_secondary_chans_ok(wiphy, chandef->center_freq1,
+ if (!cfg80211_secondary_chans_ok(wiphy,
+ ieee80211_chandef_to_khz(chandef),
width, prohibited_flags))
return false;
if (!chandef->center_freq2)
return true;
- return cfg80211_secondary_chans_ok(wiphy, chandef->center_freq2,
+ return cfg80211_secondary_chans_ok(wiphy,
+ MHZ_TO_KHZ(chandef->center_freq2),
width, prohibited_flags);
}
EXPORT_SYMBOL(cfg80211_chandef_usable);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 341402b4f178..f0226ae9561c 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -142,7 +142,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
if (result)
return result;
- if (rdev->wiphy.debugfsdir)
+ if (!IS_ERR_OR_NULL(rdev->wiphy.debugfsdir))
debugfs_rename(rdev->wiphy.debugfsdir->d_parent,
rdev->wiphy.debugfsdir,
rdev->wiphy.debugfsdir->d_parent, newname);
@@ -480,9 +480,6 @@ use_default_name:
INIT_LIST_HEAD(&rdev->bss_list);
INIT_LIST_HEAD(&rdev->sched_scan_req_list);
INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done);
- INIT_LIST_HEAD(&rdev->mlme_unreg);
- spin_lock_init(&rdev->mlme_unreg_lock);
- INIT_WORK(&rdev->mlme_unreg_wk, cfg80211_mlme_unreg_wk);
INIT_DELAYED_WORK(&rdev->dfs_update_channels_wk,
cfg80211_dfs_channels_update_work);
#ifdef CONFIG_CFG80211_WEXT
@@ -794,6 +791,7 @@ int wiphy_register(struct wiphy *wiphy)
/* sanity check supported bands/channels */
for (band = 0; band < NUM_NL80211_BANDS; band++) {
u16 types = 0;
+ bool have_he = false;
sband = wiphy->bands[band];
if (!sband)
@@ -810,6 +808,11 @@ int wiphy_register(struct wiphy *wiphy)
!sband->n_bitrates))
return -EINVAL;
+ if (WARN_ON(band == NL80211_BAND_6GHZ &&
+ (sband->ht_cap.ht_supported ||
+ sband->vht_cap.vht_supported)))
+ return -EINVAL;
+
/*
* Since cfg80211_disable_40mhz_24ghz is global, we can
* modify the sband's ht data even if the driver uses a
@@ -837,6 +840,9 @@ int wiphy_register(struct wiphy *wiphy)
sband->channels[i].orig_mpwr =
sband->channels[i].max_power;
sband->channels[i].band = band;
+
+ if (WARN_ON(sband->channels[i].freq_offset >= 1000))
+ return -EINVAL;
}
for (i = 0; i < sband->n_iftype_data; i++) {
@@ -854,8 +860,17 @@ int wiphy_register(struct wiphy *wiphy)
return -EINVAL;
types |= iftd->types_mask;
+
+ if (i == 0)
+ have_he = iftd->he_cap.has_he;
+ else
+ have_he = have_he &&
+ iftd->he_cap.has_he;
}
+ if (WARN_ON(!have_he && band == NL80211_BAND_6GHZ))
+ return -EINVAL;
+
have_band = true;
}
@@ -1030,7 +1045,6 @@ void wiphy_unregister(struct wiphy *wiphy)
cancel_delayed_work_sync(&rdev->dfs_update_channels_wk);
flush_work(&rdev->destroy_work);
flush_work(&rdev->sched_scan_stop_wk);
- flush_work(&rdev->mlme_unreg_wk);
flush_work(&rdev->propagate_radar_detect_wk);
flush_work(&rdev->propagate_cac_done_wk);
@@ -1094,6 +1108,7 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync)
rdev->devlist_generation++;
cfg80211_mlme_purge_registrations(wdev);
+ flush_work(&wdev->mgmt_registrations_update_wk);
switch (wdev->iftype) {
case NL80211_IFTYPE_P2P_DEVICE:
@@ -1238,6 +1253,8 @@ void cfg80211_init_wdev(struct cfg80211_registered_device *rdev,
spin_lock_init(&wdev->event_lock);
INIT_LIST_HEAD(&wdev->mgmt_registrations);
spin_lock_init(&wdev->mgmt_registrations_lock);
+ INIT_WORK(&wdev->mgmt_registrations_update_wk,
+ cfg80211_mgmt_registrations_update_wk);
INIT_LIST_HEAD(&wdev->pmsr_list);
spin_lock_init(&wdev->pmsr_lock);
INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index bb897a803ffe..e0e5b3ee9699 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -60,10 +60,6 @@ struct cfg80211_registered_device {
struct list_head beacon_registrations;
spinlock_t beacon_registrations_lock;
- struct list_head mlme_unreg;
- spinlock_t mlme_unreg_lock;
- struct work_struct mlme_unreg_wk;
-
/* protected by RTNL only */
int num_running_ifaces;
int num_running_monitor_ifaces;
@@ -290,7 +286,7 @@ struct cfg80211_cqm_config {
u32 rssi_hyst;
s32 last_rssi_event_value;
int n_rssi_thresholds;
- s32 rssi_thresholds[0];
+ s32 rssi_thresholds[];
};
void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev);
@@ -385,8 +381,9 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
struct net_device *dev);
int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
u16 frame_type, const u8 *match_data,
- int match_len, struct netlink_ext_ack *extack);
-void cfg80211_mlme_unreg_wk(struct work_struct *wk);
+ int match_len, bool multicast_rx,
+ struct netlink_ext_ack *extack);
+void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk);
void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid);
void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev);
int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index e4805a3bd310..189334314cba 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -426,58 +426,62 @@ struct cfg80211_mgmt_registration {
__le16 frame_type;
+ bool multicast_rx;
+
u8 match[];
};
-static void
-cfg80211_process_mlme_unregistrations(struct cfg80211_registered_device *rdev)
+static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev)
{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct wireless_dev *tmp;
struct cfg80211_mgmt_registration *reg;
+ struct mgmt_frame_regs upd = {};
ASSERT_RTNL();
- spin_lock_bh(&rdev->mlme_unreg_lock);
- while ((reg = list_first_entry_or_null(&rdev->mlme_unreg,
- struct cfg80211_mgmt_registration,
- list))) {
- list_del(&reg->list);
- spin_unlock_bh(&rdev->mlme_unreg_lock);
+ rcu_read_lock();
+ list_for_each_entry_rcu(tmp, &rdev->wiphy.wdev_list, list) {
+ list_for_each_entry_rcu(reg, &tmp->mgmt_registrations, list) {
+ u32 mask = BIT(le16_to_cpu(reg->frame_type) >> 4);
+ u32 mcast_mask = 0;
- if (rdev->ops->mgmt_frame_register) {
- u16 frame_type = le16_to_cpu(reg->frame_type);
+ if (reg->multicast_rx)
+ mcast_mask = mask;
- rdev_mgmt_frame_register(rdev, reg->wdev,
- frame_type, false);
- }
+ upd.global_stypes |= mask;
+ upd.global_mcast_stypes |= mcast_mask;
- kfree(reg);
-
- spin_lock_bh(&rdev->mlme_unreg_lock);
+ if (tmp == wdev) {
+ upd.interface_stypes |= mask;
+ upd.interface_mcast_stypes |= mcast_mask;
+ }
+ }
}
- spin_unlock_bh(&rdev->mlme_unreg_lock);
+ rcu_read_unlock();
+
+ rdev_update_mgmt_frame_registrations(rdev, wdev, &upd);
}
-void cfg80211_mlme_unreg_wk(struct work_struct *wk)
+void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk)
{
- struct cfg80211_registered_device *rdev;
-
- rdev = container_of(wk, struct cfg80211_registered_device,
- mlme_unreg_wk);
+ struct wireless_dev *wdev = container_of(wk, struct wireless_dev,
+ mgmt_registrations_update_wk);
rtnl_lock();
- cfg80211_process_mlme_unregistrations(rdev);
+ cfg80211_mgmt_registrations_update(wdev);
rtnl_unlock();
}
int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
u16 frame_type, const u8 *match_data,
- int match_len, struct netlink_ext_ack *extack)
+ int match_len, bool multicast_rx,
+ struct netlink_ext_ack *extack)
{
- struct wiphy *wiphy = wdev->wiphy;
- struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct cfg80211_mgmt_registration *reg, *nreg;
int err = 0;
u16 mgmt_type;
+ bool update_multicast = false;
if (!wdev->wiphy->mgmt_stypes)
return -EOPNOTSUPP;
@@ -528,34 +532,39 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
continue;
if (memcmp(reg->match, match_data, mlen) == 0) {
+ if (reg->multicast_rx != multicast_rx) {
+ update_multicast = true;
+ reg->multicast_rx = multicast_rx;
+ break;
+ }
NL_SET_ERR_MSG(extack, "Match already configured");
err = -EALREADY;
break;
}
}
- if (err) {
- kfree(nreg);
+ if (err)
goto out;
- }
- memcpy(nreg->match, match_data, match_len);
- nreg->match_len = match_len;
- nreg->nlportid = snd_portid;
- nreg->frame_type = cpu_to_le16(frame_type);
- nreg->wdev = wdev;
- list_add(&nreg->list, &wdev->mgmt_registrations);
+ if (update_multicast) {
+ kfree(nreg);
+ } else {
+ memcpy(nreg->match, match_data, match_len);
+ nreg->match_len = match_len;
+ nreg->nlportid = snd_portid;
+ nreg->frame_type = cpu_to_le16(frame_type);
+ nreg->wdev = wdev;
+ nreg->multicast_rx = multicast_rx;
+ list_add(&nreg->list, &wdev->mgmt_registrations);
+ }
spin_unlock_bh(&wdev->mgmt_registrations_lock);
- /* process all unregistrations to avoid driver confusion */
- cfg80211_process_mlme_unregistrations(rdev);
-
- if (rdev->ops->mgmt_frame_register)
- rdev_mgmt_frame_register(rdev, wdev, frame_type, true);
+ cfg80211_mgmt_registrations_update(wdev);
return 0;
out:
+ kfree(nreg);
spin_unlock_bh(&wdev->mgmt_registrations_lock);
return err;
@@ -574,11 +583,9 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
continue;
list_del(&reg->list);
- spin_lock(&rdev->mlme_unreg_lock);
- list_add_tail(&reg->list, &rdev->mlme_unreg);
- spin_unlock(&rdev->mlme_unreg_lock);
+ kfree(reg);
- schedule_work(&rdev->mlme_unreg_wk);
+ schedule_work(&wdev->mgmt_registrations_update_wk);
}
spin_unlock_bh(&wdev->mgmt_registrations_lock);
@@ -594,15 +601,16 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
{
- struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct cfg80211_mgmt_registration *reg, *tmp;
spin_lock_bh(&wdev->mgmt_registrations_lock);
- spin_lock(&rdev->mlme_unreg_lock);
- list_splice_tail_init(&wdev->mgmt_registrations, &rdev->mlme_unreg);
- spin_unlock(&rdev->mlme_unreg_lock);
+ list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
+ list_del(&reg->list);
+ kfree(reg);
+ }
spin_unlock_bh(&wdev->mgmt_registrations_lock);
- cfg80211_process_mlme_unregistrations(rdev);
+ cfg80211_mgmt_registrations_update(wdev);
}
int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
@@ -721,8 +729,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
return rdev_mgmt_tx(rdev, wdev, params, cookie);
}
-bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm,
- const u8 *buf, size_t len, u32 flags)
+bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm,
+ const u8 *buf, size_t len, u32 flags)
{
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -777,7 +785,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm,
trace_cfg80211_return_bool(result);
return result;
}
-EXPORT_SYMBOL(cfg80211_rx_mgmt);
+EXPORT_SYMBOL(cfg80211_rx_mgmt_khz);
void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev)
{
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 519414468b5d..263ae395ad44 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -329,6 +329,15 @@ he_bss_color_policy[NL80211_HE_BSS_COLOR_ATTR_MAX + 1] = {
[NL80211_HE_BSS_COLOR_ATTR_PARTIAL] = { .type = NLA_FLAG },
};
+static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = {
+ [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY,
+ .len = NL80211_MAX_SUPP_RATES },
+ [NL80211_TXRATE_HT] = { .type = NLA_BINARY,
+ .len = NL80211_MAX_SUPP_HT_RATES },
+ [NL80211_TXRATE_VHT] = NLA_POLICY_EXACT_LEN_WARN(sizeof(struct nl80211_txrate_vht)),
+ [NL80211_TXRATE_GI] = { .type = NLA_U8 },
+};
+
static const struct nla_policy
nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = {
[NL80211_TID_CONFIG_ATTR_VIF_SUPP] = { .type = NLA_U64 },
@@ -343,6 +352,12 @@ nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = {
NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE),
[NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL] =
NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE),
+ [NL80211_TID_CONFIG_ATTR_AMSDU_CTRL] =
+ NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE),
+ [NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE] =
+ NLA_POLICY_MAX(NLA_U8, NL80211_TX_RATE_FIXED),
+ [NL80211_TID_CONFIG_ATTR_TX_RATE] =
+ NLA_POLICY_NESTED(nl80211_txattr_policy),
};
static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
@@ -363,6 +378,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_CHANNEL_WIDTH] = { .type = NLA_U32 },
[NL80211_ATTR_CENTER_FREQ1] = { .type = NLA_U32 },
+ [NL80211_ATTR_CENTER_FREQ1_OFFSET] = NLA_POLICY_RANGE(NLA_U32, 0, 999),
[NL80211_ATTR_CENTER_FREQ2] = { .type = NLA_U32 },
[NL80211_ATTR_WIPHY_RETRY_SHORT] = NLA_POLICY_MIN(NLA_U8, 1),
@@ -635,6 +651,13 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_CONTROL_PORT_NO_PREAUTH] = { .type = NLA_FLAG },
[NL80211_ATTR_PMK_LIFETIME] = NLA_POLICY_MIN(NLA_U32, 1),
[NL80211_ATTR_PMK_REAUTH_THRESHOLD] = NLA_POLICY_RANGE(NLA_U8, 1, 100),
+ [NL80211_ATTR_RECEIVE_MULTICAST] = { .type = NLA_FLAG },
+ [NL80211_ATTR_WIPHY_FREQ_OFFSET] = NLA_POLICY_RANGE(NLA_U32, 0, 999),
+ [NL80211_ATTR_SCAN_FREQ_KHZ] = { .type = NLA_NESTED },
+ [NL80211_ATTR_HE_6GHZ_CAPABILITY] = {
+ .type = NLA_EXACT_LEN,
+ .len = sizeof(struct ieee80211_he_6ghz_capa),
+ },
};
/* policy for the key attributes */
@@ -707,9 +730,16 @@ nl80211_coalesce_policy[NUM_NL80211_ATTR_COALESCE_RULE] = {
/* policy for GTK rekey offload attributes */
static const struct nla_policy
nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = {
- [NL80211_REKEY_DATA_KEK] = NLA_POLICY_EXACT_LEN_WARN(NL80211_KEK_LEN),
- [NL80211_REKEY_DATA_KCK] = NLA_POLICY_EXACT_LEN_WARN(NL80211_KCK_LEN),
+ [NL80211_REKEY_DATA_KEK] = {
+ .type = NLA_BINARY,
+ .len = NL80211_KEK_EXT_LEN
+ },
+ [NL80211_REKEY_DATA_KCK] = {
+ .type = NLA_BINARY,
+ .len = NL80211_KCK_EXT_LEN
+ },
[NL80211_REKEY_DATA_REPLAY_CTR] = NLA_POLICY_EXACT_LEN_WARN(NL80211_REPLAY_CTR_LEN),
+ [NL80211_REKEY_DATA_AKM] = { .type = NLA_U32 },
};
static const struct nla_policy
@@ -901,6 +931,9 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy,
chan->center_freq))
goto nla_put_failure;
+ if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_OFFSET, chan->freq_offset))
+ goto nla_put_failure;
+
if ((chan->flags & IEEE80211_CHAN_DISABLED) &&
nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DISABLED))
goto nla_put_failure;
@@ -1306,13 +1339,11 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
}
static struct ieee80211_channel *nl80211_get_valid_chan(struct wiphy *wiphy,
- struct nlattr *tb)
+ u32 freq)
{
struct ieee80211_channel *chan;
- if (tb == NULL)
- return NULL;
- chan = ieee80211_get_channel(wiphy, nla_get_u32(tb));
+ chan = ieee80211_get_channel_khz(wiphy, freq);
if (!chan || chan->flags & IEEE80211_CHAN_DISABLED)
return NULL;
return chan;
@@ -1538,6 +1569,7 @@ static int nl80211_send_coalesce(struct sk_buff *msg,
static int
nl80211_send_iftype_data(struct sk_buff *msg,
+ const struct ieee80211_supported_band *sband,
const struct ieee80211_sband_iftype_data *iftdata)
{
const struct ieee80211_sta_he_cap *he_cap = &iftdata->he_cap;
@@ -1561,6 +1593,12 @@ nl80211_send_iftype_data(struct sk_buff *msg,
return -ENOBUFS;
}
+ if (sband->band == NL80211_BAND_6GHZ &&
+ nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA,
+ sizeof(iftdata->he_6ghz_capa),
+ &iftdata->he_6ghz_capa))
+ return -ENOBUFS;
+
return 0;
}
@@ -1609,7 +1647,7 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg,
if (!iftdata)
return -ENOBUFS;
- err = nl80211_send_iftype_data(msg,
+ err = nl80211_send_iftype_data(msg, sband,
&sband->iftype_data[i]);
if (err)
return err;
@@ -2767,13 +2805,17 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
if (!attrs[NL80211_ATTR_WIPHY_FREQ])
return -EINVAL;
- control_freq = nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ]);
+ control_freq = MHZ_TO_KHZ(
+ nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+ if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET])
+ control_freq +=
+ nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]);
memset(chandef, 0, sizeof(*chandef));
-
- chandef->chan = ieee80211_get_channel(&rdev->wiphy, control_freq);
+ chandef->chan = ieee80211_get_channel_khz(&rdev->wiphy, control_freq);
chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
- chandef->center_freq1 = control_freq;
+ chandef->center_freq1 = KHZ_TO_MHZ(control_freq);
+ chandef->freq1_offset = control_freq % 1000;
chandef->center_freq2 = 0;
/* Primary channel not allowed */
@@ -2821,9 +2863,15 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
} else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) {
chandef->width =
nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]);
- if (attrs[NL80211_ATTR_CENTER_FREQ1])
+ if (attrs[NL80211_ATTR_CENTER_FREQ1]) {
chandef->center_freq1 =
nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]);
+ if (attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET])
+ chandef->freq1_offset = nla_get_u32(
+ attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET]);
+ else
+ chandef->freq1_offset = 0;
+ }
if (attrs[NL80211_ATTR_CENTER_FREQ2])
chandef->center_freq2 =
nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ2]);
@@ -3256,6 +3304,9 @@ static int nl80211_send_chandef(struct sk_buff *msg,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ,
chandef->chan->center_freq))
return -ENOBUFS;
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET,
+ chandef->chan->freq_offset))
+ return -ENOBUFS;
switch (chandef->width) {
case NL80211_CHAN_WIDTH_20_NOHT:
case NL80211_CHAN_WIDTH_20:
@@ -3860,14 +3911,25 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
};
void *hdr;
struct sk_buff *msg;
+ bool bigtk_support = false;
+
+ if (wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_BEACON_PROTECTION))
+ bigtk_support = true;
+
+ if ((dev->ieee80211_ptr->iftype == NL80211_IFTYPE_STATION ||
+ dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_CLIENT) &&
+ wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT))
+ bigtk_support = true;
if (info->attrs[NL80211_ATTR_KEY_IDX]) {
key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
- if (key_idx > 5 &&
- !wiphy_ext_feature_isset(
- &rdev->wiphy,
- NL80211_EXT_FEATURE_BEACON_PROTECTION))
+
+ if (key_idx >= 6 && key_idx <= 7 && !bigtk_support) {
+ GENL_SET_ERR_MSG(info, "BIGTK not supported");
return -EINVAL;
+ }
}
if (info->attrs[NL80211_ATTR_MAC])
@@ -4357,16 +4419,9 @@ static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband,
return true;
}
-static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = {
- [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY,
- .len = NL80211_MAX_SUPP_RATES },
- [NL80211_TXRATE_HT] = { .type = NLA_BINARY,
- .len = NL80211_MAX_SUPP_HT_RATES },
- [NL80211_TXRATE_VHT] = NLA_POLICY_EXACT_LEN_WARN(sizeof(struct nl80211_txrate_vht)),
- [NL80211_TXRATE_GI] = { .type = NLA_U8 },
-};
-
static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
+ struct nlattr *attrs[],
+ enum nl80211_attrs attr,
struct cfg80211_bitrate_mask *mask)
{
struct nlattr *tb[NL80211_TXRATE_MAX + 1];
@@ -4397,14 +4452,14 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
}
/* if no rates are given set it back to the defaults */
- if (!info->attrs[NL80211_ATTR_TX_RATES])
+ if (!attrs[attr])
goto out;
/* The nested attribute uses enum nl80211_band as the index. This maps
* directly to the enum nl80211_band values used in cfg80211.
*/
BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8);
- nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) {
+ nla_for_each_nested(tx_rates, attrs[attr], rem) {
enum nl80211_band band = nla_type(tx_rates);
int err;
@@ -4679,6 +4734,8 @@ static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params,
params->ht_required = true;
if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_VHT_PHY)
params->vht_required = true;
+ if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_HE_PHY)
+ params->he_required = true;
}
}
@@ -4907,7 +4964,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
if (info->attrs[NL80211_ATTR_TX_RATES]) {
- err = nl80211_parse_tx_bitrate_mask(info, &params.beacon_rate);
+ err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
+ NL80211_ATTR_TX_RATES,
+ &params.beacon_rate);
if (err)
return err;
@@ -5948,6 +6007,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]);
}
+ if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY])
+ params.he_6ghz_capa =
+ nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+
if (info->attrs[NL80211_ATTR_AIRTIME_WEIGHT])
params.airtime_weight =
nla_get_u16(info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]);
@@ -6082,6 +6145,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
+ if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY])
+ params.he_6ghz_capa =
+ nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]);
+
if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) {
params.opmode_notif_used = true;
params.opmode_notif =
@@ -6126,10 +6193,14 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
params.vht_capa = NULL;
/* HE requires WME */
- if (params.he_capa_len)
+ if (params.he_capa_len || params.he_6ghz_capa)
return -EINVAL;
}
+ /* Ensure that HT/VHT capabilities are not set for 6 GHz HE STA */
+ if (params.he_6ghz_capa && (params.ht_capa || params.vht_capa))
+ return -EINVAL;
+
/* When you run into this, adjust the code below for the new flag */
BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
@@ -7687,6 +7758,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct wireless_dev *wdev = info->user_ptr[1];
struct cfg80211_scan_request *request;
+ struct nlattr *scan_freqs = NULL;
+ bool scan_freqs_khz = false;
struct nlattr *attr;
struct wiphy *wiphy;
int err, tmp, n_ssids = 0, n_channels, i;
@@ -7705,9 +7778,17 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
- n_channels = validate_scan_freqs(
- info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]);
+ if (info->attrs[NL80211_ATTR_SCAN_FREQ_KHZ]) {
+ if (!wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_SCAN_FREQ_KHZ))
+ return -EOPNOTSUPP;
+ scan_freqs = info->attrs[NL80211_ATTR_SCAN_FREQ_KHZ];
+ scan_freqs_khz = true;
+ } else if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES])
+ scan_freqs = info->attrs[NL80211_ATTR_SCAN_FREQUENCIES];
+
+ if (scan_freqs) {
+ n_channels = validate_scan_freqs(scan_freqs);
if (!n_channels) {
err = -EINVAL;
goto unlock;
@@ -7755,13 +7836,16 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
}
i = 0;
- if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
+ if (scan_freqs) {
/* user specified, bail out if channel not found */
- nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_FREQUENCIES], tmp) {
+ nla_for_each_nested(attr, scan_freqs, tmp) {
struct ieee80211_channel *chan;
+ int freq = nla_get_u32(attr);
- chan = ieee80211_get_channel(wiphy, nla_get_u32(attr));
+ if (!scan_freqs_khz)
+ freq = MHZ_TO_KHZ(freq);
+ chan = ieee80211_get_channel_khz(wiphy, freq);
if (!chan) {
err = -EINVAL;
goto out_free;
@@ -8857,6 +8941,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
goto nla_put_failure;
if (nla_put_u16(msg, NL80211_BSS_CAPABILITY, res->capability) ||
nla_put_u32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq) ||
+ nla_put_u32(msg, NL80211_BSS_FREQUENCY_OFFSET,
+ res->channel->freq_offset) ||
nla_put_u32(msg, NL80211_BSS_CHAN_WIDTH, res->scan_width) ||
nla_put_u32(msg, NL80211_BSS_SEEN_MS_AGO,
jiffies_to_msecs(jiffies - intbss->ts)))
@@ -9125,6 +9211,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
enum nl80211_auth_type auth_type;
struct key_parse key;
bool local_state_change;
+ u32 freq;
if (!info->attrs[NL80211_ATTR_MAC])
return -EINVAL;
@@ -9181,8 +9268,12 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
return -EOPNOTSUPP;
bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
- chan = nl80211_get_valid_chan(&rdev->wiphy,
- info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+ freq = MHZ_TO_KHZ(nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+ if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET])
+ freq +=
+ nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]);
+
+ chan = nl80211_get_valid_chan(&rdev->wiphy, freq);
if (!chan)
return -EINVAL;
@@ -9372,6 +9463,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_assoc_request req = {};
const u8 *bssid, *ssid;
int err, ssid_len = 0;
+ u32 freq;
if (dev->ieee80211_ptr->conn_owner_nlportid &&
dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid)
@@ -9391,8 +9483,11 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
- chan = nl80211_get_valid_chan(&rdev->wiphy,
- info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+ freq = MHZ_TO_KHZ(nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+ if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET])
+ freq +=
+ nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]);
+ chan = nl80211_get_valid_chan(&rdev->wiphy, freq);
if (!chan)
return -EINVAL;
@@ -10072,6 +10167,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
struct cfg80211_connect_params connect;
struct wiphy *wiphy;
struct cfg80211_cached_keys *connkeys = NULL;
+ u32 freq = 0;
int err;
memset(&connect, 0, sizeof(connect));
@@ -10142,14 +10238,21 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
connect.prev_bssid =
nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
- if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
- connect.channel = nl80211_get_valid_chan(
- wiphy, info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+ if (info->attrs[NL80211_ATTR_WIPHY_FREQ])
+ freq = MHZ_TO_KHZ(nla_get_u32(
+ info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+ if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET])
+ freq +=
+ nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]);
+
+ if (freq) {
+ connect.channel = nl80211_get_valid_chan(wiphy, freq);
if (!connect.channel)
return -EINVAL;
} else if (info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]) {
- connect.channel_hint = nl80211_get_valid_chan(
- wiphy, info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]);
+ freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]);
+ freq = MHZ_TO_KHZ(freq);
+ connect.channel_hint = nl80211_get_valid_chan(wiphy, freq);
if (!connect.channel_hint)
return -EINVAL;
}
@@ -10688,7 +10791,8 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
if (!rdev->ops->set_bitrate_mask)
return -EOPNOTSUPP;
- err = nl80211_parse_tx_bitrate_mask(info, &mask);
+ err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
+ NL80211_ATTR_TX_RATES, &mask);
if (err)
return err;
@@ -10726,9 +10830,18 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
if (!rdev->ops->mgmt_tx)
return -EOPNOTSUPP;
+ if (info->attrs[NL80211_ATTR_RECEIVE_MULTICAST] &&
+ !wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS)) {
+ GENL_SET_ERR_MSG(info,
+ "multicast RX registrations are not supported");
+ return -EOPNOTSUPP;
+ }
+
return cfg80211_mlme_register_mgmt(wdev, info->snd_portid, frame_type,
nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]),
nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]),
+ info->attrs[NL80211_ATTR_RECEIVE_MULTICAST],
info->extack);
}
@@ -11285,7 +11398,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
}
if (info->attrs[NL80211_ATTR_TX_RATES]) {
- err = nl80211_parse_tx_bitrate_mask(info, &setup.beacon_rate);
+ err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
+ NL80211_ATTR_TX_RATES,
+ &setup.beacon_rate);
if (err)
return err;
@@ -12239,14 +12354,22 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN)
return -ERANGE;
- if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN)
+ if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN &&
+ !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK &&
+ nla_len(tb[NL80211_REKEY_DATA_KEK]) == NL80211_KEK_EXT_LEN))
return -ERANGE;
- if (nla_len(tb[NL80211_REKEY_DATA_KCK]) != NL80211_KCK_LEN)
+ if (nla_len(tb[NL80211_REKEY_DATA_KCK]) != NL80211_KCK_LEN &&
+ !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK &&
+ nla_len(tb[NL80211_REKEY_DATA_KEK]) == NL80211_KCK_EXT_LEN))
return -ERANGE;
rekey_data.kek = nla_data(tb[NL80211_REKEY_DATA_KEK]);
rekey_data.kck = nla_data(tb[NL80211_REKEY_DATA_KCK]);
rekey_data.replay_ctr = nla_data(tb[NL80211_REKEY_DATA_REPLAY_CTR]);
+ rekey_data.kek_len = nla_len(tb[NL80211_REKEY_DATA_KEK]);
+ rekey_data.kck_len = nla_len(tb[NL80211_REKEY_DATA_KCK]);
+ if (tb[NL80211_REKEY_DATA_AKM])
+ rekey_data.akm = nla_get_u32(tb[NL80211_REKEY_DATA_AKM]);
wdev_lock(wdev);
if (!wdev->current_bss) {
@@ -13792,6 +13915,7 @@ static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info)
static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info)
{
+ bool dont_wait_for_ack = info->attrs[NL80211_ATTR_DONT_WAIT_FOR_ACK];
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -13800,6 +13924,7 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info)
u8 *dest;
u16 proto;
bool noencrypt;
+ u64 cookie = 0;
int err;
if (!wiphy_ext_feature_isset(&rdev->wiphy,
@@ -13844,9 +13969,12 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info)
noencrypt =
nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]);
- return rdev_tx_control_port(rdev, dev, buf, len,
- dest, cpu_to_be16(proto), noencrypt);
-
+ err = rdev_tx_control_port(rdev, dev, buf, len,
+ dest, cpu_to_be16(proto), noencrypt,
+ dont_wait_for_ack ? NULL : &cookie);
+ if (!err && !dont_wait_for_ack)
+ nl_set_extack_cookie_u64(info->extack, cookie);
+ return err;
out:
wdev_unlock(wdev);
return err;
@@ -14011,10 +14139,7 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev,
if (rdev->ops->reset_tid_config) {
err = rdev_reset_tid_config(rdev, dev, peer,
tid_conf->tids);
- /* If peer is there no other configuration will be
- * allowed
- */
- if (err || peer)
+ if (err)
return err;
} else {
return -EINVAL;
@@ -14057,6 +14182,29 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev,
nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL]);
}
+ if (attrs[NL80211_TID_CONFIG_ATTR_AMSDU_CTRL]) {
+ tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_AMSDU_CTRL);
+ tid_conf->amsdu =
+ nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_AMSDU_CTRL]);
+ }
+
+ if (attrs[NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE]) {
+ u32 idx = NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE, attr;
+
+ tid_conf->txrate_type = nla_get_u8(attrs[idx]);
+
+ if (tid_conf->txrate_type != NL80211_TX_RATE_AUTOMATIC) {
+ attr = NL80211_TID_CONFIG_ATTR_TX_RATE;
+ err = nl80211_parse_tx_bitrate_mask(info, attrs, attr,
+ &tid_conf->txrate_mask);
+ if (err)
+ return err;
+
+ tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_TX_RATE);
+ }
+ tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE);
+ }
+
if (peer)
mask = rdev->wiphy.tid_config_support.peer;
else
@@ -15168,14 +15316,27 @@ static int nl80211_add_scan_req(struct sk_buff *msg,
}
nla_nest_end(msg, nest);
- nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_FREQUENCIES);
- if (!nest)
- goto nla_put_failure;
- for (i = 0; i < req->n_channels; i++) {
- if (nla_put_u32(msg, i, req->channels[i]->center_freq))
+ if (req->flags & NL80211_SCAN_FLAG_FREQ_KHZ) {
+ nest = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQ_KHZ);
+ if (!nest)
goto nla_put_failure;
+ for (i = 0; i < req->n_channels; i++) {
+ if (nla_put_u32(msg, i,
+ ieee80211_channel_to_khz(req->channels[i])))
+ goto nla_put_failure;
+ }
+ nla_nest_end(msg, nest);
+ } else {
+ nest = nla_nest_start_noflag(msg,
+ NL80211_ATTR_SCAN_FREQUENCIES);
+ if (!nest)
+ goto nla_put_failure;
+ for (i = 0; i < req->n_channels; i++) {
+ if (nla_put_u32(msg, i, req->channels[i]->center_freq))
+ goto nla_put_failure;
+ }
+ nla_nest_end(msg, nest);
}
- nla_nest_end(msg, nest);
if (req->ie &&
nla_put(msg, NL80211_ATTR_IE, req->ie_len, req->ie))
@@ -15495,10 +15656,19 @@ void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf,
if (WARN_ON(len < 2))
return;
- if (ieee80211_is_deauth(mgmt->frame_control))
+ if (ieee80211_is_deauth(mgmt->frame_control)) {
cmd = NL80211_CMD_UNPROT_DEAUTHENTICATE;
- else
+ } else if (ieee80211_is_disassoc(mgmt->frame_control)) {
cmd = NL80211_CMD_UNPROT_DISASSOCIATE;
+ } else if (ieee80211_is_beacon(mgmt->frame_control)) {
+ if (wdev->unprot_beacon_reported &&
+ elapsed_jiffies_msecs(wdev->unprot_beacon_reported) < 10000)
+ return;
+ cmd = NL80211_CMD_UNPROT_BEACON;
+ wdev->unprot_beacon_reported = jiffies;
+ } else {
+ return;
+ }
trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len);
nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC, -1,
@@ -16177,7 +16347,8 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
netdev->ifindex)) ||
nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
NL80211_ATTR_PAD) ||
- nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq) ||
+ nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, KHZ_TO_MHZ(freq)) ||
+ nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET, freq % 1000) ||
(sig_dbm &&
nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) ||
nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
@@ -16194,8 +16365,9 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
return -ENOBUFS;
}
-void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
- const u8 *buf, size_t len, bool ack, gfp_t gfp)
+static void nl80211_frame_tx_status(struct wireless_dev *wdev, u64 cookie,
+ const u8 *buf, size_t len, bool ack,
+ gfp_t gfp, enum nl80211_commands command)
{
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -16203,13 +16375,16 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
struct sk_buff *msg;
void *hdr;
- trace_cfg80211_mgmt_tx_status(wdev, cookie, ack);
+ if (command == NL80211_CMD_FRAME_TX_STATUS)
+ trace_cfg80211_mgmt_tx_status(wdev, cookie, ack);
+ else
+ trace_cfg80211_control_port_tx_status(wdev, cookie, ack);
msg = nlmsg_new(100 + len, gfp);
if (!msg)
return;
- hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME_TX_STATUS);
+ hdr = nl80211hdr_put(msg, 0, 0, 0, command);
if (!hdr) {
nlmsg_free(msg);
return;
@@ -16232,9 +16407,25 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
NL80211_MCGRP_MLME, gfp);
return;
- nla_put_failure:
+nla_put_failure:
nlmsg_free(msg);
}
+
+void cfg80211_control_port_tx_status(struct wireless_dev *wdev, u64 cookie,
+ const u8 *buf, size_t len, bool ack,
+ gfp_t gfp)
+{
+ nl80211_frame_tx_status(wdev, cookie, buf, len, ack, gfp,
+ NL80211_CMD_CONTROL_PORT_FRAME_TX_STATUS);
+}
+EXPORT_SYMBOL(cfg80211_control_port_tx_status);
+
+void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
+ const u8 *buf, size_t len, bool ack, gfp_t gfp)
+{
+ nl80211_frame_tx_status(wdev, cookie, buf, len, ack, gfp,
+ NL80211_CMD_FRAME_TX_STATUS);
+}
EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
static int __nl80211_rx_control_port(struct net_device *dev,
@@ -16803,9 +16994,8 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
}
EXPORT_SYMBOL(cfg80211_probe_status);
-void cfg80211_report_obss_beacon(struct wiphy *wiphy,
- const u8 *frame, size_t len,
- int freq, int sig_dbm)
+void cfg80211_report_obss_beacon_khz(struct wiphy *wiphy, const u8 *frame,
+ size_t len, int freq, int sig_dbm)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct sk_buff *msg;
@@ -16828,7 +17018,10 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
(freq &&
- nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq)) ||
+ (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ,
+ KHZ_TO_MHZ(freq)) ||
+ nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET,
+ freq % 1000))) ||
(sig_dbm &&
nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) ||
nla_put(msg, NL80211_ATTR_FRAME, len, frame))
@@ -16845,7 +17038,7 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
spin_unlock_bh(&rdev->beacon_registrations_lock);
nlmsg_free(msg);
}
-EXPORT_SYMBOL(cfg80211_report_obss_beacon);
+EXPORT_SYMBOL(cfg80211_report_obss_beacon_khz);
#ifdef CONFIG_PM
static int cfg80211_net_detect_results(struct sk_buff *msg,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 99462f0c4e08..950d57494168 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -748,14 +748,17 @@ static inline int rdev_tx_control_port(struct cfg80211_registered_device *rdev,
struct net_device *dev,
const void *buf, size_t len,
const u8 *dest, __be16 proto,
- const bool noencrypt)
+ const bool noencrypt, u64 *cookie)
{
int ret;
trace_rdev_tx_control_port(&rdev->wiphy, dev, buf, len,
dest, proto, noencrypt);
ret = rdev->ops->tx_control_port(&rdev->wiphy, dev, buf, len,
- dest, proto, noencrypt);
- trace_rdev_return_int(&rdev->wiphy, ret);
+ dest, proto, noencrypt, cookie);
+ if (cookie)
+ trace_rdev_return_int_cookie(&rdev->wiphy, ret, *cookie);
+ else
+ trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -819,13 +822,16 @@ rdev_set_cqm_txe_config(struct cfg80211_registered_device *rdev,
}
static inline void
-rdev_mgmt_frame_register(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev, u16 frame_type, bool reg)
+rdev_update_mgmt_frame_registrations(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev,
+ struct mgmt_frame_regs *upd)
{
might_sleep();
- trace_rdev_mgmt_frame_register(&rdev->wiphy, wdev , frame_type, reg);
- rdev->ops->mgmt_frame_register(&rdev->wiphy, wdev , frame_type, reg);
+ trace_rdev_update_mgmt_frame_registrations(&rdev->wiphy, wdev, upd);
+ if (rdev->ops->update_mgmt_frame_registrations)
+ rdev->ops->update_mgmt_frame_registrations(&rdev->wiphy, wdev,
+ upd);
trace_rdev_return_void(&rdev->wiphy);
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index d476d4da0d09..0d74a31ef0ab 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1658,22 +1658,23 @@ static uint32_t reg_rule_to_chan_bw_flags(const struct ieee80211_regdomain *regd
const struct ieee80211_channel *chan)
{
const struct ieee80211_freq_range *freq_range = NULL;
- u32 max_bandwidth_khz, bw_flags = 0;
+ u32 max_bandwidth_khz, center_freq_khz, bw_flags = 0;
freq_range = &reg_rule->freq_range;
max_bandwidth_khz = freq_range->max_bandwidth_khz;
+ center_freq_khz = ieee80211_channel_to_khz(chan);
/* Check if auto calculation requested */
if (reg_rule->flags & NL80211_RRF_AUTO_BW)
max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
/* If we get a reg_rule we can assume that at least 5Mhz fit */
if (!cfg80211_does_bw_fit_range(freq_range,
- MHZ_TO_KHZ(chan->center_freq),
+ center_freq_khz,
MHZ_TO_KHZ(10)))
bw_flags |= IEEE80211_CHAN_NO_10MHZ;
if (!cfg80211_does_bw_fit_range(freq_range,
- MHZ_TO_KHZ(chan->center_freq),
+ center_freq_khz,
MHZ_TO_KHZ(20)))
bw_flags |= IEEE80211_CHAN_NO_20MHZ;
@@ -1710,7 +1711,7 @@ static void handle_channel(struct wiphy *wiphy,
flags = chan->orig_flags;
- reg_rule = freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq));
+ reg_rule = freq_reg_info(wiphy, ieee80211_channel_to_khz(chan));
if (IS_ERR(reg_rule)) {
/*
* We will disable all channels that do not match our
@@ -1729,13 +1730,13 @@ static void handle_channel(struct wiphy *wiphy,
if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
request_wiphy && request_wiphy == wiphy &&
request_wiphy->regulatory_flags & REGULATORY_STRICT_REG) {
- pr_debug("Disabling freq %d MHz for good\n",
- chan->center_freq);
+ pr_debug("Disabling freq %d.%03d MHz for good\n",
+ chan->center_freq, chan->freq_offset);
chan->orig_flags |= IEEE80211_CHAN_DISABLED;
chan->flags = chan->orig_flags;
} else {
- pr_debug("Disabling freq %d MHz\n",
- chan->center_freq);
+ pr_debug("Disabling freq %d.%03d MHz\n",
+ chan->center_freq, chan->freq_offset);
chan->flags |= IEEE80211_CHAN_DISABLED;
}
return;
@@ -1936,7 +1937,7 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx,
sband = wiphy->bands[reg_beacon->chan.band];
chan = &sband->channels[chan_idx];
- if (likely(chan->center_freq != reg_beacon->chan.center_freq))
+ if (likely(!ieee80211_channel_equal(chan, &reg_beacon->chan)))
return;
if (chan->beacon_found)
@@ -2269,18 +2270,18 @@ static void handle_channel_custom(struct wiphy *wiphy,
u32 bw_flags = 0;
const struct ieee80211_reg_rule *reg_rule = NULL;
const struct ieee80211_power_rule *power_rule = NULL;
- u32 bw;
+ u32 bw, center_freq_khz;
+ center_freq_khz = ieee80211_channel_to_khz(chan);
for (bw = MHZ_TO_KHZ(20); bw >= min_bw; bw = bw / 2) {
- reg_rule = freq_reg_info_regd(MHZ_TO_KHZ(chan->center_freq),
- regd, bw);
+ reg_rule = freq_reg_info_regd(center_freq_khz, regd, bw);
if (!IS_ERR(reg_rule))
break;
}
if (IS_ERR_OR_NULL(reg_rule)) {
- pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n",
- chan->center_freq);
+ pr_debug("Disabling freq %d.%03d MHz as custom regd has no rule that fits it\n",
+ chan->center_freq, chan->freq_offset);
if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) {
chan->flags |= IEEE80211_CHAN_DISABLED;
} else {
@@ -3337,8 +3338,8 @@ static bool pending_reg_beacon(struct ieee80211_channel *beacon_chan)
struct reg_beacon *pending_beacon;
list_for_each_entry(pending_beacon, &reg_pending_beacons, list)
- if (beacon_chan->center_freq ==
- pending_beacon->chan.center_freq)
+ if (ieee80211_channel_equal(beacon_chan,
+ &pending_beacon->chan))
return true;
return false;
}
@@ -3367,9 +3368,10 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy,
if (!reg_beacon)
return -ENOMEM;
- pr_debug("Found new beacon on frequency: %d MHz (Ch %d) on %s\n",
- beacon_chan->center_freq,
- ieee80211_frequency_to_channel(beacon_chan->center_freq),
+ pr_debug("Found new beacon on frequency: %d.%03d MHz (Ch %d) on %s\n",
+ beacon_chan->center_freq, beacon_chan->freq_offset,
+ ieee80211_freq_khz_to_channel(
+ ieee80211_channel_to_khz(beacon_chan)),
wiphy_name(wiphy));
memcpy(&reg_beacon->chan, beacon_chan,
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 4000382aef48..74ea4cfb39fb 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1322,8 +1322,8 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
return channel;
}
- freq = ieee80211_channel_to_frequency(channel_number, channel->band);
- alt_channel = ieee80211_get_channel(wiphy, freq);
+ freq = ieee80211_channel_to_freq_khz(channel_number, channel->band);
+ alt_channel = ieee80211_get_channel_khz(wiphy, freq);
if (!alt_channel) {
if (channel->band == NL80211_BAND_2GHZ) {
/*
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index ac3e60aa1fc8..15595cf401de 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -5,7 +5,7 @@
* (for nl80211's connect() and wext)
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2009 Intel Corporation. All rights reserved.
+ * Copyright (C) 2009, 2020 Intel Corporation. All rights reserved.
* Copyright 2017 Intel Deutschland GmbH
*/
@@ -694,6 +694,7 @@ void __cfg80211_connect_result(struct net_device *dev,
return;
}
+ wdev->unprot_beacon_reported = 0;
nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev, cr,
GFP_KERNEL);
@@ -921,6 +922,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev,
cfg80211_hold_bss(bss_from_pub(info->bss));
wdev->current_bss = bss_from_pub(info->bss);
+ wdev->unprot_beacon_reported = 0;
nl80211_send_roamed(wiphy_to_rdev(wdev->wiphy),
wdev->netdev, info, GFP_KERNEL);
@@ -1116,7 +1118,10 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
if (wiphy_ext_feature_isset(
wdev->wiphy,
- NL80211_EXT_FEATURE_BEACON_PROTECTION))
+ NL80211_EXT_FEATURE_BEACON_PROTECTION) ||
+ wiphy_ext_feature_isset(
+ wdev->wiphy,
+ NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT))
max_key_idx = 7;
for (i = 0; i <= max_key_idx; i++)
rdev_del_key(rdev, dev, i, false, NULL);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 839df54cee21..b23cab016521 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -112,24 +112,29 @@
} while (0)
#define CHAN_ENTRY __field(enum nl80211_band, band) \
- __field(u32, center_freq)
+ __field(u32, center_freq) \
+ __field(u16, freq_offset)
#define CHAN_ASSIGN(chan) \
do { \
if (chan) { \
__entry->band = chan->band; \
__entry->center_freq = chan->center_freq; \
+ __entry->freq_offset = chan->freq_offset; \
} else { \
__entry->band = 0; \
__entry->center_freq = 0; \
+ __entry->freq_offset = 0; \
} \
} while (0)
-#define CHAN_PR_FMT "band: %d, freq: %u"
-#define CHAN_PR_ARG __entry->band, __entry->center_freq
+#define CHAN_PR_FMT "band: %d, freq: %u.%03u"
+#define CHAN_PR_ARG __entry->band, __entry->center_freq, __entry->freq_offset
#define CHAN_DEF_ENTRY __field(enum nl80211_band, band) \
__field(u32, control_freq) \
+ __field(u32, freq_offset) \
__field(u32, width) \
__field(u32, center_freq1) \
+ __field(u32, freq1_offset) \
__field(u32, center_freq2)
#define CHAN_DEF_ASSIGN(chandef) \
do { \
@@ -137,21 +142,27 @@
__entry->band = (chandef)->chan->band; \
__entry->control_freq = \
(chandef)->chan->center_freq; \
+ __entry->freq_offset = \
+ (chandef)->chan->freq_offset; \
__entry->width = (chandef)->width; \
__entry->center_freq1 = (chandef)->center_freq1;\
+ __entry->freq1_offset = (chandef)->freq1_offset;\
__entry->center_freq2 = (chandef)->center_freq2;\
} else { \
__entry->band = 0; \
__entry->control_freq = 0; \
+ __entry->freq_offset = 0; \
__entry->width = 0; \
__entry->center_freq1 = 0; \
+ __entry->freq1_offset = 0; \
__entry->center_freq2 = 0; \
} \
} while (0)
#define CHAN_DEF_PR_FMT \
- "band: %d, control freq: %u, width: %d, cf1: %u, cf2: %u"
+ "band: %d, control freq: %u.%03u, width: %d, cf1: %u.%03u, cf2: %u"
#define CHAN_DEF_PR_ARG __entry->band, __entry->control_freq, \
- __entry->width, __entry->center_freq1, \
+ __entry->freq_offset, __entry->width, \
+ __entry->center_freq1, __entry->freq1_offset, \
__entry->center_freq2
#define SINFO_ENTRY __field(int, generation) \
@@ -1582,25 +1593,25 @@ TRACE_EVENT(rdev_set_bitrate_mask,
WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer))
);
-TRACE_EVENT(rdev_mgmt_frame_register,
+TRACE_EVENT(rdev_update_mgmt_frame_registrations,
TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
- u16 frame_type, bool reg),
- TP_ARGS(wiphy, wdev, frame_type, reg),
+ struct mgmt_frame_regs *upd),
+ TP_ARGS(wiphy, wdev, upd),
TP_STRUCT__entry(
WIPHY_ENTRY
WDEV_ENTRY
- __field(u16, frame_type)
- __field(bool, reg)
+ __field(u16, global_stypes)
+ __field(u16, interface_stypes)
),
TP_fast_assign(
WIPHY_ASSIGN;
WDEV_ASSIGN;
- __entry->frame_type = frame_type;
- __entry->reg = reg;
+ __entry->global_stypes = upd->global_stypes;
+ __entry->interface_stypes = upd->interface_stypes;
),
- TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", frame_type: 0x%.2x, reg: %s ",
- WIPHY_PR_ARG, WDEV_PR_ARG, __entry->frame_type,
- __entry->reg ? "true" : "false")
+ TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", global: 0x%.2x, intf: 0x%.2x",
+ WIPHY_PR_ARG, WDEV_PR_ARG,
+ __entry->global_stypes, __entry->interface_stypes)
);
TRACE_EVENT(rdev_return_int_tx_rx,
@@ -2829,8 +2840,8 @@ TRACE_EVENT(cfg80211_rx_mgmt,
__entry->freq = freq;
__entry->sig_dbm = sig_dbm;
),
- TP_printk(WDEV_PR_FMT ", freq: %d, sig dbm: %d",
- WDEV_PR_ARG, __entry->freq, __entry->sig_dbm)
+ TP_printk(WDEV_PR_FMT ", freq: "KHZ_F", sig dbm: %d",
+ WDEV_PR_ARG, PR_KHZ(__entry->freq), __entry->sig_dbm)
);
TRACE_EVENT(cfg80211_mgmt_tx_status,
@@ -2850,6 +2861,23 @@ TRACE_EVENT(cfg80211_mgmt_tx_status,
WDEV_PR_ARG, __entry->cookie, BOOL_TO_STR(__entry->ack))
);
+TRACE_EVENT(cfg80211_control_port_tx_status,
+ TP_PROTO(struct wireless_dev *wdev, u64 cookie, bool ack),
+ TP_ARGS(wdev, cookie, ack),
+ TP_STRUCT__entry(
+ WDEV_ENTRY
+ __field(u64, cookie)
+ __field(bool, ack)
+ ),
+ TP_fast_assign(
+ WDEV_ASSIGN;
+ __entry->cookie = cookie;
+ __entry->ack = ack;
+ ),
+ TP_printk(WDEV_PR_FMT", cookie: %llu, ack: %s",
+ WDEV_PR_ARG, __entry->cookie, BOOL_TO_STR(__entry->ack))
+);
+
TRACE_EVENT(cfg80211_rx_control_port,
TP_PROTO(struct net_device *netdev, struct sk_buff *skb,
bool unencrypted),
@@ -3110,8 +3138,8 @@ TRACE_EVENT(cfg80211_report_obss_beacon,
__entry->freq = freq;
__entry->sig_dbm = sig_dbm;
),
- TP_printk(WIPHY_PR_FMT ", freq: %d, sig_dbm: %d",
- WIPHY_PR_ARG, __entry->freq, __entry->sig_dbm)
+ TP_printk(WIPHY_PR_FMT ", freq: "KHZ_F", sig_dbm: %d",
+ WIPHY_PR_ARG, PR_KHZ(__entry->freq), __entry->sig_dbm)
);
TRACE_EVENT(cfg80211_tdls_oper_request,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 6590efbbcbb9..4d3b76f94f55 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -5,7 +5,7 @@
* Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
*/
#include <linux/export.h>
#include <linux/bitops.h>
@@ -72,7 +72,7 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband,
}
EXPORT_SYMBOL(ieee80211_mandatory_rates);
-int ieee80211_channel_to_frequency(int chan, enum nl80211_band band)
+u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band)
{
/* see 802.11 17.3.8.3.2 and Annex J
* there are overlapping channel numbers in 5GHz and 2GHz bands */
@@ -81,34 +81,39 @@ int ieee80211_channel_to_frequency(int chan, enum nl80211_band band)
switch (band) {
case NL80211_BAND_2GHZ:
if (chan == 14)
- return 2484;
+ return MHZ_TO_KHZ(2484);
else if (chan < 14)
- return 2407 + chan * 5;
+ return MHZ_TO_KHZ(2407 + chan * 5);
break;
case NL80211_BAND_5GHZ:
if (chan >= 182 && chan <= 196)
- return 4000 + chan * 5;
+ return MHZ_TO_KHZ(4000 + chan * 5);
else
- return 5000 + chan * 5;
+ return MHZ_TO_KHZ(5000 + chan * 5);
break;
case NL80211_BAND_6GHZ:
- /* see 802.11ax D4.1 27.3.22.2 */
+ /* see 802.11ax D6.1 27.3.23.2 */
+ if (chan == 2)
+ return MHZ_TO_KHZ(5935);
if (chan <= 253)
- return 5940 + chan * 5;
+ return MHZ_TO_KHZ(5950 + chan * 5);
break;
case NL80211_BAND_60GHZ:
if (chan < 7)
- return 56160 + chan * 2160;
+ return MHZ_TO_KHZ(56160 + chan * 2160);
break;
default:
;
}
return 0; /* not supported */
}
-EXPORT_SYMBOL(ieee80211_channel_to_frequency);
+EXPORT_SYMBOL(ieee80211_channel_to_freq_khz);
-int ieee80211_frequency_to_channel(int freq)
+int ieee80211_freq_khz_to_channel(u32 freq)
{
+ /* TODO: just handle MHz for now */
+ freq = KHZ_TO_MHZ(freq);
+
/* see 802.11 17.3.8.3.2 and Annex J */
if (freq == 2484)
return 14;
@@ -126,9 +131,10 @@ int ieee80211_frequency_to_channel(int freq)
else
return 0;
}
-EXPORT_SYMBOL(ieee80211_frequency_to_channel);
+EXPORT_SYMBOL(ieee80211_freq_khz_to_channel);
-struct ieee80211_channel *ieee80211_get_channel(struct wiphy *wiphy, int freq)
+struct ieee80211_channel *ieee80211_get_channel_khz(struct wiphy *wiphy,
+ u32 freq)
{
enum nl80211_band band;
struct ieee80211_supported_band *sband;
@@ -141,14 +147,16 @@ struct ieee80211_channel *ieee80211_get_channel(struct wiphy *wiphy, int freq)
continue;
for (i = 0; i < sband->n_channels; i++) {
- if (sband->channels[i].center_freq == freq)
- return &sband->channels[i];
+ struct ieee80211_channel *chan = &sband->channels[i];
+
+ if (ieee80211_channel_to_khz(chan) == freq)
+ return chan;
}
}
return NULL;
}
-EXPORT_SYMBOL(ieee80211_get_channel);
+EXPORT_SYMBOL(ieee80211_get_channel_khz);
static void set_mandatory_flags_band(struct ieee80211_supported_band *sband)
{
@@ -234,7 +242,9 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
int max_key_idx = 5;
if (wiphy_ext_feature_isset(&rdev->wiphy,
- NL80211_EXT_FEATURE_BEACON_PROTECTION))
+ NL80211_EXT_FEATURE_BEACON_PROTECTION) ||
+ wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT))
max_key_idx = 7;
if (key_idx < 0 || key_idx > max_key_idx)
return -EINVAL;
@@ -2030,10 +2040,10 @@ EXPORT_SYMBOL(cfg80211_send_layer2_update);
int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
enum ieee80211_vht_chanwidth bw,
- int mcs, bool ext_nss_bw_capable)
+ int mcs, bool ext_nss_bw_capable,
+ unsigned int max_vht_nss)
{
u16 map = le16_to_cpu(cap->supp_mcs.rx_mcs_map);
- int max_vht_nss = 0;
int ext_nss_bw;
int supp_width;
int i, mcs_encoding;
@@ -2041,7 +2051,7 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
if (map == 0xffff)
return 0;
- if (WARN_ON(mcs > 9))
+ if (WARN_ON(mcs > 9 || max_vht_nss > 8))
return 0;
if (mcs <= 7)
mcs_encoding = 0;
@@ -2050,16 +2060,18 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
else
mcs_encoding = 2;
- /* find max_vht_nss for the given MCS */
- for (i = 7; i >= 0; i--) {
- int supp = (map >> (2 * i)) & 3;
+ if (!max_vht_nss) {
+ /* find max_vht_nss for the given MCS */
+ for (i = 7; i >= 0; i--) {
+ int supp = (map >> (2 * i)) & 3;
- if (supp == 3)
- continue;
+ if (supp == 3)
+ continue;
- if (supp >= mcs_encoding) {
- max_vht_nss = i + 1;
- break;
+ if (supp >= mcs_encoding) {
+ max_vht_nss = i + 1;
+ break;
+ }
}
}
diff --git a/net/xdp/Makefile b/net/xdp/Makefile
index 71e2bdafb2ce..30cdc4315f42 100644
--- a/net/xdp/Makefile
+++ b/net/xdp/Makefile
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
+obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o xskmap.o
+obj-$(CONFIG_XDP_SOCKETS) += xsk_buff_pool.o
obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 37ace3bc0d48..1bbaf1747e4f 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -179,37 +179,6 @@ void xdp_umem_clear_dev(struct xdp_umem *umem)
umem->zc = false;
}
-static void xdp_umem_unmap_pages(struct xdp_umem *umem)
-{
- unsigned int i;
-
- for (i = 0; i < umem->npgs; i++)
- if (PageHighMem(umem->pgs[i]))
- vunmap(umem->pages[i].addr);
-}
-
-static int xdp_umem_map_pages(struct xdp_umem *umem)
-{
- unsigned int i;
- void *addr;
-
- for (i = 0; i < umem->npgs; i++) {
- if (PageHighMem(umem->pgs[i]))
- addr = vmap(&umem->pgs[i], 1, VM_MAP, PAGE_KERNEL);
- else
- addr = page_address(umem->pgs[i]);
-
- if (!addr) {
- xdp_umem_unmap_pages(umem);
- return -ENOMEM;
- }
-
- umem->pages[i].addr = addr;
- }
-
- return 0;
-}
-
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
{
unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
@@ -244,14 +213,9 @@ static void xdp_umem_release(struct xdp_umem *umem)
umem->cq = NULL;
}
- xsk_reuseq_destroy(umem);
-
- xdp_umem_unmap_pages(umem);
+ xp_destroy(umem->pool);
xdp_umem_unpin_pages(umem);
- kvfree(umem->pages);
- umem->pages = NULL;
-
xdp_umem_unaccount_pages(umem);
kfree(umem);
}
@@ -341,8 +305,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{
bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
+ u64 npgs, addr = mr->addr, size = mr->len;
unsigned int chunks, chunks_per_page;
- u64 addr = mr->addr, size = mr->len;
int err;
if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
@@ -372,6 +336,10 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if ((addr + size) < addr)
return -EINVAL;
+ npgs = div_u64(size, PAGE_SIZE);
+ if (npgs > U32_MAX)
+ return -EINVAL;
+
chunks = (unsigned int)div_u64(size, chunk_size);
if (chunks == 0)
return -EINVAL;
@@ -385,12 +353,10 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
return -EINVAL;
- umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK
- : ~((u64)chunk_size - 1);
umem->size = size;
umem->headroom = headroom;
- umem->chunk_size_nohr = chunk_size - headroom;
- umem->npgs = size / PAGE_SIZE;
+ umem->chunk_size = chunk_size;
+ umem->npgs = (u32)npgs;
umem->pgs = NULL;
umem->user = NULL;
umem->flags = mr->flags;
@@ -407,18 +373,13 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (err)
goto out_account;
- umem->pages = kvcalloc(umem->npgs, sizeof(*umem->pages),
- GFP_KERNEL_ACCOUNT);
- if (!umem->pages) {
+ umem->pool = xp_create(umem->pgs, umem->npgs, chunks, chunk_size,
+ headroom, size, unaligned_chunks);
+ if (!umem->pool) {
err = -ENOMEM;
goto out_pin;
}
-
- err = xdp_umem_map_pages(umem);
- if (!err)
- return 0;
-
- kvfree(umem->pages);
+ return 0;
out_pin:
xdp_umem_unpin_pages(umem);
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
index a63a9fb251f5..32067fe98f65 100644
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
@@ -6,7 +6,7 @@
#ifndef XDP_UMEM_H_
#define XDP_UMEM_H_
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
u16 queue_id, u16 flags);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 45ffd67b367d..b6c0f08bd80d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -22,7 +22,7 @@
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/rculist.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
#include <net/xdp.h>
#include "xsk_queue.h"
@@ -39,24 +39,6 @@ bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
READ_ONCE(xs->umem->fq);
}
-bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt)
-{
- return xskq_cons_has_entries(umem->fq, cnt);
-}
-EXPORT_SYMBOL(xsk_umem_has_addrs);
-
-bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
-{
- return xskq_cons_peek_addr(umem->fq, addr, umem);
-}
-EXPORT_SYMBOL(xsk_umem_peek_addr);
-
-void xsk_umem_release_addr(struct xdp_umem *umem)
-{
- xskq_cons_release(umem->fq);
-}
-EXPORT_SYMBOL(xsk_umem_release_addr);
-
void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
{
if (umem->need_wakeup & XDP_WAKEUP_RX)
@@ -117,76 +99,82 @@ bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
}
EXPORT_SYMBOL(xsk_umem_uses_need_wakeup);
-/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for
- * each page. This is only required in copy mode.
- */
-static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf,
- u32 len, u32 metalen)
+void xp_release(struct xdp_buff_xsk *xskb)
{
- void *to_buf = xdp_umem_get_data(umem, addr);
-
- addr = xsk_umem_add_offset_to_addr(addr);
- if (xskq_cons_crosses_non_contig_pg(umem, addr, len + metalen)) {
- void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr;
- u64 page_start = addr & ~(PAGE_SIZE - 1);
- u64 first_len = PAGE_SIZE - (addr - page_start);
-
- memcpy(to_buf, from_buf, first_len);
- memcpy(next_pg_addr, from_buf + first_len,
- len + metalen - first_len);
+ xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
+}
- return;
- }
+static u64 xp_get_handle(struct xdp_buff_xsk *xskb)
+{
+ u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
- memcpy(to_buf, from_buf, len + metalen);
+ offset += xskb->pool->headroom;
+ if (!xskb->pool->unaligned)
+ return xskb->orig_addr + offset;
+ return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
}
-static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- u64 offset = xs->umem->headroom;
- u64 addr, memcpy_addr;
- void *from_buf;
- u32 metalen;
+ struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+ u64 addr;
int err;
- if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) ||
- len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
+ addr = xp_get_handle(xskb);
+ err = xskq_prod_reserve_desc(xs->rx, addr, len);
+ if (err) {
xs->rx_dropped++;
- return -ENOSPC;
+ return err;
}
- if (unlikely(xdp_data_meta_unsupported(xdp))) {
- from_buf = xdp->data;
- metalen = 0;
- } else {
- from_buf = xdp->data_meta;
- metalen = xdp->data - xdp->data_meta;
- }
+ xp_release(xskb);
+ return 0;
+}
- memcpy_addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
- __xsk_rcv_memcpy(xs->umem, memcpy_addr, from_buf, len, metalen);
+static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len)
+{
+ void *from_buf, *to_buf;
+ u32 metalen;
- offset += metalen;
- addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
- err = xskq_prod_reserve_desc(xs->rx, addr, len);
- if (!err) {
- xskq_cons_release(xs->umem->fq);
- xdp_return_buff(xdp);
- return 0;
+ if (unlikely(xdp_data_meta_unsupported(from))) {
+ from_buf = from->data;
+ to_buf = to->data;
+ metalen = 0;
+ } else {
+ from_buf = from->data_meta;
+ metalen = from->data - from->data_meta;
+ to_buf = to->data - metalen;
}
- xs->rx_dropped++;
- return err;
+ memcpy(to_buf, from_buf, len + metalen);
}
-static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len,
+ bool explicit_free)
{
- int err = xskq_prod_reserve_desc(xs->rx, xdp->handle, len);
+ struct xdp_buff *xsk_xdp;
+ int err;
- if (err)
+ if (len > xsk_umem_get_rx_frame_size(xs->umem)) {
xs->rx_dropped++;
+ return -ENOSPC;
+ }
- return err;
+ xsk_xdp = xsk_buff_alloc(xs->umem);
+ if (!xsk_xdp) {
+ xs->rx_dropped++;
+ return -ENOSPC;
+ }
+
+ xsk_copy_xdp(xsk_xdp, xdp, len);
+ err = __xsk_rcv_zc(xs, xsk_xdp, len);
+ if (err) {
+ xsk_buff_free(xsk_xdp);
+ return err;
+ }
+ if (explicit_free)
+ xdp_return_buff(xdp);
+ return 0;
}
static bool xsk_is_bound(struct xdp_sock *xs)
@@ -199,7 +187,8 @@ static bool xsk_is_bound(struct xdp_sock *xs)
return false;
}
-static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp,
+ bool explicit_free)
{
u32 len;
@@ -211,8 +200,9 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
len = xdp->data_end - xdp->data;
- return (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) ?
- __xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
+ return xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL ?
+ __xsk_rcv_zc(xs, xdp, len) :
+ __xsk_rcv(xs, xdp, len, explicit_free);
}
static void xsk_flush(struct xdp_sock *xs)
@@ -224,46 +214,11 @@ static void xsk_flush(struct xdp_sock *xs)
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
- u32 metalen = xdp->data - xdp->data_meta;
- u32 len = xdp->data_end - xdp->data;
- u64 offset = xs->umem->headroom;
- void *buffer;
- u64 addr;
int err;
spin_lock_bh(&xs->rx_lock);
-
- if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) {
- err = -EINVAL;
- goto out_unlock;
- }
-
- if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) ||
- len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
- err = -ENOSPC;
- goto out_drop;
- }
-
- addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
- buffer = xdp_umem_get_data(xs->umem, addr);
- memcpy(buffer, xdp->data_meta, len + metalen);
-
- addr = xsk_umem_adjust_offset(xs->umem, addr, metalen);
- err = xskq_prod_reserve_desc(xs->rx, addr, len);
- if (err)
- goto out_drop;
-
- xskq_cons_release(xs->umem->fq);
- xskq_prod_submit(xs->rx);
-
- spin_unlock_bh(&xs->rx_lock);
-
- xs->sk.sk_data_ready(&xs->sk);
- return 0;
-
-out_drop:
- xs->rx_dropped++;
-out_unlock:
+ err = xsk_rcv(xs, xdp, false);
+ xsk_flush(xs);
spin_unlock_bh(&xs->rx_lock);
return err;
}
@@ -273,7 +228,7 @@ int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
int err;
- err = xsk_rcv(xs, xdp);
+ err = xsk_rcv(xs, xdp, true);
if (err)
return err;
@@ -404,7 +359,7 @@ static int xsk_generic_xmit(struct sock *sk)
skb_put(skb, len);
addr = desc.addr;
- buffer = xdp_umem_get_data(xs->umem, addr);
+ buffer = xsk_buff_raw_get_data(xs->umem, addr);
err = skb_store_bits(skb, 0, buffer, len);
/* This is the backpressure mechanism for the Tx path.
* Reserve space in the completion queue and only proceed
@@ -629,24 +584,6 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd)
return sock;
}
-/* Check if umem pages are contiguous.
- * If zero-copy mode, use the DMA address to do the page contiguity check
- * For all other modes we use addr (kernel virtual address)
- * Store the result in the low bits of addr.
- */
-static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags)
-{
- struct xdp_umem_page *pgs = umem->pages;
- int i, is_contig;
-
- for (i = 0; i < umem->npgs - 1; i++) {
- is_contig = (flags & XDP_ZEROCOPY) ?
- (pgs[i].dma + PAGE_SIZE == pgs[i + 1].dma) :
- (pgs[i].addr + PAGE_SIZE == pgs[i + 1].addr);
- pgs[i].addr += is_contig << XSK_NEXT_PG_CONTIG_SHIFT;
- }
-}
-
static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
@@ -729,23 +666,14 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
goto out_unlock;
} else {
/* This xsk has its own umem. */
- xskq_set_umem(xs->umem->fq, xs->umem->size,
- xs->umem->chunk_mask);
- xskq_set_umem(xs->umem->cq, xs->umem->size,
- xs->umem->chunk_mask);
-
err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
if (err)
goto out_unlock;
-
- xsk_check_page_contiguity(xs->umem, flags);
}
xs->dev = dev;
xs->zc = xs->umem->zc;
xs->queue_id = qid;
- xskq_set_umem(xs->rx, xs->umem->size, xs->umem->chunk_mask);
- xskq_set_umem(xs->tx, xs->umem->size, xs->umem->chunk_mask);
xdp_add_sk_umem(xs->umem, xs);
out_unlock:
@@ -860,6 +788,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
&xs->umem->cq;
err = xsk_init_queue(entries, q, true);
+ if (optname == XDP_UMEM_FILL_RING)
+ xp_set_fq(xs->umem->pool, *q);
mutex_unlock(&xs->mutex);
return err;
}
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
index 4cfd106bdb53..455ddd480f3d 100644
--- a/net/xdp/xsk.h
+++ b/net/xdp/xsk.h
@@ -4,6 +4,20 @@
#ifndef XSK_H_
#define XSK_H_
+/* Masks for xdp_umem_page flags.
+ * The low 12-bits of the addr will be 0 since this is the page address, so we
+ * can use them for flags.
+ */
+#define XSK_NEXT_PG_CONTIG_SHIFT 0
+#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT)
+
+/* Flags for the umem flags field.
+ *
+ * The NEED_WAKEUP flag is 1 due to the reuse of the flags field for public
+ * flags. See inlude/uapi/include/linux/if_xdp.h.
+ */
+#define XDP_UMEM_USES_NEED_WAKEUP BIT(1)
+
struct xdp_ring_offset_v1 {
__u64 producer;
__u64 consumer;
@@ -17,9 +31,25 @@ struct xdp_mmap_offsets_v1 {
struct xdp_ring_offset_v1 cr;
};
+/* Nodes are linked in the struct xdp_sock map_list field, and used to
+ * track which maps a certain socket reside in.
+ */
+
+struct xsk_map_node {
+ struct list_head node;
+ struct xsk_map *map;
+ struct xdp_sock **map_entry;
+};
+
static inline struct xdp_sock *xdp_sk(struct sock *sk)
{
return (struct xdp_sock *)sk;
}
+bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
+void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
+ struct xdp_sock **map_entry);
+int xsk_map_inc(struct xsk_map *map);
+void xsk_map_put(struct xsk_map *map);
+
#endif /* XSK_H_ */
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
new file mode 100644
index 000000000000..540ed75e4482
--- /dev/null
+++ b/net/xdp/xsk_buff_pool.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <net/xsk_buff_pool.h>
+#include <net/xdp_sock.h>
+#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
+#include <linux/swiotlb.h>
+
+#include "xsk_queue.h"
+
+static void xp_addr_unmap(struct xsk_buff_pool *pool)
+{
+ vunmap(pool->addrs);
+}
+
+static int xp_addr_map(struct xsk_buff_pool *pool,
+ struct page **pages, u32 nr_pages)
+{
+ pool->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+ if (!pool->addrs)
+ return -ENOMEM;
+ return 0;
+}
+
+void xp_destroy(struct xsk_buff_pool *pool)
+{
+ if (!pool)
+ return;
+
+ xp_addr_unmap(pool);
+ kvfree(pool->heads);
+ kvfree(pool);
+}
+
+struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks,
+ u32 chunk_size, u32 headroom, u64 size,
+ bool unaligned)
+{
+ struct xsk_buff_pool *pool;
+ struct xdp_buff_xsk *xskb;
+ int err;
+ u32 i;
+
+ pool = kvzalloc(struct_size(pool, free_heads, chunks), GFP_KERNEL);
+ if (!pool)
+ goto out;
+
+ pool->heads = kvcalloc(chunks, sizeof(*pool->heads), GFP_KERNEL);
+ if (!pool->heads)
+ goto out;
+
+ pool->chunk_mask = ~((u64)chunk_size - 1);
+ pool->addrs_cnt = size;
+ pool->heads_cnt = chunks;
+ pool->free_heads_cnt = chunks;
+ pool->headroom = headroom;
+ pool->chunk_size = chunk_size;
+ pool->cheap_dma = true;
+ pool->unaligned = unaligned;
+ pool->frame_len = chunk_size - headroom - XDP_PACKET_HEADROOM;
+ INIT_LIST_HEAD(&pool->free_list);
+
+ for (i = 0; i < pool->free_heads_cnt; i++) {
+ xskb = &pool->heads[i];
+ xskb->pool = pool;
+ xskb->xdp.frame_sz = chunk_size - headroom;
+ pool->free_heads[i] = xskb;
+ }
+
+ err = xp_addr_map(pool, pages, nr_pages);
+ if (!err)
+ return pool;
+
+out:
+ xp_destroy(pool);
+ return NULL;
+}
+
+void xp_set_fq(struct xsk_buff_pool *pool, struct xsk_queue *fq)
+{
+ pool->fq = fq;
+}
+
+void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq)
+{
+ u32 i;
+
+ for (i = 0; i < pool->heads_cnt; i++)
+ pool->heads[i].xdp.rxq = rxq;
+}
+EXPORT_SYMBOL(xp_set_rxq_info);
+
+void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
+{
+ dma_addr_t *dma;
+ u32 i;
+
+ if (pool->dma_pages_cnt == 0)
+ return;
+
+ for (i = 0; i < pool->dma_pages_cnt; i++) {
+ dma = &pool->dma_pages[i];
+ if (*dma) {
+ dma_unmap_page_attrs(pool->dev, *dma, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, attrs);
+ *dma = 0;
+ }
+ }
+
+ kvfree(pool->dma_pages);
+ pool->dma_pages_cnt = 0;
+ pool->dev = NULL;
+}
+EXPORT_SYMBOL(xp_dma_unmap);
+
+static void xp_check_dma_contiguity(struct xsk_buff_pool *pool)
+{
+ u32 i;
+
+ for (i = 0; i < pool->dma_pages_cnt - 1; i++) {
+ if (pool->dma_pages[i] + PAGE_SIZE == pool->dma_pages[i + 1])
+ pool->dma_pages[i] |= XSK_NEXT_PG_CONTIG_MASK;
+ else
+ pool->dma_pages[i] &= ~XSK_NEXT_PG_CONTIG_MASK;
+ }
+}
+
+static bool __maybe_unused xp_check_swiotlb_dma(struct xsk_buff_pool *pool)
+{
+#if defined(CONFIG_SWIOTLB)
+ phys_addr_t paddr;
+ u32 i;
+
+ for (i = 0; i < pool->dma_pages_cnt; i++) {
+ paddr = dma_to_phys(pool->dev, pool->dma_pages[i]);
+ if (is_swiotlb_buffer(paddr))
+ return false;
+ }
+#endif
+ return true;
+}
+
+static bool xp_check_cheap_dma(struct xsk_buff_pool *pool)
+{
+#if defined(CONFIG_HAS_DMA)
+ const struct dma_map_ops *ops = get_dma_ops(pool->dev);
+
+ if (ops) {
+ return !ops->sync_single_for_cpu &&
+ !ops->sync_single_for_device;
+ }
+
+ if (!dma_is_direct(ops))
+ return false;
+
+ if (!xp_check_swiotlb_dma(pool))
+ return false;
+
+ if (!dev_is_dma_coherent(pool->dev)) {
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
+ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \
+ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE)
+ return false;
+#endif
+ }
+#endif
+ return true;
+}
+
+int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
+ unsigned long attrs, struct page **pages, u32 nr_pages)
+{
+ dma_addr_t dma;
+ u32 i;
+
+ pool->dma_pages = kvcalloc(nr_pages, sizeof(*pool->dma_pages),
+ GFP_KERNEL);
+ if (!pool->dma_pages)
+ return -ENOMEM;
+
+ pool->dev = dev;
+ pool->dma_pages_cnt = nr_pages;
+
+ for (i = 0; i < pool->dma_pages_cnt; i++) {
+ dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, attrs);
+ if (dma_mapping_error(dev, dma)) {
+ xp_dma_unmap(pool, attrs);
+ return -ENOMEM;
+ }
+ pool->dma_pages[i] = dma;
+ }
+
+ if (pool->unaligned)
+ xp_check_dma_contiguity(pool);
+
+ pool->dev = dev;
+ pool->cheap_dma = xp_check_cheap_dma(pool);
+ return 0;
+}
+EXPORT_SYMBOL(xp_dma_map);
+
+static bool xp_addr_crosses_non_contig_pg(struct xsk_buff_pool *pool,
+ u64 addr)
+{
+ return xp_desc_crosses_non_contig_pg(pool, addr, pool->chunk_size);
+}
+
+static bool xp_check_unaligned(struct xsk_buff_pool *pool, u64 *addr)
+{
+ *addr = xp_unaligned_extract_addr(*addr);
+ if (*addr >= pool->addrs_cnt ||
+ *addr + pool->chunk_size > pool->addrs_cnt ||
+ xp_addr_crosses_non_contig_pg(pool, *addr))
+ return false;
+ return true;
+}
+
+static bool xp_check_aligned(struct xsk_buff_pool *pool, u64 *addr)
+{
+ *addr = xp_aligned_extract_addr(pool, *addr);
+ return *addr < pool->addrs_cnt;
+}
+
+static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
+{
+ struct xdp_buff_xsk *xskb;
+ u64 addr;
+ bool ok;
+
+ if (pool->free_heads_cnt == 0)
+ return NULL;
+
+ xskb = pool->free_heads[--pool->free_heads_cnt];
+
+ for (;;) {
+ if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) {
+ xp_release(xskb);
+ return NULL;
+ }
+
+ ok = pool->unaligned ? xp_check_unaligned(pool, &addr) :
+ xp_check_aligned(pool, &addr);
+ if (!ok) {
+ pool->fq->invalid_descs++;
+ xskq_cons_release(pool->fq);
+ continue;
+ }
+ break;
+ }
+ xskq_cons_release(pool->fq);
+
+ xskb->orig_addr = addr;
+ xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom;
+ if (pool->dma_pages_cnt) {
+ xskb->frame_dma = (pool->dma_pages[addr >> PAGE_SHIFT] &
+ ~XSK_NEXT_PG_CONTIG_MASK) +
+ (addr & ~PAGE_MASK);
+ xskb->dma = xskb->frame_dma + pool->headroom +
+ XDP_PACKET_HEADROOM;
+ }
+ return xskb;
+}
+
+struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
+{
+ struct xdp_buff_xsk *xskb;
+
+ if (!pool->free_list_cnt) {
+ xskb = __xp_alloc(pool);
+ if (!xskb)
+ return NULL;
+ } else {
+ pool->free_list_cnt--;
+ xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk,
+ free_list_node);
+ list_del(&xskb->free_list_node);
+ }
+
+ xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
+ xskb->xdp.data_meta = xskb->xdp.data;
+
+ if (!pool->cheap_dma) {
+ dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
+ pool->frame_len,
+ DMA_BIDIRECTIONAL);
+ }
+ return &xskb->xdp;
+}
+EXPORT_SYMBOL(xp_alloc);
+
+bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count)
+{
+ if (pool->free_list_cnt >= count)
+ return true;
+ return xskq_cons_has_entries(pool->fq, count - pool->free_list_cnt);
+}
+EXPORT_SYMBOL(xp_can_alloc);
+
+void xp_free(struct xdp_buff_xsk *xskb)
+{
+ xskb->pool->free_list_cnt++;
+ list_add(&xskb->free_list_node, &xskb->pool->free_list);
+}
+EXPORT_SYMBOL(xp_free);
+
+void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
+{
+ addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
+ return pool->addrs + addr;
+}
+EXPORT_SYMBOL(xp_raw_get_data);
+
+dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr)
+{
+ addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
+ return (pool->dma_pages[addr >> PAGE_SHIFT] &
+ ~XSK_NEXT_PG_CONTIG_MASK) +
+ (addr & ~PAGE_MASK);
+}
+EXPORT_SYMBOL(xp_raw_get_dma);
+
+void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb)
+{
+ dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
+ xskb->pool->frame_len, DMA_BIDIRECTIONAL);
+}
+EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow);
+
+void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
+ size_t size)
+{
+ dma_sync_single_range_for_device(pool->dev, dma, 0,
+ size, DMA_BIDIRECTIONAL);
+}
+EXPORT_SYMBOL(xp_dma_sync_for_device_slow);
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index f59791ba43a0..0163b26aaf63 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -56,7 +56,7 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
du.id = umem->id;
du.size = umem->size;
du.num_pages = umem->npgs;
- du.chunk_size = umem->chunk_size_nohr + umem->headroom;
+ du.chunk_size = umem->chunk_size;
du.headroom = umem->headroom;
du.ifindex = umem->dev ? umem->dev->ifindex : 0;
du.queue_id = umem->queue_id;
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
index 57fb81bd593c..6cf9586e5027 100644
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
@@ -6,18 +6,10 @@
#include <linux/log2.h>
#include <linux/slab.h>
#include <linux/overflow.h>
+#include <net/xdp_sock_drv.h>
#include "xsk_queue.h"
-void xskq_set_umem(struct xsk_queue *q, u64 umem_size, u64 chunk_mask)
-{
- if (!q)
- return;
-
- q->umem_size = umem_size;
- q->chunk_mask = chunk_mask;
-}
-
static size_t xskq_get_ring_size(struct xsk_queue *q, bool umem_queue)
{
struct xdp_umem_ring *umem_ring;
@@ -63,56 +55,3 @@ void xskq_destroy(struct xsk_queue *q)
page_frag_free(q->ring);
kfree(q);
}
-
-struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
-{
- struct xdp_umem_fq_reuse *newq;
-
- /* Check for overflow */
- if (nentries > (u32)roundup_pow_of_two(nentries))
- return NULL;
- nentries = roundup_pow_of_two(nentries);
-
- newq = kvmalloc(struct_size(newq, handles, nentries), GFP_KERNEL);
- if (!newq)
- return NULL;
- memset(newq, 0, offsetof(typeof(*newq), handles));
-
- newq->nentries = nentries;
- return newq;
-}
-EXPORT_SYMBOL_GPL(xsk_reuseq_prepare);
-
-struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
- struct xdp_umem_fq_reuse *newq)
-{
- struct xdp_umem_fq_reuse *oldq = umem->fq_reuse;
-
- if (!oldq) {
- umem->fq_reuse = newq;
- return NULL;
- }
-
- if (newq->nentries < oldq->length)
- return newq;
-
- memcpy(newq->handles, oldq->handles,
- array_size(oldq->length, sizeof(u64)));
- newq->length = oldq->length;
-
- umem->fq_reuse = newq;
- return oldq;
-}
-EXPORT_SYMBOL_GPL(xsk_reuseq_swap);
-
-void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
-{
- kvfree(rq);
-}
-EXPORT_SYMBOL_GPL(xsk_reuseq_free);
-
-void xsk_reuseq_destroy(struct xdp_umem *umem)
-{
- xsk_reuseq_free(umem->fq_reuse);
- umem->fq_reuse = NULL;
-}
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 648733ec24ac..5b5d24d2dd37 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -9,6 +9,9 @@
#include <linux/types.h>
#include <linux/if_xdp.h>
#include <net/xdp_sock.h>
+#include <net/xsk_buff_pool.h>
+
+#include "xsk.h"
struct xdp_ring {
u32 producer ____cacheline_aligned_in_smp;
@@ -29,8 +32,6 @@ struct xdp_umem_ring {
};
struct xsk_queue {
- u64 chunk_mask;
- u64 umem_size;
u32 ring_mask;
u32 nentries;
u32 cached_prod;
@@ -103,98 +104,73 @@ struct xsk_queue {
/* Functions that read and validate content from consumer rings. */
-static inline bool xskq_cons_crosses_non_contig_pg(struct xdp_umem *umem,
- u64 addr,
- u64 length)
+static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
{
- bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE;
- bool next_pg_contig =
- (unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr &
- XSK_NEXT_PG_CONTIG_MASK;
-
- return cross_pg && !next_pg_contig;
-}
+ struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
-static inline bool xskq_cons_is_valid_unaligned(struct xsk_queue *q,
- u64 addr,
- u64 length,
- struct xdp_umem *umem)
-{
- u64 base_addr = xsk_umem_extract_addr(addr);
+ if (q->cached_cons != q->cached_prod) {
+ u32 idx = q->cached_cons & q->ring_mask;
- addr = xsk_umem_add_offset_to_addr(addr);
- if (base_addr >= q->umem_size || addr >= q->umem_size ||
- xskq_cons_crosses_non_contig_pg(umem, addr, length)) {
- q->invalid_descs++;
- return false;
+ *addr = ring->desc[idx];
+ return true;
}
- return true;
+ return false;
}
-static inline bool xskq_cons_is_valid_addr(struct xsk_queue *q, u64 addr)
+static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
+ struct xdp_desc *desc)
{
- if (addr >= q->umem_size) {
- q->invalid_descs++;
+ u64 chunk, chunk_end;
+
+ chunk = xp_aligned_extract_addr(pool, desc->addr);
+ chunk_end = xp_aligned_extract_addr(pool, desc->addr + desc->len);
+ if (chunk != chunk_end)
+ return false;
+
+ if (chunk >= pool->addrs_cnt)
return false;
- }
+ if (desc->options)
+ return false;
return true;
}
-static inline bool xskq_cons_read_addr(struct xsk_queue *q, u64 *addr,
- struct xdp_umem *umem)
+static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
+ struct xdp_desc *desc)
{
- struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
-
- while (q->cached_cons != q->cached_prod) {
- u32 idx = q->cached_cons & q->ring_mask;
+ u64 addr, base_addr;
- *addr = ring->desc[idx] & q->chunk_mask;
+ base_addr = xp_unaligned_extract_addr(desc->addr);
+ addr = xp_unaligned_add_offset_to_addr(desc->addr);
- if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
- if (xskq_cons_is_valid_unaligned(q, *addr,
- umem->chunk_size_nohr,
- umem))
- return true;
- goto out;
- }
+ if (desc->len > pool->chunk_size)
+ return false;
- if (xskq_cons_is_valid_addr(q, *addr))
- return true;
+ if (base_addr >= pool->addrs_cnt || addr >= pool->addrs_cnt ||
+ xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
+ return false;
-out:
- q->cached_cons++;
- }
+ if (desc->options)
+ return false;
+ return true;
+}
- return false;
+static inline bool xp_validate_desc(struct xsk_buff_pool *pool,
+ struct xdp_desc *desc)
+{
+ return pool->unaligned ? xp_unaligned_validate_desc(pool, desc) :
+ xp_aligned_validate_desc(pool, desc);
}
static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q,
struct xdp_desc *d,
struct xdp_umem *umem)
{
- if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
- if (!xskq_cons_is_valid_unaligned(q, d->addr, d->len, umem))
- return false;
-
- if (d->len > umem->chunk_size_nohr || d->options) {
- q->invalid_descs++;
- return false;
- }
-
- return true;
- }
-
- if (!xskq_cons_is_valid_addr(q, d->addr))
- return false;
-
- if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) ||
- d->options) {
+ if (!xp_validate_desc(umem->pool, d)) {
q->invalid_descs++;
return false;
}
-
return true;
}
@@ -250,12 +226,11 @@ static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt)
return entries >= cnt;
}
-static inline bool xskq_cons_peek_addr(struct xsk_queue *q, u64 *addr,
- struct xdp_umem *umem)
+static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr)
{
if (q->cached_prod == q->cached_cons)
xskq_cons_get_entries(q);
- return xskq_cons_read_addr(q, addr, umem);
+ return xskq_cons_read_addr_unchecked(q, addr);
}
static inline bool xskq_cons_peek_desc(struct xsk_queue *q,
@@ -379,11 +354,7 @@ static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q)
return q ? q->invalid_descs : 0;
}
-void xskq_set_umem(struct xsk_queue *q, u64 umem_size, u64 chunk_mask);
struct xsk_queue *xskq_create(u32 nentries, bool umem_queue);
void xskq_destroy(struct xsk_queue *q_ops);
-/* Executed by the core when the entire UMEM gets freed */
-void xsk_reuseq_destroy(struct xdp_umem *umem);
-
#endif /* _LINUX_XSK_QUEUE_H */
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
new file mode 100644
index 000000000000..1dc7208c71ba
--- /dev/null
+++ b/net/xdp/xskmap.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0
+/* XSKMAP used for AF_XDP sockets
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/capability.h>
+#include <net/xdp_sock.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#include "xsk.h"
+
+int xsk_map_inc(struct xsk_map *map)
+{
+ bpf_map_inc(&map->map);
+ return 0;
+}
+
+void xsk_map_put(struct xsk_map *map)
+{
+ bpf_map_put(&map->map);
+}
+
+static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
+ struct xdp_sock **map_entry)
+{
+ struct xsk_map_node *node;
+ int err;
+
+ node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ err = xsk_map_inc(map);
+ if (err) {
+ kfree(node);
+ return ERR_PTR(err);
+ }
+
+ node->map = map;
+ node->map_entry = map_entry;
+ return node;
+}
+
+static void xsk_map_node_free(struct xsk_map_node *node)
+{
+ xsk_map_put(node->map);
+ kfree(node);
+}
+
+static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node)
+{
+ spin_lock_bh(&xs->map_list_lock);
+ list_add_tail(&node->node, &xs->map_list);
+ spin_unlock_bh(&xs->map_list_lock);
+}
+
+static void xsk_map_sock_delete(struct xdp_sock *xs,
+ struct xdp_sock **map_entry)
+{
+ struct xsk_map_node *n, *tmp;
+
+ spin_lock_bh(&xs->map_list_lock);
+ list_for_each_entry_safe(n, tmp, &xs->map_list, node) {
+ if (map_entry == n->map_entry) {
+ list_del(&n->node);
+ xsk_map_node_free(n);
+ }
+ }
+ spin_unlock_bh(&xs->map_list_lock);
+}
+
+static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
+{
+ struct bpf_map_memory mem;
+ int err, numa_node;
+ struct xsk_map *m;
+ u64 size;
+
+ if (!capable(CAP_NET_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ if (attr->max_entries == 0 || attr->key_size != 4 ||
+ attr->value_size != 4 ||
+ attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
+ return ERR_PTR(-EINVAL);
+
+ numa_node = bpf_map_attr_numa_node(attr);
+ size = struct_size(m, xsk_map, attr->max_entries);
+
+ err = bpf_map_charge_init(&mem, size);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ m = bpf_map_area_alloc(size, numa_node);
+ if (!m) {
+ bpf_map_charge_finish(&mem);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ bpf_map_init_from_attr(&m->map, attr);
+ bpf_map_charge_move(&m->map.memory, &mem);
+ spin_lock_init(&m->lock);
+
+ return &m->map;
+}
+
+static void xsk_map_free(struct bpf_map *map)
+{
+ struct xsk_map *m = container_of(map, struct xsk_map, map);
+
+ bpf_clear_redirect_map(map);
+ synchronize_net();
+ bpf_map_area_free(m);
+}
+
+static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+ struct xsk_map *m = container_of(map, struct xsk_map, map);
+ u32 index = key ? *(u32 *)key : U32_MAX;
+ u32 *next = next_key;
+
+ if (index >= m->map.max_entries) {
+ *next = 0;
+ return 0;
+ }
+
+ if (index == m->map.max_entries - 1)
+ return -ENOENT;
+ *next = index + 1;
+ return 0;
+}
+
+static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
+{
+ const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2;
+ struct bpf_insn *insn = insn_buf;
+
+ *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
+ *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
+ *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *)));
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map));
+ *insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0);
+ *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *insn++ = BPF_MOV64_IMM(ret, 0);
+ return insn - insn_buf;
+}
+
+static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ return __xsk_map_lookup_elem(map, *(u32 *)key);
+}
+
+static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
+ u64 map_flags)
+{
+ struct xsk_map *m = container_of(map, struct xsk_map, map);
+ struct xdp_sock *xs, *old_xs, **map_entry;
+ u32 i = *(u32 *)key, fd = *(u32 *)value;
+ struct xsk_map_node *node;
+ struct socket *sock;
+ int err;
+
+ if (unlikely(map_flags > BPF_EXIST))
+ return -EINVAL;
+ if (unlikely(i >= m->map.max_entries))
+ return -E2BIG;
+
+ sock = sockfd_lookup(fd, &err);
+ if (!sock)
+ return err;
+
+ if (sock->sk->sk_family != PF_XDP) {
+ sockfd_put(sock);
+ return -EOPNOTSUPP;
+ }
+
+ xs = (struct xdp_sock *)sock->sk;
+
+ if (!xsk_is_setup_for_bpf_map(xs)) {
+ sockfd_put(sock);
+ return -EOPNOTSUPP;
+ }
+
+ map_entry = &m->xsk_map[i];
+ node = xsk_map_node_alloc(m, map_entry);
+ if (IS_ERR(node)) {
+ sockfd_put(sock);
+ return PTR_ERR(node);
+ }
+
+ spin_lock_bh(&m->lock);
+ old_xs = READ_ONCE(*map_entry);
+ if (old_xs == xs) {
+ err = 0;
+ goto out;
+ } else if (old_xs && map_flags == BPF_NOEXIST) {
+ err = -EEXIST;
+ goto out;
+ } else if (!old_xs && map_flags == BPF_EXIST) {
+ err = -ENOENT;
+ goto out;
+ }
+ xsk_map_sock_add(xs, node);
+ WRITE_ONCE(*map_entry, xs);
+ if (old_xs)
+ xsk_map_sock_delete(old_xs, map_entry);
+ spin_unlock_bh(&m->lock);
+ sockfd_put(sock);
+ return 0;
+
+out:
+ spin_unlock_bh(&m->lock);
+ sockfd_put(sock);
+ xsk_map_node_free(node);
+ return err;
+}
+
+static int xsk_map_delete_elem(struct bpf_map *map, void *key)
+{
+ struct xsk_map *m = container_of(map, struct xsk_map, map);
+ struct xdp_sock *old_xs, **map_entry;
+ int k = *(u32 *)key;
+
+ if (k >= map->max_entries)
+ return -EINVAL;
+
+ spin_lock_bh(&m->lock);
+ map_entry = &m->xsk_map[k];
+ old_xs = xchg(map_entry, NULL);
+ if (old_xs)
+ xsk_map_sock_delete(old_xs, map_entry);
+ spin_unlock_bh(&m->lock);
+
+ return 0;
+}
+
+void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
+ struct xdp_sock **map_entry)
+{
+ spin_lock_bh(&map->lock);
+ if (READ_ONCE(*map_entry) == xs) {
+ WRITE_ONCE(*map_entry, NULL);
+ xsk_map_sock_delete(xs, map_entry);
+ }
+ spin_unlock_bh(&map->lock);
+}
+
+const struct bpf_map_ops xsk_map_ops = {
+ .map_alloc = xsk_map_alloc,
+ .map_free = xsk_map_free,
+ .map_get_next_key = xsk_map_get_next_key,
+ .map_lookup_elem = xsk_map_lookup_elem,
+ .map_gen_lookup = xsk_map_gen_lookup,
+ .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
+ .map_update_elem = xsk_map_update_elem,
+ .map_delete_elem = xsk_map_delete_elem,
+ .map_check_btf = map_check_no_btf,
+};
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 6921a18201a0..b7fd9c838416 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -99,4 +99,7 @@ config NET_KEY_MIGRATE
If unsure, say N.
+config XFRM_ESPINTCP
+ bool
+
endif # INET
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 212a4fcb4a88..2d4bb4b9f75e 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -11,4 +11,4 @@ obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
obj-$(CONFIG_XFRM_USER) += xfrm_user.o
obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
-obj-$(CONFIG_INET_ESPINTCP) += espintcp.o
+obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 037ea156d2f9..100e29682b48 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -6,6 +6,9 @@
#include <net/espintcp.h>
#include <linux/skmsg.h>
#include <net/inet_common.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6_stubs.h>
+#endif
static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb,
struct sock *sk)
@@ -31,7 +34,12 @@ static void handle_esp(struct sk_buff *skb, struct sock *sk)
rcu_read_lock();
skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
local_bh_disable();
- xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ ipv6_stub->xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
+ else
+#endif
+ xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
local_bh_enable();
rcu_read_unlock();
}
@@ -347,6 +355,9 @@ unlock:
static struct proto espintcp_prot __ro_after_init;
static struct proto_ops espintcp_ops __ro_after_init;
+static struct proto espintcp6_prot;
+static struct proto_ops espintcp6_ops;
+static DEFINE_MUTEX(tcpv6_prot_mutex);
static void espintcp_data_ready(struct sock *sk)
{
@@ -379,15 +390,20 @@ static void espintcp_destruct(struct sock *sk)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
+ ctx->saved_destruct(sk);
kfree(ctx);
}
bool tcp_is_ulp_esp(struct sock *sk)
{
- return sk->sk_prot == &espintcp_prot;
+ return sk->sk_prot == &espintcp_prot || sk->sk_prot == &espintcp6_prot;
}
EXPORT_SYMBOL_GPL(tcp_is_ulp_esp);
+static void build_protos(struct proto *espintcp_prot,
+ struct proto_ops *espintcp_ops,
+ const struct proto *orig_prot,
+ const struct proto_ops *orig_ops);
static int espintcp_init_sk(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -415,10 +431,22 @@ static int espintcp_init_sk(struct sock *sk)
strp_check_rcv(&ctx->strp);
skb_queue_head_init(&ctx->ike_queue);
skb_queue_head_init(&ctx->out_queue);
- sk->sk_prot = &espintcp_prot;
- sk->sk_socket->ops = &espintcp_ops;
+
+ if (sk->sk_family == AF_INET) {
+ sk->sk_prot = &espintcp_prot;
+ sk->sk_socket->ops = &espintcp_ops;
+ } else {
+ mutex_lock(&tcpv6_prot_mutex);
+ if (!espintcp6_prot.recvmsg)
+ build_protos(&espintcp6_prot, &espintcp6_ops, sk->sk_prot, sk->sk_socket->ops);
+ mutex_unlock(&tcpv6_prot_mutex);
+
+ sk->sk_prot = &espintcp6_prot;
+ sk->sk_socket->ops = &espintcp6_ops;
+ }
ctx->saved_data_ready = sk->sk_data_ready;
ctx->saved_write_space = sk->sk_write_space;
+ ctx->saved_destruct = sk->sk_destruct;
sk->sk_data_ready = espintcp_data_ready;
sk->sk_write_space = espintcp_write_space;
sk->sk_destruct = espintcp_destruct;
@@ -489,6 +517,20 @@ static __poll_t espintcp_poll(struct file *file, struct socket *sock,
return mask;
}
+static void build_protos(struct proto *espintcp_prot,
+ struct proto_ops *espintcp_ops,
+ const struct proto *orig_prot,
+ const struct proto_ops *orig_ops)
+{
+ memcpy(espintcp_prot, orig_prot, sizeof(struct proto));
+ memcpy(espintcp_ops, orig_ops, sizeof(struct proto_ops));
+ espintcp_prot->sendmsg = espintcp_sendmsg;
+ espintcp_prot->recvmsg = espintcp_recvmsg;
+ espintcp_prot->close = espintcp_close;
+ espintcp_prot->release_cb = espintcp_release;
+ espintcp_ops->poll = espintcp_poll;
+}
+
static struct tcp_ulp_ops espintcp_ulp __read_mostly = {
.name = "espintcp",
.owner = THIS_MODULE,
@@ -497,13 +539,7 @@ static struct tcp_ulp_ops espintcp_ulp __read_mostly = {
void __init espintcp_init(void)
{
- memcpy(&espintcp_prot, &tcp_prot, sizeof(tcp_prot));
- memcpy(&espintcp_ops, &inet_stream_ops, sizeof(inet_stream_ops));
- espintcp_prot.sendmsg = espintcp_sendmsg;
- espintcp_prot.recvmsg = espintcp_recvmsg;
- espintcp_prot.close = espintcp_close;
- espintcp_prot.release_cb = espintcp_release;
- espintcp_ops.poll = espintcp_poll;
+ build_protos(&espintcp_prot, &espintcp_ops, &tcp_prot, &inet_stream_ops);
tcp_register_ulp(&espintcp_ulp);
}
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 6cc7f7f1dd68..f50d1f97cf8e 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -25,12 +25,10 @@ static void __xfrm_transport_prep(struct xfrm_state *x, struct sk_buff *skb,
struct xfrm_offload *xo = xfrm_offload(skb);
skb_reset_mac_len(skb);
- pskb_pull(skb, skb->mac_len + hsize + x->props.header_len);
-
- if (xo->flags & XFRM_GSO_SEGMENT) {
- skb_reset_transport_header(skb);
+ if (xo->flags & XFRM_GSO_SEGMENT)
skb->transport_header -= x->props.header_len;
- }
+
+ pskb_pull(skb, skb_transport_offset(skb) + x->props.header_len);
}
static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb,
diff --git a/net/xfrm/xfrm_inout.h b/net/xfrm/xfrm_inout.h
index c7b0318938e2..efc5e6b2e87b 100644
--- a/net/xfrm/xfrm_inout.h
+++ b/net/xfrm/xfrm_inout.h
@@ -6,6 +6,38 @@
#ifndef XFRM_INOUT_H
#define XFRM_INOUT_H 1
+static inline void xfrm4_extract_header(struct sk_buff *skb)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+
+ XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
+ XFRM_MODE_SKB_CB(skb)->id = iph->id;
+ XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off;
+ XFRM_MODE_SKB_CB(skb)->tos = iph->tos;
+ XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl;
+ XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph);
+ memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0,
+ sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
+}
+
+static inline void xfrm6_extract_header(struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
+ XFRM_MODE_SKB_CB(skb)->id = 0;
+ XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF);
+ XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph);
+ XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit;
+ XFRM_MODE_SKB_CB(skb)->optlen = 0;
+ memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl,
+ sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
+#else
+ WARN_ON_ONCE(1);
+#endif
+}
+
static inline void xfrm6_beet_make_header(struct sk_buff *skb)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index aa35f23c4912..bd984ff17c2d 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -353,17 +353,18 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x,
static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
{
const struct xfrm_mode *inner_mode = &x->inner_mode;
- const struct xfrm_state_afinfo *afinfo;
- int err = -EAFNOSUPPORT;
-
- rcu_read_lock();
- afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family);
- if (likely(afinfo))
- err = afinfo->extract_input(x, skb);
- rcu_read_unlock();
- if (err)
- return err;
+ switch (x->outer_mode.family) {
+ case AF_INET:
+ xfrm4_extract_header(skb);
+ break;
+ case AF_INET6:
+ xfrm6_extract_header(skb);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return -EAFNOSUPPORT;
+ }
if (x->sel.family == AF_UNSPEC) {
inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
@@ -644,7 +645,7 @@ resume:
dev_put(skb->dev);
spin_lock(&x->lock);
- if (nexthdr <= 0) {
+ if (nexthdr < 0) {
if (nexthdr == -EBADMSG) {
xfrm_audit_state_icvfail(x, skb,
x->type->proto);
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 3361e3ac5714..c407ecbc5d46 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -145,7 +145,6 @@ static int xfrmi_create(struct net_device *dev)
if (err < 0)
goto out;
- dev_hold(dev);
xfrmi_link(xfrmn, xi);
return 0;
@@ -175,7 +174,6 @@ static void xfrmi_dev_uninit(struct net_device *dev)
struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
xfrmi_unlink(xfrmn, xi);
- dev_put(dev);
}
static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
@@ -750,13 +748,35 @@ static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
.get_link_net = xfrmi_get_link_net,
};
+static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
+{
+ struct net *net;
+ LIST_HEAD(list);
+
+ rtnl_lock();
+ list_for_each_entry(net, net_exit_list, exit_list) {
+ struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+ struct xfrm_if __rcu **xip;
+ struct xfrm_if *xi;
+
+ for (xip = &xfrmn->xfrmi[0];
+ (xi = rtnl_dereference(*xip)) != NULL;
+ xip = &xi->next)
+ unregister_netdevice_queue(xi->dev, &list);
+ }
+ unregister_netdevice_many(&list);
+ rtnl_unlock();
+}
+
static struct pernet_operations xfrmi_net_ops = {
+ .exit_batch = xfrmi_exit_batch_net,
.id = &xfrmi_net_id,
.size = sizeof(struct xfrmi_net),
};
static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
.handler = xfrm6_rcv,
+ .input_handler = xfrm_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi6_err,
.priority = 10,
@@ -764,6 +784,7 @@ static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
.handler = xfrm6_rcv,
+ .input_handler = xfrm_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi6_err,
.priority = 10,
@@ -771,6 +792,7 @@ static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = {
.handler = xfrm6_rcv,
+ .input_handler = xfrm_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi6_err,
.priority = 10,
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 2fd3d990d992..e4c23f69f69f 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -13,9 +13,15 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <net/dst.h>
+#include <net/icmp.h>
#include <net/inet_ecn.h>
#include <net/xfrm.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_route.h>
+#include <net/ipv6_stubs.h>
+#endif
+
#include "xfrm_inout.h"
static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb);
@@ -565,6 +571,22 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
struct xfrm_state *x = skb_dst(skb)->xfrm;
int err;
+ switch (x->outer_mode.family) {
+ case AF_INET:
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+#ifdef CONFIG_NETFILTER
+ IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
+#endif
+ break;
+ case AF_INET6:
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+
+#ifdef CONFIG_NETFILTER
+ IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
+#endif
+ break;
+ }
+
secpath_reset(skb);
if (xfrm_dev_offload_ok(skb, x)) {
@@ -583,18 +605,20 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
xfrm_state_hold(x);
if (skb_is_gso(skb)) {
- skb_shinfo(skb)->gso_type |= SKB_GSO_ESP;
+ if (skb->inner_protocol)
+ return xfrm_output_gso(net, sk, skb);
- return xfrm_output2(net, sk, skb);
+ skb_shinfo(skb)->gso_type |= SKB_GSO_ESP;
+ goto out;
}
if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM)
goto out;
+ } else {
+ if (skb_is_gso(skb))
+ return xfrm_output_gso(net, sk, skb);
}
- if (skb_is_gso(skb))
- return xfrm_output_gso(net, sk, skb);
-
if (skb->ip_summed == CHECKSUM_PARTIAL) {
err = skb_checksum_help(skb);
if (err) {
@@ -609,11 +633,101 @@ out:
}
EXPORT_SYMBOL_GPL(xfrm_output);
+static int xfrm4_tunnel_check_size(struct sk_buff *skb)
+{
+ int mtu, ret = 0;
+
+ if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
+ goto out;
+
+ if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df)
+ goto out;
+
+ mtu = dst_mtu(skb_dst(skb));
+ if ((!skb_is_gso(skb) && skb->len > mtu) ||
+ (skb_is_gso(skb) &&
+ !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) {
+ skb->protocol = htons(ETH_P_IP);
+
+ if (skb->sk)
+ xfrm_local_error(skb, mtu);
+ else
+ icmp_send(skb, ICMP_DEST_UNREACH,
+ ICMP_FRAG_NEEDED, htonl(mtu));
+ ret = -EMSGSIZE;
+ }
+out:
+ return ret;
+}
+
+static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err;
+
+ err = xfrm4_tunnel_check_size(skb);
+ if (err)
+ return err;
+
+ XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol;
+
+ xfrm4_extract_header(skb);
+ return 0;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int xfrm6_tunnel_check_size(struct sk_buff *skb)
+{
+ int mtu, ret = 0;
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (skb->ignore_df)
+ goto out;
+
+ mtu = dst_mtu(dst);
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+
+ if ((!skb_is_gso(skb) && skb->len > mtu) ||
+ (skb_is_gso(skb) &&
+ !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) {
+ skb->dev = dst->dev;
+ skb->protocol = htons(ETH_P_IPV6);
+
+ if (xfrm6_local_dontfrag(skb->sk))
+ ipv6_stub->xfrm6_local_rxpmtu(skb, mtu);
+ else if (skb->sk)
+ xfrm_local_error(skb, mtu);
+ else
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ ret = -EMSGSIZE;
+ }
+out:
+ return ret;
+}
+#endif
+
+static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ int err;
+
+ err = xfrm6_tunnel_check_size(skb);
+ if (err)
+ return err;
+
+ XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr;
+
+ xfrm6_extract_header(skb);
+ return 0;
+#else
+ WARN_ON_ONCE(1);
+ return -EAFNOSUPPORT;
+#endif
+}
+
static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
{
- const struct xfrm_state_afinfo *afinfo;
const struct xfrm_mode *inner_mode;
- int err = -EAFNOSUPPORT;
if (x->sel.family == AF_UNSPEC)
inner_mode = xfrm_ip2inner_mode(x,
@@ -624,13 +738,14 @@ static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
if (inner_mode == NULL)
return -EAFNOSUPPORT;
- rcu_read_lock();
- afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
- if (likely(afinfo))
- err = afinfo->extract_output(x, skb);
- rcu_read_unlock();
+ switch (inner_mode->family) {
+ case AF_INET:
+ return xfrm4_extract_output(x, skb);
+ case AF_INET6:
+ return xfrm6_extract_output(x, skb);
+ }
- return err;
+ return -EAFNOSUPPORT;
}
void xfrm_local_error(struct sk_buff *skb, int mtu)
@@ -640,7 +755,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu)
if (skb->protocol == htons(ETH_P_IP))
proto = AF_INET;
- else if (skb->protocol == htons(ETH_P_IPV6))
+ else if (skb->protocol == htons(ETH_P_IPV6) &&
+ skb->sk->sk_family == AF_INET6)
proto = AF_INET6;
else
return;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 297b2fdb3c29..564aa6492e7c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1436,12 +1436,7 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
struct xfrm_policy *pol)
{
- u32 mark = policy->mark.v & policy->mark.m;
-
- if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
- return true;
-
- if ((mark & pol->mark.m) == pol->mark.v &&
+ if (policy->mark.v == pol->mark.v &&
policy->priority == pol->priority)
return true;