From 55c42fa7fa331f98062c32799456420930b8bf8c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 17 Sep 2021 16:33:19 -0700 Subject: mptcp: add MPTCP_INFO getsockopt Its not compatible with multipath-tcp.org kernel one. 1. The out-of-tree implementation defines a different 'struct mptcp_info', with embedded __user addresses for additional data such as endpoint addresses. 2. Mat Martineau points out that embedded __user addresses doesn't work with BPF_CGROUP_RUN_PROG_GETSOCKOPT() which assumes that copying in optsize bytes from optval provides all data that got copied to userspace. This provides mptcp_info data for the given mptcp socket. Userspace sets optlen to the size of the structure it expects. The kernel updates it to contain the number of bytes that it copied. This allows to append more information to the structure later. Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/socket.h') diff --git a/include/linux/socket.h b/include/linux/socket.h index 041d6032a348..7612d760b6a9 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -364,6 +364,7 @@ struct ucred { #define SOL_KCM 281 #define SOL_TLS 282 #define SOL_XDP 283 +#define SOL_MPTCP 284 /* IPX options */ #define IPX_TYPE 1 -- cgit v1.2.3 From 99ce45d5e7dbde399997a630f45ac9f654fa4bcc Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 26 Oct 2021 09:57:28 +0800 Subject: mctp: Implement extended addressing This change allows an extended address struct - struct sockaddr_mctp_ext - to be passed to sendmsg/recvmsg. This allows userspace to specify output ifindex and physical address information (for sendmsg) or receive the input ifindex/physaddr for incoming messages (for recvmsg). This is typically used by userspace for MCTP address discovery and assignment operations. The extended addressing facility is conditional on a new sockopt: MCTP_OPT_ADDR_EXT; userspace must explicitly enable addressing before the kernel will consume/populate the extended address data. Includes a fix for an uninitialised var: Reported-by: kernel test robot Signed-off-by: Jeremy Kerr Signed-off-by: David S. Miller --- include/linux/socket.h | 1 + include/net/mctp.h | 13 +++++-- include/uapi/linux/mctp.h | 11 ++++++ net/mctp/af_mctp.c | 86 ++++++++++++++++++++++++++++++++++++----- net/mctp/route.c | 98 +++++++++++++++++++++++++++++++++++------------ 5 files changed, 170 insertions(+), 39 deletions(-) (limited to 'include/linux/socket.h') diff --git a/include/linux/socket.h b/include/linux/socket.h index 7612d760b6a9..8ef26d89ef49 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -365,6 +365,7 @@ struct ucred { #define SOL_TLS 282 #define SOL_XDP 283 #define SOL_MPTCP 284 +#define SOL_MCTP 285 /* IPX options */ #define IPX_TYPE 1 diff --git a/include/net/mctp.h b/include/net/mctp.h index 2a83443bdfac..23bec708f4c7 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -58,6 +59,9 @@ struct mctp_sock { mctp_eid_t bind_addr; __u8 bind_type; + /* sendmsg()/recvmsg() uses struct sockaddr_mctp_ext */ + bool addr_ext; + /* list of mctp_sk_key, for incoming tag lookup. updates protected * by sk->net->keys_lock */ @@ -153,7 +157,10 @@ struct mctp_sk_key { struct mctp_skb_cb { unsigned int magic; unsigned int net; + int ifindex; /* extended/direct addressing if set */ mctp_eid_t src; + unsigned char halen; + unsigned char haddr[MAX_ADDR_LEN]; }; /* skb control-block accessors with a little extra debugging for initial @@ -177,6 +184,7 @@ static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb) { struct mctp_skb_cb *cb = (void *)skb->cb; + BUILD_BUG_ON(sizeof(struct mctp_skb_cb) > sizeof(skb->cb)); WARN_ON(cb->magic != 0x4d435450); return (void *)(skb->cb); } @@ -189,8 +197,7 @@ static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb) * * Updates to the route table are performed under rtnl; all reads under RCU, * so routes cannot be referenced over a RCU grace period. Specifically: A - * caller cannot block between mctp_route_lookup and passing the route to - * mctp_do_route. + * caller cannot block between mctp_route_lookup and mctp_route_release() */ struct mctp_route { mctp_eid_t min, max; @@ -210,8 +217,6 @@ struct mctp_route { struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, mctp_eid_t daddr); -int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb); - int mctp_local_output(struct sock *sk, struct mctp_route *rt, struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag); diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 6acd4ccafbf7..07b0318716fc 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -11,6 +11,7 @@ #include #include +#include typedef __u8 mctp_eid_t; @@ -28,6 +29,14 @@ struct sockaddr_mctp { __u8 __smctp_pad1; }; +struct sockaddr_mctp_ext { + struct sockaddr_mctp smctp_base; + int smctp_ifindex; + __u8 smctp_halen; + __u8 __smctp_pad0[3]; + __u8 smctp_haddr[MAX_ADDR_LEN]; +}; + #define MCTP_NET_ANY 0x0 #define MCTP_ADDR_NULL 0x00 @@ -36,4 +45,6 @@ struct sockaddr_mctp { #define MCTP_TAG_MASK 0x07 #define MCTP_TAG_OWNER 0x08 +#define MCTP_OPT_ADDR_EXT 1 + #endif /* __UAPI_MCTP_H */ diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 66a411d60b6c..d344b02a1cde 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -77,6 +77,7 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr); int rc, addrlen = msg->msg_namelen; struct sock *sk = sock->sk; + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct mctp_skb_cb *cb; struct mctp_route *rt; struct sk_buff *skb; @@ -100,11 +101,6 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (addr->smctp_network == MCTP_NET_ANY) addr->smctp_network = mctp_default_net(sock_net(sk)); - rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, - addr->smctp_addr.s_addr); - if (!rt) - return -EHOSTUNREACH; - skb = sock_alloc_send_skb(sk, hlen + 1 + len, msg->msg_flags & MSG_DONTWAIT, &rc); if (!skb) @@ -116,19 +112,45 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) *(u8 *)skb_put(skb, 1) = addr->smctp_type; rc = memcpy_from_msg((void *)skb_put(skb, len), msg, len); - if (rc < 0) { - kfree_skb(skb); - return rc; - } + if (rc < 0) + goto err_free; /* set up cb */ cb = __mctp_cb(skb); cb->net = addr->smctp_network; + /* direct addressing */ + if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) { + DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, + extaddr, msg->msg_name); + + if (extaddr->smctp_halen > sizeof(cb->haddr)) { + rc = -EINVAL; + goto err_free; + } + + cb->ifindex = extaddr->smctp_ifindex; + cb->halen = extaddr->smctp_halen; + memcpy(cb->haddr, extaddr->smctp_haddr, cb->halen); + + rt = NULL; + } else { + rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, + addr->smctp_addr.s_addr); + if (!rt) { + rc = -EHOSTUNREACH; + goto err_free; + } + } + rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr, addr->smctp_tag); return rc ? : len; + +err_free: + kfree_skb(skb); + return rc; } static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, @@ -136,6 +158,7 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, { DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name); struct sock *sk = sock->sk; + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct sk_buff *skb; size_t msglen; u8 type; @@ -181,6 +204,16 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, addr->smctp_tag = hdr->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); msg->msg_namelen = sizeof(*addr); + + if (msk->addr_ext) { + DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, ae, + msg->msg_name); + msg->msg_namelen = sizeof(*ae); + ae->smctp_ifindex = cb->ifindex; + ae->smctp_halen = cb->halen; + memset(ae->smctp_haddr, 0x0, sizeof(ae->smctp_haddr)); + memcpy(ae->smctp_haddr, cb->haddr, cb->halen); + } } rc = len; @@ -196,12 +229,45 @@ out_free: static int mctp_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { - return -EINVAL; + struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk); + int val; + + if (level != SOL_MCTP) + return -EINVAL; + + if (optname == MCTP_OPT_ADDR_EXT) { + if (optlen != sizeof(int)) + return -EINVAL; + if (copy_from_sockptr(&val, optval, sizeof(int))) + return -EFAULT; + msk->addr_ext = val; + return 0; + } + + return -ENOPROTOOPT; } static int mctp_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { + struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk); + int len, val; + + if (level != SOL_MCTP) + return -EINVAL; + + if (get_user(len, optlen)) + return -EFAULT; + + if (optname == MCTP_OPT_ADDR_EXT) { + if (len != sizeof(int)) + return -EINVAL; + val = !!msk->addr_ext; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + return 0; + } + return -EINVAL; } diff --git a/net/mctp/route.c b/net/mctp/route.c index 82fb5ae524f6..c23ab3547ee5 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -434,6 +434,7 @@ static unsigned int mctp_route_mtu(struct mctp_route *rt) static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) { + struct mctp_skb_cb *cb = mctp_cb(skb); struct mctp_hdr *hdr = mctp_hdr(skb); char daddr_buf[MAX_ADDR_LEN]; char *daddr = NULL; @@ -448,9 +449,14 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) return -EMSGSIZE; } - /* If lookup fails let the device handle daddr==NULL */ - if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0) - daddr = daddr_buf; + if (cb->ifindex) { + /* direct route; use the hwaddr we stashed in sendmsg */ + daddr = cb->haddr; + } else { + /* If lookup fails let the device handle daddr==NULL */ + if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0) + daddr = daddr_buf; + } rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol), daddr, skb->dev->dev_addr, skb->len); @@ -649,16 +655,6 @@ static struct mctp_route *mctp_route_lookup_null(struct net *net, return NULL; } -/* sends a skb to rt and releases the route. */ -int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb) -{ - int rc; - - rc = rt->output(rt, skb); - mctp_route_release(rt); - return rc; -} - static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, unsigned int mtu, u8 tag) { @@ -725,7 +721,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, /* copy message payload */ skb_copy_bits(skb, pos, skb_transport_header(skb2), size); - /* do route, but don't drop the rt reference */ + /* do route */ rc = rt->output(rt, skb2); if (rc) break; @@ -734,7 +730,6 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, pos += size; } - mctp_route_release(rt); consume_skb(skb); return rc; } @@ -744,15 +739,51 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, { struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct mctp_skb_cb *cb = mctp_cb(skb); + struct mctp_route tmp_rt; + struct net_device *dev; struct mctp_hdr *hdr; unsigned long flags; unsigned int mtu; mctp_eid_t saddr; + bool ext_rt; int rc; u8 tag; - if (WARN_ON(!rt->dev)) + rc = -ENODEV; + + if (rt) { + ext_rt = false; + dev = NULL; + + if (WARN_ON(!rt->dev)) + goto out_release; + + } else if (cb->ifindex) { + ext_rt = true; + rt = &tmp_rt; + + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex); + if (!dev) { + rcu_read_unlock(); + return rc; + } + + rt->dev = __mctp_dev_get(dev); + rcu_read_unlock(); + + if (!rt->dev) + goto out_release; + + /* establish temporary route - we set up enough to keep + * mctp_route_output happy + */ + rt->output = mctp_route_output; + rt->mtu = 0; + + } else { return -EINVAL; + } spin_lock_irqsave(&rt->dev->addrs_lock, flags); if (rt->dev->num_addrs == 0) { @@ -765,18 +796,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, spin_unlock_irqrestore(&rt->dev->addrs_lock, flags); if (rc) - return rc; + goto out_release; if (req_tag & MCTP_HDR_FLAG_TO) { rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag); if (rc) - return rc; + goto out_release; tag |= MCTP_HDR_FLAG_TO; } else { tag = req_tag; } - skb->protocol = htons(ETH_P_MCTP); skb->priority = 0; skb_reset_transport_header(skb); @@ -796,12 +826,22 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, mtu = mctp_route_mtu(rt); if (skb->len + sizeof(struct mctp_hdr) <= mtu) { - hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | - tag; - return mctp_do_route(rt, skb); + hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | + MCTP_HDR_FLAG_EOM | tag; + rc = rt->output(rt, skb); } else { - return mctp_do_fragment_route(rt, skb, mtu, tag); + rc = mctp_do_fragment_route(rt, skb, mtu, tag); } + +out_release: + if (!ext_rt) + mctp_route_release(rt); + + if (dev) + dev_put(dev); + + return rc; + } /* route management */ @@ -942,8 +982,15 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) goto err_drop; - cb = __mctp_cb(skb); + /* MCTP drivers must populate halen/haddr */ + if (dev->type == ARPHRD_MCTP) { + cb = mctp_cb(skb); + } else { + cb = __mctp_cb(skb); + cb->halen = 0; + } cb->net = READ_ONCE(mdev->net); + cb->ifindex = dev->ifindex; rt = mctp_route_lookup(net, cb->net, mh->dest); @@ -954,7 +1001,8 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, if (!rt) goto err_drop; - mctp_do_route(rt, skb); + rt->output(rt, skb); + mctp_route_release(rt); return NET_RX_SUCCESS; -- cgit v1.2.3