summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-07-29 17:06:50 +0300
committerDavid S. Miller <davem@davemloft.net>2021-07-29 17:06:50 +0300
commite5fe3a5fe333b9967eeb99966bcf3ec710318554 (patch)
treea5f61d0f3c74a55101f2c5710d6370df4393026f /include
parent658e6b1612c6cc680381b6827dca9c3ee8862ee6 (diff)
parent6a2d98b18900002b6d24c4c3850c1c2467d13898 (diff)
downloadlinux-e5fe3a5fe333b9967eeb99966bcf3ec710318554.tar.xz
Merge branch 'mctp'
Jeremy Kerr says: ==================== Add Management Component Transport Protocol support This series adds core MCTP support to the kernel. From the Kconfig description: Management Component Transport Protocol (MCTP) is an in-system protocol for communicating between management controllers and their managed devices (peripherals, host processors, etc.). The protocol is defined by DMTF specification DSP0236. This option enables core MCTP support. For communicating with other devices, you'll want to enable a driver for a specific hardware channel. This implementation allows a sockets-based API for sending and receiving MCTP messages via sendmsg/recvmsg on SOCK_DGRAM sockets. Kernel stack control is all via netlink, using existing RTM_* messages. The userspace ABI change is fairly small; just the necessary AF_/ETH_P_/ARPHDR_ constants, a new sockaddr, and a new netlink attribute. For MAINTAINERS, I've just included netdev@ as the list entry. I'm happy to alter this based on preferences here - an alternative would be the OpenBMC list (the main user of the MCTP interface), or we can create a new list entirely. We have a couple of interface drivers almost ready to go at the moment, but those can wait until the core code has some review. This is v4 of the series; v1 and v2 were both RFC. selinux folks: CCing 01/15 due to the new PF_MCTP protocol family. linux-doc folks: CCing 15/15 for the new MCTP overview document. Review, comments, questions etc. are most welcome. Cheers, Jeremy v2: - change to match spec terminology: controller -> component - require specific capabilities for bind() & sendmsg() - add address and tag defintions to uapi - add selinux AF_MCTP table definitions - remove strict cflags; warnings are present in common headers v3: - require caps for MCTP bind() & send() - comment typo fixes - switch to an array for local EIDs - fix addrinfo dump iteration & error path - add RTM_DELADDR - remove GENMASK() and BIT() from uapi v4: - drop tun patch; that can be submitted separately - keep nipa happy: add maintainer CCs, including doc and selinux - net-next rebase - Include AF_MCTP in af_family_slock_keys and pf_family_names - Introduce MODULE_ definitions earlier - upstream change: set_link_af no longer called with RTNL held - add kdoc for net_device.mctp_ptr - don't inline mctp_rt_match_eid - require rtm_type == RTN_UNICAST in route management handlers - remove unused RTAX policy table - fix mctp_sock->keys rcu annotations - fix spurious rcu_read_unlock in route input ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/netdevice.h4
-rw-r--r--include/linux/socket.h6
-rw-r--r--include/net/mctp.h231
-rw-r--r--include/net/mctpdevice.h36
-rw-r--r--include/net/net_namespace.h4
-rw-r--r--include/net/netns/mctp.h36
-rw-r--r--include/uapi/linux/if_arp.h1
-rw-r--r--include/uapi/linux/if_ether.h3
-rw-r--r--include/uapi/linux/if_link.h10
-rw-r--r--include/uapi/linux/mctp.h36
10 files changed, 366 insertions, 1 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 226bbee06730..d63a94ecbf3b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1823,6 +1823,7 @@ enum netdev_ml_priv_type {
* @ieee802154_ptr: IEEE 802.15.4 low-rate Wireless Personal Area Network
* device struct
* @mpls_ptr: mpls_dev struct pointer
+ * @mctp_ptr: MCTP specific data
*
* @dev_addr: Hw address (before bcast,
* because most packets are unicast)
@@ -2110,6 +2111,9 @@ struct net_device {
#if IS_ENABLED(CONFIG_MPLS_ROUTING)
struct mpls_dev __rcu *mpls_ptr;
#endif
+#if IS_ENABLED(CONFIG_MCTP)
+ struct mctp_dev __rcu *mctp_ptr;
+#endif
/*
* Cache lines mostly used on receive path (including eth_type_trans())
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 0d8e3dcb7f88..fd9ce51582d8 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -223,8 +223,11 @@ struct ucred {
* reuses AF_INET address family
*/
#define AF_XDP 44 /* XDP sockets */
+#define AF_MCTP 45 /* Management component
+ * transport protocol
+ */
-#define AF_MAX 45 /* For now.. */
+#define AF_MAX 46 /* For now.. */
/* Protocol families, same as address families. */
#define PF_UNSPEC AF_UNSPEC
@@ -274,6 +277,7 @@ struct ucred {
#define PF_QIPCRTR AF_QIPCRTR
#define PF_SMC AF_SMC
#define PF_XDP AF_XDP
+#define PF_MCTP AF_MCTP
#define PF_MAX AF_MAX
/* Maximum queue length specifiable by listen. */
diff --git a/include/net/mctp.h b/include/net/mctp.h
new file mode 100644
index 000000000000..54bbe042c973
--- /dev/null
+++ b/include/net/mctp.h
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Management Component Transport Protocol (MCTP)
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#ifndef __NET_MCTP_H
+#define __NET_MCTP_H
+
+#include <linux/bits.h>
+#include <linux/mctp.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+/* MCTP packet definitions */
+struct mctp_hdr {
+ u8 ver;
+ u8 dest;
+ u8 src;
+ u8 flags_seq_tag;
+};
+
+#define MCTP_VER_MIN 1
+#define MCTP_VER_MAX 1
+
+/* Definitions for flags_seq_tag field */
+#define MCTP_HDR_FLAG_SOM BIT(7)
+#define MCTP_HDR_FLAG_EOM BIT(6)
+#define MCTP_HDR_FLAG_TO BIT(3)
+#define MCTP_HDR_FLAGS GENMASK(5, 3)
+#define MCTP_HDR_SEQ_SHIFT 4
+#define MCTP_HDR_SEQ_MASK GENMASK(1, 0)
+#define MCTP_HDR_TAG_SHIFT 0
+#define MCTP_HDR_TAG_MASK GENMASK(2, 0)
+
+#define MCTP_HEADER_MAXLEN 4
+
+#define MCTP_INITIAL_DEFAULT_NET 1
+
+static inline bool mctp_address_ok(mctp_eid_t eid)
+{
+ return eid >= 8 && eid < 255;
+}
+
+static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb)
+{
+ return (struct mctp_hdr *)skb_network_header(skb);
+}
+
+/* socket implementation */
+struct mctp_sock {
+ struct sock sk;
+
+ /* bind() params */
+ int bind_net;
+ mctp_eid_t bind_addr;
+ __u8 bind_type;
+
+ /* list of mctp_sk_key, for incoming tag lookup. updates protected
+ * by sk->net->keys_lock
+ */
+ struct hlist_head keys;
+};
+
+/* Key for matching incoming packets to sockets or reassembly contexts.
+ * Packets are matched on (src,dest,tag).
+ *
+ * Lifetime requirements:
+ *
+ * - keys are free()ed via RCU
+ *
+ * - a mctp_sk_key contains a reference to a struct sock; this is valid
+ * for the life of the key. On sock destruction (through unhash), the key is
+ * removed from lists (see below), and will not be observable after a RCU
+ * grace period.
+ *
+ * any RX occurring within that grace period may still queue to the socket,
+ * but will hit the SOCK_DEAD case before the socket is freed.
+ *
+ * - these mctp_sk_keys appear on two lists:
+ * 1) the struct mctp_sock->keys list
+ * 2) the struct netns_mctp->keys list
+ *
+ * updates to either list are performed under the netns_mctp->keys
+ * lock.
+ *
+ * - a key may have a sk_buff attached as part of an in-progress message
+ * reassembly (->reasm_head). The reassembly context is protected by
+ * reasm_lock, which may be acquired with the keys lock (above) held, if
+ * necessary. Consequently, keys lock *cannot* be acquired with the
+ * reasm_lock held.
+ *
+ * - there are two destruction paths for a mctp_sk_key:
+ *
+ * - through socket unhash (see mctp_sk_unhash). This performs the list
+ * removal under keys_lock.
+ *
+ * - where a key is established to receive a reply message: after receiving
+ * the (complete) reply, or during reassembly errors. Here, we clean up
+ * the reassembly context (marking reasm_dead, to prevent another from
+ * starting), and remove the socket from the netns & socket lists.
+ */
+struct mctp_sk_key {
+ mctp_eid_t peer_addr;
+ mctp_eid_t local_addr;
+ __u8 tag; /* incoming tag match; invert TO for local */
+
+ /* we hold a ref to sk when set */
+ struct sock *sk;
+
+ /* routing lookup list */
+ struct hlist_node hlist;
+
+ /* per-socket list */
+ struct hlist_node sklist;
+
+ /* incoming fragment reassembly context */
+ spinlock_t reasm_lock;
+ struct sk_buff *reasm_head;
+ struct sk_buff **reasm_tailp;
+ bool reasm_dead;
+ u8 last_seq;
+
+ struct rcu_head rcu;
+};
+
+struct mctp_skb_cb {
+ unsigned int magic;
+ unsigned int net;
+ mctp_eid_t src;
+};
+
+/* skb control-block accessors with a little extra debugging for initial
+ * development.
+ *
+ * TODO: remove checks & mctp_skb_cb->magic; replace callers of __mctp_cb
+ * with mctp_cb().
+ *
+ * __mctp_cb() is only for the initial ingress code; we should see ->magic set
+ * at all times after this.
+ */
+static inline struct mctp_skb_cb *__mctp_cb(struct sk_buff *skb)
+{
+ struct mctp_skb_cb *cb = (void *)skb->cb;
+
+ cb->magic = 0x4d435450;
+ return cb;
+}
+
+static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb)
+{
+ struct mctp_skb_cb *cb = (void *)skb->cb;
+
+ WARN_ON(cb->magic != 0x4d435450);
+ return (void *)(skb->cb);
+}
+
+/* Route definition.
+ *
+ * These are held in the pernet->mctp.routes list, with RCU protection for
+ * removed routes. We hold a reference to the netdev; routes need to be
+ * dropped on NETDEV_UNREGISTER events.
+ *
+ * Updates to the route table are performed under rtnl; all reads under RCU,
+ * so routes cannot be referenced over a RCU grace period. Specifically: A
+ * caller cannot block between mctp_route_lookup and passing the route to
+ * mctp_do_route.
+ */
+struct mctp_route {
+ mctp_eid_t min, max;
+
+ struct mctp_dev *dev;
+ unsigned int mtu;
+ int (*output)(struct mctp_route *route,
+ struct sk_buff *skb);
+
+ struct list_head list;
+ refcount_t refs;
+ struct rcu_head rcu;
+};
+
+/* route interfaces */
+struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
+ mctp_eid_t daddr);
+
+int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb);
+
+int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+ struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
+
+/* routing <--> device interface */
+unsigned int mctp_default_net(struct net *net);
+int mctp_default_net_set(struct net *net, unsigned int index);
+int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr);
+int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr);
+void mctp_route_remove_dev(struct mctp_dev *mdev);
+
+/* neighbour definitions */
+enum mctp_neigh_source {
+ MCTP_NEIGH_STATIC,
+ MCTP_NEIGH_DISCOVER,
+};
+
+struct mctp_neigh {
+ struct mctp_dev *dev;
+ mctp_eid_t eid;
+ enum mctp_neigh_source source;
+
+ unsigned char ha[MAX_ADDR_LEN];
+
+ struct list_head list;
+ struct rcu_head rcu;
+};
+
+int mctp_neigh_init(void);
+void mctp_neigh_exit(void);
+
+// ret_hwaddr may be NULL, otherwise must have space for MAX_ADDR_LEN
+int mctp_neigh_lookup(struct mctp_dev *dev, mctp_eid_t eid,
+ void *ret_hwaddr);
+void mctp_neigh_remove_dev(struct mctp_dev *mdev);
+
+int mctp_routes_init(void);
+void mctp_routes_exit(void);
+
+void mctp_device_init(void);
+void mctp_device_exit(void);
+
+#endif /* __NET_MCTP_H */
diff --git a/include/net/mctpdevice.h b/include/net/mctpdevice.h
new file mode 100644
index 000000000000..57e773ff08bb
--- /dev/null
+++ b/include/net/mctpdevice.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Management Component Transport Protocol (MCTP) - device
+ * definitions.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#ifndef __NET_MCTPDEVICE_H
+#define __NET_MCTPDEVICE_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/refcount.h>
+
+struct mctp_dev {
+ struct net_device *dev;
+
+ unsigned int net;
+
+ /* Only modified under RTNL. Reads have addrs_lock held */
+ u8 *addrs;
+ size_t num_addrs;
+ spinlock_t addrs_lock;
+
+ struct rcu_head rcu;
+};
+
+#define MCTP_INITIAL_DEFAULT_NET 1
+
+struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev);
+struct mctp_dev *__mctp_dev_get(const struct net_device *dev);
+struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev);
+
+#endif /* __NET_MCTPDEVICE_H */
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 12cf6d7ea62c..cc54750dd3db 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -34,6 +34,7 @@
#include <net/netns/xdp.h>
#include <net/netns/smc.h>
#include <net/netns/bpf.h>
+#include <net/netns/mctp.h>
#include <linux/ns_common.h>
#include <linux/idr.h>
#include <linux/skbuff.h>
@@ -167,6 +168,9 @@ struct net {
#ifdef CONFIG_XDP_SOCKETS
struct netns_xdp xdp;
#endif
+#if IS_ENABLED(CONFIG_MCTP)
+ struct netns_mctp mctp;
+#endif
#if IS_ENABLED(CONFIG_CRYPTO_USER)
struct sock *crypto_nlsk;
#endif
diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h
new file mode 100644
index 000000000000..acedef12a35e
--- /dev/null
+++ b/include/net/netns/mctp.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * MCTP per-net structures
+ */
+
+#ifndef __NETNS_MCTP_H__
+#define __NETNS_MCTP_H__
+
+#include <linux/types.h>
+
+struct netns_mctp {
+ /* Only updated under RTNL, entries freed via RCU */
+ struct list_head routes;
+
+ /* Bound sockets: list of sockets bound by type.
+ * This list is updated from non-atomic contexts (under bind_lock),
+ * and read (under rcu) in packet rx
+ */
+ struct mutex bind_lock;
+ struct hlist_head binds;
+
+ /* tag allocations. This list is read and updated from atomic contexts,
+ * but elements are free()ed after a RCU grace-period
+ */
+ spinlock_t keys_lock;
+ struct hlist_head keys;
+
+ /* MCTP network */
+ unsigned int default_net;
+
+ /* neighbour table */
+ struct mutex neigh_lock;
+ struct list_head neighbours;
+};
+
+#endif /* __NETNS_MCTP_H__ */
diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h
index c3cc5a9e5eaf..4783af9fe520 100644
--- a/include/uapi/linux/if_arp.h
+++ b/include/uapi/linux/if_arp.h
@@ -54,6 +54,7 @@
#define ARPHRD_X25 271 /* CCITT X.25 */
#define ARPHRD_HWX25 272 /* Boards with X.25 in firmware */
#define ARPHRD_CAN 280 /* Controller Area Network */
+#define ARPHRD_MCTP 290
#define ARPHRD_PPP 512
#define ARPHRD_CISCO 513 /* Cisco HDLC */
#define ARPHRD_HDLC ARPHRD_CISCO
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index a0b637911d3c..5f589c7a8382 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -151,6 +151,9 @@
#define ETH_P_MAP 0x00F9 /* Qualcomm multiplexing and
* aggregation protocol
*/
+#define ETH_P_MCTP 0x00FA /* Management component transport
+ * protocol packets
+ */
/*
* This is an Ethernet frame header.
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 4882e81514b6..49b22afab78f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1260,4 +1260,14 @@ struct ifla_rmnet_flags {
__u32 mask;
};
+/* MCTP section */
+
+enum {
+ IFLA_MCTP_UNSPEC,
+ IFLA_MCTP_NET,
+ __IFLA_MCTP_MAX,
+};
+
+#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1)
+
#endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
new file mode 100644
index 000000000000..52b54d13f385
--- /dev/null
+++ b/include/uapi/linux/mctp.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Management Component Transport Protocol (MCTP)
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#ifndef __UAPI_MCTP_H
+#define __UAPI_MCTP_H
+
+#include <linux/types.h>
+
+typedef __u8 mctp_eid_t;
+
+struct mctp_addr {
+ mctp_eid_t s_addr;
+};
+
+struct sockaddr_mctp {
+ unsigned short int smctp_family;
+ int smctp_network;
+ struct mctp_addr smctp_addr;
+ __u8 smctp_type;
+ __u8 smctp_tag;
+};
+
+#define MCTP_NET_ANY 0x0
+
+#define MCTP_ADDR_NULL 0x00
+#define MCTP_ADDR_ANY 0xff
+
+#define MCTP_TAG_MASK 0x07
+#define MCTP_TAG_OWNER 0x08
+
+#endif /* __UAPI_MCTP_H */