From 3d249d4ca7d0ed6629a135ea1ea21c72286c0d80 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 11 Nov 2011 22:16:48 +0000 Subject: net: introduce ethernet teaming device This patch introduces new network device called team. It supposes to be very fast, simple, userspace-driven alternative to existing bonding driver. Userspace library called libteam with couple of demo apps is available here: https://github.com/jpirko/libteam Note it's still in its dipers atm. team<->libteam use generic netlink for communication. That and rtnl suppose to be the only way to configure team device, no sysfs etc. Python binding of libteam was recently introduced. Daemon providing arpmon/miimon active-backup functionality will be introduced shortly. All what's necessary is already implemented in kernel team driver. v7->v8: - check ndo_ndo_vlan_rx_[add/kill]_vid functions before calling them. - use dev_kfree_skb_any() instead of dev_kfree_skb() v6->v7: - transmit and receive functions are not checked in hot paths. That also resolves memory leak on transmit when no port is present v5->v6: - changed couple of _rcu calls to non _rcu ones in non-readers v4->v5: - team_change_mtu() uses team->lock while travesing though port list - mac address changes are moved completely to jurisdiction of userspace daemon. This way the daemon can do FOM1, FOM2 and possibly other weird things with mac addresses. Only round-robin mode sets up all ports to bond's address then enslaved. - Extended Kconfig text v3->v4: - remove redundant synchronize_rcu from __team_change_mode() - revert "set and clear of mode_ops happens per pointer, not per byte" - extend comment of function __team_change_mode() v2->v3: - team_change_mtu() uses rcu version of list traversal to unwind - set and clear of mode_ops happens per pointer, not per byte - port hashlist changed to be embedded into team structure - error branch in team_port_enter() does cleanup now - fixed rtln->rtnl v1->v2: - modes are made as modules. Makes team more modular and extendable. - several commenters' nitpicks found on v1 were fixed - several other bugs were fixed. - note I ignored Eric's comment about roundrobin port selector as Eric's way may be easily implemented as another mode (mode "random") in future. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/Kbuild | 1 + include/linux/if.h | 1 + include/linux/if_team.h | 242 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 244 insertions(+) create mode 100644 include/linux/if_team.h (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 619b5657af77..0b091b32267d 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -185,6 +185,7 @@ header-y += if_pppol2tp.h header-y += if_pppox.h header-y += if_slip.h header-y += if_strip.h +header-y += if_team.h header-y += if_tr.h header-y += if_tun.h header-y += if_tunnel.h diff --git a/include/linux/if.h b/include/linux/if.h index db20bd4fd16b..06b6ef60c821 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -79,6 +79,7 @@ #define IFF_TX_SKB_SHARING 0x10000 /* The interface supports sharing * skbs on transmit */ #define IFF_UNICAST_FLT 0x20000 /* Supports unicast filtering */ +#define IFF_TEAM_PORT 0x40000 /* device used as team port */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/if_team.h b/include/linux/if_team.h new file mode 100644 index 000000000000..14f6388f5460 --- /dev/null +++ b/include/linux/if_team.h @@ -0,0 +1,242 @@ +/* + * include/linux/if_team.h - Network team device driver header + * Copyright (c) 2011 Jiri Pirko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _LINUX_IF_TEAM_H_ +#define _LINUX_IF_TEAM_H_ + +#ifdef __KERNEL__ + +struct team_pcpu_stats { + u64 rx_packets; + u64 rx_bytes; + u64 rx_multicast; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; + u32 rx_dropped; + u32 tx_dropped; +}; + +struct team; + +struct team_port { + struct net_device *dev; + struct hlist_node hlist; /* node in hash list */ + struct list_head list; /* node in ordinary list */ + struct team *team; + int index; + + /* + * A place for storing original values of the device before it + * become a port. + */ + struct { + unsigned char dev_addr[MAX_ADDR_LEN]; + unsigned int mtu; + } orig; + + bool linkup; + u32 speed; + u8 duplex; + + struct rcu_head rcu; +}; + +struct team_mode_ops { + int (*init)(struct team *team); + void (*exit)(struct team *team); + rx_handler_result_t (*receive)(struct team *team, + struct team_port *port, + struct sk_buff *skb); + bool (*transmit)(struct team *team, struct sk_buff *skb); + int (*port_enter)(struct team *team, struct team_port *port); + void (*port_leave)(struct team *team, struct team_port *port); + void (*port_change_mac)(struct team *team, struct team_port *port); +}; + +enum team_option_type { + TEAM_OPTION_TYPE_U32, + TEAM_OPTION_TYPE_STRING, +}; + +struct team_option { + struct list_head list; + const char *name; + enum team_option_type type; + int (*getter)(struct team *team, void *arg); + int (*setter)(struct team *team, void *arg); +}; + +struct team_mode { + struct list_head list; + const char *kind; + struct module *owner; + size_t priv_size; + const struct team_mode_ops *ops; +}; + +#define TEAM_PORT_HASHBITS 4 +#define TEAM_PORT_HASHENTRIES (1 << TEAM_PORT_HASHBITS) + +#define TEAM_MODE_PRIV_LONGS 4 +#define TEAM_MODE_PRIV_SIZE (sizeof(long) * TEAM_MODE_PRIV_LONGS) + +struct team { + struct net_device *dev; /* associated netdevice */ + struct team_pcpu_stats __percpu *pcpu_stats; + + spinlock_t lock; /* used for overall locking, e.g. port lists write */ + + /* + * port lists with port count + */ + int port_count; + struct hlist_head port_hlist[TEAM_PORT_HASHENTRIES]; + struct list_head port_list; + + struct list_head option_list; + + const struct team_mode *mode; + struct team_mode_ops ops; + long mode_priv[TEAM_MODE_PRIV_LONGS]; +}; + +static inline struct hlist_head *team_port_index_hash(struct team *team, + int port_index) +{ + return &team->port_hlist[port_index & (TEAM_PORT_HASHENTRIES - 1)]; +} + +static inline struct team_port *team_get_port_by_index(struct team *team, + int port_index) +{ + struct hlist_node *p; + struct team_port *port; + struct hlist_head *head = team_port_index_hash(team, port_index); + + hlist_for_each_entry(port, p, head, hlist) + if (port->index == port_index) + return port; + return NULL; +} +static inline struct team_port *team_get_port_by_index_rcu(struct team *team, + int port_index) +{ + struct hlist_node *p; + struct team_port *port; + struct hlist_head *head = team_port_index_hash(team, port_index); + + hlist_for_each_entry_rcu(port, p, head, hlist) + if (port->index == port_index) + return port; + return NULL; +} + +extern int team_port_set_team_mac(struct team_port *port); +extern void team_options_register(struct team *team, + struct team_option *option, + size_t option_count); +extern void team_options_unregister(struct team *team, + struct team_option *option, + size_t option_count); +extern int team_mode_register(struct team_mode *mode); +extern int team_mode_unregister(struct team_mode *mode); + +#endif /* __KERNEL__ */ + +#define TEAM_STRING_MAX_LEN 32 + +/********************************** + * NETLINK_GENERIC netlink family. + **********************************/ + +enum { + TEAM_CMD_NOOP, + TEAM_CMD_OPTIONS_SET, + TEAM_CMD_OPTIONS_GET, + TEAM_CMD_PORT_LIST_GET, + + __TEAM_CMD_MAX, + TEAM_CMD_MAX = (__TEAM_CMD_MAX - 1), +}; + +enum { + TEAM_ATTR_UNSPEC, + TEAM_ATTR_TEAM_IFINDEX, /* u32 */ + TEAM_ATTR_LIST_OPTION, /* nest */ + TEAM_ATTR_LIST_PORT, /* nest */ + + __TEAM_ATTR_MAX, + TEAM_ATTR_MAX = __TEAM_ATTR_MAX - 1, +}; + +/* Nested layout of get/set msg: + * + * [TEAM_ATTR_LIST_OPTION] + * [TEAM_ATTR_ITEM_OPTION] + * [TEAM_ATTR_OPTION_*], ... + * [TEAM_ATTR_ITEM_OPTION] + * [TEAM_ATTR_OPTION_*], ... + * ... + * [TEAM_ATTR_LIST_PORT] + * [TEAM_ATTR_ITEM_PORT] + * [TEAM_ATTR_PORT_*], ... + * [TEAM_ATTR_ITEM_PORT] + * [TEAM_ATTR_PORT_*], ... + * ... + */ + +enum { + TEAM_ATTR_ITEM_OPTION_UNSPEC, + TEAM_ATTR_ITEM_OPTION, /* nest */ + + __TEAM_ATTR_ITEM_OPTION_MAX, + TEAM_ATTR_ITEM_OPTION_MAX = __TEAM_ATTR_ITEM_OPTION_MAX - 1, +}; + +enum { + TEAM_ATTR_OPTION_UNSPEC, + TEAM_ATTR_OPTION_NAME, /* string */ + TEAM_ATTR_OPTION_CHANGED, /* flag */ + TEAM_ATTR_OPTION_TYPE, /* u8 */ + TEAM_ATTR_OPTION_DATA, /* dynamic */ + + __TEAM_ATTR_OPTION_MAX, + TEAM_ATTR_OPTION_MAX = __TEAM_ATTR_OPTION_MAX - 1, +}; + +enum { + TEAM_ATTR_ITEM_PORT_UNSPEC, + TEAM_ATTR_ITEM_PORT, /* nest */ + + __TEAM_ATTR_ITEM_PORT_MAX, + TEAM_ATTR_ITEM_PORT_MAX = __TEAM_ATTR_ITEM_PORT_MAX - 1, +}; + +enum { + TEAM_ATTR_PORT_UNSPEC, + TEAM_ATTR_PORT_IFINDEX, /* u32 */ + TEAM_ATTR_PORT_CHANGED, /* flag */ + TEAM_ATTR_PORT_LINKUP, /* flag */ + TEAM_ATTR_PORT_SPEED, /* u32 */ + TEAM_ATTR_PORT_DUPLEX, /* u8 */ + + __TEAM_ATTR_PORT_MAX, + TEAM_ATTR_PORT_MAX = __TEAM_ATTR_PORT_MAX - 1, +}; + +/* + * NETLINK_GENERIC related info + */ +#define TEAM_GENL_NAME "team" +#define TEAM_GENL_VERSION 0x1 +#define TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME "change_event" + +#endif /* _LINUX_IF_TEAM_H_ */ -- cgit v1.2.3