diff options
Diffstat (limited to 'drivers/net/bonding')
-rw-r--r-- | drivers/net/bonding/Makefile | 2 | ||||
-rw-r--r-- | drivers/net/bonding/bond_3ad.c | 199 | ||||
-rw-r--r-- | drivers/net/bonding/bond_alb.c | 150 | ||||
-rw-r--r-- | drivers/net/bonding/bond_alb.h | 4 | ||||
-rw-r--r-- | drivers/net/bonding/bond_main.c | 715 | ||||
-rw-r--r-- | drivers/net/bonding/bond_netlink.c | 131 | ||||
-rw-r--r-- | drivers/net/bonding/bond_options.c | 142 | ||||
-rw-r--r-- | drivers/net/bonding/bond_procfs.c | 21 | ||||
-rw-r--r-- | drivers/net/bonding/bond_sysfs.c | 264 | ||||
-rw-r--r-- | drivers/net/bonding/bonding.h | 113 |
10 files changed, 932 insertions, 809 deletions
diff --git a/drivers/net/bonding/Makefile b/drivers/net/bonding/Makefile index 4c21bf6b8b2f..5a5d720da929 100644 --- a/drivers/net/bonding/Makefile +++ b/drivers/net/bonding/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_BONDING) += bonding.o -bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o bond_debugfs.o +bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o bond_debugfs.o bond_netlink.o bond_options.o proc-$(CONFIG_PROC_FS) += bond_procfs.o bonding-objs += $(proc-y) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 0d8f427ade93..187b1b7772ef 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -136,41 +136,6 @@ static inline struct bonding *__get_bond_by_port(struct port *port) } /** - * __get_first_port - get the first port in the bond - * @bond: the bond we're looking at - * - * Return the port of the first slave in @bond, or %NULL if it can't be found. - */ -static inline struct port *__get_first_port(struct bonding *bond) -{ - struct slave *first_slave = bond_first_slave(bond); - - return first_slave ? &(SLAVE_AD_INFO(first_slave).port) : NULL; -} - -/** - * __get_next_port - get the next port in the bond - * @port: the port we're looking at - * - * Return the port of the slave that is next in line of @port's slave in the - * bond, or %NULL if it can't be found. - */ -static inline struct port *__get_next_port(struct port *port) -{ - struct bonding *bond = __get_bond_by_port(port); - struct slave *slave = port->slave, *slave_next; - - // If there's no bond for this port, or this is the last slave - if (bond == NULL) - return NULL; - slave_next = bond_next_slave(bond, slave); - if (!slave_next || bond_is_first_slave(bond, slave_next)) - return NULL; - - return &(SLAVE_AD_INFO(slave_next).port); -} - -/** * __get_first_agg - get the first aggregator in the bond * @bond: the bond we're looking at * @@ -190,28 +155,6 @@ static inline struct aggregator *__get_first_agg(struct port *port) return first_slave ? &(SLAVE_AD_INFO(first_slave).aggregator) : NULL; } -/** - * __get_next_agg - get the next aggregator in the bond - * @aggregator: the aggregator we're looking at - * - * Return the aggregator of the slave that is next in line of @aggregator's - * slave in the bond, or %NULL if it can't be found. - */ -static inline struct aggregator *__get_next_agg(struct aggregator *aggregator) -{ - struct slave *slave = aggregator->slave, *slave_next; - struct bonding *bond = bond_get_bond_by_slave(slave); - - // If there's no bond for this aggregator, or this is the last slave - if (bond == NULL) - return NULL; - slave_next = bond_next_slave(bond, slave); - if (!slave_next || bond_is_first_slave(bond, slave_next)) - return NULL; - - return &(SLAVE_AD_INFO(slave_next).aggregator); -} - /* * __agg_has_partner * @@ -755,16 +698,15 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator) */ static struct aggregator *__get_active_agg(struct aggregator *aggregator) { - struct aggregator *retval = NULL; + struct bonding *bond = aggregator->slave->bond; + struct list_head *iter; + struct slave *slave; - for (; aggregator; aggregator = __get_next_agg(aggregator)) { - if (aggregator->is_active) { - retval = aggregator; - break; - } - } + bond_for_each_slave(bond, slave, iter) + if (SLAVE_AD_INFO(slave).aggregator.is_active) + return &(SLAVE_AD_INFO(slave).aggregator); - return retval; + return NULL; } /** @@ -1274,12 +1216,17 @@ static void ad_port_selection_logic(struct port *port) { struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator; struct port *last_port = NULL, *curr_port; + struct list_head *iter; + struct bonding *bond; + struct slave *slave; int found = 0; // if the port is already Selected, do nothing if (port->sm_vars & AD_PORT_SELECTED) return; + bond = __get_bond_by_port(port); + // if the port is connected to other aggregator, detach it if (port->aggregator) { // detach the port from its former aggregator @@ -1320,8 +1267,8 @@ static void ad_port_selection_logic(struct port *port) } } // search on all aggregators for a suitable aggregator for this port - for (aggregator = __get_first_agg(port); aggregator; - aggregator = __get_next_agg(aggregator)) { + bond_for_each_slave(bond, slave, iter) { + aggregator = &(SLAVE_AD_INFO(slave).aggregator); // keep a free aggregator for later use(if needed) if (!aggregator->lag_ports) { @@ -1515,19 +1462,23 @@ static int agg_device_up(const struct aggregator *agg) static void ad_agg_selection_logic(struct aggregator *agg) { struct aggregator *best, *active, *origin; + struct bonding *bond = agg->slave->bond; + struct list_head *iter; + struct slave *slave; struct port *port; origin = agg; active = __get_active_agg(agg); best = (active && agg_device_up(active)) ? active : NULL; - do { + bond_for_each_slave(bond, slave, iter) { + agg = &(SLAVE_AD_INFO(slave).aggregator); + agg->is_active = 0; if (agg->num_of_ports && agg_device_up(agg)) best = ad_agg_selection_test(best, agg); - - } while ((agg = __get_next_agg(agg))); + } if (best && __get_agg_selection_mode(best->lag_ports) == BOND_AD_STABLE) { @@ -1565,8 +1516,8 @@ static void ad_agg_selection_logic(struct aggregator *agg) best->lag_ports, best->slave, best->slave ? best->slave->dev->name : "NULL"); - for (agg = __get_first_agg(best->lag_ports); agg; - agg = __get_next_agg(agg)) { + bond_for_each_slave(bond, slave, iter) { + agg = &(SLAVE_AD_INFO(slave).aggregator); pr_debug("Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n", agg->aggregator_identifier, agg->num_of_ports, @@ -1614,13 +1565,7 @@ static void ad_agg_selection_logic(struct aggregator *agg) } } - if (origin->slave) { - struct bonding *bond; - - bond = bond_get_bond_by_slave(origin->slave); - if (bond) - bond_3ad_set_carrier(bond); - } + bond_3ad_set_carrier(bond); } /** @@ -1969,6 +1914,9 @@ void bond_3ad_unbind_slave(struct slave *slave) struct port *port, *prev_port, *temp_port; struct aggregator *aggregator, *new_aggregator, *temp_aggregator; int select_new_active_agg = 0; + struct bonding *bond = slave->bond; + struct slave *slave_iter; + struct list_head *iter; // find the aggregator related to this slave aggregator = &(SLAVE_AD_INFO(slave).aggregator); @@ -1998,14 +1946,16 @@ void bond_3ad_unbind_slave(struct slave *slave) // reason to search for new aggregator, and that we will find one if ((aggregator->lag_ports != port) || (aggregator->lag_ports->next_port_in_aggregator)) { // find new aggregator for the related port(s) - new_aggregator = __get_first_agg(port); - for (; new_aggregator; new_aggregator = __get_next_agg(new_aggregator)) { + bond_for_each_slave(bond, slave_iter, iter) { + new_aggregator = &(SLAVE_AD_INFO(slave_iter).aggregator); // if the new aggregator is empty, or it is connected to our port only if (!new_aggregator->lag_ports || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator)) break; } + if (!slave_iter) + new_aggregator = NULL; // if new aggregator found, copy the aggregator's parameters // and connect the related lag_ports to the new aggregator if ((new_aggregator) && ((!new_aggregator->lag_ports) || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator))) { @@ -2056,15 +2006,17 @@ void bond_3ad_unbind_slave(struct slave *slave) pr_info("%s: Removing an active aggregator\n", slave->bond->dev->name); // select new active aggregator - ad_agg_selection_logic(__get_first_agg(port)); + temp_aggregator = __get_first_agg(port); + if (temp_aggregator) + ad_agg_selection_logic(temp_aggregator); } } } pr_debug("Unbinding port %d\n", port->actor_port_number); // find the aggregator that this port is connected to - temp_aggregator = __get_first_agg(port); - for (; temp_aggregator; temp_aggregator = __get_next_agg(temp_aggregator)) { + bond_for_each_slave(bond, slave_iter, iter) { + temp_aggregator = &(SLAVE_AD_INFO(slave_iter).aggregator); prev_port = NULL; // search the port in the aggregator's related ports for (temp_port = temp_aggregator->lag_ports; temp_port; @@ -2111,19 +2063,24 @@ void bond_3ad_state_machine_handler(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, ad_work.work); - struct port *port; struct aggregator *aggregator; + struct list_head *iter; + struct slave *slave; + struct port *port; read_lock(&bond->lock); //check if there are any slaves - if (list_empty(&bond->slave_list)) + if (!bond_has_slaves(bond)) goto re_arm; // check if agg_select_timer timer after initialize is timed out if (BOND_AD_INFO(bond).agg_select_timer && !(--BOND_AD_INFO(bond).agg_select_timer)) { + slave = bond_first_slave(bond); + port = slave ? &(SLAVE_AD_INFO(slave).port) : NULL; + // select the active aggregator for the bond - if ((port = __get_first_port(bond))) { + if (port) { if (!port->slave) { pr_warning("%s: Warning: bond's first port is uninitialized\n", bond->dev->name); @@ -2137,7 +2094,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work) } // for each port run the state machines - for (port = __get_first_port(bond); port; port = __get_next_port(port)) { + bond_for_each_slave(bond, slave, iter) { + port = &(SLAVE_AD_INFO(slave).port); if (!port->slave) { pr_warning("%s: Warning: Found an uninitialized port\n", bond->dev->name); @@ -2382,9 +2340,12 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) { struct aggregator *aggregator = NULL; + struct list_head *iter; + struct slave *slave; struct port *port; - for (port = __get_first_port(bond); port; port = __get_next_port(port)) { + bond_for_each_slave_rcu(bond, slave, iter) { + port = &(SLAVE_AD_INFO(slave).port); if (port->aggregator && port->aggregator->is_active) { aggregator = port->aggregator; break; @@ -2408,25 +2369,25 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info) { int ret; - read_lock(&bond->lock); + rcu_read_lock(); ret = __bond_3ad_get_active_agg_info(bond, ad_info); - read_unlock(&bond->lock); + rcu_read_unlock(); return ret; } int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) { - struct slave *slave, *start_at; struct bonding *bond = netdev_priv(dev); - int slave_agg_no; - int slaves_in_agg; - int agg_id; - int i; + struct slave *slave, *first_ok_slave; + struct aggregator *agg; struct ad_info ad_info; + struct list_head *iter; + int slaves_in_agg; + int slave_agg_no; int res = 1; + int agg_id; - read_lock(&bond->lock); if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n", dev->name); @@ -2437,20 +2398,28 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) agg_id = ad_info.aggregator_id; if (slaves_in_agg == 0) { - /*the aggregator is empty*/ pr_debug("%s: Error: active aggregator is empty\n", dev->name); goto out; } - slave_agg_no = bond->xmit_hash_policy(skb, slaves_in_agg); + slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg); + first_ok_slave = NULL; - bond_for_each_slave(bond, slave) { - struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator; + bond_for_each_slave_rcu(bond, slave, iter) { + agg = SLAVE_AD_INFO(slave).port.aggregator; + if (!agg || agg->aggregator_identifier != agg_id) + continue; - if (agg && (agg->aggregator_identifier == agg_id)) { + if (slave_agg_no >= 0) { + if (!first_ok_slave && SLAVE_IS_OK(slave)) + first_ok_slave = slave; slave_agg_no--; - if (slave_agg_no < 0) - break; + continue; + } + + if (SLAVE_IS_OK(slave)) { + res = bond_dev_queue_xmit(bond, skb, slave->dev); + goto out; } } @@ -2460,23 +2429,12 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) goto out; } - start_at = slave; - - bond_for_each_slave_from(bond, slave, i, start_at) { - int slave_agg_id = 0; - struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator; - - if (agg) - slave_agg_id = agg->aggregator_identifier; - - if (SLAVE_IS_OK(slave) && agg && (slave_agg_id == agg_id)) { - res = bond_dev_queue_xmit(bond, skb, slave->dev); - break; - } - } + /* we couldn't find any suitable slave after the agg_no, so use the + * first suitable found, if found. */ + if (first_ok_slave) + res = bond_dev_queue_xmit(bond, skb, first_ok_slave->dev); out: - read_unlock(&bond->lock); if (res) { /* no suitable interface, frame not sent */ kfree_skb(skb); @@ -2515,11 +2473,12 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, void bond_3ad_update_lacp_rate(struct bonding *bond) { struct port *port = NULL; + struct list_head *iter; struct slave *slave; int lacp_fast; lacp_fast = bond->params.lacp_fast; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave).port); __get_state_machine_lock(port); if (lacp_fast) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index f428ef574372..02872405d35d 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -223,13 +223,14 @@ static long long compute_gap(struct slave *slave) static struct slave *tlb_get_least_loaded_slave(struct bonding *bond) { struct slave *slave, *least_loaded; + struct list_head *iter; long long max_gap; least_loaded = NULL; max_gap = LLONG_MIN; /* Find the slave with the largest gap */ - bond_for_each_slave(bond, slave) { + bond_for_each_slave_rcu(bond, slave, iter) { if (SLAVE_IS_OK(slave)) { long long gap = compute_gap(slave); @@ -382,30 +383,64 @@ out: static struct slave *rlb_next_rx_slave(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct slave *rx_slave, *slave, *start_at; - int i = 0; + struct slave *before = NULL, *rx_slave = NULL, *slave; + struct list_head *iter; + bool found = false; - if (bond_info->next_rx_slave) - start_at = bond_info->next_rx_slave; - else - start_at = bond_first_slave(bond); + bond_for_each_slave(bond, slave, iter) { + if (!SLAVE_IS_OK(slave)) + continue; + if (!found) { + if (!before || before->speed < slave->speed) + before = slave; + } else { + if (!rx_slave || rx_slave->speed < slave->speed) + rx_slave = slave; + } + if (slave == bond_info->rx_slave) + found = true; + } + /* we didn't find anything after the current or we have something + * better before and up to the current slave + */ + if (!rx_slave || (before && rx_slave->speed < before->speed)) + rx_slave = before; - rx_slave = NULL; + if (rx_slave) + bond_info->rx_slave = rx_slave; - bond_for_each_slave_from(bond, slave, i, start_at) { - if (SLAVE_IS_OK(slave)) { - if (!rx_slave) { - rx_slave = slave; - } else if (slave->speed > rx_slave->speed) { + return rx_slave; +} + +/* Caller must hold rcu_read_lock() for read */ +static struct slave *__rlb_next_rx_slave(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct slave *before = NULL, *rx_slave = NULL, *slave; + struct list_head *iter; + bool found = false; + + bond_for_each_slave_rcu(bond, slave, iter) { + if (!SLAVE_IS_OK(slave)) + continue; + if (!found) { + if (!before || before->speed < slave->speed) + before = slave; + } else { + if (!rx_slave || rx_slave->speed < slave->speed) rx_slave = slave; - } } + if (slave == bond_info->rx_slave) + found = true; } + /* we didn't find anything after the current or we have something + * better before and up to the current slave + */ + if (!rx_slave || (before && rx_slave->speed < before->speed)) + rx_slave = before; - if (rx_slave) { - slave = bond_next_slave(bond, rx_slave); - bond_info->next_rx_slave = slave; - } + if (rx_slave) + bond_info->rx_slave = rx_slave; return rx_slave; } @@ -626,12 +661,14 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); struct arp_pkt *arp = arp_pkt(skb); - struct slave *assigned_slave; + struct slave *assigned_slave, *curr_active_slave; struct rlb_client_info *client_info; u32 hash_index = 0; _lock_rx_hashtbl(bond); + curr_active_slave = rcu_dereference(bond->curr_active_slave); + hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst)); client_info = &(bond_info->rx_hashtbl[hash_index]); @@ -656,14 +693,14 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon * that the new client can be assigned to this entry. */ if (bond->curr_active_slave && - client_info->slave != bond->curr_active_slave) { - client_info->slave = bond->curr_active_slave; + client_info->slave != curr_active_slave) { + client_info->slave = curr_active_slave; rlb_update_client(client_info); } } } /* assign a new slave */ - assigned_slave = rlb_next_rx_slave(bond); + assigned_slave = __rlb_next_rx_slave(bond); if (assigned_slave) { if (!(client_info->assigned && @@ -726,7 +763,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) /* Don't modify or load balance ARPs that do not originate locally * (e.g.,arrive via a bridge). */ - if (!bond_slave_has_mac(bond, arp->mac_src)) + if (!bond_slave_has_mac_rcu(bond, arp->mac_src)) return NULL; if (arp->op_code == htons(ARPOP_REPLY)) { @@ -1019,7 +1056,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) /* loop through vlans and send one packet for each */ rcu_read_lock(); - netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) { + netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { if (upper->priv_flags & IFF_802_1Q_VLAN) alb_send_lp_vid(slave, mac_addr, vlan_dev_vlan_id(upper)); @@ -1172,10 +1209,11 @@ static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *sla */ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave) { - struct slave *tmp_slave1, *free_mac_slave = NULL; struct slave *has_bond_addr = bond->curr_active_slave; + struct slave *tmp_slave1, *free_mac_slave = NULL; + struct list_head *iter; - if (list_empty(&bond->slave_list)) { + if (!bond_has_slaves(bond)) { /* this is the first slave */ return 0; } @@ -1196,7 +1234,7 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav /* The slave's address is equal to the address of the bond. * Search for a spare address in the bond for this slave. */ - bond_for_each_slave(bond, tmp_slave1) { + bond_for_each_slave(bond, tmp_slave1, iter) { if (!bond_slave_has_mac(bond, tmp_slave1->perm_hwaddr)) { /* no slave has tmp_slave1's perm addr * as its curr addr @@ -1246,15 +1284,16 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav */ static int alb_set_mac_address(struct bonding *bond, void *addr) { - char tmp_addr[ETH_ALEN]; - struct slave *slave; + struct slave *slave, *rollback_slave; + struct list_head *iter; struct sockaddr sa; + char tmp_addr[ETH_ALEN]; int res; if (bond->alb_info.rlb_enabled) return 0; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { /* save net_device's current hw address */ memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN); @@ -1274,10 +1313,12 @@ unwind: sa.sa_family = bond->dev->type; /* unwind from head to the slave that failed */ - bond_for_each_slave_continue_reverse(bond, slave) { - memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN); - dev_set_mac_address(slave->dev, &sa); - memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN); + bond_for_each_slave(bond, rollback_slave, iter) { + if (rollback_slave == slave) + break; + memcpy(tmp_addr, rollback_slave->dev->dev_addr, ETH_ALEN); + dev_set_mac_address(rollback_slave->dev, &sa); + memcpy(rollback_slave->dev->dev_addr, tmp_addr, ETH_ALEN); } return res; @@ -1337,11 +1378,6 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) skb_reset_mac_header(skb); eth_data = eth_hdr(skb); - /* make sure that the curr_active_slave do not change during tx - */ - read_lock(&bond->lock); - read_lock(&bond->curr_slave_lock); - switch (ntohs(skb->protocol)) { case ETH_P_IP: { const struct iphdr *iph = ip_hdr(skb); @@ -1423,12 +1459,12 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) if (!tx_slave) { /* unbalanced or unassigned, send through primary */ - tx_slave = bond->curr_active_slave; + tx_slave = rcu_dereference(bond->curr_active_slave); bond_info->unbalanced_load += skb->len; } if (tx_slave && SLAVE_IS_OK(tx_slave)) { - if (tx_slave != bond->curr_active_slave) { + if (tx_slave != rcu_dereference(bond->curr_active_slave)) { memcpy(eth_data->h_source, tx_slave->dev->dev_addr, ETH_ALEN); @@ -1443,8 +1479,6 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) } } - read_unlock(&bond->curr_slave_lock); - read_unlock(&bond->lock); if (res) { /* no suitable interface, frame not sent */ kfree_skb(skb); @@ -1458,11 +1492,12 @@ void bond_alb_monitor(struct work_struct *work) struct bonding *bond = container_of(work, struct bonding, alb_work.work); struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct list_head *iter; struct slave *slave; read_lock(&bond->lock); - if (list_empty(&bond->slave_list)) { + if (!bond_has_slaves(bond)) { bond_info->tx_rebalance_counter = 0; bond_info->lp_counter = 0; goto re_arm; @@ -1480,7 +1515,7 @@ void bond_alb_monitor(struct work_struct *work) */ read_lock(&bond->curr_slave_lock); - bond_for_each_slave(bond, slave) + bond_for_each_slave(bond, slave, iter) alb_send_learning_packets(slave, slave->dev->dev_addr); read_unlock(&bond->curr_slave_lock); @@ -1493,7 +1528,7 @@ void bond_alb_monitor(struct work_struct *work) read_lock(&bond->curr_slave_lock); - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { tlb_clear_slave(bond, slave, 1); if (slave == bond->curr_active_slave) { SLAVE_TLB_INFO(slave).load = @@ -1599,13 +1634,13 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave) */ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave) { - if (!list_empty(&bond->slave_list)) + if (bond_has_slaves(bond)) alb_change_hw_addr_on_detach(bond, slave); tlb_clear_slave(bond, slave, 0); if (bond->alb_info.rlb_enabled) { - bond->alb_info.next_rx_slave = NULL; + bond->alb_info.rx_slave = NULL; rlb_clear_slave(bond, slave); } } @@ -1669,7 +1704,7 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave swap_slave = bond->curr_active_slave; rcu_assign_pointer(bond->curr_active_slave, new_slave); - if (!new_slave || list_empty(&bond->slave_list)) + if (!new_slave || !bond_has_slaves(bond)) return; /* set the new curr_active_slave to the bonds mac address @@ -1692,6 +1727,23 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave ASSERT_RTNL(); + /* in TLB mode, the slave might flip down/up with the old dev_addr, + * and thus filter bond->dev_addr's packets, so force bond's mac + */ + if (bond->params.mode == BOND_MODE_TLB) { + struct sockaddr sa; + u8 tmp_addr[ETH_ALEN]; + + memcpy(tmp_addr, new_slave->dev->dev_addr, ETH_ALEN); + + memcpy(sa.sa_data, bond->dev->dev_addr, bond->dev->addr_len); + sa.sa_family = bond->dev->type; + /* we don't care if it can't change its mac, best effort */ + dev_set_mac_address(new_slave->dev, &sa); + + memcpy(new_slave->dev->dev_addr, tmp_addr, ETH_ALEN); + } + /* curr_active_slave must be set before calling alb_swap_mac_addr */ if (swap_slave) { /* swap mac address */ diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h index c5eff5dafdfe..4226044efd08 100644 --- a/drivers/net/bonding/bond_alb.h +++ b/drivers/net/bonding/bond_alb.h @@ -154,9 +154,7 @@ struct alb_bond_info { u8 rx_ntt; /* flag - need to transmit * to all rx clients */ - struct slave *next_rx_slave;/* next slave to be assigned - * to a new rx client for - */ + struct slave *rx_slave;/* last slave to xmit from */ u8 primary_is_promisc; /* boolean */ u32 rlb_promisc_timeout_counter;/* counts primary * promiscuity time diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e883bfe2e727..4dd5ee2a34cc 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -78,6 +78,8 @@ #include <net/netns/generic.h> #include <net/pkt_sched.h> #include <linux/rculist.h> +#include <net/flow_keys.h> +#include <linux/reciprocal_div.h> #include "bonding.h" #include "bond_3ad.h" #include "bond_alb.h" @@ -110,6 +112,7 @@ static char *fail_over_mac; static int all_slaves_active; static struct bond_params bonding_defaults; static int resend_igmp = BOND_DEFAULT_RESEND_IGMP; +static int packets_per_slave = 1; module_param(max_bonds, int, 0); MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); @@ -159,7 +162,8 @@ MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on module_param(xmit_hash_policy, charp, 0); MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing method; " "0 for layer 2 (default), 1 for layer 3+4, " - "2 for layer 2+3"); + "2 for layer 2+3, 3 for encap layer 2+3, " + "4 for encap layer 3+4"); module_param(arp_interval, int, 0); MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); module_param_array(arp_ip_target, charp, NULL, 0); @@ -181,6 +185,10 @@ MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface" module_param(resend_igmp, int, 0); MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on " "link failure"); +module_param(packets_per_slave, int, 0); +MODULE_PARM_DESC(packets_per_slave, "Packets to send per slave in balance-rr " + "mode; 0 for a random slave, 1 packet per " + "slave (default), >1 packets per slave."); /*----------------------------- Global variables ----------------------------*/ @@ -217,6 +225,8 @@ const struct bond_parm_tbl xmit_hashtype_tbl[] = { { "layer2", BOND_XMIT_POLICY_LAYER2}, { "layer3+4", BOND_XMIT_POLICY_LAYER34}, { "layer2+3", BOND_XMIT_POLICY_LAYER23}, +{ "encap2+3", BOND_XMIT_POLICY_ENCAP23}, +{ "encap3+4", BOND_XMIT_POLICY_ENCAP34}, { NULL, -1}, }; @@ -332,10 +342,11 @@ static int bond_vlan_rx_add_vid(struct net_device *bond_dev, __be16 proto, u16 vid) { struct bonding *bond = netdev_priv(bond_dev); - struct slave *slave; + struct slave *slave, *rollback_slave; + struct list_head *iter; int res; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { res = vlan_vid_add(slave->dev, proto, vid); if (res) goto unwind; @@ -344,9 +355,13 @@ static int bond_vlan_rx_add_vid(struct net_device *bond_dev, return 0; unwind: - /* unwind from the slave that failed */ - bond_for_each_slave_continue_reverse(bond, slave) - vlan_vid_del(slave->dev, proto, vid); + /* unwind to the slave that failed */ + bond_for_each_slave(bond, rollback_slave, iter) { + if (rollback_slave == slave) + break; + + vlan_vid_del(rollback_slave->dev, proto, vid); + } return res; } @@ -360,9 +375,10 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, __be16 proto, u16 vid) { struct bonding *bond = netdev_priv(bond_dev); + struct list_head *iter; struct slave *slave; - bond_for_each_slave(bond, slave) + bond_for_each_slave(bond, slave, iter) vlan_vid_del(slave->dev, proto, vid); if (bond_is_lb(bond)) @@ -382,15 +398,16 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, */ static int bond_set_carrier(struct bonding *bond) { + struct list_head *iter; struct slave *slave; - if (list_empty(&bond->slave_list)) + if (!bond_has_slaves(bond)) goto down; if (bond->params.mode == BOND_MODE_8023AD) return bond_3ad_set_carrier(bond); - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { if (slave->link == BOND_LINK_UP) { if (!netif_carrier_ok(bond->dev)) { netif_carrier_on(bond->dev); @@ -522,7 +539,9 @@ static int bond_check_dev_link(struct bonding *bond, */ static int bond_set_promiscuity(struct bonding *bond, int inc) { + struct list_head *iter; int err = 0; + if (USES_PRIMARY(bond->params.mode)) { /* write lock already acquired */ if (bond->curr_active_slave) { @@ -532,7 +551,7 @@ static int bond_set_promiscuity(struct bonding *bond, int inc) } else { struct slave *slave; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { err = dev_set_promiscuity(slave->dev, inc); if (err) return err; @@ -546,7 +565,9 @@ static int bond_set_promiscuity(struct bonding *bond, int inc) */ static int bond_set_allmulti(struct bonding *bond, int inc) { + struct list_head *iter; int err = 0; + if (USES_PRIMARY(bond->params.mode)) { /* write lock already acquired */ if (bond->curr_active_slave) { @@ -556,7 +577,7 @@ static int bond_set_allmulti(struct bonding *bond, int inc) } else { struct slave *slave; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { err = dev_set_allmulti(slave->dev, inc); if (err) return err; @@ -774,43 +795,24 @@ static bool bond_should_change_active(struct bonding *bond) /** * find_best_interface - select the best available slave to be the active one * @bond: our bonding struct - * - * Warning: Caller must hold curr_slave_lock for writing. */ static struct slave *bond_find_best_slave(struct bonding *bond) { - struct slave *new_active, *old_active; - struct slave *bestslave = NULL; + struct slave *slave, *bestslave = NULL; + struct list_head *iter; int mintime = bond->params.updelay; - int i; - new_active = bond->curr_active_slave; - - if (!new_active) { /* there were no active slaves left */ - new_active = bond_first_slave(bond); - if (!new_active) - return NULL; /* still no slave, return NULL */ - } - - if ((bond->primary_slave) && - bond->primary_slave->link == BOND_LINK_UP && - bond_should_change_active(bond)) { - new_active = bond->primary_slave; - } - - /* remember where to stop iterating over the slaves */ - old_active = new_active; - - bond_for_each_slave_from(bond, new_active, i, old_active) { - if (new_active->link == BOND_LINK_UP) { - return new_active; - } else if (new_active->link == BOND_LINK_BACK && - IS_UP(new_active->dev)) { - /* link up, but waiting for stabilization */ - if (new_active->delay < mintime) { - mintime = new_active->delay; - bestslave = new_active; - } + if (bond->primary_slave && bond->primary_slave->link == BOND_LINK_UP && + bond_should_change_active(bond)) + return bond->primary_slave; + + bond_for_each_slave(bond, slave, iter) { + if (slave->link == BOND_LINK_UP) + return slave; + if (slave->link == BOND_LINK_BACK && IS_UP(slave->dev) && + slave->delay < mintime) { + mintime = slave->delay; + bestslave = slave; } } @@ -971,35 +973,6 @@ void bond_select_active_slave(struct bonding *bond) } } -/*--------------------------- slave list handling ---------------------------*/ - -/* - * This function attaches the slave to the end of list. - * - * bond->lock held for writing by caller. - */ -static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) -{ - list_add_tail_rcu(&new_slave->list, &bond->slave_list); - bond->slave_cnt++; -} - -/* - * This function detaches the slave from the list. - * WARNING: no check is made to verify if the slave effectively - * belongs to <bond>. - * Nothing is freed on return, structures are just unchained. - * If any slave pointer in bond was pointing to <slave>, - * it should be changed by the calling function. - * - * bond->lock held for writing by caller. - */ -static void bond_detach_slave(struct bonding *bond, struct slave *slave) -{ - list_del_rcu(&slave->list); - bond->slave_cnt--; -} - #ifdef CONFIG_NET_POLL_CONTROLLER static inline int slave_enable_netpoll(struct slave *slave) { @@ -1046,9 +1019,10 @@ static void bond_poll_controller(struct net_device *bond_dev) static void bond_netpoll_cleanup(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); + struct list_head *iter; struct slave *slave; - bond_for_each_slave(bond, slave) + bond_for_each_slave(bond, slave, iter) if (IS_UP(slave->dev)) slave_disable_netpoll(slave); } @@ -1056,10 +1030,11 @@ static void bond_netpoll_cleanup(struct net_device *bond_dev) static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, gfp_t gfp) { struct bonding *bond = netdev_priv(dev); + struct list_head *iter; struct slave *slave; int err = 0; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { err = slave_enable_netpoll(slave); if (err) { bond_netpoll_cleanup(dev); @@ -1087,10 +1062,11 @@ static netdev_features_t bond_fix_features(struct net_device *dev, netdev_features_t features) { struct bonding *bond = netdev_priv(dev); + struct list_head *iter; netdev_features_t mask; struct slave *slave; - if (list_empty(&bond->slave_list)) { + if (!bond_has_slaves(bond)) { /* Disable adding VLANs to empty bond. But why? --mq */ features |= NETIF_F_VLAN_CHALLENGED; return features; @@ -1100,7 +1076,7 @@ static netdev_features_t bond_fix_features(struct net_device *dev, features &= ~NETIF_F_ONE_FOR_ALL; features |= NETIF_F_ALL_FOR_ALL; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { features = netdev_increment_features(features, slave->dev->features, mask); @@ -1118,16 +1094,17 @@ static void bond_compute_features(struct bonding *bond) { unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE; netdev_features_t vlan_features = BOND_VLAN_FEATURES; + struct net_device *bond_dev = bond->dev; + struct list_head *iter; + struct slave *slave; unsigned short max_hard_header_len = ETH_HLEN; unsigned int gso_max_size = GSO_MAX_SIZE; - struct net_device *bond_dev = bond->dev; u16 gso_max_segs = GSO_MAX_SEGS; - struct slave *slave; - if (list_empty(&bond->slave_list)) + if (!bond_has_slaves(bond)) goto done; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { vlan_features = netdev_increment_features(vlan_features, slave->dev->vlan_features, BOND_VLAN_FEATURES); @@ -1233,15 +1210,16 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) } static int bond_master_upper_dev_link(struct net_device *bond_dev, - struct net_device *slave_dev) + struct net_device *slave_dev, + struct slave *slave) { int err; - err = netdev_master_upper_dev_link(slave_dev, bond_dev); + err = netdev_master_upper_dev_link_private(slave_dev, bond_dev, slave); if (err) return err; slave_dev->flags |= IFF_SLAVE; - rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE); + rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); return 0; } @@ -1250,7 +1228,7 @@ static void bond_upper_dev_unlink(struct net_device *bond_dev, { netdev_upper_dev_unlink(slave_dev, bond_dev); slave_dev->flags &= ~IFF_SLAVE; - rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE); + rtmsg_ifinfo(RTM_NEWLINK, slave_dev, IFF_SLAVE, GFP_KERNEL); } /* enslave device <slave> to bond device <master> */ @@ -1258,7 +1236,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) { struct bonding *bond = netdev_priv(bond_dev); const struct net_device_ops *slave_ops = slave_dev->netdev_ops; - struct slave *new_slave = NULL; + struct slave *new_slave = NULL, *prev_slave; struct sockaddr addr; int link_reporting; int res = 0, i; @@ -1313,7 +1291,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) * bond ether type mutual exclusion - don't allow slaves of dissimilar * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond */ - if (list_empty(&bond->slave_list)) { + if (!bond_has_slaves(bond)) { if (bond_dev->type != slave_dev->type) { pr_debug("%s: change device type from %d to %d\n", bond_dev->name, @@ -1352,7 +1330,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } if (slave_ops->ndo_set_mac_address == NULL) { - if (list_empty(&bond->slave_list)) { + if (!bond_has_slaves(bond)) { pr_warning("%s: Warning: The first slave device specified does not support setting the MAC address. Setting fail_over_mac to active.", bond_dev->name); bond->params.fail_over_mac = BOND_FOM_ACTIVE; @@ -1368,7 +1346,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) /* If this is the first slave, then we need to set the master's hardware * address to be the same as the slave's. */ - if (list_empty(&bond->slave_list) && + if (!bond_has_slaves(bond) && bond->dev->addr_assign_type == NET_ADDR_RANDOM) bond_set_dev_addr(bond->dev, slave_dev); @@ -1377,7 +1355,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) res = -ENOMEM; goto err_undo_flags; } - INIT_LIST_HEAD(&new_slave->list); /* * Set the new_slave's queue_id to be zero. Queue ID mapping * is set via sysfs or module option if desired. @@ -1413,17 +1390,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } } - res = bond_master_upper_dev_link(bond_dev, slave_dev); - if (res) { - pr_debug("Error %d calling bond_master_upper_dev_link\n", res); - goto err_restore_mac; - } - /* open the slave since the application closed it */ res = dev_open(slave_dev); if (res) { pr_debug("Opening slave %s failed\n", slave_dev->name); - goto err_unset_master; + goto err_restore_mac; } new_slave->bond = bond; @@ -1479,21 +1450,13 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) goto err_close; } - write_lock_bh(&bond->lock); - - bond_attach_slave(bond, new_slave); + prev_slave = bond_last_slave(bond); new_slave->delay = 0; new_slave->link_failure_count = 0; - write_unlock_bh(&bond->lock); - - bond_compute_features(bond); - bond_update_speed_duplex(new_slave); - read_lock(&bond->lock); - new_slave->last_arp_rx = jiffies - (msecs_to_jiffies(bond->params.arp_interval) + 1); for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) @@ -1554,12 +1517,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } } - write_lock_bh(&bond->curr_slave_lock); - switch (bond->params.mode) { case BOND_MODE_ACTIVEBACKUP: bond_set_slave_inactive_flags(new_slave); - bond_select_active_slave(bond); break; case BOND_MODE_8023AD: /* in 802.3ad mode, the internal mechanism @@ -1568,16 +1528,13 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) */ bond_set_slave_inactive_flags(new_slave); /* if this is the first slave */ - if (bond_first_slave(bond) == new_slave) { + if (!prev_slave) { SLAVE_AD_INFO(new_slave).id = 1; /* Initialize AD with the number of times that the AD timer is called in 1 second * can be called only after the mac address of the bond is set */ bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL); } else { - struct slave *prev_slave; - - prev_slave = bond_prev_slave(bond, new_slave); SLAVE_AD_INFO(new_slave).id = SLAVE_AD_INFO(prev_slave).id + 1; } @@ -1588,7 +1545,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) case BOND_MODE_ALB: bond_set_active_slave(new_slave); bond_set_slave_inactive_flags(new_slave); - bond_select_active_slave(bond); break; default: pr_debug("This slave is always active in trunk mode\n"); @@ -1606,10 +1562,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) break; } /* switch(bond_mode) */ - write_unlock_bh(&bond->curr_slave_lock); - - bond_set_carrier(bond); - #ifdef CONFIG_NET_POLL_CONTROLLER slave_dev->npinfo = bond->dev->npinfo; if (slave_dev->npinfo) { @@ -1624,17 +1576,29 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } #endif - read_unlock(&bond->lock); - - res = bond_create_slave_symlinks(bond_dev, slave_dev); - if (res) - goto err_detach; - res = netdev_rx_handler_register(slave_dev, bond_handle_frame, new_slave); if (res) { pr_debug("Error %d calling netdev_rx_handler_register\n", res); - goto err_dest_symlinks; + goto err_detach; + } + + res = bond_master_upper_dev_link(bond_dev, slave_dev, new_slave); + if (res) { + pr_debug("Error %d calling bond_master_upper_dev_link\n", res); + goto err_unregister; + } + + bond->slave_cnt++; + bond_compute_features(bond); + bond_set_carrier(bond); + + if (USES_PRIMARY(bond->params.mode)) { + read_lock(&bond->lock); + write_lock_bh(&bond->curr_slave_lock); + bond_select_active_slave(bond); + write_unlock_bh(&bond->curr_slave_lock); + read_unlock(&bond->lock); } pr_info("%s: enslaving %s as a%s interface with a%s link.\n", @@ -1646,8 +1610,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) return 0; /* Undo stages on error */ -err_dest_symlinks: - bond_destroy_slave_symlinks(bond_dev, slave_dev); +err_unregister: + netdev_rx_handler_unregister(slave_dev); err_detach: if (!USES_PRIMARY(bond->params.mode)) @@ -1655,7 +1619,6 @@ err_detach: vlan_vids_del_by_dev(slave_dev, bond_dev); write_lock_bh(&bond->lock); - bond_detach_slave(bond, new_slave); if (bond->primary_slave == new_slave) bond->primary_slave = NULL; if (bond->curr_active_slave == new_slave) { @@ -1675,9 +1638,6 @@ err_close: slave_dev->priv_flags &= ~IFF_BONDING; dev_close(slave_dev); -err_unset_master: - bond_upper_dev_unlink(bond_dev, slave_dev); - err_restore_mac: if (!bond->params.fail_over_mac) { /* XXX TODO - fom follow mode needs to change master's @@ -1696,9 +1656,8 @@ err_free: kfree(new_slave); err_undo_flags: - bond_compute_features(bond); /* Enslave of first slave has failed and we need to fix master's mac */ - if (list_empty(&bond->slave_list) && + if (!bond_has_slaves(bond) && ether_addr_equal(bond_dev->dev_addr, slave_dev->dev_addr)) eth_hw_addr_random(bond_dev); @@ -1749,6 +1708,11 @@ static int __bond_release_one(struct net_device *bond_dev, } write_unlock_bh(&bond->lock); + + /* release the slave from its bond */ + bond->slave_cnt--; + + bond_upper_dev_unlink(bond_dev, slave_dev); /* unregister rx_handler early so bond_handle_frame wouldn't be called * for this slave anymore. */ @@ -1772,12 +1736,9 @@ static int __bond_release_one(struct net_device *bond_dev, bond->current_arp_slave = NULL; - /* release the slave from its bond */ - bond_detach_slave(bond, slave); - if (!all && !bond->params.fail_over_mac) { if (ether_addr_equal(bond_dev->dev_addr, slave->perm_hwaddr) && - !list_empty(&bond->slave_list)) + bond_has_slaves(bond)) pr_warn("%s: Warning: the permanent HWaddr of %s - %pM - is still in use by %s. Set the HWaddr of %s to a different address to avoid conflicts.\n", bond_dev->name, slave_dev->name, slave->perm_hwaddr, @@ -1820,7 +1781,7 @@ static int __bond_release_one(struct net_device *bond_dev, write_lock_bh(&bond->lock); } - if (list_empty(&bond->slave_list)) { + if (!bond_has_slaves(bond)) { bond_set_carrier(bond); eth_hw_addr_random(bond_dev); @@ -1836,7 +1797,7 @@ static int __bond_release_one(struct net_device *bond_dev, unblock_netpoll_tx(); synchronize_rcu(); - if (list_empty(&bond->slave_list)) { + if (!bond_has_slaves(bond)) { call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev); call_netdevice_notifiers(NETDEV_RELEASE, bond->dev); } @@ -1848,8 +1809,6 @@ static int __bond_release_one(struct net_device *bond_dev, bond_dev->name, slave_dev->name, bond_dev->name); /* must do this from outside any spinlocks */ - bond_destroy_slave_symlinks(bond_dev, slave_dev); - vlan_vids_del_by_dev(slave_dev, bond_dev); /* If the mode USES_PRIMARY, then this cases was handled above by @@ -1873,8 +1832,6 @@ static int __bond_release_one(struct net_device *bond_dev, bond_hw_addr_flush(bond_dev, slave_dev); } - bond_upper_dev_unlink(bond_dev, slave_dev); - slave_disable_netpoll(slave); /* close slave before restoring its mac address */ @@ -1913,7 +1870,7 @@ static int bond_release_and_destroy(struct net_device *bond_dev, int ret; ret = bond_release(bond_dev, slave_dev); - if (ret == 0 && list_empty(&bond->slave_list)) { + if (ret == 0 && !bond_has_slaves(bond)) { bond_dev->priv_flags |= IFF_DISABLE_NETPOLL; pr_info("%s: destroying bond %s.\n", bond_dev->name, bond_dev->name); @@ -1922,61 +1879,6 @@ static int bond_release_and_destroy(struct net_device *bond_dev, return ret; } -/* - * This function changes the active slave to slave <slave_dev>. - * It returns -EINVAL in the following cases. - * - <slave_dev> is not found in the list. - * - There is not active slave now. - * - <slave_dev> is already active. - * - The link state of <slave_dev> is not BOND_LINK_UP. - * - <slave_dev> is not running. - * In these cases, this function does nothing. - * In the other cases, current_slave pointer is changed and 0 is returned. - */ -static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev) -{ - struct bonding *bond = netdev_priv(bond_dev); - struct slave *old_active = NULL; - struct slave *new_active = NULL; - int res = 0; - - if (!USES_PRIMARY(bond->params.mode)) - return -EINVAL; - - /* Verify that bond_dev is indeed the master of slave_dev */ - if (!(slave_dev->flags & IFF_SLAVE) || - !netdev_has_upper_dev(slave_dev, bond_dev)) - return -EINVAL; - - read_lock(&bond->lock); - - old_active = bond->curr_active_slave; - new_active = bond_get_slave_by_dev(bond, slave_dev); - /* - * Changing to the current active: do nothing; return success. - */ - if (new_active && new_active == old_active) { - read_unlock(&bond->lock); - return 0; - } - - if (new_active && - old_active && - new_active->link == BOND_LINK_UP && - IS_UP(new_active->dev)) { - block_netpoll_tx(); - write_lock_bh(&bond->curr_slave_lock); - bond_change_active_slave(bond, new_active); - write_unlock_bh(&bond->curr_slave_lock); - unblock_netpoll_tx(); - } else - res = -EINVAL; - - read_unlock(&bond->lock); - - return res; -} - static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) { struct bonding *bond = netdev_priv(bond_dev); @@ -1994,11 +1896,12 @@ static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info) { struct bonding *bond = netdev_priv(bond_dev); + struct list_head *iter; int i = 0, res = -ENODEV; struct slave *slave; read_lock(&bond->lock); - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { if (i++ == (int)info->slave_id) { res = 0; strcpy(info->slave_name, slave->dev->name); @@ -2019,12 +1922,13 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in static int bond_miimon_inspect(struct bonding *bond) { int link_state, commit = 0; + struct list_head *iter; struct slave *slave; bool ignore_updelay; ignore_updelay = !bond->curr_active_slave ? true : false; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { slave->new_link = BOND_LINK_NOCHANGE; link_state = bond_check_dev_link(bond, slave->dev, 0); @@ -2118,9 +2022,10 @@ static int bond_miimon_inspect(struct bonding *bond) static void bond_miimon_commit(struct bonding *bond) { + struct list_head *iter; struct slave *slave; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { switch (slave->new_link) { case BOND_LINK_NOCHANGE: continue; @@ -2225,7 +2130,7 @@ void bond_mii_monitor(struct work_struct *work) delay = msecs_to_jiffies(bond->params.miimon); - if (list_empty(&bond->slave_list)) + if (!bond_has_slaves(bond)) goto re_arm; should_notify_peers = bond_should_notify_peers(bond); @@ -2274,7 +2179,7 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip) return true; rcu_read_lock(); - netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) { + netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { if (ip == bond_confirm_addr(upper, 0, ip)) { ret = true; break; @@ -2349,10 +2254,12 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) * * TODO: QinQ? */ - netdev_for_each_upper_dev_rcu(bond->dev, vlan_upper, vlan_iter) { + netdev_for_each_all_upper_dev_rcu(bond->dev, vlan_upper, + vlan_iter) { if (!is_vlan_dev(vlan_upper)) continue; - netdev_for_each_upper_dev_rcu(vlan_upper, upper, iter) { + netdev_for_each_all_upper_dev_rcu(vlan_upper, upper, + iter) { if (upper == rt->dst.dev) { vlan_id = vlan_dev_vlan_id(vlan_upper); rcu_read_unlock(); @@ -2365,7 +2272,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) * our upper vlans, then just search for any dev that * matches, and in case it's a vlan - save the id */ - netdev_for_each_upper_dev_rcu(bond->dev, upper, iter) { + netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { if (upper == rt->dst.dev) { /* if it's a vlan - get its VID */ if (is_vlan_dev(upper)) @@ -2512,11 +2419,12 @@ void bond_loadbalance_arp_mon(struct work_struct *work) struct bonding *bond = container_of(work, struct bonding, arp_work.work); struct slave *slave, *oldcurrent; + struct list_head *iter; int do_failover = 0; read_lock(&bond->lock); - if (list_empty(&bond->slave_list)) + if (!bond_has_slaves(bond)) goto re_arm; oldcurrent = bond->curr_active_slave; @@ -2528,7 +2436,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work) * TODO: what about up/down delay in arp mode? it wasn't here before * so it can wait */ - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { unsigned long trans_start = dev_trans_start(slave->dev); if (slave->link != BOND_LINK_UP) { @@ -2619,10 +2527,11 @@ re_arm: static int bond_ab_arp_inspect(struct bonding *bond) { unsigned long trans_start, last_rx; + struct list_head *iter; struct slave *slave; int commit = 0; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { slave->new_link = BOND_LINK_NOCHANGE; last_rx = slave_last_rx(bond, slave); @@ -2689,9 +2598,10 @@ static int bond_ab_arp_inspect(struct bonding *bond) static void bond_ab_arp_commit(struct bonding *bond) { unsigned long trans_start; + struct list_head *iter; struct slave *slave; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { switch (slave->new_link) { case BOND_LINK_NOCHANGE: continue; @@ -2762,8 +2672,9 @@ do_failover: */ static void bond_ab_arp_probe(struct bonding *bond) { - struct slave *slave, *next_slave; - int i; + struct slave *slave, *before = NULL, *new_slave = NULL; + struct list_head *iter; + bool found = false; read_lock(&bond->curr_slave_lock); @@ -2793,18 +2704,12 @@ static void bond_ab_arp_probe(struct bonding *bond) bond_set_slave_inactive_flags(bond->current_arp_slave); - /* search for next candidate */ - next_slave = bond_next_slave(bond, bond->current_arp_slave); - bond_for_each_slave_from(bond, slave, i, next_slave) { - if (IS_UP(slave->dev)) { - slave->link = BOND_LINK_BACK; - bond_set_slave_active_flags(slave); - bond_arp_send_all(bond, slave); - slave->jiffies = jiffies; - bond->current_arp_slave = slave; - break; - } + bond_for_each_slave(bond, slave, iter) { + if (!found && !before && IS_UP(slave->dev)) + before = slave; + if (found && !new_slave && IS_UP(slave->dev)) + new_slave = slave; /* if the link state is up at this point, we * mark it down - this can happen if we have * simultaneous link failures and @@ -2812,7 +2717,7 @@ static void bond_ab_arp_probe(struct bonding *bond) * one the current slave so it is still marked * up when it is actually down */ - if (slave->link == BOND_LINK_UP) { + if (!IS_UP(slave->dev) && slave->link == BOND_LINK_UP) { slave->link = BOND_LINK_DOWN; if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; @@ -2822,7 +2727,22 @@ static void bond_ab_arp_probe(struct bonding *bond) pr_info("%s: backup interface %s is now down.\n", bond->dev->name, slave->dev->name); } + if (slave == bond->current_arp_slave) + found = true; } + + if (!new_slave && before) + new_slave = before; + + if (!new_slave) + return; + + new_slave->link = BOND_LINK_BACK; + bond_set_slave_active_flags(new_slave); + bond_arp_send_all(bond, new_slave); + new_slave->jiffies = jiffies; + bond->current_arp_slave = new_slave; + } void bond_activebackup_arp_mon(struct work_struct *work) @@ -2836,7 +2756,7 @@ void bond_activebackup_arp_mon(struct work_struct *work) delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); - if (list_empty(&bond->slave_list)) + if (!bond_has_slaves(bond)) goto re_arm; should_notify_peers = bond_should_notify_peers(bond); @@ -3033,99 +2953,85 @@ static struct notifier_block bond_netdev_notifier = { /*---------------------------- Hashing Policies -----------------------------*/ -/* - * Hash for the output device based upon layer 2 data - */ -static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count) +/* L2 hash helper */ +static inline u32 bond_eth_hash(struct sk_buff *skb) { struct ethhdr *data = (struct ethhdr *)skb->data; if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto)) - return (data->h_dest[5] ^ data->h_source[5]) % count; + return data->h_dest[5] ^ data->h_source[5]; return 0; } -/* - * Hash for the output device based upon layer 2 and layer 3 data. If - * the packet is not IP, fall back on bond_xmit_hash_policy_l2() - */ -static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count) +/* Extract the appropriate headers based on bond's xmit policy */ +static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, + struct flow_keys *fk) { - const struct ethhdr *data; + const struct ipv6hdr *iph6; const struct iphdr *iph; - const struct ipv6hdr *ipv6h; - u32 v6hash; - const __be32 *s, *d; + int noff, proto = -1; + + if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) + return skb_flow_dissect(skb, fk); - if (skb->protocol == htons(ETH_P_IP) && - pskb_network_may_pull(skb, sizeof(*iph))) { + fk->ports = 0; + noff = skb_network_offset(skb); + if (skb->protocol == htons(ETH_P_IP)) { + if (!pskb_may_pull(skb, noff + sizeof(*iph))) + return false; iph = ip_hdr(skb); - data = (struct ethhdr *)skb->data; - return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^ - (data->h_dest[5] ^ data->h_source[5])) % count; - } else if (skb->protocol == htons(ETH_P_IPV6) && - pskb_network_may_pull(skb, sizeof(*ipv6h))) { - ipv6h = ipv6_hdr(skb); - data = (struct ethhdr *)skb->data; - s = &ipv6h->saddr.s6_addr32[0]; - d = &ipv6h->daddr.s6_addr32[0]; - v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]); - v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8); - return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count; - } - - return bond_xmit_hash_policy_l2(skb, count); + fk->src = iph->saddr; + fk->dst = iph->daddr; + noff += iph->ihl << 2; + if (!ip_is_fragment(iph)) + proto = iph->protocol; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + if (!pskb_may_pull(skb, noff + sizeof(*iph6))) + return false; + iph6 = ipv6_hdr(skb); + fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr); + fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr); + noff += sizeof(*iph6); + proto = iph6->nexthdr; + } else { + return false; + } + if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0) + fk->ports = skb_flow_get_ports(skb, noff, proto); + + return true; } -/* - * Hash for the output device based upon layer 3 and layer 4 data. If - * the packet is a frag or not TCP or UDP, just use layer 3 data. If it is - * altogether not IP, fall back on bond_xmit_hash_policy_l2() +/** + * bond_xmit_hash - generate a hash value based on the xmit policy + * @bond: bonding device + * @skb: buffer to use for headers + * @count: modulo value + * + * This function will extract the necessary headers from the skb buffer and use + * them to generate a hash based on the xmit_policy set in the bonding device + * which will be reduced modulo count before returning. */ -static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count) +int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count) { - u32 layer4_xor = 0; - const struct iphdr *iph; - const struct ipv6hdr *ipv6h; - const __be32 *s, *d; - const __be16 *l4 = NULL; - __be16 _l4[2]; - int noff = skb_network_offset(skb); - int poff; - - if (skb->protocol == htons(ETH_P_IP) && - pskb_may_pull(skb, noff + sizeof(*iph))) { - iph = ip_hdr(skb); - poff = proto_ports_offset(iph->protocol); + struct flow_keys flow; + u32 hash; - if (!ip_is_fragment(iph) && poff >= 0) { - l4 = skb_header_pointer(skb, noff + (iph->ihl << 2) + poff, - sizeof(_l4), &_l4); - if (l4) - layer4_xor = ntohs(l4[0] ^ l4[1]); - } - return (layer4_xor ^ - ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count; - } else if (skb->protocol == htons(ETH_P_IPV6) && - pskb_may_pull(skb, noff + sizeof(*ipv6h))) { - ipv6h = ipv6_hdr(skb); - poff = proto_ports_offset(ipv6h->nexthdr); - if (poff >= 0) { - l4 = skb_header_pointer(skb, noff + sizeof(*ipv6h) + poff, - sizeof(_l4), &_l4); - if (l4) - layer4_xor = ntohs(l4[0] ^ l4[1]); - } - s = &ipv6h->saddr.s6_addr32[0]; - d = &ipv6h->daddr.s6_addr32[0]; - layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]); - layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^ - (layer4_xor >> 8); - return layer4_xor % count; - } + if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || + !bond_flow_dissect(bond, skb, &flow)) + return bond_eth_hash(skb) % count; + + if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || + bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) + hash = bond_eth_hash(skb); + else + hash = (__force u32)flow.ports; + hash ^= (__force u32)flow.dst ^ (__force u32)flow.src; + hash ^= (hash >> 16); + hash ^= (hash >> 8); - return bond_xmit_hash_policy_l2(skb, count); + return hash % count; } /*-------------------------- Device entry points ----------------------------*/ @@ -3155,13 +3061,14 @@ static void bond_work_cancel_all(struct bonding *bond) static int bond_open(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); + struct list_head *iter; struct slave *slave; /* reset slave->backup and slave->inactive */ read_lock(&bond->lock); - if (!list_empty(&bond->slave_list)) { + if (bond_has_slaves(bond)) { read_lock(&bond->curr_slave_lock); - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) && (slave != bond->curr_active_slave)) { bond_set_slave_inactive_flags(slave); @@ -3221,12 +3128,13 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, { struct bonding *bond = netdev_priv(bond_dev); struct rtnl_link_stats64 temp; + struct list_head *iter; struct slave *slave; memset(stats, 0, sizeof(*stats)); read_lock_bh(&bond->lock); - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { const struct rtnl_link_stats64 *sstats = dev_get_stats(slave->dev, &temp); @@ -3263,6 +3171,7 @@ static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) { + struct bonding *bond = netdev_priv(bond_dev); struct net_device *slave_dev = NULL; struct ifbond k_binfo; struct ifbond __user *u_binfo = NULL; @@ -3293,7 +3202,6 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd if (mii->reg_num == 1) { - struct bonding *bond = netdev_priv(bond_dev); mii->val_out = 0; read_lock(&bond->lock); read_lock(&bond->curr_slave_lock); @@ -3365,7 +3273,7 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd break; case BOND_CHANGE_ACTIVE_OLD: case SIOCBONDCHANGEACTIVE: - res = bond_ioctl_change_active(bond_dev, slave_dev); + res = bond_option_active_slave_set(bond, slave_dev); break; default: res = -EOPNOTSUPP; @@ -3393,22 +3301,24 @@ static void bond_change_rx_flags(struct net_device *bond_dev, int change) static void bond_set_rx_mode(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); + struct list_head *iter; struct slave *slave; - ASSERT_RTNL(); + rcu_read_lock(); if (USES_PRIMARY(bond->params.mode)) { - slave = rtnl_dereference(bond->curr_active_slave); + slave = rcu_dereference(bond->curr_active_slave); if (slave) { dev_uc_sync(slave->dev, bond_dev); dev_mc_sync(slave->dev, bond_dev); } } else { - bond_for_each_slave(bond, slave) { + bond_for_each_slave_rcu(bond, slave, iter) { dev_uc_sync_multiple(slave->dev, bond_dev); dev_mc_sync_multiple(slave->dev, bond_dev); } } + rcu_read_unlock(); } static int bond_neigh_init(struct neighbour *n) @@ -3471,7 +3381,8 @@ static int bond_neigh_setup(struct net_device *dev, static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) { struct bonding *bond = netdev_priv(bond_dev); - struct slave *slave; + struct slave *slave, *rollback_slave; + struct list_head *iter; int res = 0; pr_debug("bond=%p, name=%s, new_mtu=%d\n", bond, @@ -3492,10 +3403,9 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) * call to the base driver. */ - bond_for_each_slave(bond, slave) { - pr_debug("s %p s->p %p c_m %p\n", + bond_for_each_slave(bond, slave, iter) { + pr_debug("s %p c_m %p\n", slave, - bond_prev_slave(bond, slave), slave->dev->netdev_ops->ndo_change_mtu); res = dev_set_mtu(slave->dev, new_mtu); @@ -3520,13 +3430,16 @@ static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) unwind: /* unwind from head to the slave that failed */ - bond_for_each_slave_continue_reverse(bond, slave) { + bond_for_each_slave(bond, rollback_slave, iter) { int tmp_res; - tmp_res = dev_set_mtu(slave->dev, bond_dev->mtu); + if (rollback_slave == slave) + break; + + tmp_res = dev_set_mtu(rollback_slave->dev, bond_dev->mtu); if (tmp_res) { pr_debug("unwind err %d dev %s\n", - tmp_res, slave->dev->name); + tmp_res, rollback_slave->dev->name); } } @@ -3543,8 +3456,9 @@ unwind: static int bond_set_mac_address(struct net_device *bond_dev, void *addr) { struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave, *rollback_slave; struct sockaddr *sa = addr, tmp_sa; - struct slave *slave; + struct list_head *iter; int res = 0; if (bond->params.mode == BOND_MODE_ALB) @@ -3578,7 +3492,7 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr) * call to the base driver. */ - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { const struct net_device_ops *slave_ops = slave->dev->netdev_ops; pr_debug("slave %p %s\n", slave, slave->dev->name); @@ -3610,13 +3524,16 @@ unwind: tmp_sa.sa_family = bond_dev->type; /* unwind from head to the slave that failed */ - bond_for_each_slave_continue_reverse(bond, slave) { + bond_for_each_slave(bond, rollback_slave, iter) { int tmp_res; - tmp_res = dev_set_mac_address(slave->dev, &tmp_sa); + if (rollback_slave == slave) + break; + + tmp_res = dev_set_mac_address(rollback_slave->dev, &tmp_sa); if (tmp_res) { pr_debug("unwind err %d dev %s\n", - tmp_res, slave->dev->name); + tmp_res, rollback_slave->dev->name); } } @@ -3635,11 +3552,12 @@ unwind: */ void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) { + struct list_head *iter; struct slave *slave; int i = slave_id; /* Here we start from the slave with slave_id */ - bond_for_each_slave_rcu(bond, slave) { + bond_for_each_slave_rcu(bond, slave, iter) { if (--i < 0) { if (slave_can_tx(slave)) { bond_dev_queue_xmit(bond, skb, slave->dev); @@ -3650,7 +3568,7 @@ void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) /* Here we start from the first slave up to slave_id */ i = slave_id; - bond_for_each_slave_rcu(bond, slave) { + bond_for_each_slave_rcu(bond, slave, iter) { if (--i < 0) break; if (slave_can_tx(slave)) { @@ -3662,14 +3580,44 @@ void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) kfree_skb(skb); } +/** + * bond_rr_gen_slave_id - generate slave id based on packets_per_slave + * @bond: bonding device to use + * + * Based on the value of the bonding device's packets_per_slave parameter + * this function generates a slave id, which is usually used as the next + * slave to transmit through. + */ +static u32 bond_rr_gen_slave_id(struct bonding *bond) +{ + int packets_per_slave = bond->params.packets_per_slave; + u32 slave_id; + + switch (packets_per_slave) { + case 0: + slave_id = prandom_u32(); + break; + case 1: + slave_id = bond->rr_tx_counter; + break; + default: + slave_id = reciprocal_divide(bond->rr_tx_counter, + packets_per_slave); + break; + } + bond->rr_tx_counter++; + + return slave_id; +} + static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); struct iphdr *iph = ip_hdr(skb); struct slave *slave; + u32 slave_id; - /* - * Start with the curr_active_slave that joined the bond as the + /* Start with the curr_active_slave that joined the bond as the * default for sending IGMP traffic. For failover purposes one * needs to maintain some consistency for the interface that will * send the join/membership reports. The curr_active_slave found @@ -3682,8 +3630,8 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev else bond_xmit_slave_id(bond, skb, 0); } else { - bond_xmit_slave_id(bond, skb, - bond->rr_tx_counter++ % bond->slave_cnt); + slave_id = bond_rr_gen_slave_id(bond); + bond_xmit_slave_id(bond, skb, slave_id % bond->slave_cnt); } return NETDEV_TX_OK; @@ -3707,8 +3655,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d return NETDEV_TX_OK; } -/* - * In bond_xmit_xor() , we determine the output device by using a pre- +/* In bond_xmit_xor() , we determine the output device by using a pre- * determined xmit_hash_policy(), If the selected device is not enabled, * find the next active slave. */ @@ -3716,8 +3663,7 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); - bond_xmit_slave_id(bond, skb, - bond->xmit_hash_policy(skb, bond->slave_cnt)); + bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb, bond->slave_cnt)); return NETDEV_TX_OK; } @@ -3727,8 +3673,9 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); struct slave *slave = NULL; + struct list_head *iter; - bond_for_each_slave_rcu(bond, slave) { + bond_for_each_slave_rcu(bond, slave, iter) { if (bond_is_last_slave(bond, slave)) break; if (IS_UP(slave->dev) && slave->link == BOND_LINK_UP) { @@ -3753,22 +3700,6 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) /*------------------------- Device initialization ---------------------------*/ -static void bond_set_xmit_hash_policy(struct bonding *bond) -{ - switch (bond->params.xmit_policy) { - case BOND_XMIT_POLICY_LAYER23: - bond->xmit_hash_policy = bond_xmit_hash_policy_l23; - break; - case BOND_XMIT_POLICY_LAYER34: - bond->xmit_hash_policy = bond_xmit_hash_policy_l34; - break; - case BOND_XMIT_POLICY_LAYER2: - default: - bond->xmit_hash_policy = bond_xmit_hash_policy_l2; - break; - } -} - /* * Lookup the slave that corresponds to a qid */ @@ -3777,13 +3708,14 @@ static inline int bond_slave_override(struct bonding *bond, { struct slave *slave = NULL; struct slave *check_slave; + struct list_head *iter; int res = 1; if (!skb->queue_mapping) return 1; /* Find out if any slaves have the same mapping as this skb. */ - bond_for_each_slave_rcu(bond, check_slave) { + bond_for_each_slave_rcu(bond, check_slave, iter) { if (check_slave->queue_id == skb->queue_mapping) { slave = check_slave; break; @@ -3869,7 +3801,7 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; rcu_read_lock(); - if (!list_empty(&bond->slave_list)) + if (bond_has_slaves(bond)) ret = __bond_start_xmit(skb, dev); else kfree_skb(skb); @@ -3878,43 +3810,12 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } -/* - * set bond mode specific net device operations - */ -void bond_set_mode_ops(struct bonding *bond, int mode) -{ - struct net_device *bond_dev = bond->dev; - - switch (mode) { - case BOND_MODE_ROUNDROBIN: - break; - case BOND_MODE_ACTIVEBACKUP: - break; - case BOND_MODE_XOR: - bond_set_xmit_hash_policy(bond); - break; - case BOND_MODE_BROADCAST: - break; - case BOND_MODE_8023AD: - bond_set_xmit_hash_policy(bond); - break; - case BOND_MODE_ALB: - /* FALLTHRU */ - case BOND_MODE_TLB: - break; - default: - /* Should never happen, mode already checked */ - pr_err("%s: Error: Unknown bonding mode %d\n", - bond_dev->name, mode); - break; - } -} - static int bond_ethtool_get_settings(struct net_device *bond_dev, struct ethtool_cmd *ecmd) { struct bonding *bond = netdev_priv(bond_dev); unsigned long speed = 0; + struct list_head *iter; struct slave *slave; ecmd->duplex = DUPLEX_UNKNOWN; @@ -3926,7 +3827,7 @@ static int bond_ethtool_get_settings(struct net_device *bond_dev, * this is an accurate maximum. */ read_lock(&bond->lock); - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { if (SLAVE_IS_OK(slave)) { if (slave->speed != SPEED_UNKNOWN) speed += slave->speed; @@ -3994,14 +3895,13 @@ static void bond_destructor(struct net_device *bond_dev) free_netdev(bond_dev); } -static void bond_setup(struct net_device *bond_dev) +void bond_setup(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); /* initialize rwlocks */ rwlock_init(&bond->lock); rwlock_init(&bond->curr_slave_lock); - INIT_LIST_HEAD(&bond->slave_list); bond->params = bonding_defaults; /* Initialize pointers */ @@ -4011,7 +3911,6 @@ static void bond_setup(struct net_device *bond_dev) ether_setup(bond_dev); bond_dev->netdev_ops = &bond_netdev_ops; bond_dev->ethtool_ops = &bond_ethtool_ops; - bond_set_mode_ops(bond, bond->params.mode); bond_dev->destructor = bond_destructor; @@ -4057,12 +3956,13 @@ static void bond_setup(struct net_device *bond_dev) static void bond_uninit(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); - struct slave *slave, *tmp_slave; + struct list_head *iter; + struct slave *slave; bond_netpoll_cleanup(bond_dev); /* Release the bonded slaves */ - list_for_each_entry_safe(slave, tmp_slave, &bond->slave_list, list) + bond_for_each_slave(bond, slave, iter) __bond_release_one(bond_dev, slave->dev, true); pr_info("%s: released all slaves\n", bond_dev->name); @@ -4235,6 +4135,12 @@ static int bond_check_params(struct bond_params *params) resend_igmp = BOND_DEFAULT_RESEND_IGMP; } + if (packets_per_slave < 0 || packets_per_slave > USHRT_MAX) { + pr_warn("Warning: packets_per_slave (%d) should be between 0 and %u resetting to 1\n", + packets_per_slave, USHRT_MAX); + packets_per_slave = 1; + } + /* reset values for TLB/ALB */ if ((bond_mode == BOND_MODE_TLB) || (bond_mode == BOND_MODE_ALB)) { @@ -4424,7 +4330,10 @@ static int bond_check_params(struct bond_params *params) params->resend_igmp = resend_igmp; params->min_links = min_links; params->lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL; - + if (packets_per_slave > 1) + params->packets_per_slave = reciprocal_value(packets_per_slave); + else + params->packets_per_slave = packets_per_slave; if (primary) { strncpy(params->primary, primary, IFNAMSIZ); params->primary[IFNAMSIZ - 1] = 0; @@ -4495,32 +4404,11 @@ static int bond_init(struct net_device *bond_dev) return 0; } -static int bond_validate(struct nlattr *tb[], struct nlattr *data[]) -{ - if (tb[IFLA_ADDRESS]) { - if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) - return -EINVAL; - if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) - return -EADDRNOTAVAIL; - } - return 0; -} - -static unsigned int bond_get_num_tx_queues(void) +unsigned int bond_get_num_tx_queues(void) { return tx_queues; } -static struct rtnl_link_ops bond_link_ops __read_mostly = { - .kind = "bond", - .priv_size = sizeof(struct bonding), - .setup = bond_setup, - .validate = bond_validate, - .get_num_tx_queues = bond_get_num_tx_queues, - .get_num_rx_queues = bond_get_num_tx_queues, /* Use the same number - as for TX queues */ -}; - /* Create a new bond based on the specified name and bonding parameters. * If name is NULL, obtain a suitable "bond%d" name for us. * Caller must NOT hold rtnl_lock; we need to release it here before we @@ -4607,7 +4495,7 @@ static int __init bonding_init(void) if (res) goto out; - res = rtnl_link_register(&bond_link_ops); + res = bond_netlink_init(); if (res) goto err_link; @@ -4623,7 +4511,7 @@ static int __init bonding_init(void) out: return res; err: - rtnl_link_unregister(&bond_link_ops); + bond_netlink_fini(); err_link: unregister_pernet_subsys(&bond_net_ops); goto out; @@ -4636,7 +4524,7 @@ static void __exit bonding_exit(void) bond_destroy_debugfs(); - rtnl_link_unregister(&bond_link_ops); + bond_netlink_fini(); unregister_pernet_subsys(&bond_net_ops); #ifdef CONFIG_NET_POLL_CONTROLLER @@ -4653,4 +4541,3 @@ MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION); MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); -MODULE_ALIAS_RTNL_LINK("bond"); diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c new file mode 100644 index 000000000000..40e7b1cb4aea --- /dev/null +++ b/drivers/net/bonding/bond_netlink.c @@ -0,0 +1,131 @@ +/* + * drivers/net/bond/bond_netlink.c - Netlink interface for bonding + * Copyright (c) 2013 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/if_link.h> +#include <linux/if_ether.h> +#include <net/netlink.h> +#include <net/rtnetlink.h> +#include "bonding.h" + +static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { + [IFLA_BOND_MODE] = { .type = NLA_U8 }, + [IFLA_BOND_ACTIVE_SLAVE] = { .type = NLA_U32 }, +}; + +static int bond_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + return 0; +} + +static int bond_changelink(struct net_device *bond_dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct bonding *bond = netdev_priv(bond_dev); + int err; + + if (data && data[IFLA_BOND_MODE]) { + int mode = nla_get_u8(data[IFLA_BOND_MODE]); + + err = bond_option_mode_set(bond, mode); + if (err) + return err; + } + if (data && data[IFLA_BOND_ACTIVE_SLAVE]) { + int ifindex = nla_get_u32(data[IFLA_BOND_ACTIVE_SLAVE]); + struct net_device *slave_dev; + + if (ifindex == 0) { + slave_dev = NULL; + } else { + slave_dev = __dev_get_by_index(dev_net(bond_dev), + ifindex); + if (!slave_dev) + return -ENODEV; + } + err = bond_option_active_slave_set(bond, slave_dev); + if (err) + return err; + } + return 0; +} + +static int bond_newlink(struct net *src_net, struct net_device *bond_dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + int err; + + err = bond_changelink(bond_dev, tb, data); + if (err < 0) + return err; + + return register_netdevice(bond_dev); +} + +static size_t bond_get_size(const struct net_device *bond_dev) +{ + return nla_total_size(sizeof(u8)) + /* IFLA_BOND_MODE */ + nla_total_size(sizeof(u32)); /* IFLA_BOND_ACTIVE_SLAVE */ +} + +static int bond_fill_info(struct sk_buff *skb, + const struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct net_device *slave_dev = bond_option_active_slave_get(bond); + + if (nla_put_u8(skb, IFLA_BOND_MODE, bond->params.mode) || + (slave_dev && + nla_put_u32(skb, IFLA_BOND_ACTIVE_SLAVE, slave_dev->ifindex))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +struct rtnl_link_ops bond_link_ops __read_mostly = { + .kind = "bond", + .priv_size = sizeof(struct bonding), + .setup = bond_setup, + .maxtype = IFLA_BOND_MAX, + .policy = bond_policy, + .validate = bond_validate, + .newlink = bond_newlink, + .changelink = bond_changelink, + .get_size = bond_get_size, + .fill_info = bond_fill_info, + .get_num_tx_queues = bond_get_num_tx_queues, + .get_num_rx_queues = bond_get_num_tx_queues, /* Use the same number + as for TX queues */ +}; + +int __init bond_netlink_init(void) +{ + return rtnl_link_register(&bond_link_ops); +} + +void bond_netlink_fini(void) +{ + rtnl_link_unregister(&bond_link_ops); +} + +MODULE_ALIAS_RTNL_LINK("bond"); diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c new file mode 100644 index 000000000000..9a5223c7b4d1 --- /dev/null +++ b/drivers/net/bonding/bond_options.c @@ -0,0 +1,142 @@ +/* + * drivers/net/bond/bond_options.c - bonding options + * Copyright (c) 2013 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/errno.h> +#include <linux/if.h> +#include <linux/netdevice.h> +#include <linux/rwlock.h> +#include <linux/rcupdate.h> +#include "bonding.h" + +static bool bond_mode_is_valid(int mode) +{ + int i; + + for (i = 0; bond_mode_tbl[i].modename; i++); + + return mode >= 0 && mode < i; +} + +int bond_option_mode_set(struct bonding *bond, int mode) +{ + if (!bond_mode_is_valid(mode)) { + pr_err("invalid mode value %d.\n", mode); + return -EINVAL; + } + + if (bond->dev->flags & IFF_UP) { + pr_err("%s: unable to update mode because interface is up.\n", + bond->dev->name); + return -EPERM; + } + + if (bond_has_slaves(bond)) { + pr_err("%s: unable to update mode because bond has slaves.\n", + bond->dev->name); + return -EPERM; + } + + if (BOND_MODE_IS_LB(mode) && bond->params.arp_interval) { + pr_err("%s: %s mode is incompatible with arp monitoring.\n", + bond->dev->name, bond_mode_tbl[mode].modename); + return -EINVAL; + } + + /* don't cache arp_validate between modes */ + bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; + bond->params.mode = mode; + return 0; +} + +static struct net_device *__bond_option_active_slave_get(struct bonding *bond, + struct slave *slave) +{ + return USES_PRIMARY(bond->params.mode) && slave ? slave->dev : NULL; +} + +struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond) +{ + struct slave *slave = rcu_dereference(bond->curr_active_slave); + + return __bond_option_active_slave_get(bond, slave); +} + +struct net_device *bond_option_active_slave_get(struct bonding *bond) +{ + return __bond_option_active_slave_get(bond, bond->curr_active_slave); +} + +int bond_option_active_slave_set(struct bonding *bond, + struct net_device *slave_dev) +{ + int ret = 0; + + if (slave_dev) { + if (!netif_is_bond_slave(slave_dev)) { + pr_err("Device %s is not bonding slave.\n", + slave_dev->name); + return -EINVAL; + } + + if (bond->dev != netdev_master_upper_dev_get(slave_dev)) { + pr_err("%s: Device %s is not our slave.\n", + bond->dev->name, slave_dev->name); + return -EINVAL; + } + } + + if (!USES_PRIMARY(bond->params.mode)) { + pr_err("%s: Unable to change active slave; %s is in mode %d\n", + bond->dev->name, bond->dev->name, bond->params.mode); + return -EINVAL; + } + + block_netpoll_tx(); + read_lock(&bond->lock); + write_lock_bh(&bond->curr_slave_lock); + + /* check to see if we are clearing active */ + if (!slave_dev) { + pr_info("%s: Clearing current active slave.\n", + bond->dev->name); + rcu_assign_pointer(bond->curr_active_slave, NULL); + bond_select_active_slave(bond); + } else { + struct slave *old_active = bond->curr_active_slave; + struct slave *new_active = bond_slave_get_rtnl(slave_dev); + + BUG_ON(!new_active); + + if (new_active == old_active) { + /* do nothing */ + pr_info("%s: %s is already the current active slave.\n", + bond->dev->name, new_active->dev->name); + } else { + if (old_active && (new_active->link == BOND_LINK_UP) && + IS_UP(new_active->dev)) { + pr_info("%s: Setting %s as active slave.\n", + bond->dev->name, new_active->dev->name); + bond_change_active_slave(bond, new_active); + } else { + pr_err("%s: Could not set %s as active slave; either %s is down or the link is down.\n", + bond->dev->name, new_active->dev->name, + new_active->dev->name); + ret = -EINVAL; + } + } + } + + write_unlock_bh(&bond->curr_slave_lock); + read_unlock(&bond->lock); + unblock_netpoll_tx(); + return ret; +} diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index 20a6ee25bb63..fb868d6c22da 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -10,8 +10,9 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) __acquires(&bond->lock) { struct bonding *bond = seq->private; - loff_t off = 0; + struct list_head *iter; struct slave *slave; + loff_t off = 0; /* make sure the bond won't be taken away */ rcu_read_lock(); @@ -20,7 +21,7 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - bond_for_each_slave(bond, slave) + bond_for_each_slave(bond, slave, iter) if (++off == *pos) return slave; @@ -30,17 +31,25 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bonding *bond = seq->private; - struct slave *slave = v; + struct list_head *iter; + struct slave *slave; + bool found = false; ++*pos; if (v == SEQ_START_TOKEN) return bond_first_slave(bond); - if (bond_is_last_slave(bond, slave)) + if (bond_is_last_slave(bond, v)) return NULL; - slave = bond_next_slave(bond, slave); - return slave; + bond_for_each_slave(bond, slave, iter) { + if (found) + return slave; + if (slave == v) + found = true; + } + + return NULL; } static void bond_info_seq_stop(struct seq_file *seq, void *v) diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index ec9b6460a38d..bc8fd362a5aa 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -40,6 +40,7 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/nsproxy.h> +#include <linux/reciprocal_div.h> #include "bonding.h" @@ -159,41 +160,6 @@ static const struct class_attribute class_attr_bonding_masters = { .store = bonding_store_bonds, }; -int bond_create_slave_symlinks(struct net_device *master, - struct net_device *slave) -{ - char linkname[IFNAMSIZ+7]; - int ret = 0; - - /* first, create a link from the slave back to the master */ - ret = sysfs_create_link(&(slave->dev.kobj), &(master->dev.kobj), - "master"); - if (ret) - return ret; - /* next, create a link from the master to the slave */ - sprintf(linkname, "slave_%s", slave->name); - ret = sysfs_create_link(&(master->dev.kobj), &(slave->dev.kobj), - linkname); - - /* free the master link created earlier in case of error */ - if (ret) - sysfs_remove_link(&(slave->dev.kobj), "master"); - - return ret; - -} - -void bond_destroy_slave_symlinks(struct net_device *master, - struct net_device *slave) -{ - char linkname[IFNAMSIZ+7]; - - sysfs_remove_link(&(slave->dev.kobj), "master"); - sprintf(linkname, "slave_%s", slave->name); - sysfs_remove_link(&(master->dev.kobj), linkname); -} - - /* * Show the slaves in the current bond. */ @@ -201,11 +167,14 @@ static ssize_t bonding_show_slaves(struct device *d, struct device_attribute *attr, char *buf) { struct bonding *bond = to_bond(d); + struct list_head *iter; struct slave *slave; int res = 0; - read_lock(&bond->lock); - bond_for_each_slave(bond, slave) { + if (!rtnl_trylock()) + return restart_syscall(); + + bond_for_each_slave(bond, slave, iter) { if (res > (PAGE_SIZE - IFNAMSIZ)) { /* not enough space for another interface name */ if ((PAGE_SIZE - res) > 10) @@ -215,7 +184,9 @@ static ssize_t bonding_show_slaves(struct device *d, } res += sprintf(buf + res, "%s ", slave->dev->name); } - read_unlock(&bond->lock); + + rtnl_unlock(); + if (res) buf[res-1] = '\n'; /* eat the leftover space */ @@ -304,50 +275,26 @@ static ssize_t bonding_store_mode(struct device *d, struct device_attribute *attr, const char *buf, size_t count) { - int new_value, ret = count; + int new_value, ret; struct bonding *bond = to_bond(d); - if (!rtnl_trylock()) - return restart_syscall(); - - if (bond->dev->flags & IFF_UP) { - pr_err("unable to update mode of %s because interface is up.\n", - bond->dev->name); - ret = -EPERM; - goto out; - } - - if (!list_empty(&bond->slave_list)) { - pr_err("unable to update mode of %s because it has slaves.\n", - bond->dev->name); - ret = -EPERM; - goto out; - } - new_value = bond_parse_parm(buf, bond_mode_tbl); if (new_value < 0) { pr_err("%s: Ignoring invalid mode value %.*s.\n", bond->dev->name, (int)strlen(buf) - 1, buf); - ret = -EINVAL; - goto out; + return -EINVAL; } - if ((new_value == BOND_MODE_ALB || - new_value == BOND_MODE_TLB) && - bond->params.arp_interval) { - pr_err("%s: %s mode is incompatible with arp monitoring.\n", - bond->dev->name, bond_mode_tbl[new_value].modename); - ret = -EINVAL; - goto out; + if (!rtnl_trylock()) + return restart_syscall(); + + ret = bond_option_mode_set(bond, new_value); + if (!ret) { + pr_info("%s: setting mode to %s (%d).\n", + bond->dev->name, bond_mode_tbl[new_value].modename, + new_value); + ret = count; } - /* don't cache arp_validate between modes */ - bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; - bond->params.mode = new_value; - bond_set_mode_ops(bond, bond->params.mode); - pr_info("%s: setting mode to %s (%d).\n", - bond->dev->name, bond_mode_tbl[new_value].modename, - new_value); -out: rtnl_unlock(); return ret; } @@ -383,7 +330,6 @@ static ssize_t bonding_store_xmit_hash(struct device *d, ret = -EINVAL; } else { bond->params.xmit_policy = new_value; - bond_set_mode_ops(bond, bond->params.mode); pr_info("%s: setting xmit hash policy to %s (%d).\n", bond->dev->name, xmit_hashtype_tbl[new_value].modename, new_value); @@ -513,7 +459,7 @@ static ssize_t bonding_store_fail_over_mac(struct device *d, if (!rtnl_trylock()) return restart_syscall(); - if (!list_empty(&bond->slave_list)) { + if (bond_has_slaves(bond)) { pr_err("%s: Can't alter fail_over_mac with slaves in bond.\n", bond->dev->name); ret = -EPERM; @@ -647,11 +593,15 @@ static ssize_t bonding_store_arp_targets(struct device *d, const char *buf, size_t count) { struct bonding *bond = to_bond(d); + struct list_head *iter; struct slave *slave; __be32 newtarget, *targets; unsigned long *targets_rx; int ind, i, j, ret = -EINVAL; + if (!rtnl_trylock()) + return restart_syscall(); + targets = bond->params.arp_targets; newtarget = in_aton(buf + 1); /* look for adds */ @@ -679,7 +629,7 @@ static ssize_t bonding_store_arp_targets(struct device *d, &newtarget); /* not to race with bond_arp_rcv */ write_lock_bh(&bond->lock); - bond_for_each_slave(bond, slave) + bond_for_each_slave(bond, slave, iter) slave->target_last_arp_rx[ind] = jiffies; targets[ind] = newtarget; write_unlock_bh(&bond->lock); @@ -705,7 +655,7 @@ static ssize_t bonding_store_arp_targets(struct device *d, &newtarget); write_lock_bh(&bond->lock); - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { targets_rx = slave->target_last_arp_rx; j = ind; for (; (j < BOND_MAX_ARP_TARGETS-1) && targets[j+1]; j++) @@ -725,6 +675,7 @@ static ssize_t bonding_store_arp_targets(struct device *d, ret = count; out: + rtnl_unlock(); return ret; } static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR , bonding_show_arp_targets, bonding_store_arp_targets); @@ -1102,6 +1053,7 @@ static ssize_t bonding_store_primary(struct device *d, const char *buf, size_t count) { struct bonding *bond = to_bond(d); + struct list_head *iter; char ifname[IFNAMSIZ]; struct slave *slave; @@ -1129,7 +1081,7 @@ static ssize_t bonding_store_primary(struct device *d, goto out; } - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) { pr_info("%s: Setting %s as primary slave.\n", bond->dev->name, slave->dev->name); @@ -1259,13 +1211,13 @@ static ssize_t bonding_show_active_slave(struct device *d, char *buf) { struct bonding *bond = to_bond(d); - struct slave *curr; + struct net_device *slave_dev; int count = 0; rcu_read_lock(); - curr = rcu_dereference(bond->curr_active_slave); - if (USES_PRIMARY(bond->params.mode) && curr) - count = sprintf(buf, "%s\n", curr->dev->name); + slave_dev = bond_option_active_slave_get_rcu(bond); + if (slave_dev) + count = sprintf(buf, "%s\n", slave_dev->name); rcu_read_unlock(); return count; @@ -1275,80 +1227,33 @@ static ssize_t bonding_store_active_slave(struct device *d, struct device_attribute *attr, const char *buf, size_t count) { - struct slave *slave, *old_active, *new_active; + int ret; struct bonding *bond = to_bond(d); char ifname[IFNAMSIZ]; + struct net_device *dev; if (!rtnl_trylock()) return restart_syscall(); - old_active = new_active = NULL; - block_netpoll_tx(); - read_lock(&bond->lock); - write_lock_bh(&bond->curr_slave_lock); - - if (!USES_PRIMARY(bond->params.mode)) { - pr_info("%s: Unable to change active slave; %s is in mode %d\n", - bond->dev->name, bond->dev->name, bond->params.mode); - goto out; - } - sscanf(buf, "%15s", ifname); /* IFNAMSIZ */ - - /* check to see if we are clearing active */ if (!strlen(ifname) || buf[0] == '\n') { - pr_info("%s: Clearing current active slave.\n", - bond->dev->name); - rcu_assign_pointer(bond->curr_active_slave, NULL); - bond_select_active_slave(bond); - goto out; - } - - bond_for_each_slave(bond, slave) { - if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) { - old_active = bond->curr_active_slave; - new_active = slave; - if (new_active == old_active) { - /* do nothing */ - pr_info("%s: %s is already the current" - " active slave.\n", - bond->dev->name, - slave->dev->name); - goto out; - } else { - if ((new_active) && - (old_active) && - (new_active->link == BOND_LINK_UP) && - IS_UP(new_active->dev)) { - pr_info("%s: Setting %s as active" - " slave.\n", - bond->dev->name, - slave->dev->name); - bond_change_active_slave(bond, - new_active); - } else { - pr_info("%s: Could not set %s as" - " active slave; either %s is" - " down or the link is down.\n", - bond->dev->name, - slave->dev->name, - slave->dev->name); - } - goto out; - } + dev = NULL; + } else { + dev = __dev_get_by_name(dev_net(bond->dev), ifname); + if (!dev) { + ret = -ENODEV; + goto out; } } - pr_info("%s: Unable to set %.*s as active slave.\n", - bond->dev->name, (int)strlen(buf) - 1, buf); - out: - write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); - unblock_netpoll_tx(); + ret = bond_option_active_slave_set(bond, dev); + if (!ret) + ret = count; + out: rtnl_unlock(); - return count; + return ret; } static DEVICE_ATTR(active_slave, S_IRUGO | S_IWUSR, @@ -1484,14 +1389,14 @@ static ssize_t bonding_show_queue_id(struct device *d, char *buf) { struct bonding *bond = to_bond(d); + struct list_head *iter; struct slave *slave; int res = 0; if (!rtnl_trylock()) return restart_syscall(); - read_lock(&bond->lock); - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { if (res > (PAGE_SIZE - IFNAMSIZ - 6)) { /* not enough space for another interface_name:queue_id pair */ if ((PAGE_SIZE - res) > 10) @@ -1502,9 +1407,9 @@ static ssize_t bonding_show_queue_id(struct device *d, res += sprintf(buf + res, "%s:%d ", slave->dev->name, slave->queue_id); } - read_unlock(&bond->lock); if (res) buf[res-1] = '\n'; /* eat the leftover space */ + rtnl_unlock(); return res; @@ -1520,6 +1425,7 @@ static ssize_t bonding_store_queue_id(struct device *d, { struct slave *slave, *update_slave; struct bonding *bond = to_bond(d); + struct list_head *iter; u16 qid; int ret = count; char *delim; @@ -1552,11 +1458,9 @@ static ssize_t bonding_store_queue_id(struct device *d, if (!sdev) goto err_no_cmd; - read_lock(&bond->lock); - /* Search for thes slave and check for duplicate qids */ update_slave = NULL; - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { if (sdev == slave->dev) /* * We don't need to check the matching @@ -1564,23 +1468,20 @@ static ssize_t bonding_store_queue_id(struct device *d, */ update_slave = slave; else if (qid && qid == slave->queue_id) { - goto err_no_cmd_unlock; + goto err_no_cmd; } } if (!update_slave) - goto err_no_cmd_unlock; + goto err_no_cmd; /* Actually set the qids for the slave */ update_slave->queue_id = qid; - read_unlock(&bond->lock); out: rtnl_unlock(); return ret; -err_no_cmd_unlock: - read_unlock(&bond->lock); err_no_cmd: pr_info("invalid input for queue_id set for %s.\n", bond->dev->name); @@ -1610,8 +1511,12 @@ static ssize_t bonding_store_slaves_active(struct device *d, { struct bonding *bond = to_bond(d); int new_value, ret = count; + struct list_head *iter; struct slave *slave; + if (!rtnl_trylock()) + return restart_syscall(); + if (sscanf(buf, "%d", &new_value) != 1) { pr_err("%s: no all_slaves_active value specified.\n", bond->dev->name); @@ -1631,8 +1536,7 @@ static ssize_t bonding_store_slaves_active(struct device *d, goto out; } - read_lock(&bond->lock); - bond_for_each_slave(bond, slave) { + bond_for_each_slave(bond, slave, iter) { if (!bond_is_active_slave(slave)) { if (new_value) slave->inactive = 0; @@ -1640,8 +1544,8 @@ static ssize_t bonding_store_slaves_active(struct device *d, slave->inactive = 1; } } - read_unlock(&bond->lock); out: + rtnl_unlock(); return ret; } static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR, @@ -1728,6 +1632,53 @@ out: static DEVICE_ATTR(lp_interval, S_IRUGO | S_IWUSR, bonding_show_lp_interval, bonding_store_lp_interval); +static ssize_t bonding_show_packets_per_slave(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + int packets_per_slave = bond->params.packets_per_slave; + + if (packets_per_slave > 1) + packets_per_slave = reciprocal_value(packets_per_slave); + + return sprintf(buf, "%d\n", packets_per_slave); +} + +static ssize_t bonding_store_packets_per_slave(struct device *d, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct bonding *bond = to_bond(d); + int new_value, ret = count; + + if (sscanf(buf, "%d", &new_value) != 1) { + pr_err("%s: no packets_per_slave value specified.\n", + bond->dev->name); + ret = -EINVAL; + goto out; + } + if (new_value < 0 || new_value > USHRT_MAX) { + pr_err("%s: packets_per_slave must be between 0 and %u\n", + bond->dev->name, USHRT_MAX); + ret = -EINVAL; + goto out; + } + if (bond->params.mode != BOND_MODE_ROUNDROBIN) + pr_warn("%s: Warning: packets_per_slave has effect only in balance-rr mode\n", + bond->dev->name); + if (new_value > 1) + bond->params.packets_per_slave = reciprocal_value(new_value); + else + bond->params.packets_per_slave = new_value; +out: + return ret; +} + +static DEVICE_ATTR(packets_per_slave, S_IRUGO | S_IWUSR, + bonding_show_packets_per_slave, + bonding_store_packets_per_slave); + static struct attribute *per_bond_attrs[] = { &dev_attr_slaves.attr, &dev_attr_mode.attr, @@ -1759,6 +1710,7 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_resend_igmp.attr, &dev_attr_min_links.attr, &dev_attr_lp_interval.attr, + &dev_attr_packets_per_slave.attr, NULL, }; diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 03cf3fd14490..77a07a12e77f 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -58,6 +58,11 @@ #define TX_QUEUE_OVERRIDE(mode) \ (((mode) == BOND_MODE_ACTIVEBACKUP) || \ ((mode) == BOND_MODE_ROUNDROBIN)) + +#define BOND_MODE_IS_LB(mode) \ + (((mode) == BOND_MODE_TLB) || \ + ((mode) == BOND_MODE_ALB)) + /* * Less bad way to call ioctl from within the kernel; this needs to be * done some other way to get the call out of interrupt context. @@ -72,63 +77,37 @@ res; }) /* slave list primitives */ -#define bond_to_slave(ptr) list_entry(ptr, struct slave, list) +#define bond_slave_list(bond) (&(bond)->dev->adj_list.lower) + +#define bond_has_slaves(bond) !list_empty(bond_slave_list(bond)) /* IMPORTANT: bond_first/last_slave can return NULL in case of an empty list */ #define bond_first_slave(bond) \ - list_first_entry_or_null(&(bond)->slave_list, struct slave, list) + (bond_has_slaves(bond) ? \ + netdev_adjacent_get_private(bond_slave_list(bond)->next) : \ + NULL) #define bond_last_slave(bond) \ - (list_empty(&(bond)->slave_list) ? NULL : \ - bond_to_slave((bond)->slave_list.prev)) + (bond_has_slaves(bond) ? \ + netdev_adjacent_get_private(bond_slave_list(bond)->prev) : \ + NULL) -#define bond_is_first_slave(bond, pos) ((pos)->list.prev == &(bond)->slave_list) -#define bond_is_last_slave(bond, pos) ((pos)->list.next == &(bond)->slave_list) - -/* Since bond_first/last_slave can return NULL, these can return NULL too */ -#define bond_next_slave(bond, pos) \ - (bond_is_last_slave(bond, pos) ? bond_first_slave(bond) : \ - bond_to_slave((pos)->list.next)) - -#define bond_prev_slave(bond, pos) \ - (bond_is_first_slave(bond, pos) ? bond_last_slave(bond) : \ - bond_to_slave((pos)->list.prev)) - -/** - * bond_for_each_slave_from - iterate the slaves list from a starting point - * @bond: the bond holding this list. - * @pos: current slave. - * @cnt: counter for max number of moves - * @start: starting point. - * - * Caller must hold bond->lock - */ -#define bond_for_each_slave_from(bond, pos, cnt, start) \ - for (cnt = 0, pos = start; pos && cnt < (bond)->slave_cnt; \ - cnt++, pos = bond_next_slave(bond, pos)) +#define bond_is_first_slave(bond, pos) (pos == bond_first_slave(bond)) +#define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond)) /** * bond_for_each_slave - iterate over all slaves * @bond: the bond holding this list * @pos: current slave + * @iter: list_head * iterator * * Caller must hold bond->lock */ -#define bond_for_each_slave(bond, pos) \ - list_for_each_entry(pos, &(bond)->slave_list, list) +#define bond_for_each_slave(bond, pos, iter) \ + netdev_for_each_lower_private((bond)->dev, pos, iter) /* Caller must have rcu_read_lock */ -#define bond_for_each_slave_rcu(bond, pos) \ - list_for_each_entry_rcu(pos, &(bond)->slave_list, list) - -/** - * bond_for_each_slave_reverse - iterate in reverse from a given position - * @bond: the bond holding this list - * @pos: slave to continue from - * - * Caller must hold bond->lock - */ -#define bond_for_each_slave_continue_reverse(bond, pos) \ - list_for_each_entry_continue_reverse(pos, &(bond)->slave_list, list) +#define bond_for_each_slave_rcu(bond, pos, iter) \ + netdev_for_each_lower_private_rcu((bond)->dev, pos, iter) #ifdef CONFIG_NET_POLL_CONTROLLER extern atomic_t netpoll_block_tx; @@ -177,6 +156,7 @@ struct bond_params { int all_slaves_active; int resend_igmp; int lp_interval; + int packets_per_slave; }; struct bond_parm_tbl { @@ -188,7 +168,6 @@ struct bond_parm_tbl { struct slave { struct net_device *dev; /* first - useful for panic debug */ - struct list_head list; struct bonding *bond; /* our master */ int delay; unsigned long jiffies; @@ -228,7 +207,6 @@ struct slave { */ struct bonding { struct net_device *dev; /* first - useful for panic debug */ - struct list_head slave_list; struct slave *curr_active_slave; struct slave *current_arp_slave; struct slave *primary_slave; @@ -245,8 +223,7 @@ struct bonding { char proc_file_name[IFNAMSIZ]; #endif /* CONFIG_PROC_FS */ struct list_head bond_list; - int (*xmit_hash_policy)(struct sk_buff *, int); - u16 rr_tx_counter; + u32 rr_tx_counter; struct ad_bond_info ad_info; struct alb_bond_info alb_info; struct bond_params params; @@ -276,13 +253,7 @@ struct bonding { static inline struct slave *bond_get_slave_by_dev(struct bonding *bond, struct net_device *slave_dev) { - struct slave *slave = NULL; - - bond_for_each_slave(bond, slave) - if (slave->dev == slave_dev) - return slave; - - return NULL; + return netdev_lower_dev_get_private(bond->dev, slave_dev); } static inline struct bonding *bond_get_bond_by_slave(struct slave *slave) @@ -294,8 +265,7 @@ static inline struct bonding *bond_get_bond_by_slave(struct slave *slave) static inline bool bond_is_lb(const struct bonding *bond) { - return (bond->params.mode == BOND_MODE_TLB || - bond->params.mode == BOND_MODE_ALB); + return BOND_MODE_IS_LB(bond->params.mode); } static inline void bond_set_active_slave(struct slave *slave) @@ -432,21 +402,18 @@ static inline bool slave_can_tx(struct slave *slave) struct bond_net; int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); -struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr); int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev); void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id); int bond_create(struct net *net, const char *name); int bond_create_sysfs(struct bond_net *net); void bond_destroy_sysfs(struct bond_net *net); void bond_prepare_sysfs_group(struct bonding *bond); -int bond_create_slave_symlinks(struct net_device *master, struct net_device *slave); -void bond_destroy_slave_symlinks(struct net_device *master, struct net_device *slave); int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev); int bond_release(struct net_device *bond_dev, struct net_device *slave_dev); void bond_mii_monitor(struct work_struct *); void bond_loadbalance_arp_mon(struct work_struct *); void bond_activebackup_arp_mon(struct work_struct *); -void bond_set_mode_ops(struct bonding *bond, int mode); +int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count); int bond_parse_parm(const char *mode_arg, const struct bond_parm_tbl *tbl); void bond_select_active_slave(struct bonding *bond); void bond_change_active_slave(struct bonding *bond, struct slave *new_active); @@ -456,6 +423,14 @@ void bond_debug_register(struct bonding *bond); void bond_debug_unregister(struct bonding *bond); void bond_debug_reregister(struct bonding *bond); const char *bond_mode_name(int mode); +void bond_setup(struct net_device *bond_dev); +unsigned int bond_get_num_tx_queues(void); +int bond_netlink_init(void); +void bond_netlink_fini(void); +int bond_option_mode_set(struct bonding *bond, int mode); +int bond_option_active_slave_set(struct bonding *bond, struct net_device *slave_dev); +struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond); +struct net_device *bond_option_active_slave_get(struct bonding *bond); struct bond_net { struct net * net; /* Associated network namespace */ @@ -492,9 +467,24 @@ static inline void bond_destroy_proc_dir(struct bond_net *bn) static inline struct slave *bond_slave_has_mac(struct bonding *bond, const u8 *mac) { + struct list_head *iter; struct slave *tmp; - bond_for_each_slave(bond, tmp) + bond_for_each_slave(bond, tmp, iter) + if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) + return tmp; + + return NULL; +} + +/* Caller must hold rcu_read_lock() for read */ +static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond, + const u8 *mac) +{ + struct list_head *iter; + struct slave *tmp; + + bond_for_each_slave_rcu(bond, tmp, iter) if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) return tmp; @@ -528,4 +518,7 @@ extern const struct bond_parm_tbl fail_over_mac_tbl[]; extern const struct bond_parm_tbl pri_reselect_tbl[]; extern struct bond_parm_tbl ad_select_tbl[]; +/* exported from bond_netlink.c */ +extern struct rtnl_link_ops bond_link_ops; + #endif /* _LINUX_BONDING_H */ |