/* * Copyright (C) 2018 Netronome Systems, Inc. * * This software is dual licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this * source tree or the BSD 2-Clause License provided below. You have the * option to license this software under the complete terms of either license. * * The BSD 2-Clause License: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * 1. Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * 2. Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "main.h" /* LAG group config flags. */ #define NFP_FL_LAG_LAST BIT(1) #define NFP_FL_LAG_FIRST BIT(2) #define NFP_FL_LAG_DATA BIT(3) #define NFP_FL_LAG_XON BIT(4) #define NFP_FL_LAG_SYNC BIT(5) #define NFP_FL_LAG_SWITCH BIT(6) #define NFP_FL_LAG_RESET BIT(7) /* LAG port state flags. */ #define NFP_PORT_LAG_LINK_UP BIT(0) #define NFP_PORT_LAG_TX_ENABLED BIT(1) #define NFP_PORT_LAG_CHANGED BIT(2) enum nfp_fl_lag_batch { NFP_FL_LAG_BATCH_FIRST, NFP_FL_LAG_BATCH_MEMBER, NFP_FL_LAG_BATCH_FINISHED }; /** * struct nfp_flower_cmsg_lag_config - control message payload for LAG config * @ctrl_flags: Configuration flags * @reserved: Reserved for future use * @ttl: Time to live of packet - host always sets to 0xff * @pkt_number: Config message packet number - increment for each message * @batch_ver: Batch version of messages - increment for each batch of messages * @group_id: Group ID applicable * @group_inst: Group instance number - increment when group is reused * @members: Array of 32-bit words listing all active group members */ struct nfp_flower_cmsg_lag_config { u8 ctrl_flags; u8 reserved[2]; u8 ttl; __be32 pkt_number; __be32 batch_ver; __be32 group_id; __be32 group_inst; __be32 members[]; }; /** * struct nfp_fl_lag_group - list entry for each LAG group * @group_id: Assigned group ID for host/kernel sync * @group_inst: Group instance in case of ID reuse * @list: List entry * @master_ndev: Group master Netdev * @dirty: Marked if the group needs synced to HW * @offloaded: Marked if the group is currently offloaded to NIC * @to_remove: Marked if the group should be removed from NIC * @to_destroy: Marked if the group should be removed from driver * @slave_cnt: Number of slaves in group */ struct nfp_fl_lag_group { unsigned int group_id; u8 group_inst; struct list_head list; struct net_device *master_ndev; bool dirty; bool offloaded; bool to_remove; bool to_destroy; unsigned int slave_cnt; }; #define NFP_FL_LAG_PKT_NUMBER_MASK GENMASK(30, 0) #define NFP_FL_LAG_VERSION_MASK GENMASK(22, 0) #define NFP_FL_LAG_HOST_TTL 0xff /* Use this ID with zero members to ack a batch config */ #define NFP_FL_LAG_SYNC_ID 0 #define NFP_FL_LAG_GROUP_MIN 1 /* ID 0 reserved */ #define NFP_FL_LAG_GROUP_MAX 32 /* IDs 1 to 31 are valid */ /* wait for more config */ #define NFP_FL_LAG_DELAY (msecs_to_jiffies(2)) #define NFP_FL_LAG_RETRANS_LIMIT 100 /* max retrans cmsgs to store */ static unsigned int nfp_fl_get_next_pkt_number(struct nfp_fl_lag *lag) { lag->pkt_num++; lag->pkt_num &= NFP_FL_LAG_PKT_NUMBER_MASK; return lag->pkt_num; } static void nfp_fl_increment_version(struct nfp_fl_lag *lag) { /* LSB is not considered by firmware so add 2 for each increment. */ lag->batch_ver += 2; lag->batch_ver &= NFP_FL_LAG_VERSION_MASK; /* Zero is reserved by firmware. */ if (!lag->batch_ver) lag->batch_ver += 2; } static struct nfp_fl_lag_group * nfp_fl_lag_group_create(struct nfp_fl_lag *lag, struct net_device *master) { struct nfp_fl_lag_group *group; struct nfp_flower_priv *priv; int id; priv = container_of(lag, struct nfp_flower_priv, nfp_lag); id = ida_simple_get(&lag->ida_handle, NFP_FL_LAG_GROUP_MIN, NFP_FL_LAG_GROUP_MAX, GFP_KERNEL); if (id < 0) { nfp_flower_cmsg_warn(priv->app, "No more bonding groups available\n"); return ERR_PTR(id); } group = kmalloc(sizeof(*group), GFP_KERNEL); if (!group) { ida_simple_remove(&lag->ida_handle, id); return ERR_PTR(-ENOMEM); } group->group_id = id; group->master_ndev = master; group->dirty = true; group->offloaded = false; group->to_remove = false; group->to_destroy = false; group->slave_cnt = 0; group->group_inst = ++lag->global_inst; list_add_tail(&group->list, &lag->group_list); return group; } static struct nfp_fl_lag_group * nfp_fl_lag_find_group_for_master_with_lag(struct nfp_fl_lag *lag, struct net_device *master) { struct nfp_fl_lag_group *entry; if (!master) return NULL; list_for_each_entry(entry, &lag->group_list, list) if (entry->master_ndev == master) return entry; return NULL; } int nfp_flower_lag_populate_pre_action(struct nfp_app *app, struct net_device *master, struct nfp_fl_pre_lag *pre_act) { struct nfp_flower_priv *priv = app->priv; struct nfp_fl_lag_group *group = NULL; __be32 temp_vers; mutex_lock(&priv->nfp_lag.lock); group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag, master); if (!group) { mutex_unlock(&priv->nfp_lag.lock); return -ENOENT; } pre_act->group_id = cpu_to_be16(group->group_id); temp_vers = cpu_to_be32(priv->nfp_lag.batch_ver << NFP_FL_PRE_LAG_VER_OFF); memcpy(pre_act->lag_version, &temp_vers, 3); pre_act->instance = group->group_inst; mutex_unlock(&priv->nfp_lag.lock); return 0; } int nfp_flower_lag_get_output_id(struct nfp_app *app, struct net_device *master) { struct nfp_flower_priv *priv = app->priv; struct nfp_fl_lag_group *group = NULL; int group_id = -ENOENT; mutex_lock(&priv->nfp_lag.lock); group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag, master); if (group) group_id = group->group_id; mutex_unlock(&priv->nfp_lag.lock); return group_id; } static int nfp_fl_lag_config_group(struct nfp_fl_lag *lag, struct nfp_fl_lag_group *group, struct net_device **active_members, unsigned int member_cnt, enum nfp_fl_lag_batch *batch) { struct nfp_flower_cmsg_lag_config *cmsg_payload; struct nfp_flower_priv *priv; unsigned long int flags; unsigned int size, i; struct sk_buff *skb; priv = container_of(lag, struct nfp_flower_priv, nfp_lag); size = sizeof(*cmsg_payload) + sizeof(__be32) * member_cnt; skb = nfp_flower_cmsg_alloc(priv->app, size, NFP_FLOWER_CMSG_TYPE_LAG_CONFIG, GFP_KERNEL); if (!skb) return -ENOMEM; cmsg_payload = nfp_flower_cmsg_get_data(skb); flags = 0; /* Increment batch version for each new batch of config messages. */ if (*batch == NFP_FL_LAG_BATCH_FIRST) { flags |= NFP_FL_LAG_FIRST; nfp_fl_increment_version(lag); *batch = NFP_FL_LAG_BATCH_MEMBER; } /* If it is a reset msg then it is also the end of the batch. */ if (lag->rst_cfg) { flags |= NFP_FL_LAG_RESET; *batch = NFP_FL_LAG_BATCH_FINISHED; } /* To signal the end of a batch, both the switch and last flags are set * and the the reserved SYNC group ID is used. */ if (*batch == NFP_FL_LAG_BATCH_FINISHED) { flags |= NFP_FL_LAG_SWITCH | NFP_FL_LAG_LAST; lag->rst_cfg = false; cmsg_payload->group_id = cpu_to_be32(NFP_FL_LAG_SYNC_ID); cmsg_payload->group_inst = 0; } else { cmsg_payload->group_id = cpu_to_be32(group->group_id); cmsg_payload->group_inst = cpu_to_be32(group->group_inst); } cmsg_payload->reserved[0] = 0; cmsg_payload->reserved[1] = 0; cmsg_payload->ttl = NFP_FL_LAG_HOST_TTL; cmsg_payload->ctrl_flags = flags; cmsg_payload->batch_ver = cpu_to_be32(lag->batch_ver); cmsg_payload->pkt_number = cpu_to_be32(nfp_fl_get_next_pkt_number(lag)); for (i = 0; i < member_cnt; i++) cmsg_payload->members[i] = cpu_to_be32(nfp_repr_get_port_id(active_members[i])); nfp_ctrl_tx(priv->app->ctrl, skb); return 0; } static void nfp_fl_lag_do_work(struct work_struct *work) { enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST; struct nfp_fl_lag_group *entry, *storage; struct delayed_work *delayed_work; struct nfp_flower_priv *priv; struct nfp_fl_lag *lag; int err; delayed_work = to_delayed_work(work); lag = container_of(delayed_work, struct nfp_fl_lag, work); priv = container_of(lag, struct nfp_flower_priv, nfp_lag); mutex_lock(&lag->lock); list_for_each_entry_safe(entry, storage, &lag->group_list, list) { struct net_device *iter_netdev, **acti_netdevs; struct nfp_flower_repr_priv *repr_priv; int active_count = 0, slaves = 0; struct nfp_repr *repr; unsigned long *flags; if (entry->to_remove) { /* Active count of 0 deletes group on hw. */ err = nfp_fl_lag_config_group(lag, entry, NULL, 0, &batch); if (!err) { entry->to_remove = false; entry->offloaded = false; } else { nfp_flower_cmsg_warn(priv->app, "group delete failed\n"); schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); continue; } if (entry->to_destroy) { ida_simple_remove(&lag->ida_handle, entry->group_id); list_del(&entry->list); kfree(entry); } continue; } acti_netdevs = kmalloc_array(entry->slave_cnt, sizeof(*acti_netdevs), GFP_KERNEL); /* Include sanity check in the loop. It may be that a bond has * changed between processing the last notification and the * work queue triggering. If the number of slaves has changed * or it now contains netdevs that cannot be offloaded, ignore * the group until pending notifications are processed. */ rcu_read_lock(); for_each_netdev_in_bond_rcu(entry->master_ndev, iter_netdev) { if (!nfp_netdev_is_nfp_repr(iter_netdev)) { slaves = 0; break; } repr = netdev_priv(iter_netdev); if (repr->app != priv->app) { slaves = 0; break; } slaves++; if (slaves > entry->slave_cnt) break; /* Check the ports for state changes. */ repr_priv = repr->app_priv; flags = &repr_priv->lag_port_flags; if (*flags & NFP_PORT_LAG_CHANGED) { *flags &= ~NFP_PORT_LAG_CHANGED; entry->dirty = true; } if ((*flags & NFP_PORT_LAG_TX_ENABLED) && (*flags & NFP_PORT_LAG_LINK_UP)) acti_netdevs[active_count++] = iter_netdev; } rcu_read_unlock(); if (slaves != entry->slave_cnt || !entry->dirty) { kfree(acti_netdevs); continue; } err = nfp_fl_lag_config_group(lag, entry, acti_netdevs, active_count, &batch); if (!err) { entry->offloaded = true; entry->dirty = false; } else { nfp_flower_cmsg_warn(priv->app, "group offload failed\n"); schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); } kfree(acti_netdevs); } /* End the config batch if at least one packet has been batched. */ if (batch == NFP_FL_LAG_BATCH_MEMBER) { batch = NFP_FL_LAG_BATCH_FINISHED; err = nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch); if (err) nfp_flower_cmsg_warn(priv->app, "group batch end cmsg failed\n"); } mutex_unlock(&lag->lock); } static int nfp_fl_lag_put_unprocessed(struct nfp_fl_lag *lag, struct sk_buff *skb) { struct nfp_flower_cmsg_lag_config *cmsg_payload; cmsg_payload = nfp_flower_cmsg_get_data(skb); if (be32_to_cpu(cmsg_payload->group_id) >= NFP_FL_LAG_GROUP_MAX) return -EINVAL; /* Drop cmsg retrans if storage limit is exceeded to prevent * overloading. If the fw notices that expected messages have not been * received in a given time block, it will request a full resync. */ if (skb_queue_len(&lag->retrans_skbs) >= NFP_FL_LAG_RETRANS_LIMIT) return -ENOSPC; __skb_queue_tail(&lag->retrans_skbs, skb); return 0; } static void nfp_fl_send_unprocessed(struct nfp_fl_lag *lag) { struct nfp_flower_priv *priv; struct sk_buff *skb; priv = container_of(lag, struct nfp_flower_priv, nfp_lag); while ((skb = __skb_dequeue(&lag->retrans_skbs))) nfp_ctrl_tx(priv->app->ctrl, skb); } bool nfp_flower_lag_unprocessed_msg(struct nfp_app *app, struct sk_buff *skb) { struct nfp_flower_cmsg_lag_config *cmsg_payload; struct nfp_flower_priv *priv = app->priv; struct nfp_fl_lag_group *group_entry; unsigned long int flags; bool store_skb = false; int err; cmsg_payload = nfp_flower_cmsg_get_data(skb); flags = cmsg_payload->ctrl_flags; /* Note the intentional fall through below. If DATA and XON are both * set, the message will stored and sent again with the rest of the * unprocessed messages list. */ /* Store */ if (flags & NFP_FL_LAG_DATA) if (!nfp_fl_lag_put_unprocessed(&priv->nfp_lag, skb)) store_skb = true; /* Send stored */ if (flags & NFP_FL_LAG_XON) nfp_fl_send_unprocessed(&priv->nfp_lag); /* Resend all */ if (flags & NFP_FL_LAG_SYNC) { /* To resend all config: * 1) Clear all unprocessed messages * 2) Mark all groups dirty * 3) Reset NFP group config * 4) Schedule a LAG config update */ __skb_queue_purge(&priv->nfp_lag.retrans_skbs); mutex_lock(&priv->nfp_lag.lock); list_for_each_entry(group_entry, &priv->nfp_lag.group_list, list) group_entry->dirty = true; err = nfp_flower_lag_reset(&priv->nfp_lag); if (err) nfp_flower_cmsg_warn(priv->app, "mem err in group reset msg\n"); mutex_unlock(&priv->nfp_lag.lock); schedule_delayed_work(&priv->nfp_lag.work, 0); } return store_skb; } static void nfp_fl_lag_schedule_group_remove(struct nfp_fl_lag *lag, struct nfp_fl_lag_group *group) { group->to_remove = true; schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); } static int nfp_fl_lag_schedule_group_delete(struct nfp_fl_lag *lag, struct net_device *master) { struct nfp_fl_lag_group *group; mutex_lock(&lag->lock); group = nfp_fl_lag_find_group_for_master_with_lag(lag, master); if (!group) { mutex_unlock(&lag->lock); return -ENOENT; } group->to_remove = true; group->to_destroy = true; mutex_unlock(&lag->lock); schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); return 0; } static int nfp_fl_lag_changeupper_event(struct nfp_fl_lag *lag, struct netdev_notifier_changeupper_info *info) { struct net_device *upper = info->upper_dev, *iter_netdev; struct netdev_lag_upper_info *lag_upper_info; struct nfp_fl_lag_group *group; struct nfp_flower_priv *priv; unsigned int slave_count = 0; bool can_offload = true; struct nfp_repr *repr; if (!netif_is_lag_master(upper)) return 0; priv = container_of(lag, struct nfp_flower_priv, nfp_lag); rcu_read_lock(); for_each_netdev_in_bond_rcu(upper, iter_netdev) { if (!nfp_netdev_is_nfp_repr(iter_netdev)) { can_offload = false; break; } repr = netdev_priv(iter_netdev); /* Ensure all ports are created by the same app/on same card. */ if (repr->app != priv->app) { can_offload = false; break; } slave_count++; } rcu_read_unlock(); lag_upper_info = info->upper_info; /* Firmware supports active/backup and L3/L4 hash bonds. */ if (lag_upper_info && lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH || (lag_upper_info->hash_type != NETDEV_LAG_HASH_L34 && lag_upper_info->hash_type != NETDEV_LAG_HASH_E34))) { can_offload = false; nfp_flower_cmsg_warn(priv->app, "Unable to offload tx_type %u hash %u\n", lag_upper_info->tx_type, lag_upper_info->hash_type); } mutex_lock(&lag->lock); group = nfp_fl_lag_find_group_for_master_with_lag(lag, upper); if (slave_count == 0 || !can_offload) { /* Cannot offload the group - remove if previously offloaded. */ if (group && group->offloaded) nfp_fl_lag_schedule_group_remove(lag, group); mutex_unlock(&lag->lock); return 0; } if (!group) { group = nfp_fl_lag_group_create(lag, upper); if (IS_ERR(group)) { mutex_unlock(&lag->lock); return PTR_ERR(group); } } group->dirty = true; group->slave_cnt = slave_count; /* Group may have been on queue for removal but is now offfloable. */ group->to_remove = false; mutex_unlock(&lag->lock); schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); return 0; } static int nfp_fl_lag_changels_event(struct nfp_fl_lag *lag, struct net_device *netdev, struct netdev_notifier_changelowerstate_info *info) { struct netdev_lag_lower_state_info *lag_lower_info; struct nfp_flower_repr_priv *repr_priv; struct nfp_flower_priv *priv; struct nfp_repr *repr; unsigned long *flags; if (!netif_is_lag_port(netdev) || !nfp_netdev_is_nfp_repr(netdev)) return 0; lag_lower_info = info->lower_state_info; if (!lag_lower_info) return 0; priv = container_of(lag, struct nfp_flower_priv, nfp_lag); repr = netdev_priv(netdev); /* Verify that the repr is associated with this app. */ if (repr->app != priv->app) return 0; repr_priv = repr->app_priv; flags = &repr_priv->lag_port_flags; mutex_lock(&lag->lock); if (lag_lower_info->link_up) *flags |= NFP_PORT_LAG_LINK_UP; else *flags &= ~NFP_PORT_LAG_LINK_UP; if (lag_lower_info->tx_enabled) *flags |= NFP_PORT_LAG_TX_ENABLED; else *flags &= ~NFP_PORT_LAG_TX_ENABLED; *flags |= NFP_PORT_LAG_CHANGED; mutex_unlock(&lag->lock); schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY); return 0; } static int nfp_fl_lag_netdev_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *netdev; struct nfp_fl_lag *lag; int err; netdev = netdev_notifier_info_to_dev(ptr); lag = container_of(nb, struct nfp_fl_lag, lag_nb); switch (event) { case NETDEV_CHANGEUPPER: err = nfp_fl_lag_changeupper_event(lag, ptr); if (err) return NOTIFY_BAD; return NOTIFY_OK; case NETDEV_CHANGELOWERSTATE: err = nfp_fl_lag_changels_event(lag, netdev, ptr); if (err) return NOTIFY_BAD; return NOTIFY_OK; case NETDEV_UNREGISTER: if (netif_is_bond_master(netdev)) { err = nfp_fl_lag_schedule_group_delete(lag, netdev); if (err) return NOTIFY_BAD; return NOTIFY_OK; } } return NOTIFY_DONE; } int nfp_flower_lag_reset(struct nfp_fl_lag *lag) { enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST; lag->rst_cfg = true; return nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch); } void nfp_flower_lag_init(struct nfp_fl_lag *lag) { INIT_DELAYED_WORK(&lag->work, nfp_fl_lag_do_work); INIT_LIST_HEAD(&lag->group_list); mutex_init(&lag->lock); ida_init(&lag->ida_handle); __skb_queue_head_init(&lag->retrans_skbs); /* 0 is a reserved batch version so increment to first valid value. */ nfp_fl_increment_version(lag); lag->lag_nb.notifier_call = nfp_fl_lag_netdev_event; } void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag) { struct nfp_fl_lag_group *entry, *storage; cancel_delayed_work_sync(&lag->work); __skb_queue_purge(&lag->retrans_skbs); /* Remove all groups. */ mutex_lock(&lag->lock); list_for_each_entry_safe(entry, storage, &lag->group_list, list) { list_del(&entry->list); kfree(entry); } mutex_unlock(&lag->lock); mutex_destroy(&lag->lock); ida_destroy(&lag->ida_handle); }