From dd5f03beb4f76ae65d76d8c22a8815e424fc607c Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 12 Dec 2013 18:03:11 +0200 Subject: IB/core: Ethernet L2 attributes in verbs/cm structures This patch add the support for Ethernet L2 attributes in the verbs/cm/cma structures. When dealing with L2 Ethernet, we should use smac, dmac, vlan ID and priority in a similar manner that the IB L2 (and the L4 PKEY) attributes are used. Thus, those attributes were added to the following structures: * ib_ah_attr - added dmac * ib_qp_attr - added smac and vlan_id, (sl remains vlan priority) * ib_wc - added smac, vlan_id * ib_sa_path_rec - added smac, dmac, vlan_id * cm_av - added smac and vlan_id For the path record structure, extra care was taken to avoid the new fields when packing it into wire format, so we don't break the IB CM and SA wire protocol. On the active side, the CM fills. its internal structures from the path provided by the ULP. We add there taking the ETH L2 attributes and placing them into the CM Address Handle (struct cm_av). On the passive side, the CM fills its internal structures from the WC associated with the REQ message. We add there taking the ETH L2 attributes from the WC. When the HW driver provides the required ETH L2 attributes in the WC, they set the IB_WC_WITH_SMAC and IB_WC_WITH_VLAN flags. The IB core code checks for the presence of these flags, and in their absence does address resolution from the ib_init_ah_from_wc() helper function. ib_modify_qp_is_ok is also updated to consider the link layer. Some parameters are mandatory for Ethernet link layer, while they are irrelevant for IB. Vendor drivers are modified to support the new function signature. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw/mlx4') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 4f10af2905b5..da6f5fa0c328 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1561,13 +1561,18 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx4_ib_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; int err = -EINVAL; - + int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { + if (cur_state == new_state && cur_state == IB_QPS_RESET) + p = IB_LINK_LAYER_UNSPECIFIED; + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, + attr_mask, + rdma_port_get_link_layer(&dev->ib_dev, p))) { pr_debug("qpn 0x%x: invalid attribute mask specified " "for transition %d to %d. qp_type %d," " attr_mask 0x%x\n", -- cgit v1.2.3 From d487ee77740ccf79d7dc1935d4daa77887283028 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 12 Dec 2013 18:03:13 +0200 Subject: IB/mlx4: Use IBoE (RoCE) IP based GIDs in the port GID table Currently, the mlx4 driver set IBoE (RoCE) gids to encode related Ethernet netdevice interface MAC address and possibly VLAN id. Change this scheme such that gids encode interface IP addresses (both IP4 and IPv6). This requires learning the IP addresses which are of use by a netdevice associated with the HCA port, formatting them to gids and adding them to the port gid table. Furthermore, events of add and delete address are caught to maintain the gid table accordingly. Associated IP addresses may belong to a master of an Ethernet netdevice on top of that port so this should be considered when building and maintaining the gid table. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 474 ++++++++++++++++++++++++----------- drivers/infiniband/hw/mlx4/mlx4_ib.h | 3 + 2 files changed, 334 insertions(+), 143 deletions(-) (limited to 'drivers/infiniband/hw/mlx4') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1958c5ca792a..a776aabab44a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -39,6 +39,8 @@ #include #include #include +#include +#include #include #include @@ -787,7 +789,6 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, union ib_gid *gid) { - u8 mac[6]; struct net_device *ndev; int ret = 0; @@ -801,11 +802,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, spin_unlock(&mdev->iboe.lock); if (ndev) { - rdma_get_mcast_mac((struct in6_addr *)gid, mac); - rtnl_lock(); - dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac); ret = 1; - rtnl_unlock(); dev_put(ndev); } @@ -1025,6 +1022,8 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) struct mlx4_ib_qp *mqp = to_mqp(ibqp); u64 reg_id; struct mlx4_ib_steering *ib_steering = NULL; + enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ? + MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6; if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { @@ -1036,7 +1035,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port, !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), - MLX4_PROT_IB_IPV6, ®_id); + prot, ®_id); if (err) goto err_malloc; @@ -1055,7 +1054,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) err_add: mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, - MLX4_PROT_IB_IPV6, reg_id); + prot, reg_id); err_malloc: kfree(ib_steering); @@ -1083,10 +1082,11 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) int err; struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); struct mlx4_ib_qp *mqp = to_mqp(ibqp); - u8 mac[6]; struct net_device *ndev; struct mlx4_ib_gid_entry *ge; u64 reg_id = 0; + enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ? + MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6; if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { @@ -1109,7 +1109,7 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, - MLX4_PROT_IB_IPV6, reg_id); + prot, reg_id); if (err) return err; @@ -1121,13 +1121,8 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (ndev) dev_hold(ndev); spin_unlock(&mdev->iboe.lock); - rdma_get_mcast_mac((struct in6_addr *)gid, mac); - if (ndev) { - rtnl_lock(); - dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac); - rtnl_unlock(); + if (ndev) dev_put(ndev); - } list_del(&ge->list); kfree(ge); } else @@ -1223,20 +1218,6 @@ static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_board_id }; -static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev) -{ - memcpy(eui, dev->dev_addr, 3); - memcpy(eui + 5, dev->dev_addr + 3, 3); - if (vlan_id < 0x1000) { - eui[3] = vlan_id >> 8; - eui[4] = vlan_id & 0xff; - } else { - eui[3] = 0xff; - eui[4] = 0xfe; - } - eui[0] ^= 2; -} - static void update_gids_task(struct work_struct *work) { struct update_gid_work *gw = container_of(work, struct update_gid_work, work); @@ -1259,161 +1240,318 @@ static void update_gids_task(struct work_struct *work) MLX4_CMD_WRAPPED); if (err) pr_warn("set port command failed\n"); - else { - memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids); + else mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE); - } mlx4_free_cmd_mailbox(dev, mailbox); kfree(gw); } -static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear) +static void reset_gids_task(struct work_struct *work) { - struct net_device *ndev = dev->iboe.netdevs[port - 1]; - struct update_gid_work *work; - struct net_device *tmp; + struct update_gid_work *gw = + container_of(work, struct update_gid_work, work); + struct mlx4_cmd_mailbox *mailbox; + union ib_gid *gids; + int err; int i; - u8 *hits; - int ret; - union ib_gid gid; - int free; - int found; - int need_update = 0; - u16 vid; + struct mlx4_dev *dev = gw->dev->dev; - work = kzalloc(sizeof *work, GFP_ATOMIC); - if (!work) - return -ENOMEM; + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + pr_warn("reset gid table failed\n"); + goto free; + } - hits = kzalloc(128, GFP_ATOMIC); - if (!hits) { - ret = -ENOMEM; - goto out; + gids = mailbox->buf; + memcpy(gids, gw->gids, sizeof(gw->gids)); + + for (i = 1; i < gw->dev->num_ports + 1; i++) { + if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, i) == + IB_LINK_LAYER_ETHERNET) { + err = mlx4_cmd(dev, mailbox->dma, + MLX4_SET_PORT_GID_TABLE << 8 | i, + 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + pr_warn(KERN_WARNING + "set port %d command failed\n", i); + } } - rcu_read_lock(); - for_each_netdev_rcu(&init_net, tmp) { - if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) { - gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); - vid = rdma_vlan_dev_vlan_id(tmp); - mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev); - found = 0; - free = -1; - for (i = 0; i < 128; ++i) { - if (free < 0 && - !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) - free = i; - if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) { - hits[i] = 1; - found = 1; - break; - } - } + mlx4_free_cmd_mailbox(dev, mailbox); +free: + kfree(gw); +} - if (!found) { - if (tmp == ndev && - (memcmp(&dev->iboe.gid_table[port - 1][0], - &gid, sizeof gid) || - !memcmp(&dev->iboe.gid_table[port - 1][0], - &zgid, sizeof gid))) { - dev->iboe.gid_table[port - 1][0] = gid; - ++need_update; - hits[0] = 1; - } else if (free >= 0) { - dev->iboe.gid_table[port - 1][free] = gid; - hits[free] = 1; - ++need_update; - } +static int update_gid_table(struct mlx4_ib_dev *dev, int port, + union ib_gid *gid, int clear) +{ + struct update_gid_work *work; + int i; + int need_update = 0; + int free = -1; + int found = -1; + int max_gids; + + max_gids = dev->dev->caps.gid_table_len[port]; + for (i = 0; i < max_gids; ++i) { + if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid, + sizeof(*gid))) + found = i; + + if (clear) { + if (found >= 0) { + need_update = 1; + dev->iboe.gid_table[port - 1][found] = zgid; + break; } + } else { + if (found >= 0) + break; + + if (free < 0 && + !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, + sizeof(*gid))) + free = i; } } - rcu_read_unlock(); - for (i = 0; i < 128; ++i) - if (!hits[i]) { - if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid)) - ++need_update; - dev->iboe.gid_table[port - 1][i] = zgid; - } + if (found == -1 && !clear && free >= 0) { + dev->iboe.gid_table[port - 1][free] = *gid; + need_update = 1; + } - if (need_update) { - memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids); - INIT_WORK(&work->work, update_gids_task); - work->port = port; - work->dev = dev; - queue_work(wq, &work->work); - } else - kfree(work); + if (!need_update) + return 0; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return -ENOMEM; + + memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids)); + INIT_WORK(&work->work, update_gids_task); + work->port = port; + work->dev = dev; + queue_work(wq, &work->work); - kfree(hits); return 0; +} -out: - kfree(work); - return ret; +static int reset_gid_table(struct mlx4_ib_dev *dev) +{ + struct update_gid_work *work; + + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return -ENOMEM; + memset(dev->iboe.gid_table, 0, sizeof(dev->iboe.gid_table)); + memset(work->gids, 0, sizeof(work->gids)); + INIT_WORK(&work->work, reset_gids_task); + work->dev = dev; + queue_work(wq, &work->work); + return 0; } -static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event) +static int mlx4_ib_addr_event(int event, struct net_device *event_netdev, + struct mlx4_ib_dev *ibdev, union ib_gid *gid) { - switch (event) { - case NETDEV_UP: - case NETDEV_CHANGEADDR: - update_ipv6_gids(dev, port, 0); - break; + struct mlx4_ib_iboe *iboe; + int port = 0; + struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ? + rdma_vlan_dev_real_dev(event_netdev) : + event_netdev; + + if (event != NETDEV_DOWN && event != NETDEV_UP) + return 0; + + if ((real_dev != event_netdev) && + (event == NETDEV_DOWN) && + rdma_link_local_addr((struct in6_addr *)gid)) + return 0; + + iboe = &ibdev->iboe; + spin_lock(&iboe->lock); + + for (port = 1; port <= MLX4_MAX_PORTS; ++port) + if ((netif_is_bond_master(real_dev) && + (real_dev == iboe->masters[port - 1])) || + (!netif_is_bond_master(real_dev) && + (real_dev == iboe->netdevs[port - 1]))) + update_gid_table(ibdev, port, gid, + event == NETDEV_DOWN); + + spin_unlock(&iboe->lock); + return 0; - case NETDEV_DOWN: - update_ipv6_gids(dev, port, 1); - dev->iboe.netdevs[port - 1] = NULL; - } } -static void netdev_added(struct mlx4_ib_dev *dev, int port) +static u8 mlx4_ib_get_dev_port(struct net_device *dev, + struct mlx4_ib_dev *ibdev) { - update_ipv6_gids(dev, port, 0); + u8 port = 0; + struct mlx4_ib_iboe *iboe; + struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ? + rdma_vlan_dev_real_dev(dev) : dev; + + iboe = &ibdev->iboe; + spin_lock(&iboe->lock); + + for (port = 1; port <= MLX4_MAX_PORTS; ++port) + if ((netif_is_bond_master(real_dev) && + (real_dev == iboe->masters[port - 1])) || + (!netif_is_bond_master(real_dev) && + (real_dev == iboe->netdevs[port - 1]))) + break; + + spin_unlock(&iboe->lock); + + if ((port == 0) || (port > MLX4_MAX_PORTS)) + return 0; + else + return port; } -static void netdev_removed(struct mlx4_ib_dev *dev, int port) +static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event, + void *ptr) { - update_ipv6_gids(dev, port, 1); + struct mlx4_ib_dev *ibdev; + struct in_ifaddr *ifa = ptr; + union ib_gid gid; + struct net_device *event_netdev = ifa->ifa_dev->dev; + + ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid); + + ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet); + + mlx4_ib_addr_event(event, event_netdev, ibdev, &gid); + return NOTIFY_DONE; } -static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event, +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mlx4_ib_dev *ibdev; - struct net_device *oldnd; + struct inet6_ifaddr *ifa = ptr; + union ib_gid *gid = (union ib_gid *)&ifa->addr; + struct net_device *event_netdev = ifa->idev->dev; + + ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet6); + + mlx4_ib_addr_event(event, event_netdev, ibdev, gid); + return NOTIFY_DONE; +} +#endif + +static void mlx4_ib_get_dev_addr(struct net_device *dev, + struct mlx4_ib_dev *ibdev, u8 port) +{ + struct in_device *in_dev; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct inet6_dev *in6_dev; + union ib_gid *pgid; + struct inet6_ifaddr *ifp; +#endif + union ib_gid gid; + + + if ((port == 0) || (port > MLX4_MAX_PORTS)) + return; + + /* IPv4 gids */ + in_dev = in_dev_get(dev); + if (in_dev) { + for_ifa(in_dev) { + /*ifa->ifa_address;*/ + ipv6_addr_set_v4mapped(ifa->ifa_address, + (struct in6_addr *)&gid); + update_gid_table(ibdev, port, &gid, 0); + } + endfor_ifa(in_dev); + in_dev_put(in_dev); + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + /* IPv6 gids */ + in6_dev = in6_dev_get(dev); + if (in6_dev) { + read_lock_bh(&in6_dev->lock); + list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { + pgid = (union ib_gid *)&ifp->addr; + update_gid_table(ibdev, port, pgid, 0); + } + read_unlock_bh(&in6_dev->lock); + in6_dev_put(in6_dev); + } +#endif +} + +static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) +{ + struct net_device *dev; + + if (reset_gid_table(ibdev)) + return -1; + + read_lock(&dev_base_lock); + + for_each_netdev(&init_net, dev) { + u8 port = mlx4_ib_get_dev_port(dev, ibdev); + if (port) + mlx4_ib_get_dev_addr(dev, ibdev, port); + } + + read_unlock(&dev_base_lock); + + return 0; +} + +static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) +{ struct mlx4_ib_iboe *iboe; int port; - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); iboe = &ibdev->iboe; spin_lock(&iboe->lock); mlx4_foreach_ib_transport_port(port, ibdev->dev) { - oldnd = iboe->netdevs[port - 1]; + struct net_device *old_master = iboe->masters[port - 1]; + struct net_device *curr_master; iboe->netdevs[port - 1] = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); - if (oldnd != iboe->netdevs[port - 1]) { - if (iboe->netdevs[port - 1]) - netdev_added(ibdev, port); - else - netdev_removed(ibdev, port); + + if (iboe->netdevs[port - 1] && + netif_is_bond_slave(iboe->netdevs[port - 1])) { + rtnl_lock(); + iboe->masters[port - 1] = netdev_master_upper_dev_get( + iboe->netdevs[port - 1]); + rtnl_unlock(); } - } + curr_master = iboe->masters[port - 1]; - if (dev == iboe->netdevs[0] || - (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0])) - handle_en_event(ibdev, 1, event); - else if (dev == iboe->netdevs[1] - || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1])) - handle_en_event(ibdev, 2, event); + /* if bonding is used it is possible that we add it to masters + only after IP address is assigned to the net bonding + interface */ + if (curr_master && (old_master != curr_master)) + mlx4_ib_get_dev_addr(curr_master, ibdev, port); + } spin_unlock(&iboe->lock); +} + +static int mlx4_ib_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct mlx4_ib_dev *ibdev; + + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + + ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); + mlx4_ib_scan_netdevs(ibdev); return NOTIFY_DONE; } @@ -1719,11 +1857,35 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (mlx4_ib_init_sriov(ibdev)) goto err_mad; - if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { - iboe->nb.notifier_call = mlx4_ib_netdev_event; - err = register_netdevice_notifier(&iboe->nb); - if (err) - goto err_sriov; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) { + if (!iboe->nb.notifier_call) { + iboe->nb.notifier_call = mlx4_ib_netdev_event; + err = register_netdevice_notifier(&iboe->nb); + if (err) { + iboe->nb.notifier_call = NULL; + goto err_notif; + } + } + if (!iboe->nb_inet.notifier_call) { + iboe->nb_inet.notifier_call = mlx4_ib_inet_event; + err = register_inetaddr_notifier(&iboe->nb_inet); + if (err) { + iboe->nb_inet.notifier_call = NULL; + goto err_notif; + } + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + if (!iboe->nb_inet6.notifier_call) { + iboe->nb_inet6.notifier_call = mlx4_ib_inet6_event; + err = register_inet6addr_notifier(&iboe->nb_inet6); + if (err) { + iboe->nb_inet6.notifier_call = NULL; + goto err_notif; + } + } +#endif + mlx4_ib_scan_netdevs(ibdev); + mlx4_ib_init_gid_table(ibdev); } for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { @@ -1749,11 +1911,25 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) return ibdev; err_notif: - if (unregister_netdevice_notifier(&ibdev->iboe.nb)) - pr_warn("failure unregistering notifier\n"); + if (ibdev->iboe.nb.notifier_call) { + if (unregister_netdevice_notifier(&ibdev->iboe.nb)) + pr_warn("failure unregistering notifier\n"); + ibdev->iboe.nb.notifier_call = NULL; + } + if (ibdev->iboe.nb_inet.notifier_call) { + if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet)) + pr_warn("failure unregistering notifier\n"); + ibdev->iboe.nb_inet.notifier_call = NULL; + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + if (ibdev->iboe.nb_inet6.notifier_call) { + if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6)) + pr_warn("failure unregistering notifier\n"); + ibdev->iboe.nb_inet6.notifier_call = NULL; + } +#endif flush_workqueue(wq); -err_sriov: mlx4_ib_close_sriov(ibdev); err_mad: @@ -1795,6 +1971,18 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb.notifier_call = NULL; } + if (ibdev->iboe.nb_inet.notifier_call) { + if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet)) + pr_warn("failure unregistering notifier\n"); + ibdev->iboe.nb_inet.notifier_call = NULL; + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + if (ibdev->iboe.nb_inet6.notifier_call) { + if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6)) + pr_warn("failure unregistering notifier\n"); + ibdev->iboe.nb_inet6.notifier_call = NULL; + } +#endif iounmap(ibdev->uar_map); for (p = 0; p < ibdev->num_ports; ++p) if (ibdev->counters[p] != -1) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 036b663dd26e..133f41f42194 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -428,7 +428,10 @@ struct mlx4_ib_sriov { struct mlx4_ib_iboe { spinlock_t lock; struct net_device *netdevs[MLX4_MAX_PORTS]; + struct net_device *masters[MLX4_MAX_PORTS]; struct notifier_block nb; + struct notifier_block nb_inet; + struct notifier_block nb_inet6; union ib_gid gid_table[MLX4_MAX_PORTS][128]; }; -- cgit v1.2.3 From 297e0dad720664dad44baa2cdd13f871979fb58c Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 12 Dec 2013 18:03:14 +0200 Subject: IB/mlx4: Handle Ethernet L2 parameters for IP based GID addressing IP based RoCE gids don't store Ethernet L2 parameters, MAC and VLAN. Therefore, we need to extract them from the CQE and place them in struct ib_wc (to be used for cases were they were taken from the gid). Also, when modifying a QP or building address handle, instead of parsing the dgid to get the MAC and VLAN, take them from the address handle attributes. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/ah.c | 40 +++--------- drivers/infiniband/hw/mlx4/cq.c | 9 +++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 3 - drivers/infiniband/hw/mlx4/qp.c | 105 +++++++++++++++++++++++------- drivers/net/ethernet/mellanox/mlx4/port.c | 20 ++++++ include/linux/mlx4/cq.h | 15 +++-- 6 files changed, 130 insertions(+), 62 deletions(-) (limited to 'drivers/infiniband/hw/mlx4') diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index a251becdaa98..170dca608042 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -39,25 +39,6 @@ #include "mlx4_ib.h" -int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, - u8 *mac, int *is_mcast, u8 port) -{ - struct in6_addr in6; - - *is_mcast = 0; - - memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6); - if (rdma_link_local_addr(&in6)) - rdma_get_ll_mac(&in6, mac); - else if (rdma_is_multicast_addr(&in6)) { - rdma_get_mcast_mac(&in6, mac); - *is_mcast = 1; - } else - return -EINVAL; - - return 0; -} - static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, struct mlx4_ib_ah *ah) { @@ -92,21 +73,18 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr { struct mlx4_ib_dev *ibdev = to_mdev(pd->device); struct mlx4_dev *dev = ibdev->dev; - union ib_gid sgid; - u8 mac[6]; - int err; int is_mcast; + struct in6_addr in6; u16 vlan_tag; - err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num); - if (err) - return ERR_PTR(err); - - memcpy(ah->av.eth.mac, mac, 6); - err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid); - if (err) - return ERR_PTR(err); - vlan_tag = rdma_get_vlan_id(&sgid); + memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); + if (rdma_is_multicast_addr(&in6)) { + is_mcast = 1; + rdma_get_mcast_mac(&in6, ah->av.eth.mac); + } else { + memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN); + } + vlan_tag = ah_attr->vlan_id; if (vlan_tag < 0x1000) vlan_tag |= (ah_attr->sl & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 66dbf8062374..cc40f08ca8f1 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -798,6 +798,15 @@ repoll: wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; else wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; + if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) { + wc->vlan_id = be16_to_cpu(cqe->sl_vid) & + MLX4_CQE_VID_MASK; + } else { + wc->vlan_id = 0xffff; + } + wc->wc_flags |= IB_WC_WITH_VLAN; + memcpy(wc->smac, cqe->smac, ETH_ALEN); + wc->wc_flags |= IB_WC_WITH_SMAC; } return 0; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 133f41f42194..c06f571619df 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -678,9 +678,6 @@ int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid, int netw_view); -int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, - u8 *mac, int *is_mcast, u8 port); - static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) { u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index da6f5fa0c328..e0c2186529ff 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -90,6 +90,21 @@ enum { MLX4_RAW_QP_MSGMAX = 31, }; +#ifndef ETH_ALEN +#define ETH_ALEN 6 +#endif +static inline u64 mlx4_mac_to_u64(u8 *addr) +{ + u64 mac = 0; + int i; + + for (i = 0; i < ETH_ALEN; i++) { + mac <<= 8; + mac |= addr[i]; + } + return mac; +} + static const __be32 mlx4_ib_opcode[] = { [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), @@ -1144,16 +1159,15 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); } -static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, - struct mlx4_qp_path *path, u8 port) +static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, + u64 smac, u16 vlan_tag, struct mlx4_qp_path *path, + u8 port) { - int err; int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_ETHERNET; - u8 mac[6]; - int is_mcast; - u16 vlan_tag; int vidx; + int smac_index; + path->grh_mylmc = ah->src_path_bits & 0x7f; path->rlid = cpu_to_be16(ah->dlid); @@ -1188,22 +1202,27 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, if (!(ah->ah_flags & IB_AH_GRH)) return -1; - err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port); - if (err) - return err; - - memcpy(path->dmac, mac, 6); + memcpy(path->dmac, ah->dmac, ETH_ALEN); path->ackto = MLX4_IB_LINK_TYPE_ETH; - /* use index 0 into MAC table for IBoE */ - path->grh_mylmc &= 0x80; + /* find the index into MAC table for IBoE */ + if (!is_zero_ether_addr((const u8 *)&smac)) { + if (mlx4_find_cached_mac(dev->dev, port, smac, + &smac_index)) + return -ENOENT; + } else { + smac_index = 0; + } + + path->grh_mylmc &= 0x80 | smac_index; - vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]); + path->feup |= MLX4_FEUP_FORCE_ETH_UP; if (vlan_tag < 0x1000) { if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx)) return -ENOENT; path->vlan_index = vidx; path->fl = 1 << 6; + path->feup |= MLX4_FVL_FORCE_ETH_VLAN; } } else path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | @@ -1212,6 +1231,28 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, return 0; } +static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp, + enum ib_qp_attr_mask qp_attr_mask, + struct mlx4_qp_path *path, u8 port) +{ + return _mlx4_set_path(dev, &qp->ah_attr, + mlx4_mac_to_u64((u8 *)qp->smac), + (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff, + path, port); +} + +static int mlx4_set_alt_path(struct mlx4_ib_dev *dev, + const struct ib_qp_attr *qp, + enum ib_qp_attr_mask qp_attr_mask, + struct mlx4_qp_path *path, u8 port) +{ + return _mlx4_set_path(dev, &qp->alt_ah_attr, + mlx4_mac_to_u64((u8 *)qp->alt_smac), + (qp_attr_mask & IB_QP_ALT_VID) ? + qp->alt_vlan_id : 0xffff, + path, port); +} + static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { struct mlx4_ib_gid_entry *ge, *tmp; @@ -1329,7 +1370,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_AV) { - if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, + if (mlx4_set_path(dev, attr, attr_mask, &context->pri_path, attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) goto out; @@ -1352,8 +1393,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, dev->dev->caps.pkey_table_len[attr->alt_port_num]) goto out; - if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path, - attr->alt_port_num)) + if (mlx4_set_alt_path(dev, attr, attr_mask, &context->alt_path, + attr->alt_port_num)) goto out; context->alt_path.pkey_index = attr->alt_pkey_index; @@ -1464,6 +1505,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->pri_path.ackto = (context->pri_path.ackto & 0xf8) | MLX4_IB_LINK_TYPE_ETH; + if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) { + int is_eth = rdma_port_get_link_layer( + &dev->ib_dev, qp->port) == + IB_LINK_LAYER_ETHERNET; + if (is_eth) { + context->pri_path.ackto = MLX4_IB_LINK_TYPE_ETH; + optpar |= MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH; + } + } + + if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) sqd_event = 1; @@ -1561,18 +1613,21 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx4_ib_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; int err = -EINVAL; - int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + int ll; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - if (cur_state == new_state && cur_state == IB_QPS_RESET) - p = IB_LINK_LAYER_UNSPECIFIED; + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + ll = IB_LINK_LAYER_UNSPECIFIED; + } else { + int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + ll = rdma_port_get_link_layer(&dev->ib_dev, port); + } if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, - attr_mask, - rdma_port_get_link_layer(&dev->ib_dev, p))) { + attr_mask, ll)) { pr_debug("qpn 0x%x: invalid attribute mask specified " "for transition %d to %d. qp_type %d," " attr_mask 0x%x\n", @@ -1789,8 +1844,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, return err; } - vlan = rdma_get_vlan_id(&sgid); - is_vlan = vlan < 0x1000; + if (ah->av.eth.vlan != 0xffff) { + vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff; + is_vlan = 1; + } } ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header); diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 97d342fa5032..f50ef6a5ee5e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -123,6 +123,26 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port, return err; } +int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx) +{ + struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; + struct mlx4_mac_table *table = &info->mac_table; + int i; + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (!table->refs[i]) + continue; + + if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) { + *idx = i; + return 0; + } + } + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(mlx4_find_cached_mac); + int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) { struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h index 98fa492cf406..e1862997f933 100644 --- a/include/linux/mlx4/cq.h +++ b/include/linux/mlx4/cq.h @@ -34,6 +34,7 @@ #define MLX4_CQ_H #include +#include #include #include @@ -43,10 +44,15 @@ struct mlx4_cqe { __be32 immed_rss_invalid; __be32 g_mlpath_rqpn; __be16 sl_vid; - __be16 rlid; - __be16 status; - u8 ipv6_ext_mask; - u8 badfcs_enc; + union { + struct { + __be16 rlid; + __be16 status; + u8 ipv6_ext_mask; + u8 badfcs_enc; + }; + u8 smac[ETH_ALEN]; + }; __be32 byte_cnt; __be16 wqe_index; __be16 checksum; @@ -83,6 +89,7 @@ struct mlx4_ts_cqe { enum { MLX4_CQE_VLAN_PRESENT_MASK = 1 << 29, MLX4_CQE_QPN_MASK = 0xffffff, + MLX4_CQE_VID_MASK = 0xfff, }; enum { -- cgit v1.2.3 From f282651de676d10e395bc7923f0087fbbba12ed7 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 16 Jan 2014 17:16:48 +0200 Subject: IB/mlx4: Add dependency INET Since mlx4_ib supports IP based addressing, a dependency on INET needs to be added, since mlx4_ib registers itself for net device events. Signed-off-by: Matan Barak Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/mlx4') diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig index 24ab11a9ad1e..fc01deac1d3c 100644 --- a/drivers/infiniband/hw/mlx4/Kconfig +++ b/drivers/infiniband/hw/mlx4/Kconfig @@ -1,6 +1,6 @@ config MLX4_INFINIBAND tristate "Mellanox ConnectX HCA support" - depends on NETDEVICES && ETHERNET && PCI + depends on NETDEVICES && ETHERNET && PCI && INET select NET_VENDOR_MELLANOX select MLX4_CORE ---help--- -- cgit v1.2.3 From 27cdef637c25705b433d5c4deeef4cf8dcb75d6a Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sun, 19 Jan 2014 15:18:49 -0800 Subject: IB/mlx4: Use IS_ENABLED(CONFIG_IPV6) ...instead of testing defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw/mlx4') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index a776aabab44a..bf218d9ec089 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1430,7 +1430,7 @@ static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event, return NOTIFY_DONE; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -1450,7 +1450,7 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev, struct mlx4_ib_dev *ibdev, u8 port) { struct in_device *in_dev; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct inet6_dev *in6_dev; union ib_gid *pgid; struct inet6_ifaddr *ifp; @@ -1473,7 +1473,7 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev, endfor_ifa(in_dev); in_dev_put(in_dev); } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) /* IPv6 gids */ in6_dev = in6_dev_get(dev); if (in6_dev) { @@ -1874,7 +1874,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_notif; } } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (!iboe->nb_inet6.notifier_call) { iboe->nb_inet6.notifier_call = mlx4_ib_inet6_event; err = register_inet6addr_notifier(&iboe->nb_inet6); @@ -1921,7 +1921,7 @@ err_notif: pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb_inet.notifier_call = NULL; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (ibdev->iboe.nb_inet6.notifier_call) { if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6)) pr_warn("failure unregistering notifier\n"); @@ -1976,7 +1976,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb_inet.notifier_call = NULL; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (ibdev->iboe.nb_inet6.notifier_call) { if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6)) pr_warn("failure unregistering notifier\n"); -- cgit v1.2.3