diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4')
25 files changed, 847 insertions, 149 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index eb520ab64014..563495d8975a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -6,6 +6,7 @@ config MLX4_EN tristate "Mellanox Technologies 10Gbit Ethernet support" depends on PCI select MLX4_CORE + select PTP_1588_CLOCK ---help--- This driver supports Mellanox Technologies ConnectX Ethernet devices. diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 06fef5b44f77..c3ad464d0627 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -71,9 +71,9 @@ u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap) return obj; } -void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj) +void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr) { - mlx4_bitmap_free_range(bitmap, obj, 1); + mlx4_bitmap_free_range(bitmap, obj, 1, use_rr); } u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align) @@ -118,11 +118,17 @@ u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap) return bitmap->avail; } -void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt) +void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt, + int use_rr) { obj &= bitmap->max + bitmap->reserved_top - 1; spin_lock(&bitmap->lock); + if (!use_rr) { + bitmap->last = min(bitmap->last, obj); + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; + } bitmap_clear(bitmap->table, obj, cnt); bitmap->avail += cnt; spin_unlock(&bitmap->lock); diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 1e9970d2f0f3..0d02fba94536 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1371,6 +1371,15 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper }, + { + .opcode = MLX4_FLOW_STEERING_IB_UC_QP_RANGE, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_FLOW_STEERING_IB_UC_QP_RANGE_wrapper + }, }; static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 22fcbe78311c..0487121e4a0f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -34,7 +34,6 @@ * SOFTWARE. */ -#include <linux/init.h> #include <linux/hardirq.h> #include <linux/export.h> @@ -187,7 +186,7 @@ err_put: mlx4_table_put(dev, &cq_table->table, *cqn); err_out: - mlx4_bitmap_free(&cq_table->bitmap, *cqn); + mlx4_bitmap_free(&cq_table->bitmap, *cqn, MLX4_NO_RR); return err; } @@ -217,7 +216,7 @@ void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) mlx4_table_put(dev, &cq_table->cmpt_table, cqn); mlx4_table_put(dev, &cq_table->table, cqn); - mlx4_bitmap_free(&cq_table->bitmap, cqn); + mlx4_bitmap_free(&cq_table->bitmap, cqn, MLX4_NO_RR); } static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index fd6441071319..abaf6bb22416 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -42,6 +42,10 @@ int mlx4_en_timestamp_config(struct net_device *dev, int tx_type, int rx_filter) int port_up = 0; int err = 0; + if (priv->hwtstamp_config.tx_type == tx_type && + priv->hwtstamp_config.rx_filter == rx_filter) + return 0; + mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; @@ -103,19 +107,191 @@ void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev, struct skb_shared_hwtstamps *hwts, u64 timestamp) { + unsigned long flags; u64 nsec; + read_lock_irqsave(&mdev->clock_lock, flags); nsec = timecounter_cyc2time(&mdev->clock, timestamp); + read_unlock_irqrestore(&mdev->clock_lock, flags); memset(hwts, 0, sizeof(struct skb_shared_hwtstamps)); hwts->hwtstamp = ns_to_ktime(nsec); } +/** + * mlx4_en_remove_timestamp - disable PTP device + * @mdev: board private structure + * + * Stop the PTP support. + **/ +void mlx4_en_remove_timestamp(struct mlx4_en_dev *mdev) +{ + if (mdev->ptp_clock) { + ptp_clock_unregister(mdev->ptp_clock); + mdev->ptp_clock = NULL; + mlx4_info(mdev, "removed PHC\n"); + } +} + +void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev) +{ + bool timeout = time_is_before_jiffies(mdev->last_overflow_check + + mdev->overflow_period); + unsigned long flags; + + if (timeout) { + write_lock_irqsave(&mdev->clock_lock, flags); + timecounter_read(&mdev->clock); + write_unlock_irqrestore(&mdev->clock_lock, flags); + mdev->last_overflow_check = jiffies; + } +} + +/** + * mlx4_en_phc_adjfreq - adjust the frequency of the hardware clock + * @ptp: ptp clock structure + * @delta: Desired frequency change in parts per billion + * + * Adjust the frequency of the PHC cycle counter by the indicated delta from + * the base frequency. + **/ +static int mlx4_en_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta) +{ + u64 adj; + u32 diff, mult; + int neg_adj = 0; + unsigned long flags; + struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev, + ptp_clock_info); + + if (delta < 0) { + neg_adj = 1; + delta = -delta; + } + mult = mdev->nominal_c_mult; + adj = mult; + adj *= delta; + diff = div_u64(adj, 1000000000ULL); + + write_lock_irqsave(&mdev->clock_lock, flags); + timecounter_read(&mdev->clock); + mdev->cycles.mult = neg_adj ? mult - diff : mult + diff; + write_unlock_irqrestore(&mdev->clock_lock, flags); + + return 0; +} + +/** + * mlx4_en_phc_adjtime - Shift the time of the hardware clock + * @ptp: ptp clock structure + * @delta: Desired change in nanoseconds + * + * Adjust the timer by resetting the timecounter structure. + **/ +static int mlx4_en_phc_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev, + ptp_clock_info); + unsigned long flags; + s64 now; + + write_lock_irqsave(&mdev->clock_lock, flags); + now = timecounter_read(&mdev->clock); + now += delta; + timecounter_init(&mdev->clock, &mdev->cycles, now); + write_unlock_irqrestore(&mdev->clock_lock, flags); + + return 0; +} + +/** + * mlx4_en_phc_gettime - Reads the current time from the hardware clock + * @ptp: ptp clock structure + * @ts: timespec structure to hold the current time value + * + * Read the timecounter and return the correct value in ns after converting + * it into a struct timespec. + **/ +static int mlx4_en_phc_gettime(struct ptp_clock_info *ptp, struct timespec *ts) +{ + struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev, + ptp_clock_info); + unsigned long flags; + u32 remainder; + u64 ns; + + write_lock_irqsave(&mdev->clock_lock, flags); + ns = timecounter_read(&mdev->clock); + write_unlock_irqrestore(&mdev->clock_lock, flags); + + ts->tv_sec = div_u64_rem(ns, NSEC_PER_SEC, &remainder); + ts->tv_nsec = remainder; + + return 0; +} + +/** + * mlx4_en_phc_settime - Set the current time on the hardware clock + * @ptp: ptp clock structure + * @ts: timespec containing the new time for the cycle counter + * + * Reset the timecounter to use a new base value instead of the kernel + * wall timer value. + **/ +static int mlx4_en_phc_settime(struct ptp_clock_info *ptp, + const struct timespec *ts) +{ + struct mlx4_en_dev *mdev = container_of(ptp, struct mlx4_en_dev, + ptp_clock_info); + u64 ns = timespec_to_ns(ts); + unsigned long flags; + + /* reset the timecounter */ + write_lock_irqsave(&mdev->clock_lock, flags); + timecounter_init(&mdev->clock, &mdev->cycles, ns); + write_unlock_irqrestore(&mdev->clock_lock, flags); + + return 0; +} + +/** + * mlx4_en_phc_enable - enable or disable an ancillary feature + * @ptp: ptp clock structure + * @request: Desired resource to enable or disable + * @on: Caller passes one to enable or zero to disable + * + * Enable (or disable) ancillary features of the PHC subsystem. + * Currently, no ancillary features are supported. + **/ +static int mlx4_en_phc_enable(struct ptp_clock_info __always_unused *ptp, + struct ptp_clock_request __always_unused *request, + int __always_unused on) +{ + return -EOPNOTSUPP; +} + +static const struct ptp_clock_info mlx4_en_ptp_clock_info = { + .owner = THIS_MODULE, + .max_adj = 100000000, + .n_alarm = 0, + .n_ext_ts = 0, + .n_per_out = 0, + .pps = 0, + .adjfreq = mlx4_en_phc_adjfreq, + .adjtime = mlx4_en_phc_adjtime, + .gettime = mlx4_en_phc_gettime, + .settime = mlx4_en_phc_settime, + .enable = mlx4_en_phc_enable, +}; + void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) { struct mlx4_dev *dev = mdev->dev; + unsigned long flags; u64 ns; + rwlock_init(&mdev->clock_lock); + memset(&mdev->cycles, 0, sizeof(mdev->cycles)); mdev->cycles.read = mlx4_en_read_clock; mdev->cycles.mask = CLOCKSOURCE_MASK(48); @@ -127,9 +303,12 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) mdev->cycles.shift = 14; mdev->cycles.mult = clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift); + mdev->nominal_c_mult = mdev->cycles.mult; + write_lock_irqsave(&mdev->clock_lock, flags); timecounter_init(&mdev->clock, &mdev->cycles, ktime_to_ns(ktime_get_real())); + write_unlock_irqrestore(&mdev->clock_lock, flags); /* Calculate period in seconds to call the overflow watchdog - to make * sure counter is checked at least once every wrap around. @@ -137,15 +316,18 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) ns = cyclecounter_cyc2ns(&mdev->cycles, mdev->cycles.mask); do_div(ns, NSEC_PER_SEC / 2 / HZ); mdev->overflow_period = ns; -} -void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev) -{ - bool timeout = time_is_before_jiffies(mdev->last_overflow_check + - mdev->overflow_period); + /* Configure the PHC */ + mdev->ptp_clock_info = mlx4_en_ptp_clock_info; + snprintf(mdev->ptp_clock_info.name, 16, "mlx4 ptp"); - if (timeout) { - timecounter_read(&mdev->clock); - mdev->last_overflow_check = jiffies; + mdev->ptp_clock = ptp_clock_register(&mdev->ptp_clock_info, + &mdev->pdev->dev); + if (IS_ERR(mdev->ptp_clock)) { + mdev->ptp_clock = NULL; + mlx4_err(mdev, "ptp_clock_register failed\n"); + } else { + mlx4_info(mdev, "registered PHC clock\n"); } + } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 3a098cc4d349..70e95324a97d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -161,12 +161,16 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq; cq->mcq.event = mlx4_en_cq_event; - if (!cq->is_tx) { + if (cq->is_tx) { + netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, + NAPI_POLL_WEIGHT); + } else { netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); napi_hash_add(&cq->napi); - napi_enable(&cq->napi); } + napi_enable(&cq->napi); + return 0; } @@ -188,12 +192,12 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) { + napi_disable(&cq->napi); if (!cq->is_tx) { - napi_disable(&cq->napi); napi_hash_del(&cq->napi); synchronize_rcu(); - netif_napi_del(&cq->napi); } + netif_napi_del(&cq->napi); mlx4_cq_free(priv->mdev->dev, &cq->mcq); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 0596f9f85a0e..3e8d33605fe7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1193,6 +1193,9 @@ static int mlx4_en_get_ts_info(struct net_device *dev, info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) | (1 << HWTSTAMP_FILTER_ALL); + + if (mdev->ptp_clock) + info->phc_index = ptp_clock_index(mdev->ptp_clock); } return ret; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 0d087b03a7b0..d357bf5a4686 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -174,6 +174,9 @@ static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr, mlx4_err(mdev, "Internal error detected, restarting device\n"); break; + case MLX4_DEV_EVENT_SLAVE_INIT: + case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: + break; default: if (port < 1 || port > dev->caps.num_ports || !mdev->pndev[port]) @@ -196,6 +199,9 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) if (mdev->pndev[i]) mlx4_en_destroy_netdev(mdev->pndev[i]); + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) + mlx4_en_remove_timestamp(mdev); + flush_workqueue(mdev->workqueue); destroy_workqueue(mdev->workqueue); (void) mlx4_mr_free(dev, &mdev->mr); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index e72d8a112a6b..fad45316200a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -468,6 +468,53 @@ static void mlx4_en_u64_to_mac(unsigned char dst_mac[ETH_ALEN + 2], u64 src_mac) memset(&dst_mac[ETH_ALEN], 0, 2); } + +static int mlx4_en_tunnel_steer_add(struct mlx4_en_priv *priv, unsigned char *addr, + int qpn, u64 *reg_id) +{ + int err; + struct mlx4_spec_list spec_eth_outer = { {NULL} }; + struct mlx4_spec_list spec_vxlan = { {NULL} }; + struct mlx4_spec_list spec_eth_inner = { {NULL} }; + + struct mlx4_net_trans_rule rule = { + .queue_mode = MLX4_NET_TRANS_Q_FIFO, + .exclusive = 0, + .allow_loopback = 1, + .promisc_mode = MLX4_FS_REGULAR, + .priority = MLX4_DOMAIN_NIC, + }; + + __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); + + if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) + return 0; /* do nothing */ + + rule.port = priv->port; + rule.qpn = qpn; + INIT_LIST_HEAD(&rule.list); + + spec_eth_outer.id = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(spec_eth_outer.eth.dst_mac, addr, ETH_ALEN); + memcpy(spec_eth_outer.eth.dst_mac_msk, &mac_mask, ETH_ALEN); + + spec_vxlan.id = MLX4_NET_TRANS_RULE_ID_VXLAN; /* any vxlan header */ + spec_eth_inner.id = MLX4_NET_TRANS_RULE_ID_ETH; /* any inner eth header */ + + list_add_tail(&spec_eth_outer.list, &rule.list); + list_add_tail(&spec_vxlan.list, &rule.list); + list_add_tail(&spec_eth_inner.list, &rule.list); + + err = mlx4_flow_attach(priv->mdev->dev, &rule, reg_id); + if (err) { + en_err(priv, "failed to add vxlan steering rule, err %d\n", err); + return err; + } + en_dbg(DRV, priv, "added vxlan steering rule, mac %pM reg_id %llx\n", addr, *reg_id); + return 0; +} + + static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv, unsigned char *mac, int *qpn, u64 *reg_id) { @@ -585,6 +632,11 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv) if (err) goto steer_err; + err = mlx4_en_tunnel_steer_add(priv, priv->dev->dev_addr, *qpn, + &priv->tunnel_reg_id); + if (err) + goto tunnel_err; + entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) { err = -ENOMEM; @@ -599,6 +651,9 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv) return 0; alloc_err: + if (priv->tunnel_reg_id) + mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id); +tunnel_err: mlx4_en_uc_steer_release(priv, priv->dev->dev_addr, *qpn, reg_id); steer_err: @@ -642,6 +697,11 @@ static void mlx4_en_put_qp(struct mlx4_en_priv *priv) } } + if (priv->tunnel_reg_id) { + mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id); + priv->tunnel_reg_id = 0; + } + en_dbg(DRV, priv, "Releasing qp: port %d, qpn %d\n", priv->port, qpn); mlx4_qp_release_range(dev, qpn, 1); @@ -782,7 +842,7 @@ static void update_mclist_flags(struct mlx4_en_priv *priv, list_for_each_entry(dst_tmp, dst, list) { found = false; list_for_each_entry(src_tmp, src, list) { - if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { + if (ether_addr_equal(dst_tmp->addr, src_tmp->addr)) { found = true; break; } @@ -797,7 +857,7 @@ static void update_mclist_flags(struct mlx4_en_priv *priv, list_for_each_entry(src_tmp, src, list) { found = false; list_for_each_entry(dst_tmp, dst, list) { - if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { + if (ether_addr_equal(dst_tmp->addr, src_tmp->addr)) { dst_tmp->action = MCLIST_NONE; found = true; break; @@ -1044,6 +1104,12 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, if (err) en_err(priv, "Fail to detach multicast address\n"); + if (mclist->tunnel_reg_id) { + err = mlx4_flow_detach(priv->mdev->dev, mclist->tunnel_reg_id); + if (err) + en_err(priv, "Failed to detach multicast address\n"); + } + /* remove from list */ list_del(&mclist->list); kfree(mclist); @@ -1061,6 +1127,10 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, if (err) en_err(priv, "Fail to attach multicast address\n"); + err = mlx4_en_tunnel_steer_add(priv, &mc_list[10], priv->base_qpn, + &mclist->tunnel_reg_id); + if (err) + en_err(priv, "Failed to attach multicast address\n"); } } } @@ -1598,6 +1668,15 @@ int mlx4_en_start_port(struct net_device *dev) goto tx_err; } + if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + err = mlx4_SET_PORT_VXLAN(mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC); + if (err) { + en_err(priv, "Failed setting port L2 tunnel configuration, err %d\n", + err); + goto tx_err; + } + } + /* Init port */ en_dbg(HW, priv, "Initializing port\n"); err = mlx4_INIT_PORT(mdev->dev, priv->port); @@ -1910,8 +1989,10 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) prof->tx_ring_size, i, TX, node)) goto err; - if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], priv->base_tx_qpn + i, - prof->tx_ring_size, TXBB_SIZE, node)) + if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], + priv->base_tx_qpn + i, + prof->tx_ring_size, TXBB_SIZE, + node, i)) goto err; } @@ -2025,7 +2106,7 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int mlx4_en_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) +static int mlx4_en_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; @@ -2084,11 +2165,21 @@ static int mlx4_en_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) sizeof(config)) ? -EFAULT : 0; } +static int mlx4_en_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + return copy_to_user(ifr->ifr_data, &priv->hwtstamp_config, + sizeof(priv->hwtstamp_config)) ? -EFAULT : 0; +} + static int mlx4_en_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { switch (cmd) { case SIOCSHWTSTAMP: - return mlx4_en_hwtstamp_ioctl(dev, ifr); + return mlx4_en_hwtstamp_set(dev, ifr); + case SIOCGHWTSTAMP: + return mlx4_en_hwtstamp_get(dev, ifr); default: return -EOPNOTSUPP; } @@ -2154,6 +2245,27 @@ static int mlx4_en_set_vf_link_state(struct net_device *dev, int vf, int link_st return mlx4_set_vf_link_state(mdev->dev, en_priv->port, vf, link_state); } + +#define PORT_ID_BYTE_LEN 8 +static int mlx4_en_get_phys_port_id(struct net_device *dev, + struct netdev_phys_port_id *ppid) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_dev *mdev = priv->mdev->dev; + int i; + u64 phys_port_id = mdev->caps.phys_port_id[priv->port]; + + if (!phys_port_id) + return -EOPNOTSUPP; + + ppid->id_len = sizeof(phys_port_id); + for (i = PORT_ID_BYTE_LEN - 1; i >= 0; --i) { + ppid->id[i] = phys_port_id & 0xff; + phys_port_id >>= 8; + } + return 0; +} + static const struct net_device_ops mlx4_netdev_ops = { .ndo_open = mlx4_en_open, .ndo_stop = mlx4_en_close, @@ -2179,6 +2291,7 @@ static const struct net_device_ops mlx4_netdev_ops = { #ifdef CONFIG_NET_RX_BUSY_POLL .ndo_busy_poll = mlx4_en_low_latency_recv, #endif + .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, }; static const struct net_device_ops mlx4_netdev_ops_master = { @@ -2207,6 +2320,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif + .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, }; int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, @@ -2365,6 +2479,13 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0) dev->priv_flags |= IFF_UNICAST_FLT; + if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM | + NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL; + dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + dev->features |= NETIF_F_GSO_UDP_TUNNEL; + } + mdev->pndev[port] = dev; netif_carrier_off(dev); @@ -2394,6 +2515,15 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, goto out; } + if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + err = mlx4_SET_PORT_VXLAN(mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC); + if (err) { + en_err(priv, "Failed setting port L2 tunnel configuration, err %d\n", + err); + goto out; + } + } + /* Init port */ en_warn(priv, "Initializing port\n"); err = mlx4_INIT_PORT(mdev->dev, priv->port); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index d3f508697a3d..f1a5500ff72d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -68,6 +68,12 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2); if (!(dev->features & NETIF_F_HW_VLAN_CTAG_RX)) context->param3 |= cpu_to_be32(1 << 30); + + if (!is_tx && !rss && + (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)) { + en_dbg(HW, priv, "Setting RX qp %x tunnel mode to RX tunneled & non-tunneled\n", qpn); + context->srqn = cpu_to_be32(7 << 28); /* this fills bits 30:28 */ + } } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 07a1d0fbae47..890922c1c8ee 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -631,6 +631,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud int ip_summed; int factor = priv->cqe_factor; u64 timestamp; + bool l2_tunnel; if (!priv->port_up) return 0; @@ -709,6 +710,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud length -= ring->fcs_del; ring->bytes += length; ring->packets++; + l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && + (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); if (likely(dev->features & NETIF_F_RXCSUM)) { if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && @@ -721,7 +724,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud * - not an IP fragment * - no LLS polling in progress */ - if (!mlx4_en_cq_ll_polling(cq) && + if (!mlx4_en_cq_busy_polling(cq) && (dev->features & NETIF_F_GRO)) { struct sk_buff *gro_skb = napi_get_frags(&cq->napi); if (!gro_skb) @@ -738,6 +741,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud gro_skb->data_len = length; gro_skb->ip_summed = CHECKSUM_UNNECESSARY; + if (l2_tunnel) + gro_skb->encapsulation = 1; if ((cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) && (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { @@ -747,7 +752,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud } if (dev->features & NETIF_F_RXHASH) - gro_skb->rxhash = be32_to_cpu(cqe->immed_rss_invalid); + skb_set_hash(gro_skb, + be32_to_cpu(cqe->immed_rss_invalid), + PKT_HASH_TYPE_L3); skb_record_rx_queue(gro_skb, cq->ring); @@ -788,8 +795,13 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud skb->protocol = eth_type_trans(skb, dev); skb_record_rx_queue(skb, cq->ring); + if (l2_tunnel) + skb->encapsulation = 1; + if (dev->features & NETIF_F_RXHASH) - skb->rxhash = be32_to_cpu(cqe->immed_rss_invalid); + skb_set_hash(skb, + be32_to_cpu(cqe->immed_rss_invalid), + PKT_HASH_TYPE_L3); if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) && @@ -804,8 +816,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud skb_mark_napi_id(skb, &cq->napi); - /* Push it up the stack */ - netif_receive_skb(skb); + if (!mlx4_en_cq_busy_polling(cq)) + napi_gro_receive(&cq->napi, skb); + else + netif_receive_skb(skb); next: for (nr = 0; nr < priv->num_frags; nr++) @@ -1053,6 +1067,12 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) rss_mask |= MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6; rss_context->base_qpn_udp = rss_context->default_qpn; } + + if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + en_info(priv, "Setting RSS context tunnel type to RSS on inner headers\n"); + rss_mask |= MLX4_RSS_BY_INNER_HEADERS; + } + rss_context->flags = rss_mask; rss_context->hash_fn = MLX4_RSS_HASH_TOP; for (i = 0; i < 10; i++) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index f54ebd5a1702..8e8a7eb43a2c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -39,6 +39,7 @@ #include <linux/if_vlan.h> #include <linux/vmalloc.h> #include <linux/tcp.h> +#include <linux/ip.h> #include <linux/moduleparam.h> #include "mlx4_en.h" @@ -55,7 +56,7 @@ MODULE_PARM_DESC(inline_thold, "threshold for using inline data"); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, int qpn, u32 size, - u16 stride, int node) + u16 stride, int node, int queue_index) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_ring *ring; @@ -140,6 +141,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->bf_enabled = true; ring->hwtstamp_tx_type = priv->hwtstamp_config.tx_type; + ring->queue_index = queue_index; + + if (queue_index < priv->num_tx_rings_p_up && cpu_online(queue_index)) + cpumask_set_cpu(queue_index, &ring->affinity_mask); *pring = ring; return 0; @@ -206,6 +211,9 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context, &ring->qp, &ring->qp_state); + if (!user_prio && cpu_online(ring->queue_index)) + netif_set_xps_queue(priv->dev, &ring->affinity_mask, + ring->queue_index); return err; } @@ -317,7 +325,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, } } } - dev_kfree_skb_any(skb); + dev_kfree_skb(skb); return tx_info->nr_txbb; } @@ -354,7 +362,9 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) return cnt; } -static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) +static int mlx4_en_process_tx_cq(struct net_device *dev, + struct mlx4_en_cq *cq, + int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; @@ -372,9 +382,10 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) u32 bytes = 0; int factor = priv->cqe_factor; u64 timestamp = 0; + int done = 0; if (!priv->port_up) - return; + return 0; index = cons_index & size_mask; cqe = &buf[(index << factor) + factor]; @@ -383,7 +394,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, - cons_index & size)) { + cons_index & size) && (done < budget)) { /* * make sure we read the CQE after we read the * ownership bit @@ -421,7 +432,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) txbbs_stamp = txbbs_skipped; packets++; bytes += ring->tx_info[ring_index].nr_bytes; - } while (ring_index != new_index); + } while ((++done < budget) && (ring_index != new_index)); ++cons_index; index = cons_index & size_mask; @@ -447,6 +458,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) netif_tx_wake_queue(ring->tx_queue); priv->port_stats.wake_queue++; } + return done; } void mlx4_en_tx_irq(struct mlx4_cq *mcq) @@ -454,10 +466,31 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq) struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); - mlx4_en_process_tx_cq(cq->dev, cq); - mlx4_en_arm_cq(priv, cq); + if (priv->port_up) + napi_schedule(&cq->napi); + else + mlx4_en_arm_cq(priv, cq); } +/* TX CQ polling - called by NAPI */ +int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) +{ + struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); + struct net_device *dev = cq->dev; + struct mlx4_en_priv *priv = netdev_priv(dev); + int done; + + done = mlx4_en_process_tx_cq(dev, cq, budget); + + /* If we used up all the quota - we're probably not done yet... */ + if (done < budget) { + /* Done for now */ + napi_complete(napi); + mlx4_en_arm_cq(priv, cq); + return done; + } + return budget; +} static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, @@ -528,7 +561,10 @@ static int get_real_size(struct sk_buff *skb, struct net_device *dev, int real_size; if (skb_is_gso(skb)) { - *lso_header_size = skb_transport_offset(skb) + tcp_hdrlen(skb); + if (skb->encapsulation) + *lso_header_size = (skb_inner_transport_header(skb) - skb->data) + inner_tcp_hdrlen(skb); + else + *lso_header_size = skb_transport_offset(skb) + tcp_hdrlen(skb); real_size = CTRL_SIZE + skb_shinfo(skb)->nr_frags * DS_SIZE + ALIGN(*lso_header_size + 4, DS_SIZE); if (unlikely(*lso_header_size != skb_headlen(skb))) { @@ -592,7 +628,8 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *sk } } -u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb) +u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv) { struct mlx4_en_priv *priv = netdev_priv(dev); u16 rings_p_up = priv->num_tx_rings_p_up; @@ -827,6 +864,14 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) tx_info->inl = 1; } + if (skb->encapsulation) { + struct iphdr *ipv4 = (struct iphdr *)skb_inner_network_header(skb); + if (ipv4->protocol == IPPROTO_TCP || ipv4->protocol == IPPROTO_UDP) + op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP | MLX4_WQE_CTRL_ILP); + else + op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP); + } + ring->prod += nr_txbb; /* If we used a bounce buffer then copy descriptor back into place */ diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index c9cdb2a2c596..8992b38578d5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -31,7 +31,6 @@ * SOFTWARE. */ -#include <linux/init.h> #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/export.h> @@ -963,7 +962,7 @@ err_out_free_mtt: mlx4_mtt_cleanup(dev, &eq->mtt); err_out_free_eq: - mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn); + mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR); err_out_free_pages: for (i = 0; i < npages; ++i) @@ -1018,7 +1017,7 @@ static void mlx4_free_eq(struct mlx4_dev *dev, eq->page_list[i].map); kfree(eq->page_list); - mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn); + mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR); mlx4_free_cmd_mailbox(dev, mailbox); } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 194928214606..91b69ff4b4a2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -134,7 +134,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [5] = "Time stamping support", [6] = "VST (control vlan insertion/stripping) support", [7] = "FSM (MAC anti-spoofing) support", - [8] = "Dynamic QP updates support" + [8] = "Dynamic QP updates support", + [9] = "TCP/IP offloads/flow-steering for VXLAN support" }; int i; @@ -207,25 +208,25 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, /* when opcode modifier = 1 */ #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3 -#define QUERY_FUNC_CAP_RDMA_PROPS_OFFSET 0x8 -#define QUERY_FUNC_CAP_ETH_PROPS_OFFSET 0xc +#define QUERY_FUNC_CAP_FLAGS0_OFFSET 0x8 +#define QUERY_FUNC_CAP_FLAGS1_OFFSET 0xc #define QUERY_FUNC_CAP_QP0_TUNNEL 0x10 #define QUERY_FUNC_CAP_QP0_PROXY 0x14 #define QUERY_FUNC_CAP_QP1_TUNNEL 0x18 #define QUERY_FUNC_CAP_QP1_PROXY 0x1c +#define QUERY_FUNC_CAP_PHYS_PORT_ID 0x28 -#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC 0x40 -#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN 0x80 +#define QUERY_FUNC_CAP_FLAGS1_FORCE_MAC 0x40 +#define QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN 0x80 +#define QUERY_FUNC_CAP_FLAGS1_NIC_INFO 0x10 -#define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80 +#define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 if (vhcr->op_modifier == 1) { - field = 0; - /* ensure force vlan and force mac bits are not set */ - MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); - /* ensure that phy_wqe_gid bit is not set */ - MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); + /* Set nic_info bit to mark new fields support */ + field = QUERY_FUNC_CAP_FLAGS1_NIC_INFO; + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET); field = vhcr->in_modifier; /* phys-port = logical-port */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); @@ -243,6 +244,9 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size += 2; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY); + MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], + QUERY_FUNC_CAP_PHYS_PORT_ID); + } else if (vhcr->op_modifier == 0) { /* enable rdma and ethernet interfaces, and new quota locations */ field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA | @@ -391,22 +395,22 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, goto out; } + MLX4_GET(func_cap->flags1, outbox, QUERY_FUNC_CAP_FLAGS1_OFFSET); if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) { - MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); - if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) { + if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_OFFSET) { mlx4_err(dev, "VLAN is enforced on this port\n"); err = -EPROTONOSUPPORT; goto out; } - if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) { + if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_FORCE_MAC) { mlx4_err(dev, "Force mac is enabled on this port\n"); err = -EPROTONOSUPPORT; goto out; } } else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) { - MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); - if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); + if (field & QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID) { mlx4_err(dev, "phy_wqe_gid is " "enforced on this ib port\n"); err = -EPROTONOSUPPORT; @@ -433,6 +437,10 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY); func_cap->qp1_proxy_qpn = size & 0xFFFFFF; + if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_NIC_INFO) + MLX4_GET(func_cap->phys_port_id, outbox, + QUERY_FUNC_CAP_PHYS_PORT_ID); + /* All other resources are allocated by the master, but we still report * 'num' and 'reserved' capabilities as follows: * - num remains the maximum resource index @@ -513,6 +521,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67 #define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET 0x68 #define QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET 0x70 +#define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET 0x74 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 @@ -529,6 +538,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 #define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d +#define QUERY_DEV_CAP_VXLAN 0x9e dev_cap->flags2 = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); @@ -603,6 +613,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) if (field & 0x80) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN; dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f; + MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); + if (field & 0x80) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB; MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET); dev_cap->fs_max_num_qp_per_entry = field; MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET); @@ -694,6 +707,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC); if (field & 1<<6) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN; + MLX4_GET(field, outbox, QUERY_DEV_CAP_VXLAN); + if (field & 1<<3) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS; MLX4_GET(dev_cap->max_icm_sz, outbox, QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET); if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS) @@ -842,6 +858,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, field &= 0x7f; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); + /* For guests, disable vxlan tunneling */ + MLX4_GET(field, outbox, QUERY_DEV_CAP_VXLAN); + field &= 0xf7; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VXLAN); + /* For guests, report Blueflame disabled */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET); field &= 0x7f; @@ -860,6 +881,12 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET); } + + /* turn off ipoib managed steering for guests */ + MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); + field &= ~0x80; + MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); + return 0; } @@ -1267,6 +1294,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_IN_SIZE 0x200 #define INIT_HCA_VERSION_OFFSET 0x000 #define INIT_HCA_VERSION 2 +#define INIT_HCA_VXLAN_OFFSET 0x0c #define INIT_HCA_CACHELINE_SZ_OFFSET 0x0e #define INIT_HCA_FLAGS_OFFSET 0x014 #define INIT_HCA_QPC_OFFSET 0x020 @@ -1425,6 +1453,12 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) MLX4_PUT(inbox, param->uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET); MLX4_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET); + /* set parser VXLAN attributes */ + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS) { + u8 parser_params = 0; + MLX4_PUT(inbox, parser_params, INIT_HCA_VXLAN_OFFSET); + } + err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000, MLX4_CMD_NATIVE); @@ -1713,6 +1747,43 @@ int mlx4_NOP(struct mlx4_dev *dev) return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, 100, MLX4_CMD_NATIVE); } +int mlx4_get_phys_port_id(struct mlx4_dev *dev) +{ + u8 port; + u32 *outbox; + struct mlx4_cmd_mailbox *mailbox; + u32 in_mod; + u32 guid_hi, guid_lo; + int err, ret = 0; +#define MOD_STAT_CFG_PORT_OFFSET 8 +#define MOD_STAT_CFG_GUID_H 0X14 +#define MOD_STAT_CFG_GUID_L 0X1c + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + for (port = 1; port <= dev->caps.num_ports; port++) { + in_mod = port << MOD_STAT_CFG_PORT_OFFSET; + err = mlx4_cmd_box(dev, 0, mailbox->dma, in_mod, 0x2, + MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) { + mlx4_err(dev, "Fail to get port %d uplink guid\n", + port); + ret = err; + } else { + MLX4_GET(guid_hi, outbox, MOD_STAT_CFG_GUID_H); + MLX4_GET(guid_lo, outbox, MOD_STAT_CFG_GUID_L); + dev->caps.phys_port_id[port] = (u64)guid_lo | + (u64)guid_hi << 32; + } + } + mlx4_free_cmd_mailbox(dev, mailbox); + return ret; +} + #define MLX4_WOL_SETUP_MODE (5 << 28) int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port) { diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index a0a368b7c939..6811ee00ba7c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -140,6 +140,8 @@ struct mlx4_func_cap { u32 qp1_proxy_qpn; u8 physical_port; u8 port_flags; + u8 flags1; + u64 phys_port_id; }; struct mlx4_adapter { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 01fc6515384d..d711158b0d4b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -96,10 +96,10 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " To activate device managed" " flow steering when available, set to -1"); -static bool enable_64b_cqe_eqe; +static bool enable_64b_cqe_eqe = true; module_param(enable_64b_cqe_eqe, bool, 0444); MODULE_PARM_DESC(enable_64b_cqe_eqe, - "Enable 64 byte CQEs/EQEs when the FW supports this"); + "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)"); #define HCA_GLOBAL_CAP_MASK 0 @@ -388,6 +388,84 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) return 0; } + +static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev, + enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + u32 lnkcap1, lnkcap2; + int err1, err2; + +#define PCIE_MLW_CAP_SHIFT 4 /* start of MLW mask in link capabilities */ + + *speed = PCI_SPEED_UNKNOWN; + *width = PCIE_LNK_WIDTH_UNKNOWN; + + err1 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP, &lnkcap1); + err2 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP2, &lnkcap2); + if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */ + if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB) + *speed = PCIE_SPEED_8_0GT; + else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB) + *speed = PCIE_SPEED_5_0GT; + else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB) + *speed = PCIE_SPEED_2_5GT; + } + if (!err1) { + *width = (lnkcap1 & PCI_EXP_LNKCAP_MLW) >> PCIE_MLW_CAP_SHIFT; + if (!lnkcap2) { /* pre-r3.0 */ + if (lnkcap1 & PCI_EXP_LNKCAP_SLS_5_0GB) + *speed = PCIE_SPEED_5_0GT; + else if (lnkcap1 & PCI_EXP_LNKCAP_SLS_2_5GB) + *speed = PCIE_SPEED_2_5GT; + } + } + + if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) { + return err1 ? err1 : + err2 ? err2 : -EINVAL; + } + return 0; +} + +static void mlx4_check_pcie_caps(struct mlx4_dev *dev) +{ + enum pcie_link_width width, width_cap; + enum pci_bus_speed speed, speed_cap; + int err; + +#define PCIE_SPEED_STR(speed) \ + (speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : \ + speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : \ + speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : \ + "Unknown") + + err = mlx4_get_pcie_dev_link_caps(dev, &speed_cap, &width_cap); + if (err) { + mlx4_warn(dev, + "Unable to determine PCIe device BW capabilities\n"); + return; + } + + err = pcie_get_minimum_link(dev->pdev, &speed, &width); + if (err || speed == PCI_SPEED_UNKNOWN || + width == PCIE_LNK_WIDTH_UNKNOWN) { + mlx4_warn(dev, + "Unable to determine PCI device chain minimum BW\n"); + return; + } + + if (width != width_cap || speed != speed_cap) + mlx4_warn(dev, + "PCIe BW is different than device's capability\n"); + + mlx4_info(dev, "PCIe link speed is %s, device supports %s\n", + PCIE_SPEED_STR(speed), PCIE_SPEED_STR(speed_cap)); + mlx4_info(dev, "PCIe link width is x%d, device supports x%d\n", + width, width_cap); + return; +} + /*The function checks if there are live vf, return the num of them*/ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) { @@ -606,6 +684,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; dev->caps.port_mask[i] = dev->caps.port_type[i]; + dev->caps.phys_port_id[i] = func_cap.phys_port_id; if (mlx4_get_slave_pkey_gid_tbl_len(dev, i, &dev->caps.gid_table_len[i], &dev->caps.pkey_table_len[i])) @@ -1443,6 +1522,19 @@ static void choose_steering_mode(struct mlx4_dev *dev, mlx4_log_num_mgm_entry_size); } +static void choose_tunnel_offload_mode(struct mlx4_dev *dev, + struct mlx4_dev_cap *dev_cap) +{ + if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED && + dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS) + dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN; + else + dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE; + + mlx4_dbg(dev, "Tunneling offload mode is: %s\n", (dev->caps.tunnel_offload_mode + == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none"); +} + static int mlx4_init_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1483,6 +1575,11 @@ static int mlx4_init_hca(struct mlx4_dev *dev) } choose_steering_mode(dev, &dev_cap); + choose_tunnel_offload_mode(dev, &dev_cap); + + err = mlx4_get_phys_port_id(dev); + if (err) + mlx4_err(dev, "Fail to get physical port id\n"); if (mlx4_is_master(dev)) mlx4_parav_master_pf_caps(dev); @@ -1654,7 +1751,7 @@ EXPORT_SYMBOL_GPL(mlx4_counter_alloc); void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx) { - mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx); + mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR); return; } @@ -2287,6 +2384,12 @@ slave_start: goto err_mfunc; } + /* check if the device is functioning at its maximum possible speed. + * No return code for this call, just warn the user in case of PCI + * express device capabilities are under-satisfied by the bus. + */ + mlx4_check_pcie_caps(dev); + /* In master functions, the communication channel must be initialized * after obtaining its address from fw */ if (mlx4_is_master(dev)) { diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index acf9d5f1f922..db7dc0b6667d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -125,9 +125,14 @@ static struct mlx4_promisc_qp *get_promisc_qp(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, u32 qpn) { - struct mlx4_steer *s_steer = &mlx4_priv(dev)->steer[port - 1]; + struct mlx4_steer *s_steer; struct mlx4_promisc_qp *pqp; + if (port < 1 || port > dev->caps.num_ports) + return NULL; + + s_steer = &mlx4_priv(dev)->steer[port - 1]; + list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) { if (pqp->qpn == qpn) return pqp; @@ -154,6 +159,9 @@ static int new_steering_entry(struct mlx4_dev *dev, u8 port, u32 prot; int err; + if (port < 1 || port > dev->caps.num_ports) + return -EINVAL; + s_steer = &mlx4_priv(dev)->steer[port - 1]; new_entry = kzalloc(sizeof *new_entry, GFP_KERNEL); if (!new_entry) @@ -238,6 +246,9 @@ static int existing_steering_entry(struct mlx4_dev *dev, u8 port, struct mlx4_promisc_qp *pqp; struct mlx4_promisc_qp *dqp; + if (port < 1 || port > dev->caps.num_ports) + return -EINVAL; + s_steer = &mlx4_priv(dev)->steer[port - 1]; pqp = get_promisc_qp(dev, port, steer, qpn); @@ -283,6 +294,9 @@ static bool check_duplicate_entry(struct mlx4_dev *dev, u8 port, struct mlx4_steer_index *tmp_entry, *entry = NULL; struct mlx4_promisc_qp *dqp, *tmp_dqp; + if (port < 1 || port > dev->caps.num_ports) + return NULL; + s_steer = &mlx4_priv(dev)->steer[port - 1]; /* if qp is not promisc, it cannot be duplicated */ @@ -324,6 +338,9 @@ static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 port, bool ret = false; int i; + if (port < 1 || port > dev->caps.num_ports) + return NULL; + s_steer = &mlx4_priv(dev)->steer[port - 1]; mailbox = mlx4_alloc_cmd_mailbox(dev); @@ -378,6 +395,9 @@ static int add_promisc_qp(struct mlx4_dev *dev, u8 port, int err; struct mlx4_priv *priv = mlx4_priv(dev); + if (port < 1 || port > dev->caps.num_ports) + return -EINVAL; + s_steer = &mlx4_priv(dev)->steer[port - 1]; mutex_lock(&priv->mcg_table.mutex); @@ -484,6 +504,9 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, int loc, i; int err; + if (port < 1 || port > dev->caps.num_ports) + return -EINVAL; + s_steer = &mlx4_priv(dev)->steer[port - 1]; mutex_lock(&priv->mcg_table.mutex); @@ -674,7 +697,8 @@ const u16 __sw_id_hw[] = { [MLX4_NET_TRANS_RULE_ID_IPV6] = 0xE003, [MLX4_NET_TRANS_RULE_ID_IPV4] = 0xE002, [MLX4_NET_TRANS_RULE_ID_TCP] = 0xE004, - [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006 + [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006, + [MLX4_NET_TRANS_RULE_ID_VXLAN] = 0xE008 }; int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev, @@ -699,7 +723,9 @@ static const int __rule_hw_sz[] = { [MLX4_NET_TRANS_RULE_ID_TCP] = sizeof(struct mlx4_net_trans_rule_hw_tcp_udp), [MLX4_NET_TRANS_RULE_ID_UDP] = - sizeof(struct mlx4_net_trans_rule_hw_tcp_udp) + sizeof(struct mlx4_net_trans_rule_hw_tcp_udp), + [MLX4_NET_TRANS_RULE_ID_VXLAN] = + sizeof(struct mlx4_net_trans_rule_hw_vxlan) }; int mlx4_hw_rule_sz(struct mlx4_dev *dev, @@ -764,6 +790,13 @@ static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec, rule_hw->tcp_udp.src_port_msk = spec->tcp_udp.src_port_msk; break; + case MLX4_NET_TRANS_RULE_ID_VXLAN: + rule_hw->vxlan.vni = + cpu_to_be32(be32_to_cpu(spec->vxlan.vni) << 8); + rule_hw->vxlan.vni_mask = + cpu_to_be32(be32_to_cpu(spec->vxlan.vni_mask) << 8); + break; + default: return -EINVAL; } @@ -895,6 +928,23 @@ int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id) } EXPORT_SYMBOL_GPL(mlx4_flow_detach); +int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, + u32 max_range_qpn) +{ + int err; + u64 in_param; + + in_param = ((u64) min_range_qpn) << 32; + in_param |= ((u64) max_range_qpn) & 0xFFFFFFFF; + + err = mlx4_cmd(dev, in_param, 0, 0, + MLX4_FLOW_STEERING_IB_UC_QP_RANGE, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + + return err; +} +EXPORT_SYMBOL_GPL(mlx4_FLOW_STEERING_IB_UC_QP_RANGE); + int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot, enum mlx4_steer_type steer) @@ -996,7 +1046,7 @@ out: index, dev->caps.num_mgms); else mlx4_bitmap_free(&priv->mcg_table.bitmap, - index - dev->caps.num_mgms); + index - dev->caps.num_mgms, MLX4_USE_RR); } mutex_unlock(&priv->mcg_table.mutex); @@ -1087,7 +1137,7 @@ int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], index, amgm_index, dev->caps.num_mgms); else mlx4_bitmap_free(&priv->mcg_table.bitmap, - amgm_index - dev->caps.num_mgms); + amgm_index - dev->caps.num_mgms, MLX4_USE_RR); } } else { /* Remove entry from AMGM */ @@ -1107,7 +1157,7 @@ int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], prev, index, dev->caps.num_mgms); else mlx4_bitmap_free(&priv->mcg_table.bitmap, - index - dev->caps.num_mgms); + index - dev->caps.num_mgms, MLX4_USE_RR); } out: diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index e582a41a802b..6b65f7795215 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -783,6 +783,11 @@ enum { MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1, }; +enum { + MLX4_NO_RR = 0, + MLX4_USE_RR = 1, +}; + struct mlx4_priv { struct mlx4_dev dev; @@ -844,9 +849,10 @@ static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) extern struct workqueue_struct *mlx4_wq; u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap); -void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj); +void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr); u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align); -void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt); +void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt, + int use_rr); u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap); int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, u32 reserved_bot, u32 resetrved_top); @@ -1236,6 +1242,11 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_FLOW_STEERING_IB_UC_QP_RANGE_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_get_mgm_entry_size(struct mlx4_dev *dev); int mlx4_get_qp_per_mgm(struct mlx4_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index f3758de59c05..3af04c3f42ea 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -45,6 +45,7 @@ #include <linux/dcbnl.h> #endif #include <linux/cpu_rmap.h> +#include <linux/ptp_clock_kernel.h> #include <linux/mlx4/device.h> #include <linux/mlx4/qp.h> @@ -255,6 +256,8 @@ struct mlx4_en_tx_ring { u16 poll_cnt; struct mlx4_en_tx_info *tx_info; u8 *bounce_buf; + u8 queue_index; + cpumask_t affinity_mask; u32 last_nr_txbb; struct mlx4_qp qp; struct mlx4_qp_context context; @@ -373,10 +376,14 @@ struct mlx4_en_dev { u32 priv_pdn; spinlock_t uar_lock; u8 mac_removed[MLX4_MAX_PORTS + 1]; + rwlock_t clock_lock; + u32 nominal_c_mult; struct cyclecounter cycles; struct timecounter clock; unsigned long last_overflow_check; unsigned long overflow_period; + struct ptp_clock *ptp_clock; + struct ptp_clock_info ptp_clock_info; }; @@ -434,6 +441,7 @@ struct mlx4_en_mc_list { enum mlx4_en_mclist_act action; u8 addr[ETH_ALEN]; u64 reg_id; + u64 tunnel_reg_id; }; struct mlx4_en_frag_info { @@ -565,7 +573,7 @@ struct mlx4_en_priv { struct list_head filters; struct hlist_head filter_hash[1 << MLX4_EN_FILTER_HASH_SHIFT]; #endif - + u64 tunnel_reg_id; }; enum mlx4_en_wol { @@ -653,7 +661,7 @@ static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) } /* true if a socket is polling, even if it did not get the lock */ -static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq) +static inline bool mlx4_en_cq_busy_polling(struct mlx4_en_cq *cq) { WARN_ON(!(cq->state & MLX4_CQ_LOCKED)); return cq->state & CQ_USER_PEND; @@ -683,7 +691,7 @@ static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) return false; } -static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq) +static inline bool mlx4_en_cq_busy_polling(struct mlx4_en_cq *cq) { return false; } @@ -714,12 +722,14 @@ int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); void mlx4_en_tx_irq(struct mlx4_cq *mcq); -u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb); +u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv); netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, - int qpn, u32 size, u16 stride, int node); + int qpn, u32 size, u16 stride, + int node, int queue_index); void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring); int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, @@ -741,6 +751,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget); int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget); +int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget); void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, int is_tx, int rss, int qpn, int cqn, int user_prio, struct mlx4_qp_context *context); @@ -786,6 +797,7 @@ void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev, struct skb_shared_hwtstamps *hwts, u64 timestamp); void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev); +void mlx4_en_remove_timestamp(struct mlx4_en_dev *mdev); int mlx4_en_timestamp_config(struct net_device *dev, int tx_type, int rx_filter); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index b3ee9bafff5e..24835853b753 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -32,7 +32,6 @@ * SOFTWARE. */ -#include <linux/init.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/slab.h> @@ -346,7 +345,7 @@ void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index) { struct mlx4_priv *priv = mlx4_priv(dev); - mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index); + mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index, MLX4_NO_RR); } static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index) diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c index 84cfb40bf451..74216071201f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/pd.c +++ b/drivers/net/ethernet/mellanox/mlx4/pd.c @@ -31,7 +31,6 @@ * SOFTWARE. */ -#include <linux/init.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/io-mapping.h> @@ -59,7 +58,7 @@ EXPORT_SYMBOL_GPL(mlx4_pd_alloc); void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn) { - mlx4_bitmap_free(&mlx4_priv(dev)->pd_bitmap, pdn); + mlx4_bitmap_free(&mlx4_priv(dev)->pd_bitmap, pdn, MLX4_USE_RR); } EXPORT_SYMBOL_GPL(mlx4_pd_free); @@ -96,7 +95,7 @@ EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc); void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) { - mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn); + mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn, MLX4_USE_RR); } void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) @@ -164,7 +163,7 @@ EXPORT_SYMBOL_GPL(mlx4_uar_alloc); void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar) { - mlx4_bitmap_free(&mlx4_priv(dev)->uar_table.bitmap, uar->index); + mlx4_bitmap_free(&mlx4_priv(dev)->uar_table.bitmap, uar->index, MLX4_USE_RR); } EXPORT_SYMBOL_GPL(mlx4_uar_free); diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 97d342fa5032..a58bcbf1b806 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -123,6 +123,26 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port, return err; } +int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx) +{ + struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; + struct mlx4_mac_table *table = &info->mac_table; + int i; + + for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { + if (!table->refs[i]) + continue; + + if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) { + *idx = i; + return 0; + } + } + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(mlx4_find_cached_mac); + int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac) { struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; @@ -800,6 +820,47 @@ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, } EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER); +enum { + VXLAN_ENABLE_MODIFY = 1 << 7, + VXLAN_STEERING_MODIFY = 1 << 6, + + VXLAN_ENABLE = 1 << 7, +}; + +struct mlx4_set_port_vxlan_context { + u32 reserved1; + u8 modify_flags; + u8 reserved2; + u8 enable_flags; + u8 steering; +}; + +int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering) +{ + int err; + u32 in_mod; + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_vxlan_context *context; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + memset(context, 0, sizeof(*context)); + + context->modify_flags = VXLAN_ENABLE_MODIFY | VXLAN_STEERING_MODIFY; + context->enable_flags = VXLAN_ENABLE; + context->steering = steering; + + in_mod = MLX4_SET_PORT_VXLAN << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_VXLAN); + int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 2715e61dbb74..61d64ebffd56 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -35,7 +35,6 @@ #include <linux/gfp.h> #include <linux/export.h> -#include <linux/init.h> #include <linux/mlx4/cmd.h> #include <linux/mlx4/qp.h> @@ -250,7 +249,7 @@ void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) if (mlx4_is_qp_reserved(dev, (u32) base_qpn)) return; - mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt); + mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, MLX4_USE_RR); } void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 2f3f2bc7f283..57428a0cb9dd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1340,43 +1340,29 @@ static int cq_res_start_move_to(struct mlx4_dev *dev, int slave, int cqn, spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[RES_CQ], cqn); - if (!r) + if (!r) { err = -ENOENT; - else if (r->com.owner != slave) + } else if (r->com.owner != slave) { err = -EPERM; - else { - switch (state) { - case RES_CQ_BUSY: - err = -EBUSY; - break; - - case RES_CQ_ALLOCATED: - if (r->com.state != RES_CQ_HW) - err = -EINVAL; - else if (atomic_read(&r->ref_count)) - err = -EBUSY; - else - err = 0; - break; - - case RES_CQ_HW: - if (r->com.state != RES_CQ_ALLOCATED) - err = -EINVAL; - else - err = 0; - break; - - default: + } else if (state == RES_CQ_ALLOCATED) { + if (r->com.state != RES_CQ_HW) err = -EINVAL; - } + else if (atomic_read(&r->ref_count)) + err = -EBUSY; + else + err = 0; + } else if (state != RES_CQ_HW || r->com.state != RES_CQ_ALLOCATED) { + err = -EINVAL; + } else { + err = 0; + } - if (!err) { - r->com.from_state = r->com.state; - r->com.to_state = state; - r->com.state = RES_CQ_BUSY; - if (cq) - *cq = r; - } + if (!err) { + r->com.from_state = r->com.state; + r->com.to_state = state; + r->com.state = RES_CQ_BUSY; + if (cq) + *cq = r; } spin_unlock_irq(mlx4_tlock(dev)); @@ -1385,7 +1371,7 @@ static int cq_res_start_move_to(struct mlx4_dev *dev, int slave, int cqn, } static int srq_res_start_move_to(struct mlx4_dev *dev, int slave, int index, - enum res_cq_states state, struct res_srq **srq) + enum res_srq_states state, struct res_srq **srq) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; @@ -1394,39 +1380,25 @@ static int srq_res_start_move_to(struct mlx4_dev *dev, int slave, int index, spin_lock_irq(mlx4_tlock(dev)); r = res_tracker_lookup(&tracker->res_tree[RES_SRQ], index); - if (!r) + if (!r) { err = -ENOENT; - else if (r->com.owner != slave) + } else if (r->com.owner != slave) { err = -EPERM; - else { - switch (state) { - case RES_SRQ_BUSY: + } else if (state == RES_SRQ_ALLOCATED) { + if (r->com.state != RES_SRQ_HW) err = -EINVAL; - break; - - case RES_SRQ_ALLOCATED: - if (r->com.state != RES_SRQ_HW) - err = -EINVAL; - else if (atomic_read(&r->ref_count)) - err = -EBUSY; - break; - - case RES_SRQ_HW: - if (r->com.state != RES_SRQ_ALLOCATED) - err = -EINVAL; - break; - - default: - err = -EINVAL; - } + else if (atomic_read(&r->ref_count)) + err = -EBUSY; + } else if (state != RES_SRQ_HW || r->com.state != RES_SRQ_ALLOCATED) { + err = -EINVAL; + } - if (!err) { - r->com.from_state = r->com.state; - r->com.to_state = state; - r->com.state = RES_SRQ_BUSY; - if (srq) - *srq = r; - } + if (!err) { + r->com.from_state = r->com.state; + r->com.to_state = state; + r->com.state = RES_SRQ_BUSY; + if (srq) + *srq = r; } spin_unlock_irq(mlx4_tlock(dev)); @@ -3634,7 +3606,7 @@ static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header, !is_broadcast_ether_addr(eth_header->eth.dst_mac)) { list_for_each_entry_safe(res, tmp, rlist, list) { be_mac = cpu_to_be64(res->mac << 16); - if (!memcmp(&be_mac, eth_header->eth.dst_mac, ETH_ALEN)) + if (ether_addr_equal((u8 *)&be_mac, eth_header->eth.dst_mac)) return 0; } pr_err("MAC %pM doesn't belong to VF %d, Steering rule rejected\n", @@ -3844,6 +3816,16 @@ int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave, return err; } +int mlx4_FLOW_STEERING_IB_UC_QP_RANGE_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + return -EPERM; +} + + static void detach_qp(struct mlx4_dev *dev, int slave, struct res_qp *rqp) { struct res_gid *rgid; diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index 8fdf23753779..98faf870b0b0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -31,7 +31,6 @@ * SOFTWARE. */ -#include <linux/init.h> #include <linux/mlx4/cmd.h> #include <linux/mlx4/srq.h> @@ -117,7 +116,7 @@ err_put: mlx4_table_put(dev, &srq_table->table, *srqn); err_out: - mlx4_bitmap_free(&srq_table->bitmap, *srqn); + mlx4_bitmap_free(&srq_table->bitmap, *srqn, MLX4_NO_RR); return err; } @@ -145,7 +144,7 @@ void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn) mlx4_table_put(dev, &srq_table->cmpt_table, srqn); mlx4_table_put(dev, &srq_table->table, srqn); - mlx4_bitmap_free(&srq_table->bitmap, srqn); + mlx4_bitmap_free(&srq_table->bitmap, srqn, MLX4_NO_RR); } static void mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn) |