diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4/en_rx.c')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_rx.c | 303 |
1 files changed, 208 insertions, 95 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 01660c595f5c..a0474eb94aa3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -42,6 +42,10 @@ #include <linux/vmalloc.h> #include <linux/irq.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ip6_checksum.h> +#endif + #include "mlx4_en.h" static int mlx4_alloc_pages(struct mlx4_en_priv *priv, @@ -74,7 +78,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, page_alloc->page_size = PAGE_SIZE << order; page_alloc->page = page; page_alloc->dma = dma; - page_alloc->page_offset = frag_info->frag_align; + page_alloc->page_offset = 0; /* Not doing get_page() for each frag is a big win * on asymetric workloads. Note we can not use atomic_set(). */ @@ -119,7 +123,6 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, out: while (i--) { - frag_info = &priv->frag_info[i]; if (page_alloc[i].page != ring_alloc[i].page) { dma_unmap_page(priv->ddev, page_alloc[i].dma, page_alloc[i].page_size, PCI_DMA_FROMDEVICE); @@ -157,7 +160,7 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; if (mlx4_alloc_pages(priv, &ring->page_alloc[i], - frag_info, GFP_KERNEL)) + frag_info, GFP_KERNEL | __GFP_COLD)) goto out; } return 0; @@ -269,7 +272,7 @@ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) if (mlx4_en_prepare_rx_desc(priv, ring, ring->actual_size, - GFP_KERNEL)) { + GFP_KERNEL | __GFP_COLD)) { if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) { en_err(priv, "Failed to allocate enough rx buffers\n"); return -ENOMEM; @@ -636,13 +639,94 @@ static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, int index = ring->prod & ring->size_mask; while ((u32) (ring->prod - ring->cons) < ring->actual_size) { - if (mlx4_en_prepare_rx_desc(priv, ring, index, GFP_ATOMIC)) + if (mlx4_en_prepare_rx_desc(priv, ring, index, + GFP_ATOMIC | __GFP_COLD)) break; ring->prod++; index = ring->prod & ring->size_mask; } } +/* When hardware doesn't strip the vlan, we need to calculate the checksum + * over it and add it to the hardware's checksum calculation + */ +static inline __wsum get_fixed_vlan_csum(__wsum hw_checksum, + struct vlan_hdr *vlanh) +{ + return csum_add(hw_checksum, *(__wsum *)vlanh); +} + +/* Although the stack expects checksum which doesn't include the pseudo + * header, the HW adds it. To address that, we are subtracting the pseudo + * header checksum from the checksum value provided by the HW. + */ +static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb, + struct iphdr *iph) +{ + __u16 length_for_csum = 0; + __wsum csum_pseudo_header = 0; + + length_for_csum = (be16_to_cpu(iph->tot_len) - (iph->ihl << 2)); + csum_pseudo_header = csum_tcpudp_nofold(iph->saddr, iph->daddr, + length_for_csum, iph->protocol, 0); + skb->csum = csum_sub(hw_checksum, csum_pseudo_header); +} + +#if IS_ENABLED(CONFIG_IPV6) +/* In IPv6 packets, besides subtracting the pseudo header checksum, + * we also compute/add the IP header checksum which + * is not added by the HW. + */ +static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, + struct ipv6hdr *ipv6h) +{ + __wsum csum_pseudo_hdr = 0; + + if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS) + return -1; + hw_checksum = csum_add(hw_checksum, (__force __wsum)(ipv6h->nexthdr << 8)); + + csum_pseudo_hdr = csum_partial(&ipv6h->saddr, + sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0); + csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ipv6h->payload_len); + csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ntohs(ipv6h->nexthdr)); + + skb->csum = csum_sub(hw_checksum, csum_pseudo_hdr); + skb->csum = csum_add(skb->csum, csum_partial(ipv6h, sizeof(struct ipv6hdr), 0)); + return 0; +} +#endif +static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, + int hwtstamp_rx_filter) +{ + __wsum hw_checksum = 0; + + void *hdr = (u8 *)va + sizeof(struct ethhdr); + + hw_checksum = csum_unfold((__force __sum16)cqe->checksum); + + if (((struct ethhdr *)va)->h_proto == htons(ETH_P_8021Q) && + hwtstamp_rx_filter != HWTSTAMP_FILTER_NONE) { + /* next protocol non IPv4 or IPv6 */ + if (((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto + != htons(ETH_P_IP) && + ((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto + != htons(ETH_P_IPV6)) + return -1; + hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr); + hdr += sizeof(struct vlan_hdr); + } + + if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4)) + get_fixed_ipv4_csum(hw_checksum, skb, hdr); +#if IS_ENABLED(CONFIG_IPV6) + else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) + if (get_fixed_ipv6_csum(hw_checksum, skb, hdr)) + return -1; +#endif + return 0; +} + int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -744,73 +828,96 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); if (likely(dev->features & NETIF_F_RXCSUM)) { - if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && - (cqe->checksum == cpu_to_be16(0xffff))) { - ring->csum_ok++; - /* This packet is eligible for GRO if it is: - * - DIX Ethernet (type interpretation) - * - TCP/IP (v4) - * - without IP options - * - not an IP fragment - * - no LLS polling in progress - */ - if (!mlx4_en_cq_busy_polling(cq) && - (dev->features & NETIF_F_GRO)) { - struct sk_buff *gro_skb = napi_get_frags(&cq->napi); - if (!gro_skb) - goto next; - - nr = mlx4_en_complete_rx_desc(priv, - rx_desc, frags, gro_skb, - length); - if (!nr) - goto next; + if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | + MLX4_CQE_STATUS_UDP)) { + if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && + cqe->checksum == cpu_to_be16(0xffff)) { + ip_summed = CHECKSUM_UNNECESSARY; + ring->csum_ok++; + } else { + ip_summed = CHECKSUM_NONE; + ring->csum_none++; + } + } else { + if (priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP && + (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPV6))) { + ip_summed = CHECKSUM_COMPLETE; + ring->csum_complete++; + } else { + ip_summed = CHECKSUM_NONE; + ring->csum_none++; + } + } + } else { + ip_summed = CHECKSUM_NONE; + ring->csum_none++; + } - skb_shinfo(gro_skb)->nr_frags = nr; - gro_skb->len = length; - gro_skb->data_len = length; - gro_skb->ip_summed = CHECKSUM_UNNECESSARY; + /* This packet is eligible for GRO if it is: + * - DIX Ethernet (type interpretation) + * - TCP/IP (v4) + * - without IP options + * - not an IP fragment + * - no LLS polling in progress + */ + if (!mlx4_en_cq_busy_polling(cq) && + (dev->features & NETIF_F_GRO)) { + struct sk_buff *gro_skb = napi_get_frags(&cq->napi); + if (!gro_skb) + goto next; + + nr = mlx4_en_complete_rx_desc(priv, + rx_desc, frags, gro_skb, + length); + if (!nr) + goto next; + + if (ip_summed == CHECKSUM_COMPLETE) { + void *va = skb_frag_address(skb_shinfo(gro_skb)->frags); + if (check_csum(cqe, gro_skb, va, ring->hwtstamp_rx_filter)) { + ip_summed = CHECKSUM_NONE; + ring->csum_none++; + ring->csum_complete--; + } + } - if (l2_tunnel) - gro_skb->csum_level = 1; - if ((cqe->vlan_my_qpn & - cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) && - (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { - u16 vid = be16_to_cpu(cqe->sl_vid); + skb_shinfo(gro_skb)->nr_frags = nr; + gro_skb->len = length; + gro_skb->data_len = length; + gro_skb->ip_summed = ip_summed; - __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid); - } + if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY) + gro_skb->csum_level = 1; - if (dev->features & NETIF_F_RXHASH) - skb_set_hash(gro_skb, - be32_to_cpu(cqe->immed_rss_invalid), - PKT_HASH_TYPE_L3); + if ((cqe->vlan_my_qpn & + cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) && + (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { + u16 vid = be16_to_cpu(cqe->sl_vid); - skb_record_rx_queue(gro_skb, cq->ring); - skb_mark_napi_id(gro_skb, &cq->napi); + __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid); + } - if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { - timestamp = mlx4_en_get_cqe_ts(cqe); - mlx4_en_fill_hwtstamps(mdev, - skb_hwtstamps(gro_skb), - timestamp); - } + if (dev->features & NETIF_F_RXHASH) + skb_set_hash(gro_skb, + be32_to_cpu(cqe->immed_rss_invalid), + PKT_HASH_TYPE_L3); - napi_gro_frags(&cq->napi); - goto next; - } + skb_record_rx_queue(gro_skb, cq->ring); + skb_mark_napi_id(gro_skb, &cq->napi); - /* GRO not possible, complete processing here */ - ip_summed = CHECKSUM_UNNECESSARY; - } else { - ip_summed = CHECKSUM_NONE; - ring->csum_none++; + if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { + timestamp = mlx4_en_get_cqe_ts(cqe); + mlx4_en_fill_hwtstamps(mdev, + skb_hwtstamps(gro_skb), + timestamp); } - } else { - ip_summed = CHECKSUM_NONE; - ring->csum_none++; + + napi_gro_frags(&cq->napi); + goto next; } + /* GRO not possible, complete processing here */ skb = mlx4_en_rx_skb(priv, rx_desc, frags, length); if (!skb) { priv->stats.rx_dropped++; @@ -822,6 +929,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud goto next; } + if (ip_summed == CHECKSUM_COMPLETE) { + if (check_csum(cqe, skb, skb->data, ring->hwtstamp_rx_filter)) { + ip_summed = CHECKSUM_NONE; + ring->csum_complete--; + ring->csum_none++; + } + } + skb->ip_summed = ip_summed; skb->protocol = eth_type_trans(skb, dev); skb_record_rx_queue(skb, cq->ring); @@ -879,8 +994,8 @@ void mlx4_en_rx_irq(struct mlx4_cq *mcq) struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); - if (priv->port_up) - napi_schedule(&cq->napi); + if (likely(priv->port_up)) + napi_schedule_irqoff(&cq->napi); else mlx4_en_arm_cq(priv, cq); } @@ -910,20 +1025,18 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) cpu_curr = smp_processor_id(); aff = irq_desc_get_irq_data(cq->irq_desc)->affinity; - if (unlikely(!cpumask_test_cpu(cpu_curr, aff))) { - /* Current cpu is not according to smp_irq_affinity - - * probably affinity changed. need to stop this NAPI - * poll, and restart it on the right CPU - */ - napi_complete(napi); - mlx4_en_arm_cq(priv, cq); - return 0; - } - } else { - /* Done for now */ - napi_complete(napi); - mlx4_en_arm_cq(priv, cq); + if (likely(cpumask_test_cpu(cpu_curr, aff))) + return budget; + + /* Current cpu is not according to smp_irq_affinity - + * probably affinity changed. need to stop this NAPI + * poll, and restart it on the right CPU + */ + done = 0; } + /* Done for now */ + napi_complete_done(napi, done); + mlx4_en_arm_cq(priv, cq); return done; } @@ -946,15 +1059,8 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) (eff_mtu > buf_size + frag_sizes[i]) ? frag_sizes[i] : eff_mtu - buf_size; priv->frag_info[i].frag_prefix_size = buf_size; - if (!i) { - priv->frag_info[i].frag_align = NET_IP_ALIGN; - priv->frag_info[i].frag_stride = - ALIGN(frag_sizes[i] + NET_IP_ALIGN, SMP_CACHE_BYTES); - } else { - priv->frag_info[i].frag_align = 0; - priv->frag_info[i].frag_stride = - ALIGN(frag_sizes[i], SMP_CACHE_BYTES); - } + priv->frag_info[i].frag_stride = ALIGN(frag_sizes[i], + SMP_CACHE_BYTES); buf_size += priv->frag_info[i].frag_size; i++; } @@ -967,11 +1073,10 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) eff_mtu, priv->num_frags); for (i = 0; i < priv->num_frags; i++) { en_err(priv, - " frag:%d - size:%d prefix:%d align:%d stride:%d\n", + " frag:%d - size:%d prefix:%d stride:%d\n", i, priv->frag_info[i].frag_size, priv->frag_info[i].frag_prefix_size, - priv->frag_info[i].frag_align, priv->frag_info[i].frag_stride); } } @@ -1026,7 +1131,8 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) int err; u32 qpn; - err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn); + err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, + MLX4_RESERVE_A0_QP); if (err) { en_err(priv, "Failed reserving drop qpn\n"); return err; @@ -1065,14 +1171,11 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) int i, qpn; int err = 0; int good_qps = 0; - static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B, 0x1983A2FC, - 0x943E1ADB, 0xD9389E6B, 0xD1039C2C, 0xA74499AD, - 0x593D56D9, 0xF3253C06, 0x2ADC1FFC}; en_dbg(DRV, priv, "Configuring rss steering\n"); err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num, priv->rx_ring_num, - &rss_map->base_qpn); + &rss_map->base_qpn, 0); if (err) { en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num); return err; @@ -1122,9 +1225,19 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) rss_context->flags = rss_mask; rss_context->hash_fn = MLX4_RSS_HASH_TOP; - for (i = 0; i < 10; i++) - rss_context->rss_key[i] = cpu_to_be32(rsskey[i]); - + if (priv->rss_hash_fn == ETH_RSS_HASH_XOR) { + rss_context->hash_fn = MLX4_RSS_HASH_XOR; + } else if (priv->rss_hash_fn == ETH_RSS_HASH_TOP) { + rss_context->hash_fn = MLX4_RSS_HASH_TOP; + memcpy(rss_context->rss_key, priv->rss_key, + MLX4_EN_RSS_KEY_SIZE); + netdev_rss_key_fill(rss_context->rss_key, + MLX4_EN_RSS_KEY_SIZE); + } else { + en_err(priv, "Unknown RSS hash function requested\n"); + err = -EINVAL; + goto indir_err; + } err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, &rss_map->indir_qp, &rss_map->indir_state); if (err) |