diff options
author | Catherine Sullivan <csully@google.com> | 2020-12-08 01:45:26 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-12-09 03:06:28 +0300 |
commit | 6f007c6486d69967ac1d9e67df9ae9c77d49f1cc (patch) | |
tree | fcaa4784b3e2541ffa6319c37a2040e4ae3265c3 /drivers/net/ethernet/google/gve/gve_tx.c | |
parent | 02b0e0c18ba75227e0482600950c6abe71ace30f (diff) | |
download | linux-6f007c6486d69967ac1d9e67df9ae9c77d49f1cc.tar.xz |
gve: Add support for raw addressing in the tx path
During TX, skbs' data addresses are dma_map'ed and passed to the NIC.
This means that the device can perform DMA directly from these addresses
and the driver does not have to copy the buffer content into
pre-allocated buffers/qpls (as in qpl mode).
Reviewed-by: Yangchun Fu <yangchun@google.com>
Signed-off-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David Awogbemila <awogbemila@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/google/gve/gve_tx.c')
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_tx.c | 197 |
1 files changed, 161 insertions, 36 deletions
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index d0244feb0301..6938f3a939d6 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -158,9 +158,11 @@ static void gve_tx_free_ring(struct gve_priv *priv, int idx) tx->q_resources, tx->q_resources_bus); tx->q_resources = NULL; - gve_tx_fifo_release(priv, &tx->tx_fifo); - gve_unassign_qpl(priv, tx->tx_fifo.qpl->id); - tx->tx_fifo.qpl = NULL; + if (!tx->raw_addressing) { + gve_tx_fifo_release(priv, &tx->tx_fifo); + gve_unassign_qpl(priv, tx->tx_fifo.qpl->id); + tx->tx_fifo.qpl = NULL; + } bytes = sizeof(*tx->desc) * slots; dma_free_coherent(hdev, bytes, tx->desc, tx->bus); @@ -206,11 +208,15 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) if (!tx->desc) goto abort_with_info; - tx->tx_fifo.qpl = gve_assign_tx_qpl(priv); + tx->raw_addressing = priv->raw_addressing; + tx->dev = &priv->pdev->dev; + if (!tx->raw_addressing) { + tx->tx_fifo.qpl = gve_assign_tx_qpl(priv); - /* map Tx FIFO */ - if (gve_tx_fifo_init(priv, &tx->tx_fifo)) - goto abort_with_desc; + /* map Tx FIFO */ + if (gve_tx_fifo_init(priv, &tx->tx_fifo)) + goto abort_with_desc; + } tx->q_resources = dma_alloc_coherent(hdev, @@ -228,7 +234,8 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) return 0; abort_with_fifo: - gve_tx_fifo_release(priv, &tx->tx_fifo); + if (!tx->raw_addressing) + gve_tx_fifo_release(priv, &tx->tx_fifo); abort_with_desc: dma_free_coherent(hdev, bytes, tx->desc, tx->bus); tx->desc = NULL; @@ -301,27 +308,47 @@ static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx, return bytes; } -/* The most descriptors we could need are 3 - 1 for the headers, 1 for - * the beginning of the payload at the end of the FIFO, and 1 if the - * payload wraps to the beginning of the FIFO. +/* The most descriptors we could need is MAX_SKB_FRAGS + 3 : 1 for each skb frag, + * +1 for the skb linear portion, +1 for when tcp hdr needs to be in separate descriptor, + * and +1 if the payload wraps to the beginning of the FIFO. */ -#define MAX_TX_DESC_NEEDED 3 +#define MAX_TX_DESC_NEEDED (MAX_SKB_FRAGS + 3) +static void gve_tx_unmap_buf(struct device *dev, struct gve_tx_buffer_state *info) +{ + if (info->skb) { + dma_unmap_single(dev, dma_unmap_addr(&info->buf, dma), + dma_unmap_len(&info->buf, len), + DMA_TO_DEVICE); + dma_unmap_len_set(&info->buf, len, 0); + } else { + dma_unmap_page(dev, dma_unmap_addr(&info->buf, dma), + dma_unmap_len(&info->buf, len), + DMA_TO_DEVICE); + dma_unmap_len_set(&info->buf, len, 0); + } +} /* Check if sufficient resources (descriptor ring space, FIFO space) are * available to transmit the given number of bytes. */ static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required) { - return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED && - gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required)); + bool can_alloc = true; + + if (!tx->raw_addressing) + can_alloc = gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required); + + return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED && can_alloc); } /* Stops the queue if the skb cannot be transmitted. */ static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb) { - int bytes_required; + int bytes_required = 0; + + if (!tx->raw_addressing) + bytes_required = gve_skb_fifo_bytes_required(tx, skb); - bytes_required = gve_skb_fifo_bytes_required(tx, skb); if (likely(gve_can_tx(tx, bytes_required))) return 0; @@ -395,17 +422,13 @@ static void gve_dma_sync_for_device(struct device *dev, dma_addr_t *page_buses, { u64 last_page = (iov_offset + iov_len - 1) / PAGE_SIZE; u64 first_page = iov_offset / PAGE_SIZE; - dma_addr_t dma; u64 page; - for (page = first_page; page <= last_page; page++) { - dma = page_buses[page]; - dma_sync_single_for_device(dev, dma, PAGE_SIZE, DMA_TO_DEVICE); - } + for (page = first_page; page <= last_page; page++) + dma_sync_single_for_device(dev, page_buses[page], PAGE_SIZE, DMA_TO_DEVICE); } -static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb, - struct device *dev) +static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, struct sk_buff *skb) { int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset; union gve_tx_desc *pkt_desc, *seg_desc; @@ -447,7 +470,7 @@ static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb, skb_copy_bits(skb, 0, tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset, hlen); - gve_dma_sync_for_device(dev, tx->tx_fifo.qpl->page_buses, + gve_dma_sync_for_device(&priv->pdev->dev, tx->tx_fifo.qpl->page_buses, info->iov[hdr_nfrags - 1].iov_offset, info->iov[hdr_nfrags - 1].iov_len); copy_offset = hlen; @@ -463,7 +486,7 @@ static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb, skb_copy_bits(skb, copy_offset, tx->tx_fifo.base + info->iov[i].iov_offset, info->iov[i].iov_len); - gve_dma_sync_for_device(dev, tx->tx_fifo.qpl->page_buses, + gve_dma_sync_for_device(&priv->pdev->dev, tx->tx_fifo.qpl->page_buses, info->iov[i].iov_offset, info->iov[i].iov_len); copy_offset += info->iov[i].iov_len; @@ -472,6 +495,94 @@ static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb, return 1 + payload_nfrags; } +static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx, + struct sk_buff *skb) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + int hlen, payload_nfrags, l4_hdr_offset; + union gve_tx_desc *pkt_desc, *seg_desc; + struct gve_tx_buffer_state *info; + bool is_gso = skb_is_gso(skb); + u32 idx = tx->req & tx->mask; + struct gve_tx_dma_buf *buf; + u64 addr; + u32 len; + int i; + + info = &tx->info[idx]; + pkt_desc = &tx->desc[idx]; + + l4_hdr_offset = skb_checksum_start_offset(skb); + /* If the skb is gso, then we want only up to the tcp header in the first segment + * to efficiently replicate on each segment otherwise we want the linear portion + * of the skb (which will contain the checksum because skb->csum_start and + * skb->csum_offset are given relative to skb->head) in the first segment. + */ + hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) : skb_headlen(skb); + len = skb_headlen(skb); + + info->skb = skb; + + addr = dma_map_single(tx->dev, skb->data, len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(tx->dev, addr))) { + tx->dma_mapping_error++; + goto drop; + } + buf = &info->buf; + dma_unmap_len_set(buf, len, len); + dma_unmap_addr_set(buf, dma, addr); + + payload_nfrags = shinfo->nr_frags; + if (hlen < len) { + /* For gso the rest of the linear portion of the skb needs to + * be in its own descriptor. + */ + payload_nfrags++; + gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, + 1 + payload_nfrags, hlen, addr); + + len -= hlen; + addr += hlen; + idx = (tx->req + 1) & tx->mask; + seg_desc = &tx->desc[idx]; + gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); + } else { + gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, + 1 + payload_nfrags, hlen, addr); + } + + for (i = 0; i < shinfo->nr_frags; i++) { + const skb_frag_t *frag = &shinfo->frags[i]; + + idx = (idx + 1) & tx->mask; + seg_desc = &tx->desc[idx]; + len = skb_frag_size(frag); + addr = skb_frag_dma_map(tx->dev, frag, 0, len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(tx->dev, addr))) { + tx->dma_mapping_error++; + goto unmap_drop; + } + buf = &tx->info[idx].buf; + tx->info[idx].skb = NULL; + dma_unmap_len_set(buf, len, len); + dma_unmap_addr_set(buf, dma, addr); + + gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr); + } + + return 1 + payload_nfrags; + +unmap_drop: + i += (payload_nfrags == shinfo->nr_frags ? 1 : 2); + while (i--) { + idx--; + gve_tx_unmap_buf(tx->dev, &tx->info[idx & tx->mask]); + } +drop: + tx->dropped_pkt++; + return 0; +} + netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev) { struct gve_priv *priv = netdev_priv(dev); @@ -490,17 +601,26 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev) gve_tx_put_doorbell(priv, tx->q_resources, tx->req); return NETDEV_TX_BUSY; } - nsegs = gve_tx_add_skb(tx, skb, &priv->pdev->dev); - - netdev_tx_sent_queue(tx->netdev_txq, skb->len); - skb_tx_timestamp(skb); - - /* give packets to NIC */ - tx->req += nsegs; + if (tx->raw_addressing) + nsegs = gve_tx_add_skb_no_copy(priv, tx, skb); + else + nsegs = gve_tx_add_skb_copy(priv, tx, skb); + + /* If the packet is getting sent, we need to update the skb */ + if (nsegs) { + netdev_tx_sent_queue(tx->netdev_txq, skb->len); + skb_tx_timestamp(skb); + tx->req += nsegs; + } else { + dev_kfree_skb_any(skb); + } if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more()) return NETDEV_TX_OK; + /* Give packets to NIC. Even if this packet failed to send the doorbell + * might need to be rung because of xmit_more. + */ gve_tx_put_doorbell(priv, tx->q_resources, tx->req); return NETDEV_TX_OK; } @@ -525,24 +645,29 @@ static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, info = &tx->info[idx]; skb = info->skb; + /* Unmap the buffer */ + if (tx->raw_addressing) + gve_tx_unmap_buf(tx->dev, info); + tx->done++; /* Mark as free */ if (skb) { info->skb = NULL; bytes += skb->len; pkts++; dev_consume_skb_any(skb); + if (tx->raw_addressing) + continue; /* FIFO free */ for (i = 0; i < ARRAY_SIZE(info->iov); i++) { - space_freed += info->iov[i].iov_len + - info->iov[i].iov_padding; + space_freed += info->iov[i].iov_len + info->iov[i].iov_padding; info->iov[i].iov_len = 0; info->iov[i].iov_padding = 0; } } - tx->done++; } - gve_tx_free_fifo(&tx->tx_fifo, space_freed); + if (!tx->raw_addressing) + gve_tx_free_fifo(&tx->tx_fifo, space_freed); u64_stats_update_begin(&tx->statss); tx->bytes_done += bytes; tx->pkt_done += pkts; |