summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/google
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/google')
-rw-r--r--drivers/net/ethernet/google/gve/gve.h39
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.c89
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.h15
-rw-r--r--drivers/net/ethernet/google/gve/gve_desc.h19
-rw-r--r--drivers/net/ethernet/google/gve/gve_ethtool.c3
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c11
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx.c364
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx.c197
8 files changed, 575 insertions, 162 deletions
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index f5c80229ea96..daf07c0f790b 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -38,6 +38,8 @@
#define NIC_TX_STATS_REPORT_NUM 0
#define NIC_RX_STATS_REPORT_NUM 4
+#define GVE_DATA_SLOT_ADDR_PAGE_MASK (~(PAGE_SIZE - 1))
+
/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
struct gve_rx_desc_queue {
struct gve_rx_desc *desc_ring; /* the descriptor ring */
@@ -49,7 +51,8 @@ struct gve_rx_desc_queue {
struct gve_rx_slot_page_info {
struct page *page;
void *page_address;
- u32 page_offset; /* offset to write to in page */
+ u8 page_offset; /* flipped to second half? */
+ u8 can_flip;
};
/* A list of pages registered with the device during setup and used by a queue
@@ -64,10 +67,11 @@ struct gve_queue_page_list {
/* Each slot in the data ring has a 1:1 mapping to a slot in the desc ring */
struct gve_rx_data_queue {
- struct gve_rx_data_slot *data_ring; /* read by NIC */
+ union gve_rx_data_slot *data_ring; /* read by NIC */
dma_addr_t data_bus; /* dma mapping of the slots */
struct gve_rx_slot_page_info *page_info; /* page info of the buffers */
struct gve_queue_page_list *qpl; /* qpl assigned to this queue */
+ u8 raw_addressing; /* use raw_addressing? */
};
struct gve_priv;
@@ -82,6 +86,7 @@ struct gve_rx_ring {
u32 cnt; /* free-running total number of completed packets */
u32 fill_cnt; /* free-running total number of descs and buffs posted */
u32 mask; /* masks the cnt and fill_cnt to the size of the ring */
+ u32 db_threshold; /* threshold for posting new buffs and descs */
u64 rx_copybreak_pkt; /* free-running count of copybreak packets */
u64 rx_copied_pkt; /* free-running total number of copied packets */
u64 rx_skb_alloc_fail; /* free-running count of skb alloc fails */
@@ -107,12 +112,20 @@ struct gve_tx_iovec {
u32 iov_padding; /* padding associated with this segment */
};
+struct gve_tx_dma_buf {
+ DEFINE_DMA_UNMAP_ADDR(dma);
+ DEFINE_DMA_UNMAP_LEN(len);
+};
+
/* Tracks the memory in the fifo occupied by the skb. Mapped 1:1 to a desc
* ring entry but only used for a pkt_desc not a seg_desc
*/
struct gve_tx_buffer_state {
struct sk_buff *skb; /* skb for this pkt */
- struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
+ union {
+ struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
+ struct gve_tx_dma_buf buf;
+ };
};
/* A TX buffer - each queue has one */
@@ -135,13 +148,17 @@ struct gve_tx_ring {
__be32 last_nic_done ____cacheline_aligned; /* NIC tail pointer */
u64 pkt_done; /* free-running - total packets completed */
u64 bytes_done; /* free-running - total bytes completed */
+ u64 dropped_pkt; /* free-running - total packets dropped */
+ u64 dma_mapping_error; /* count of dma mapping errors */
/* Cacheline 2 -- Read-mostly fields */
union gve_tx_desc *desc ____cacheline_aligned;
struct gve_tx_buffer_state *info; /* Maps 1:1 to a desc */
struct netdev_queue *netdev_txq;
struct gve_queue_resources *q_resources; /* head and tail pointer idx */
+ struct device *dev;
u32 mask; /* masks req and done down to queue size */
+ u8 raw_addressing; /* use raw_addressing? */
/* Slow-path fields */
u32 q_num ____cacheline_aligned; /* queue idx */
@@ -194,11 +211,12 @@ struct gve_priv {
u16 tx_desc_cnt; /* num desc per ring */
u16 rx_desc_cnt; /* num desc per ring */
u16 tx_pages_per_qpl; /* tx buffer length */
- u16 rx_pages_per_qpl; /* rx buffer length */
+ u16 rx_data_slot_cnt; /* rx buffer length */
u64 max_registered_pages;
u64 num_registered_pages; /* num pages registered with NIC */
u32 rx_copybreak; /* copy packets smaller than this */
u16 default_num_queues; /* default num queues to set up */
+ u8 raw_addressing; /* 1 if this dev supports raw addressing, 0 otherwise */
struct gve_queue_config tx_cfg;
struct gve_queue_config rx_cfg;
@@ -436,14 +454,14 @@ static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
*/
static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
{
- return priv->tx_cfg.num_queues;
+ return priv->raw_addressing ? 0 : priv->tx_cfg.num_queues;
}
/* Returns the number of rx queue page lists
*/
static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
{
- return priv->rx_cfg.num_queues;
+ return priv->raw_addressing ? 0 : priv->rx_cfg.num_queues;
}
/* Returns a pointer to the next available tx qpl in the list of qpls
@@ -497,15 +515,6 @@ static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv,
return DMA_FROM_DEVICE;
}
-/* Returns true if the max mtu allows page recycling */
-static inline bool gve_can_recycle_pages(struct net_device *dev)
-{
- /* We can't recycle the pages if we can't fit a packet into half a
- * page.
- */
- return dev->max_mtu <= PAGE_SIZE / 2;
-}
-
/* buffers */
int gve_alloc_page(struct gve_priv *priv, struct device *dev,
struct page **page, dma_addr_t *dma,
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 24ae6a28a806..53864f200599 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -14,6 +14,57 @@
#define GVE_ADMINQ_SLEEP_LEN 20
#define GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK 100
+#define GVE_DEVICE_OPTION_ERROR_FMT "%s option error:\n" \
+"Expected: length=%d, feature_mask=%x.\n" \
+"Actual: length=%d, feature_mask=%x.\n"
+
+static
+struct gve_device_option *gve_get_next_option(struct gve_device_descriptor *descriptor,
+ struct gve_device_option *option)
+{
+ void *option_end, *descriptor_end;
+
+ option_end = (void *)(option + 1) + be16_to_cpu(option->option_length);
+ descriptor_end = (void *)descriptor + be16_to_cpu(descriptor->total_length);
+
+ return option_end > descriptor_end ? NULL : (struct gve_device_option *)option_end;
+}
+
+static
+void gve_parse_device_option(struct gve_priv *priv,
+ struct gve_device_descriptor *device_descriptor,
+ struct gve_device_option *option)
+{
+ u16 option_length = be16_to_cpu(option->option_length);
+ u16 option_id = be16_to_cpu(option->option_id);
+
+ switch (option_id) {
+ case GVE_DEV_OPT_ID_RAW_ADDRESSING:
+ /* If the length or feature mask doesn't match,
+ * continue without enabling the feature.
+ */
+ if (option_length != GVE_DEV_OPT_LEN_RAW_ADDRESSING ||
+ option->feat_mask != cpu_to_be32(GVE_DEV_OPT_FEAT_MASK_RAW_ADDRESSING)) {
+ dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, "Raw Addressing",
+ GVE_DEV_OPT_LEN_RAW_ADDRESSING,
+ cpu_to_be32(GVE_DEV_OPT_FEAT_MASK_RAW_ADDRESSING),
+ option_length, option->feat_mask);
+ priv->raw_addressing = 0;
+ } else {
+ dev_info(&priv->pdev->dev,
+ "Raw addressing device option enabled.\n");
+ priv->raw_addressing = 1;
+ }
+ break;
+ default:
+ /* If we don't recognize the option just continue
+ * without doing anything.
+ */
+ dev_dbg(&priv->pdev->dev, "Unrecognized device option 0x%hx not enabled.\n",
+ option_id);
+ }
+}
+
int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
{
priv->adminq = dma_alloc_coherent(dev, PAGE_SIZE,
@@ -318,8 +369,10 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
{
struct gve_tx_ring *tx = &priv->tx[queue_index];
union gve_adminq_command cmd;
+ u32 qpl_id;
int err;
+ qpl_id = priv->raw_addressing ? GVE_RAW_ADDRESSING_QPL_ID : tx->tx_fifo.qpl->id;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE);
cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) {
@@ -328,7 +381,7 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
.queue_resources_addr =
cpu_to_be64(tx->q_resources_bus),
.tx_ring_addr = cpu_to_be64(tx->bus),
- .queue_page_list_id = cpu_to_be32(tx->tx_fifo.qpl->id),
+ .queue_page_list_id = cpu_to_be32(qpl_id),
.ntfy_id = cpu_to_be32(tx->ntfy_id),
};
@@ -357,8 +410,10 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
{
struct gve_rx_ring *rx = &priv->rx[queue_index];
union gve_adminq_command cmd;
+ u32 qpl_id;
int err;
+ qpl_id = priv->raw_addressing ? GVE_RAW_ADDRESSING_QPL_ID : rx->data.qpl->id;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE);
cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) {
@@ -369,7 +424,7 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
.queue_resources_addr = cpu_to_be64(rx->q_resources_bus),
.rx_desc_ring_addr = cpu_to_be64(rx->desc.bus),
.rx_data_ring_addr = cpu_to_be64(rx->data.data_bus),
- .queue_page_list_id = cpu_to_be32(rx->data.qpl->id),
+ .queue_page_list_id = cpu_to_be32(qpl_id),
};
err = gve_adminq_issue_cmd(priv, &cmd);
@@ -460,11 +515,14 @@ int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues)
int gve_adminq_describe_device(struct gve_priv *priv)
{
struct gve_device_descriptor *descriptor;
+ struct gve_device_option *dev_opt;
union gve_adminq_command cmd;
dma_addr_t descriptor_bus;
+ u16 num_options;
int err = 0;
u8 *mac;
u16 mtu;
+ int i;
memset(&cmd, 0, sizeof(cmd));
descriptor = dma_alloc_coherent(&priv->pdev->dev, PAGE_SIZE,
@@ -511,13 +569,30 @@ int gve_adminq_describe_device(struct gve_priv *priv)
mac = descriptor->mac;
dev_info(&priv->pdev->dev, "MAC addr: %pM\n", mac);
priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
- priv->rx_pages_per_qpl = be16_to_cpu(descriptor->rx_pages_per_qpl);
- if (priv->rx_pages_per_qpl < priv->rx_desc_cnt) {
- dev_err(&priv->pdev->dev, "rx_pages_per_qpl cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n",
- priv->rx_pages_per_qpl);
- priv->rx_desc_cnt = priv->rx_pages_per_qpl;
+ priv->rx_data_slot_cnt = be16_to_cpu(descriptor->rx_pages_per_qpl);
+ if (priv->rx_data_slot_cnt < priv->rx_desc_cnt) {
+ dev_err(&priv->pdev->dev, "rx_data_slot_cnt cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n",
+ priv->rx_data_slot_cnt);
+ priv->rx_desc_cnt = priv->rx_data_slot_cnt;
}
priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
+ dev_opt = (void *)(descriptor + 1);
+
+ num_options = be16_to_cpu(descriptor->num_device_options);
+ for (i = 0; i < num_options; i++) {
+ struct gve_device_option *next_opt;
+
+ next_opt = gve_get_next_option(descriptor, dev_opt);
+ if (!next_opt) {
+ dev_err(&priv->dev->dev,
+ "options exceed device_descriptor's total length.\n");
+ err = -EINVAL;
+ goto free_device_descriptor;
+ }
+
+ gve_parse_device_option(priv, descriptor, dev_opt);
+ dev_opt = next_opt;
+ }
free_device_descriptor:
dma_free_coherent(&priv->pdev->dev, sizeof(*descriptor), descriptor,
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index 015796a20118..d320c2ffd87c 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -79,12 +79,17 @@ struct gve_device_descriptor {
static_assert(sizeof(struct gve_device_descriptor) == 40);
-struct device_option {
- __be32 option_id;
- __be32 option_length;
+struct gve_device_option {
+ __be16 option_id;
+ __be16 option_length;
+ __be32 feat_mask;
};
-static_assert(sizeof(struct device_option) == 8);
+static_assert(sizeof(struct gve_device_option) == 8);
+
+#define GVE_DEV_OPT_ID_RAW_ADDRESSING 0x1
+#define GVE_DEV_OPT_LEN_RAW_ADDRESSING 0x0
+#define GVE_DEV_OPT_FEAT_MASK_RAW_ADDRESSING 0x0
struct gve_adminq_configure_device_resources {
__be64 counter_array;
@@ -111,6 +116,8 @@ struct gve_adminq_unregister_page_list {
static_assert(sizeof(struct gve_adminq_unregister_page_list) == 4);
+#define GVE_RAW_ADDRESSING_QPL_ID 0xFFFFFFFF
+
struct gve_adminq_create_tx_queue {
__be32 queue_id;
__be32 reserved;
diff --git a/drivers/net/ethernet/google/gve/gve_desc.h b/drivers/net/ethernet/google/gve/gve_desc.h
index 54779871d52e..05ae6300e984 100644
--- a/drivers/net/ethernet/google/gve/gve_desc.h
+++ b/drivers/net/ethernet/google/gve/gve_desc.h
@@ -16,9 +16,11 @@
* Base addresses encoded in seg_addr are not assumed to be physical
* addresses. The ring format assumes these come from some linear address
* space. This could be physical memory, kernel virtual memory, user virtual
- * memory. gVNIC uses lists of registered pages. Each queue is assumed
- * to be associated with a single such linear address space to ensure a
- * consistent meaning for seg_addrs posted to its rings.
+ * memory.
+ * If raw dma addressing is not supported then gVNIC uses lists of registered
+ * pages. Each queue is assumed to be associated with a single such linear
+ * address space to ensure a consistent meaning for seg_addrs posted to its
+ * rings.
*/
struct gve_tx_pkt_desc {
@@ -72,12 +74,15 @@ struct gve_rx_desc {
} __packed;
static_assert(sizeof(struct gve_rx_desc) == 64);
-/* As with the Tx ring format, the qpl_offset entries below are offsets into an
- * ordered list of registered pages.
+/* If the device supports raw dma addressing then the addr in data slot is
+ * the dma address of the buffer.
+ * If the device only supports registered segments then the addr is a byte
+ * offset into the registered segment (an ordered list of pages) where the
+ * buffer is.
*/
-struct gve_rx_data_slot {
- /* byte offset into the rx registered segment of this slot */
+union gve_rx_data_slot {
__be64 qpl_offset;
+ __be64 addr;
};
/* GVE Recive Packet Descriptor Seq No */
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index 7b44769bd87c..0901fa6853ca 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -4,6 +4,7 @@
* Copyright (C) 2015-2019 Google, Inc.
*/
+#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
#include "gve.h"
#include "gve_adminq.h"
@@ -50,6 +51,7 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = {
"tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_bytes[%u]",
"tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]",
+ "tx_dma_mapping_error[%u]",
};
static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = {
@@ -322,6 +324,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
data[i++] = tx->stop_queue;
data[i++] = be32_to_cpu(gve_tx_load_event_counter(priv,
tx));
+ data[i++] = tx->dma_mapping_error;
/* stats from NIC */
if (skip_nic_stats) {
/* skip NIC tx stats */
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 02e7d74779f4..7302498c6df3 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -677,6 +677,10 @@ static int gve_alloc_qpls(struct gve_priv *priv)
int i, j;
int err;
+ /* Raw addressing means no QPLs */
+ if (priv->raw_addressing)
+ return 0;
+
priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
if (!priv->qpls)
return -ENOMEM;
@@ -689,7 +693,7 @@ static int gve_alloc_qpls(struct gve_priv *priv)
}
for (; i < num_qpls; i++) {
err = gve_alloc_queue_page_list(priv, i,
- priv->rx_pages_per_qpl);
+ priv->rx_data_slot_cnt);
if (err)
goto free_qpls;
}
@@ -717,6 +721,10 @@ static void gve_free_qpls(struct gve_priv *priv)
int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
int i;
+ /* Raw addressing means no QPLs */
+ if (priv->raw_addressing)
+ return;
+
kvfree(priv->qpl_cfg.qpl_id_map);
for (i = 0; i < num_qpls; i++)
@@ -1077,6 +1085,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
if (skip_describe_device)
goto setup_device;
+ priv->raw_addressing = false;
/* Get the initial information we need from the device */
err = gve_adminq_describe_device(priv);
if (err) {
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 008fa897a3e6..bf123fe524c4 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -16,12 +16,39 @@ static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx)
block->rx = NULL;
}
+static void gve_rx_free_buffer(struct device *dev,
+ struct gve_rx_slot_page_info *page_info,
+ union gve_rx_data_slot *data_slot)
+{
+ dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) &
+ GVE_DATA_SLOT_ADDR_PAGE_MASK);
+
+ gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
+}
+
+static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
+{
+ if (rx->data.raw_addressing) {
+ u32 slots = rx->mask + 1;
+ int i;
+
+ for (i = 0; i < slots; i++)
+ gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
+ &rx->data.data_ring[i]);
+ } else {
+ gve_unassign_qpl(priv, rx->data.qpl->id);
+ rx->data.qpl = NULL;
+ }
+ kvfree(rx->data.page_info);
+ rx->data.page_info = NULL;
+}
+
static void gve_rx_free_ring(struct gve_priv *priv, int idx)
{
struct gve_rx_ring *rx = &priv->rx[idx];
struct device *dev = &priv->pdev->dev;
+ u32 slots = rx->mask + 1;
size_t bytes;
- u32 slots;
gve_rx_remove_from_block(priv, idx);
@@ -33,11 +60,8 @@ static void gve_rx_free_ring(struct gve_priv *priv, int idx)
rx->q_resources, rx->q_resources_bus);
rx->q_resources = NULL;
- gve_unassign_qpl(priv, rx->data.qpl->id);
- rx->data.qpl = NULL;
- kvfree(rx->data.page_info);
+ gve_rx_unfill_pages(priv, rx);
- slots = rx->mask + 1;
bytes = sizeof(*rx->data.data_ring) * slots;
dma_free_coherent(dev, bytes, rx->data.data_ring,
rx->data.data_bus);
@@ -46,19 +70,35 @@ static void gve_rx_free_ring(struct gve_priv *priv, int idx)
}
static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
- struct gve_rx_data_slot *slot,
- dma_addr_t addr, struct page *page)
+ dma_addr_t addr, struct page *page, __be64 *slot_addr)
{
page_info->page = page;
page_info->page_offset = 0;
page_info->page_address = page_address(page);
- slot->qpl_offset = cpu_to_be64(addr);
+ *slot_addr = cpu_to_be64(addr);
+}
+
+static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
+ struct gve_rx_slot_page_info *page_info,
+ union gve_rx_data_slot *data_slot)
+{
+ struct page *page;
+ dma_addr_t dma;
+ int err;
+
+ err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE);
+ if (err)
+ return err;
+
+ gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
+ return 0;
}
static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
{
struct gve_priv *priv = rx->gve;
u32 slots;
+ int err;
int i;
/* Allocate one page per Rx queue slot. Each page is split into two
@@ -71,17 +111,30 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
if (!rx->data.page_info)
return -ENOMEM;
- rx->data.qpl = gve_assign_rx_qpl(priv);
-
+ if (!rx->data.raw_addressing)
+ rx->data.qpl = gve_assign_rx_qpl(priv);
for (i = 0; i < slots; i++) {
- struct page *page = rx->data.qpl->pages[i];
- dma_addr_t addr = i * PAGE_SIZE;
+ if (!rx->data.raw_addressing) {
+ struct page *page = rx->data.qpl->pages[i];
+ dma_addr_t addr = i * PAGE_SIZE;
- gve_setup_rx_buffer(&rx->data.page_info[i],
- &rx->data.data_ring[i], addr, page);
+ gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
+ &rx->data.data_ring[i].qpl_offset);
+ continue;
+ }
+ err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
+ &rx->data.data_ring[i]);
+ if (err)
+ goto alloc_err;
}
return slots;
+alloc_err:
+ while (i--)
+ gve_rx_free_buffer(&priv->pdev->dev,
+ &rx->data.page_info[i],
+ &rx->data.data_ring[i]);
+ return err;
}
static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
@@ -110,8 +163,9 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
rx->gve = priv;
rx->q_num = idx;
- slots = priv->rx_pages_per_qpl;
+ slots = priv->rx_data_slot_cnt;
rx->mask = slots - 1;
+ rx->data.raw_addressing = priv->raw_addressing;
/* alloc rx data ring */
bytes = sizeof(*rx->data.data_ring) * slots;
@@ -156,8 +210,8 @@ static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
err = -ENOMEM;
goto abort_with_q_resources;
}
- rx->mask = slots - 1;
rx->cnt = 0;
+ rx->db_threshold = priv->rx_desc_cnt / 2;
rx->desc.seqno = 1;
gve_rx_add_to_block(priv, idx);
@@ -168,7 +222,7 @@ abort_with_q_resources:
rx->q_resources, rx->q_resources_bus);
rx->q_resources = NULL;
abort_filled:
- kvfree(rx->data.page_info);
+ gve_rx_unfill_pages(priv, rx);
abort_with_slots:
bytes = sizeof(*rx->data.data_ring) * slots;
dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
@@ -225,15 +279,14 @@ static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
return PKT_HASH_TYPE_L2;
}
-static struct sk_buff *gve_rx_copy(struct gve_rx_ring *rx,
- struct net_device *dev,
+static struct sk_buff *gve_rx_copy(struct net_device *dev,
struct napi_struct *napi,
struct gve_rx_slot_page_info *page_info,
u16 len)
{
struct sk_buff *skb = napi_alloc_skb(napi, len);
void *va = page_info->page_address + GVE_RX_PAD +
- page_info->page_offset;
+ (page_info->page_offset ? PAGE_SIZE / 2 : 0);
if (unlikely(!skb))
return NULL;
@@ -244,15 +297,10 @@ static struct sk_buff *gve_rx_copy(struct gve_rx_ring *rx,
skb->protocol = eth_type_trans(skb, dev);
- u64_stats_update_begin(&rx->statss);
- rx->rx_copied_pkt++;
- u64_stats_update_end(&rx->statss);
-
return skb;
}
-static struct sk_buff *gve_rx_add_frags(struct net_device *dev,
- struct napi_struct *napi,
+static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
struct gve_rx_slot_page_info *page_info,
u16 len)
{
@@ -262,20 +310,92 @@ static struct sk_buff *gve_rx_add_frags(struct net_device *dev,
return NULL;
skb_add_rx_frag(skb, 0, page_info->page,
- page_info->page_offset +
+ (page_info->page_offset ? PAGE_SIZE / 2 : 0) +
GVE_RX_PAD, len, PAGE_SIZE / 2);
return skb;
}
-static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info,
- struct gve_rx_data_slot *data_ring)
+static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
+{
+ const __be64 offset = cpu_to_be64(PAGE_SIZE / 2);
+
+ /* "flip" to other packet buffer on this page */
+ page_info->page_offset ^= 0x1;
+ *(slot_addr) ^= offset;
+}
+
+static bool gve_rx_can_flip_buffers(struct net_device *netdev)
+{
+ return PAGE_SIZE == 4096
+ ? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false;
+}
+
+static int gve_rx_can_recycle_buffer(struct page *page)
+{
+ int pagecount = page_count(page);
+
+ /* This page is not being used by any SKBs - reuse */
+ if (pagecount == 1)
+ return 1;
+ /* This page is still being used by an SKB - we can't reuse */
+ else if (pagecount >= 2)
+ return 0;
+ WARN(pagecount < 1, "Pagecount should never be < 1");
+ return -1;
+}
+
+static struct sk_buff *
+gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
+ struct gve_rx_slot_page_info *page_info, u16 len,
+ struct napi_struct *napi,
+ union gve_rx_data_slot *data_slot)
{
- u64 addr = be64_to_cpu(data_ring->qpl_offset);
+ struct sk_buff *skb;
+
+ skb = gve_rx_add_frags(napi, page_info, len);
+ if (!skb)
+ return NULL;
- page_info->page_offset ^= PAGE_SIZE / 2;
- addr ^= PAGE_SIZE / 2;
- data_ring->qpl_offset = cpu_to_be64(addr);
+ /* Optimistically stop the kernel from freeing the page by increasing
+ * the page bias. We will check the refcount in refill to determine if
+ * we need to alloc a new page.
+ */
+ get_page(page_info->page);
+
+ return skb;
+}
+
+static struct sk_buff *
+gve_rx_qpl(struct device *dev, struct net_device *netdev,
+ struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
+ u16 len, struct napi_struct *napi,
+ union gve_rx_data_slot *data_slot)
+{
+ struct sk_buff *skb;
+
+ /* if raw_addressing mode is not enabled gvnic can only receive into
+ * registered segments. If the buffer can't be recycled, our only
+ * choice is to copy the data out of it so that we can return it to the
+ * device.
+ */
+ if (page_info->can_flip) {
+ skb = gve_rx_add_frags(napi, page_info, len);
+ /* No point in recycling if we didn't get the skb */
+ if (skb) {
+ /* Make sure that the page isn't freed. */
+ get_page(page_info->page);
+ gve_rx_flip_buff(page_info, &data_slot->qpl_offset);
+ }
+ } else {
+ skb = gve_rx_copy(netdev, napi, page_info, len);
+ if (skb) {
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_copied_pkt++;
+ u64_stats_update_end(&rx->statss);
+ }
+ }
+ return skb;
}
static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
@@ -285,8 +405,9 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
struct gve_priv *priv = rx->gve;
struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
struct net_device *dev = priv->dev;
- struct sk_buff *skb;
- int pagecount;
+ union gve_rx_data_slot *data_slot;
+ struct sk_buff *skb = NULL;
+ dma_addr_t page_bus;
u16 len;
/* drop this packet */
@@ -294,71 +415,55 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
u64_stats_update_begin(&rx->statss);
rx->rx_desc_err_dropped_pkt++;
u64_stats_update_end(&rx->statss);
- return true;
+ return false;
}
len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
page_info = &rx->data.page_info[idx];
- dma_sync_single_for_cpu(&priv->pdev->dev, rx->data.qpl->page_buses[idx],
- PAGE_SIZE, DMA_FROM_DEVICE);
- /* gvnic can only receive into registered segments. If the buffer
- * can't be recycled, our only choice is to copy the data out of
- * it so that we can return it to the device.
- */
+ data_slot = &rx->data.data_ring[idx];
+ page_bus = (rx->data.raw_addressing) ?
+ be64_to_cpu(data_slot->addr) & GVE_DATA_SLOT_ADDR_PAGE_MASK :
+ rx->data.qpl->page_buses[idx];
+ dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
+ PAGE_SIZE, DMA_FROM_DEVICE);
- if (PAGE_SIZE == 4096) {
- if (len <= priv->rx_copybreak) {
- /* Just copy small packets */
- skb = gve_rx_copy(rx, dev, napi, page_info, len);
- u64_stats_update_begin(&rx->statss);
- rx->rx_copybreak_pkt++;
- u64_stats_update_end(&rx->statss);
- goto have_skb;
- }
- if (unlikely(!gve_can_recycle_pages(dev))) {
- skb = gve_rx_copy(rx, dev, napi, page_info, len);
- goto have_skb;
- }
- pagecount = page_count(page_info->page);
- if (pagecount == 1) {
- /* No part of this page is used by any SKBs; we attach
- * the page fragment to a new SKB and pass it up the
- * stack.
- */
- skb = gve_rx_add_frags(dev, napi, page_info, len);
- if (!skb) {
- u64_stats_update_begin(&rx->statss);
- rx->rx_skb_alloc_fail++;
- u64_stats_update_end(&rx->statss);
- return true;
+ if (len <= priv->rx_copybreak) {
+ /* Just copy small packets */
+ skb = gve_rx_copy(dev, napi, page_info, len);
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_copied_pkt++;
+ rx->rx_copybreak_pkt++;
+ u64_stats_update_end(&rx->statss);
+ } else {
+ u8 can_flip = gve_rx_can_flip_buffers(dev);
+ int recycle = 0;
+
+ if (can_flip) {
+ recycle = gve_rx_can_recycle_buffer(page_info->page);
+ if (recycle < 0) {
+ if (!rx->data.raw_addressing)
+ gve_schedule_reset(priv);
+ return false;
}
- /* Make sure the kernel stack can't release the page */
- get_page(page_info->page);
- /* "flip" to other packet buffer on this page */
- gve_rx_flip_buff(page_info, &rx->data.data_ring[idx]);
- } else if (pagecount >= 2) {
- /* We have previously passed the other half of this
- * page up the stack, but it has not yet been freed.
- */
- skb = gve_rx_copy(rx, dev, napi, page_info, len);
+ }
+
+ page_info->can_flip = can_flip && recycle;
+ if (rx->data.raw_addressing) {
+ skb = gve_rx_raw_addressing(&priv->pdev->dev, dev,
+ page_info, len, napi,
+ data_slot);
} else {
- WARN(pagecount < 1, "Pagecount should never be < 1");
- return false;
+ skb = gve_rx_qpl(&priv->pdev->dev, dev, rx,
+ page_info, len, napi, data_slot);
}
- } else {
- skb = gve_rx_copy(rx, dev, napi, page_info, len);
}
-have_skb:
- /* We didn't manage to allocate an skb but we haven't had any
- * reset worthy failures.
- */
if (!skb) {
u64_stats_update_begin(&rx->statss);
rx->rx_skb_alloc_fail++;
u64_stats_update_end(&rx->statss);
- return true;
+ return false;
}
if (likely(feat & NETIF_F_RXCSUM)) {
@@ -399,19 +504,73 @@ static bool gve_rx_work_pending(struct gve_rx_ring *rx)
return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
}
+static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
+{
+ int refill_target = rx->mask + 1;
+ u32 fill_cnt = rx->fill_cnt;
+
+ while (fill_cnt - rx->cnt < refill_target) {
+ struct gve_rx_slot_page_info *page_info;
+ u32 idx = fill_cnt & rx->mask;
+
+ page_info = &rx->data.page_info[idx];
+ if (page_info->can_flip) {
+ /* The other half of the page is free because it was
+ * free when we processed the descriptor. Flip to it.
+ */
+ union gve_rx_data_slot *data_slot =
+ &rx->data.data_ring[idx];
+
+ gve_rx_flip_buff(page_info, &data_slot->addr);
+ page_info->can_flip = 0;
+ } else {
+ /* It is possible that the networking stack has already
+ * finished processing all outstanding packets in the buffer
+ * and it can be reused.
+ * Flipping is unnecessary here - if the networking stack still
+ * owns half the page it is impossible to tell which half. Either
+ * the whole page is free or it needs to be replaced.
+ */
+ int recycle = gve_rx_can_recycle_buffer(page_info->page);
+
+ if (recycle < 0) {
+ if (!rx->data.raw_addressing)
+ gve_schedule_reset(priv);
+ return false;
+ }
+ if (!recycle) {
+ /* We can't reuse the buffer - alloc a new one*/
+ union gve_rx_data_slot *data_slot =
+ &rx->data.data_ring[idx];
+ struct device *dev = &priv->pdev->dev;
+
+ gve_rx_free_buffer(dev, page_info, data_slot);
+ page_info->page = NULL;
+ if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot))
+ break;
+ }
+ }
+ fill_cnt++;
+ }
+ rx->fill_cnt = fill_cnt;
+ return true;
+}
+
bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
netdev_features_t feat)
{
struct gve_priv *priv = rx->gve;
+ u32 work_done = 0, packets = 0;
struct gve_rx_desc *desc;
u32 cnt = rx->cnt;
u32 idx = cnt & rx->mask;
- u32 work_done = 0;
u64 bytes = 0;
desc = rx->desc.desc_ring + idx;
while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
work_done < budget) {
+ bool dropped;
+
netif_info(priv, rx_status, priv->dev,
"[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
rx->q_num, idx, desc, desc->flags_seq);
@@ -419,9 +578,11 @@ bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
"[%d] seqno=%d rx->desc.seqno=%d\n",
rx->q_num, GVE_SEQNO(desc->flags_seq),
rx->desc.seqno);
- bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
- if (!gve_rx(rx, desc, feat, idx))
- gve_schedule_reset(priv);
+ dropped = !gve_rx(rx, desc, feat, idx);
+ if (!dropped) {
+ bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
+ packets++;
+ }
cnt++;
idx = cnt & rx->mask;
desc = rx->desc.desc_ring + idx;
@@ -429,15 +590,34 @@ bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
work_done++;
}
- if (!work_done)
+ if (!work_done && rx->fill_cnt - cnt > rx->db_threshold)
return false;
u64_stats_update_begin(&rx->statss);
- rx->rpackets += work_done;
+ rx->rpackets += packets;
rx->rbytes += bytes;
u64_stats_update_end(&rx->statss);
rx->cnt = cnt;
- rx->fill_cnt += work_done;
+
+ /* restock ring slots */
+ if (!rx->data.raw_addressing) {
+ /* In QPL mode buffs are refilled as the desc are processed */
+ rx->fill_cnt += work_done;
+ } else if (rx->fill_cnt - cnt <= rx->db_threshold) {
+ /* In raw addressing mode buffs are only refilled if the avail
+ * falls below a threshold.
+ */
+ if (!gve_rx_refill_buffers(priv, rx))
+ return false;
+
+ /* If we were not able to completely refill buffers, we'll want
+ * to schedule this queue for work again to refill buffers.
+ */
+ if (rx->fill_cnt - cnt <= rx->db_threshold) {
+ gve_rx_write_doorbell(priv, rx);
+ return true;
+ }
+ }
gve_rx_write_doorbell(priv, rx);
return gve_rx_work_pending(rx);
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index d0244feb0301..6938f3a939d6 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -158,9 +158,11 @@ static void gve_tx_free_ring(struct gve_priv *priv, int idx)
tx->q_resources, tx->q_resources_bus);
tx->q_resources = NULL;
- gve_tx_fifo_release(priv, &tx->tx_fifo);
- gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
- tx->tx_fifo.qpl = NULL;
+ if (!tx->raw_addressing) {
+ gve_tx_fifo_release(priv, &tx->tx_fifo);
+ gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
+ tx->tx_fifo.qpl = NULL;
+ }
bytes = sizeof(*tx->desc) * slots;
dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
@@ -206,11 +208,15 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
if (!tx->desc)
goto abort_with_info;
- tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
+ tx->raw_addressing = priv->raw_addressing;
+ tx->dev = &priv->pdev->dev;
+ if (!tx->raw_addressing) {
+ tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
- /* map Tx FIFO */
- if (gve_tx_fifo_init(priv, &tx->tx_fifo))
- goto abort_with_desc;
+ /* map Tx FIFO */
+ if (gve_tx_fifo_init(priv, &tx->tx_fifo))
+ goto abort_with_desc;
+ }
tx->q_resources =
dma_alloc_coherent(hdev,
@@ -228,7 +234,8 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
return 0;
abort_with_fifo:
- gve_tx_fifo_release(priv, &tx->tx_fifo);
+ if (!tx->raw_addressing)
+ gve_tx_fifo_release(priv, &tx->tx_fifo);
abort_with_desc:
dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
tx->desc = NULL;
@@ -301,27 +308,47 @@ static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx,
return bytes;
}
-/* The most descriptors we could need are 3 - 1 for the headers, 1 for
- * the beginning of the payload at the end of the FIFO, and 1 if the
- * payload wraps to the beginning of the FIFO.
+/* The most descriptors we could need is MAX_SKB_FRAGS + 3 : 1 for each skb frag,
+ * +1 for the skb linear portion, +1 for when tcp hdr needs to be in separate descriptor,
+ * and +1 if the payload wraps to the beginning of the FIFO.
*/
-#define MAX_TX_DESC_NEEDED 3
+#define MAX_TX_DESC_NEEDED (MAX_SKB_FRAGS + 3)
+static void gve_tx_unmap_buf(struct device *dev, struct gve_tx_buffer_state *info)
+{
+ if (info->skb) {
+ dma_unmap_single(dev, dma_unmap_addr(&info->buf, dma),
+ dma_unmap_len(&info->buf, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(&info->buf, len, 0);
+ } else {
+ dma_unmap_page(dev, dma_unmap_addr(&info->buf, dma),
+ dma_unmap_len(&info->buf, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(&info->buf, len, 0);
+ }
+}
/* Check if sufficient resources (descriptor ring space, FIFO space) are
* available to transmit the given number of bytes.
*/
static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required)
{
- return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED &&
- gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required));
+ bool can_alloc = true;
+
+ if (!tx->raw_addressing)
+ can_alloc = gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required);
+
+ return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED && can_alloc);
}
/* Stops the queue if the skb cannot be transmitted. */
static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb)
{
- int bytes_required;
+ int bytes_required = 0;
+
+ if (!tx->raw_addressing)
+ bytes_required = gve_skb_fifo_bytes_required(tx, skb);
- bytes_required = gve_skb_fifo_bytes_required(tx, skb);
if (likely(gve_can_tx(tx, bytes_required)))
return 0;
@@ -395,17 +422,13 @@ static void gve_dma_sync_for_device(struct device *dev, dma_addr_t *page_buses,
{
u64 last_page = (iov_offset + iov_len - 1) / PAGE_SIZE;
u64 first_page = iov_offset / PAGE_SIZE;
- dma_addr_t dma;
u64 page;
- for (page = first_page; page <= last_page; page++) {
- dma = page_buses[page];
- dma_sync_single_for_device(dev, dma, PAGE_SIZE, DMA_TO_DEVICE);
- }
+ for (page = first_page; page <= last_page; page++)
+ dma_sync_single_for_device(dev, page_buses[page], PAGE_SIZE, DMA_TO_DEVICE);
}
-static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb,
- struct device *dev)
+static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, struct sk_buff *skb)
{
int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset;
union gve_tx_desc *pkt_desc, *seg_desc;
@@ -447,7 +470,7 @@ static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb,
skb_copy_bits(skb, 0,
tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset,
hlen);
- gve_dma_sync_for_device(dev, tx->tx_fifo.qpl->page_buses,
+ gve_dma_sync_for_device(&priv->pdev->dev, tx->tx_fifo.qpl->page_buses,
info->iov[hdr_nfrags - 1].iov_offset,
info->iov[hdr_nfrags - 1].iov_len);
copy_offset = hlen;
@@ -463,7 +486,7 @@ static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb,
skb_copy_bits(skb, copy_offset,
tx->tx_fifo.base + info->iov[i].iov_offset,
info->iov[i].iov_len);
- gve_dma_sync_for_device(dev, tx->tx_fifo.qpl->page_buses,
+ gve_dma_sync_for_device(&priv->pdev->dev, tx->tx_fifo.qpl->page_buses,
info->iov[i].iov_offset,
info->iov[i].iov_len);
copy_offset += info->iov[i].iov_len;
@@ -472,6 +495,94 @@ static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb,
return 1 + payload_nfrags;
}
+static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
+ struct sk_buff *skb)
+{
+ const struct skb_shared_info *shinfo = skb_shinfo(skb);
+ int hlen, payload_nfrags, l4_hdr_offset;
+ union gve_tx_desc *pkt_desc, *seg_desc;
+ struct gve_tx_buffer_state *info;
+ bool is_gso = skb_is_gso(skb);
+ u32 idx = tx->req & tx->mask;
+ struct gve_tx_dma_buf *buf;
+ u64 addr;
+ u32 len;
+ int i;
+
+ info = &tx->info[idx];
+ pkt_desc = &tx->desc[idx];
+
+ l4_hdr_offset = skb_checksum_start_offset(skb);
+ /* If the skb is gso, then we want only up to the tcp header in the first segment
+ * to efficiently replicate on each segment otherwise we want the linear portion
+ * of the skb (which will contain the checksum because skb->csum_start and
+ * skb->csum_offset are given relative to skb->head) in the first segment.
+ */
+ hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) : skb_headlen(skb);
+ len = skb_headlen(skb);
+
+ info->skb = skb;
+
+ addr = dma_map_single(tx->dev, skb->data, len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(tx->dev, addr))) {
+ tx->dma_mapping_error++;
+ goto drop;
+ }
+ buf = &info->buf;
+ dma_unmap_len_set(buf, len, len);
+ dma_unmap_addr_set(buf, dma, addr);
+
+ payload_nfrags = shinfo->nr_frags;
+ if (hlen < len) {
+ /* For gso the rest of the linear portion of the skb needs to
+ * be in its own descriptor.
+ */
+ payload_nfrags++;
+ gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
+ 1 + payload_nfrags, hlen, addr);
+
+ len -= hlen;
+ addr += hlen;
+ idx = (tx->req + 1) & tx->mask;
+ seg_desc = &tx->desc[idx];
+ gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
+ } else {
+ gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
+ 1 + payload_nfrags, hlen, addr);
+ }
+
+ for (i = 0; i < shinfo->nr_frags; i++) {
+ const skb_frag_t *frag = &shinfo->frags[i];
+
+ idx = (idx + 1) & tx->mask;
+ seg_desc = &tx->desc[idx];
+ len = skb_frag_size(frag);
+ addr = skb_frag_dma_map(tx->dev, frag, 0, len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(tx->dev, addr))) {
+ tx->dma_mapping_error++;
+ goto unmap_drop;
+ }
+ buf = &tx->info[idx].buf;
+ tx->info[idx].skb = NULL;
+ dma_unmap_len_set(buf, len, len);
+ dma_unmap_addr_set(buf, dma, addr);
+
+ gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
+ }
+
+ return 1 + payload_nfrags;
+
+unmap_drop:
+ i += (payload_nfrags == shinfo->nr_frags ? 1 : 2);
+ while (i--) {
+ idx--;
+ gve_tx_unmap_buf(tx->dev, &tx->info[idx & tx->mask]);
+ }
+drop:
+ tx->dropped_pkt++;
+ return 0;
+}
+
netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
{
struct gve_priv *priv = netdev_priv(dev);
@@ -490,17 +601,26 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
return NETDEV_TX_BUSY;
}
- nsegs = gve_tx_add_skb(tx, skb, &priv->pdev->dev);
-
- netdev_tx_sent_queue(tx->netdev_txq, skb->len);
- skb_tx_timestamp(skb);
-
- /* give packets to NIC */
- tx->req += nsegs;
+ if (tx->raw_addressing)
+ nsegs = gve_tx_add_skb_no_copy(priv, tx, skb);
+ else
+ nsegs = gve_tx_add_skb_copy(priv, tx, skb);
+
+ /* If the packet is getting sent, we need to update the skb */
+ if (nsegs) {
+ netdev_tx_sent_queue(tx->netdev_txq, skb->len);
+ skb_tx_timestamp(skb);
+ tx->req += nsegs;
+ } else {
+ dev_kfree_skb_any(skb);
+ }
if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more())
return NETDEV_TX_OK;
+ /* Give packets to NIC. Even if this packet failed to send the doorbell
+ * might need to be rung because of xmit_more.
+ */
gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
return NETDEV_TX_OK;
}
@@ -525,24 +645,29 @@ static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
info = &tx->info[idx];
skb = info->skb;
+ /* Unmap the buffer */
+ if (tx->raw_addressing)
+ gve_tx_unmap_buf(tx->dev, info);
+ tx->done++;
/* Mark as free */
if (skb) {
info->skb = NULL;
bytes += skb->len;
pkts++;
dev_consume_skb_any(skb);
+ if (tx->raw_addressing)
+ continue;
/* FIFO free */
for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
- space_freed += info->iov[i].iov_len +
- info->iov[i].iov_padding;
+ space_freed += info->iov[i].iov_len + info->iov[i].iov_padding;
info->iov[i].iov_len = 0;
info->iov[i].iov_padding = 0;
}
}
- tx->done++;
}
- gve_tx_free_fifo(&tx->tx_fifo, space_freed);
+ if (!tx->raw_addressing)
+ gve_tx_free_fifo(&tx->tx_fifo, space_freed);
u64_stats_update_begin(&tx->statss);
tx->bytes_done += bytes;
tx->pkt_done += pkts;