summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/google
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/google')
-rw-r--r--drivers/net/ethernet/google/gve/gve.h114
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.c89
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.h10
-rw-r--r--drivers/net/ethernet/google/gve/gve_desc.h4
-rw-r--r--drivers/net/ethernet/google/gve/gve_ethtool.c5
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c31
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx_dqo.c126
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx_dqo.c404
8 files changed, 654 insertions, 129 deletions
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 98eb78d98e9f..0d1e681be250 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -11,6 +11,7 @@
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <linux/u64_stats_sync.h>
+#include <net/xdp.h>
#include "gve_desc.h"
#include "gve_desc_dqo.h"
@@ -51,6 +52,26 @@
#define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182
+#define DQO_QPL_DEFAULT_TX_PAGES 512
+#define DQO_QPL_DEFAULT_RX_PAGES 2048
+
+/* Maximum TSO size supported on DQO */
+#define GVE_DQO_TX_MAX 0x3FFFF
+
+#define GVE_TX_BUF_SHIFT_DQO 11
+
+/* 2K buffers for DQO-QPL */
+#define GVE_TX_BUF_SIZE_DQO BIT(GVE_TX_BUF_SHIFT_DQO)
+#define GVE_TX_BUFS_PER_PAGE_DQO (PAGE_SIZE >> GVE_TX_BUF_SHIFT_DQO)
+#define GVE_MAX_TX_BUFS_PER_PKT (DIV_ROUND_UP(GVE_DQO_TX_MAX, GVE_TX_BUF_SIZE_DQO))
+
+/* If number of free/recyclable buffers are less than this threshold; driver
+ * allocs and uses a non-qpl page on the receive path of DQO QPL to free
+ * up buffers.
+ * Value is set big enough to post at least 3 64K LRO packet via 2K buffer to NIC.
+ */
+#define GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD 96
+
/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
struct gve_rx_desc_queue {
struct gve_rx_desc *desc_ring; /* the descriptor ring */
@@ -217,6 +238,15 @@ struct gve_rx_ring {
* which cannot be reused yet.
*/
struct gve_index_list used_buf_states;
+
+ /* qpl assigned to this queue */
+ struct gve_queue_page_list *qpl;
+
+ /* index into queue page list */
+ u32 next_qpl_page_idx;
+
+ /* track number of used buffers */
+ u16 used_buf_states_cnt;
} dqo;
};
@@ -328,8 +358,14 @@ struct gve_tx_pending_packet_dqo {
* All others correspond to `skb`'s frags and should be unmapped with
* `dma_unmap_page`.
*/
- DEFINE_DMA_UNMAP_ADDR(dma[MAX_SKB_FRAGS + 1]);
- DEFINE_DMA_UNMAP_LEN(len[MAX_SKB_FRAGS + 1]);
+ union {
+ struct {
+ DEFINE_DMA_UNMAP_ADDR(dma[MAX_SKB_FRAGS + 1]);
+ DEFINE_DMA_UNMAP_LEN(len[MAX_SKB_FRAGS + 1]);
+ };
+ s16 tx_qpl_buf_ids[GVE_MAX_TX_BUFS_PER_PKT];
+ };
+
u16 num_bufs;
/* Linked list index to next element in the list, or -1 if none */
@@ -384,6 +420,32 @@ struct gve_tx_ring {
* set.
*/
u32 last_re_idx;
+
+ /* free running number of packet buf descriptors posted */
+ u16 posted_packet_desc_cnt;
+ /* free running number of packet buf descriptors completed */
+ u16 completed_packet_desc_cnt;
+
+ /* QPL fields */
+ struct {
+ /* Linked list of gve_tx_buf_dqo. Index into
+ * tx_qpl_buf_next, or -1 if empty.
+ *
+ * This is a consumer list owned by the TX path. When it
+ * runs out, the producer list is stolen from the
+ * completion handling path
+ * (dqo_compl.free_tx_qpl_buf_head).
+ */
+ s16 free_tx_qpl_buf_head;
+
+ /* Free running count of the number of QPL tx buffers
+ * allocated
+ */
+ u32 alloc_tx_qpl_buf_cnt;
+
+ /* Cached value of `dqo_compl.free_tx_qpl_buf_cnt` */
+ u32 free_tx_qpl_buf_cnt;
+ };
} dqo_tx;
};
@@ -427,6 +489,24 @@ struct gve_tx_ring {
* reached a specified timeout.
*/
struct gve_index_list timed_out_completions;
+
+ /* QPL fields */
+ struct {
+ /* Linked list of gve_tx_buf_dqo. Index into
+ * tx_qpl_buf_next, or -1 if empty.
+ *
+ * This is the producer list, owned by the completion
+ * handling path. When the consumer list
+ * (dqo_tx.free_tx_qpl_buf_head) is runs out, this list
+ * will be stolen.
+ */
+ atomic_t free_tx_qpl_buf_head;
+
+ /* Free running count of the number of tx buffers
+ * freed
+ */
+ atomic_t free_tx_qpl_buf_cnt;
+ };
} dqo_compl;
} ____cacheline_aligned;
u64 pkt_done; /* free-running - total packets completed */
@@ -453,6 +533,21 @@ struct gve_tx_ring {
s16 num_pending_packets;
u32 complq_mask; /* complq size is complq_mask + 1 */
+
+ /* QPL fields */
+ struct {
+ /* qpl assigned to this queue */
+ struct gve_queue_page_list *qpl;
+
+ /* Each QPL page is divided into TX bounce buffers
+ * of size GVE_TX_BUF_SIZE_DQO. tx_qpl_buf_next is
+ * an array to manage linked lists of TX buffers.
+ * An entry j at index i implies that j'th buffer
+ * is next on the list after i
+ */
+ s16 *tx_qpl_buf_next;
+ u32 num_tx_qpl_bufs;
+ };
} dqo;
} ____cacheline_aligned;
struct netdev_queue *netdev_txq;
@@ -531,6 +626,7 @@ enum gve_queue_format {
GVE_GQI_RDA_FORMAT = 0x1,
GVE_GQI_QPL_FORMAT = 0x2,
GVE_DQO_RDA_FORMAT = 0x3,
+ GVE_DQO_QPL_FORMAT = 0x4,
};
struct gve_priv {
@@ -550,7 +646,8 @@ struct gve_priv {
u16 num_event_counters;
u16 tx_desc_cnt; /* num desc per ring */
u16 rx_desc_cnt; /* num desc per ring */
- u16 tx_pages_per_qpl; /* tx buffer length */
+ u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */
+ u16 rx_pages_per_qpl; /* Suggested number of pages per qpl for RX queues by NIC */
u16 rx_data_slot_cnt; /* rx buffer length */
u64 max_registered_pages;
u64 num_registered_pages; /* num pages registered with NIC */
@@ -808,11 +905,17 @@ static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
return (priv->num_ntfy_blks / 2) + queue_idx;
}
+static inline bool gve_is_qpl(struct gve_priv *priv)
+{
+ return priv->queue_format == GVE_GQI_QPL_FORMAT ||
+ priv->queue_format == GVE_DQO_QPL_FORMAT;
+}
+
/* Returns the number of tx queue page lists
*/
static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
{
- if (priv->queue_format != GVE_GQI_QPL_FORMAT)
+ if (!gve_is_qpl(priv))
return 0;
return priv->tx_cfg.num_queues + priv->num_xdp_queues;
@@ -832,7 +935,7 @@ static inline u32 gve_num_xdp_qpls(struct gve_priv *priv)
*/
static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
{
- if (priv->queue_format != GVE_GQI_QPL_FORMAT)
+ if (!gve_is_qpl(priv))
return 0;
return priv->rx_cfg.num_queues;
@@ -964,5 +1067,6 @@ void gve_handle_report_stats(struct gve_priv *priv);
/* exported by ethtool.c */
extern const struct ethtool_ops gve_ethtool_ops;
/* needed by ethtool */
+extern char gve_driver_name[];
extern const char gve_version_str[];
#endif /* _GVE_H_ */
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 252974202a3f..79db7a6d42bc 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -39,7 +39,8 @@ void gve_parse_device_option(struct gve_priv *priv,
struct gve_device_option_gqi_rda **dev_op_gqi_rda,
struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
struct gve_device_option_dqo_rda **dev_op_dqo_rda,
- struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
+ struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
+ struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
{
u32 req_feat_mask = be32_to_cpu(option->required_features_mask);
u16 option_length = be16_to_cpu(option->option_length);
@@ -112,6 +113,22 @@ void gve_parse_device_option(struct gve_priv *priv,
}
*dev_op_dqo_rda = (void *)(option + 1);
break;
+ case GVE_DEV_OPT_ID_DQO_QPL:
+ if (option_length < sizeof(**dev_op_dqo_qpl) ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL) {
+ dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+ "DQO QPL", (int)sizeof(**dev_op_dqo_qpl),
+ GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ if (option_length > sizeof(**dev_op_dqo_qpl)) {
+ dev_warn(&priv->pdev->dev,
+ GVE_DEVICE_OPTION_TOO_BIG_FMT, "DQO QPL");
+ }
+ *dev_op_dqo_qpl = (void *)(option + 1);
+ break;
case GVE_DEV_OPT_ID_JUMBO_FRAMES:
if (option_length < sizeof(**dev_op_jumbo_frames) ||
req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) {
@@ -146,7 +163,8 @@ gve_process_device_options(struct gve_priv *priv,
struct gve_device_option_gqi_rda **dev_op_gqi_rda,
struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
struct gve_device_option_dqo_rda **dev_op_dqo_rda,
- struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
+ struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
+ struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
{
const int num_options = be16_to_cpu(descriptor->num_device_options);
struct gve_device_option *dev_opt;
@@ -166,7 +184,8 @@ gve_process_device_options(struct gve_priv *priv,
gve_parse_device_option(priv, descriptor, dev_opt,
dev_op_gqi_rda, dev_op_gqi_qpl,
- dev_op_dqo_rda, dev_op_jumbo_frames);
+ dev_op_dqo_rda, dev_op_jumbo_frames,
+ dev_op_dqo_qpl);
dev_opt = next_opt;
}
@@ -505,12 +524,24 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
} else {
+ u16 comp_ring_size;
+ u32 qpl_id = 0;
+
+ if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
+ qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
+ comp_ring_size =
+ priv->options_dqo_rda.tx_comp_ring_entries;
+ } else {
+ qpl_id = tx->dqo.qpl->id;
+ comp_ring_size = priv->tx_desc_cnt;
+ }
+ cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
cmd.create_tx_queue.tx_ring_size =
cpu_to_be16(priv->tx_desc_cnt);
cmd.create_tx_queue.tx_comp_ring_addr =
cpu_to_be64(tx->complq_bus_dqo);
cmd.create_tx_queue.tx_comp_ring_size =
- cpu_to_be16(priv->options_dqo_rda.tx_comp_ring_entries);
+ cpu_to_be16(comp_ring_size);
}
return gve_adminq_issue_cmd(priv, &cmd);
@@ -555,6 +586,18 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size);
} else {
+ u16 rx_buff_ring_entries;
+ u32 qpl_id = 0;
+
+ if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
+ qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
+ rx_buff_ring_entries =
+ priv->options_dqo_rda.rx_buff_ring_entries;
+ } else {
+ qpl_id = rx->dqo.qpl->id;
+ rx_buff_ring_entries = priv->rx_desc_cnt;
+ }
+ cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
cmd.create_rx_queue.rx_ring_size =
cpu_to_be16(priv->rx_desc_cnt);
cmd.create_rx_queue.rx_desc_ring_addr =
@@ -564,7 +607,7 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
cmd.create_rx_queue.packet_buffer_size =
cpu_to_be16(priv->data_buffer_size_dqo);
cmd.create_rx_queue.rx_buff_ring_size =
- cpu_to_be16(priv->options_dqo_rda.rx_buff_ring_entries);
+ cpu_to_be16(rx_buff_ring_entries);
cmd.create_rx_queue.enable_rsc =
!!(priv->dev->features & NETIF_F_LRO);
}
@@ -675,9 +718,13 @@ gve_set_desc_cnt_dqo(struct gve_priv *priv,
const struct gve_device_option_dqo_rda *dev_op_dqo_rda)
{
priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
+ priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
+
+ if (priv->queue_format == GVE_DQO_QPL_FORMAT)
+ return 0;
+
priv->options_dqo_rda.tx_comp_ring_entries =
be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries);
- priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
priv->options_dqo_rda.rx_buff_ring_entries =
be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries);
@@ -687,7 +734,9 @@ gve_set_desc_cnt_dqo(struct gve_priv *priv,
static void gve_enable_supported_features(struct gve_priv *priv,
u32 supported_features_mask,
const struct gve_device_option_jumbo_frames
- *dev_op_jumbo_frames)
+ *dev_op_jumbo_frames,
+ const struct gve_device_option_dqo_qpl
+ *dev_op_dqo_qpl)
{
/* Before control reaches this point, the page-size-capped max MTU from
* the gve_device_descriptor field has already been stored in
@@ -699,6 +748,18 @@ static void gve_enable_supported_features(struct gve_priv *priv,
"JUMBO FRAMES device option enabled.\n");
priv->dev->max_mtu = be16_to_cpu(dev_op_jumbo_frames->max_mtu);
}
+
+ /* Override pages for qpl for DQO-QPL */
+ if (dev_op_dqo_qpl) {
+ priv->tx_pages_per_qpl =
+ be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl);
+ priv->rx_pages_per_qpl =
+ be16_to_cpu(dev_op_dqo_qpl->rx_pages_per_qpl);
+ if (priv->tx_pages_per_qpl == 0)
+ priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES;
+ if (priv->rx_pages_per_qpl == 0)
+ priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES;
+ }
}
int gve_adminq_describe_device(struct gve_priv *priv)
@@ -707,6 +768,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL;
struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
+ struct gve_device_option_dqo_qpl *dev_op_dqo_qpl = NULL;
struct gve_device_descriptor *descriptor;
u32 supported_features_mask = 0;
union gve_adminq_command cmd;
@@ -733,13 +795,14 @@ int gve_adminq_describe_device(struct gve_priv *priv)
err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda,
&dev_op_gqi_qpl, &dev_op_dqo_rda,
- &dev_op_jumbo_frames);
+ &dev_op_jumbo_frames,
+ &dev_op_dqo_qpl);
if (err)
goto free_device_descriptor;
/* If the GQI_RAW_ADDRESSING option is not enabled and the queue format
* is not set to GqiRda, choose the queue format in a priority order:
- * DqoRda, GqiRda, GqiQpl. Use GqiQpl as default.
+ * DqoRda, DqoQpl, GqiRda, GqiQpl. Use GqiQpl as default.
*/
if (dev_op_dqo_rda) {
priv->queue_format = GVE_DQO_RDA_FORMAT;
@@ -747,7 +810,11 @@ int gve_adminq_describe_device(struct gve_priv *priv)
"Driver is running with DQO RDA queue format.\n");
supported_features_mask =
be32_to_cpu(dev_op_dqo_rda->supported_features_mask);
- } else if (dev_op_gqi_rda) {
+ } else if (dev_op_dqo_qpl) {
+ priv->queue_format = GVE_DQO_QPL_FORMAT;
+ supported_features_mask =
+ be32_to_cpu(dev_op_dqo_qpl->supported_features_mask);
+ } else if (dev_op_gqi_rda) {
priv->queue_format = GVE_GQI_RDA_FORMAT;
dev_info(&priv->pdev->dev,
"Driver is running with GQI RDA queue format.\n");
@@ -798,7 +865,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
gve_enable_supported_features(priv, supported_features_mask,
- dev_op_jumbo_frames);
+ dev_op_jumbo_frames, dev_op_dqo_qpl);
free_device_descriptor:
dma_free_coherent(&priv->pdev->dev, PAGE_SIZE, descriptor,
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index f894beb3deaf..38a22279e863 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -109,6 +109,14 @@ struct gve_device_option_dqo_rda {
static_assert(sizeof(struct gve_device_option_dqo_rda) == 8);
+struct gve_device_option_dqo_qpl {
+ __be32 supported_features_mask;
+ __be16 tx_pages_per_qpl;
+ __be16 rx_pages_per_qpl;
+};
+
+static_assert(sizeof(struct gve_device_option_dqo_qpl) == 8);
+
struct gve_device_option_jumbo_frames {
__be32 supported_features_mask;
__be16 max_mtu;
@@ -130,6 +138,7 @@ enum gve_dev_opt_id {
GVE_DEV_OPT_ID_GQI_RDA = 0x2,
GVE_DEV_OPT_ID_GQI_QPL = 0x3,
GVE_DEV_OPT_ID_DQO_RDA = 0x4,
+ GVE_DEV_OPT_ID_DQO_QPL = 0x7,
GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
};
@@ -139,6 +148,7 @@ enum gve_dev_opt_req_feat_mask {
GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0,
};
enum gve_sup_feature_mask {
diff --git a/drivers/net/ethernet/google/gve/gve_desc.h b/drivers/net/ethernet/google/gve/gve_desc.h
index f4ae9e19b844..c2874cdcf40c 100644
--- a/drivers/net/ethernet/google/gve/gve_desc.h
+++ b/drivers/net/ethernet/google/gve/gve_desc.h
@@ -105,10 +105,10 @@ union gve_rx_data_slot {
__be64 addr;
};
-/* GVE Recive Packet Descriptor Seq No */
+/* GVE Receive Packet Descriptor Seq No */
#define GVE_SEQNO(x) (be16_to_cpu(x) & 0x7)
-/* GVE Recive Packet Descriptor Flags */
+/* GVE Receive Packet Descriptor Flags */
#define GVE_RXFLG(x) cpu_to_be16(1 << (3 + (x)))
#define GVE_RXF_FRAG GVE_RXFLG(3) /* IP Fragment */
#define GVE_RXF_IPV4 GVE_RXFLG(4) /* IPv4 */
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index cfd4b8d284d1..233e5946905e 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -15,7 +15,7 @@ static void gve_get_drvinfo(struct net_device *netdev,
{
struct gve_priv *priv = netdev_priv(netdev);
- strscpy(info->driver, "gve", sizeof(info->driver));
+ strscpy(info->driver, gve_driver_name, sizeof(info->driver));
strscpy(info->version, gve_version_str, sizeof(info->version));
strscpy(info->bus_info, pci_name(priv->pdev), sizeof(info->bus_info));
}
@@ -590,6 +590,9 @@ static int gve_get_link_ksettings(struct net_device *netdev,
err = gve_adminq_report_link_speed(priv);
cmd->base.speed = priv->link_speed;
+
+ cmd->base.duplex = DUPLEX_FULL;
+
return err;
}
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 8fb70db63b8b..5704b5f57cd0 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -31,8 +31,8 @@
// Minimum amount of time between queue kicks in msec (10 seconds)
#define MIN_TX_TIMEOUT_GAP (1000 * 10)
-#define DQO_TX_MAX 0x3FFFF
+char gve_driver_name[] = "gve";
const char gve_version_str[] = GVE_VERSION;
static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
@@ -493,7 +493,7 @@ static int gve_setup_device_resources(struct gve_priv *priv)
goto abort_with_stats_report;
}
- if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
+ if (!gve_is_gqi(priv)) {
priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
GFP_KERNEL);
if (!priv->ptype_lut_dqo) {
@@ -1082,11 +1082,12 @@ free_qpls:
static int gve_alloc_qpls(struct gve_priv *priv)
{
int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
+ int page_count;
int start_id;
int i, j;
int err;
- if (priv->queue_format != GVE_GQI_QPL_FORMAT)
+ if (!gve_is_qpl(priv))
return 0;
priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
@@ -1094,17 +1095,25 @@ static int gve_alloc_qpls(struct gve_priv *priv)
return -ENOMEM;
start_id = gve_tx_start_qpl_id(priv);
+ page_count = priv->tx_pages_per_qpl;
for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
err = gve_alloc_queue_page_list(priv, i,
- priv->tx_pages_per_qpl);
+ page_count);
if (err)
goto free_qpls;
}
start_id = gve_rx_start_qpl_id(priv);
+
+ /* For GQI_QPL number of pages allocated have 1:1 relationship with
+ * number of descriptors. For DQO, number of pages required are
+ * more than descriptors (because of out of order completions).
+ */
+ page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ?
+ priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
err = gve_alloc_queue_page_list(priv, i,
- priv->rx_data_slot_cnt);
+ page_count);
if (err)
goto free_qpls;
}
@@ -2050,7 +2059,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
/* Big TCP is only supported on DQ*/
if (!gve_is_gqi(priv))
- netif_set_tso_max_size(priv->dev, DQO_TX_MAX);
+ netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
priv->num_registered_pages = 0;
priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
@@ -2200,7 +2209,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
return err;
- err = pci_request_regions(pdev, "gvnic-cfg");
+ err = pci_request_regions(pdev, gve_driver_name);
if (err)
goto abort_with_enabled;
@@ -2393,8 +2402,8 @@ static const struct pci_device_id gve_id_table[] = {
{ }
};
-static struct pci_driver gvnic_driver = {
- .name = "gvnic",
+static struct pci_driver gve_driver = {
+ .name = gve_driver_name,
.id_table = gve_id_table,
.probe = gve_probe,
.remove = gve_remove,
@@ -2405,10 +2414,10 @@ static struct pci_driver gvnic_driver = {
#endif
};
-module_pci_driver(gvnic_driver);
+module_pci_driver(gve_driver);
MODULE_DEVICE_TABLE(pci, gve_id_table);
MODULE_AUTHOR("Google, Inc.");
-MODULE_DESCRIPTION("gVNIC Driver");
+MODULE_DESCRIPTION("Google Virtual NIC Driver");
MODULE_LICENSE("Dual MIT/GPL");
MODULE_VERSION(GVE_VERSION);
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
index e57b73eb70f6..ea0e38b4d9e9 100644
--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -22,11 +22,13 @@ static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
}
static void gve_free_page_dqo(struct gve_priv *priv,
- struct gve_rx_buf_state_dqo *bs)
+ struct gve_rx_buf_state_dqo *bs,
+ bool free_page)
{
page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1);
- gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
- DMA_FROM_DEVICE);
+ if (free_page)
+ gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
+ DMA_FROM_DEVICE);
bs->page_info.page = NULL;
}
@@ -130,12 +132,20 @@ gve_get_recycled_buf_state(struct gve_rx_ring *rx)
*/
for (i = 0; i < 5; i++) {
buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
- if (gve_buf_ref_cnt(buf_state) == 0)
+ if (gve_buf_ref_cnt(buf_state) == 0) {
+ rx->dqo.used_buf_states_cnt--;
return buf_state;
+ }
gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
}
+ /* For QPL, we cannot allocate any new buffers and must
+ * wait for the existing ones to be available.
+ */
+ if (rx->dqo.qpl)
+ return NULL;
+
/* If there are no free buf states discard an entry from
* `used_buf_states` so it can be used.
*/
@@ -144,23 +154,39 @@ gve_get_recycled_buf_state(struct gve_rx_ring *rx)
if (gve_buf_ref_cnt(buf_state) == 0)
return buf_state;
- gve_free_page_dqo(rx->gve, buf_state);
+ gve_free_page_dqo(rx->gve, buf_state, true);
gve_free_buf_state(rx, buf_state);
}
return NULL;
}
-static int gve_alloc_page_dqo(struct gve_priv *priv,
+static int gve_alloc_page_dqo(struct gve_rx_ring *rx,
struct gve_rx_buf_state_dqo *buf_state)
{
- int err;
+ struct gve_priv *priv = rx->gve;
+ u32 idx;
- err = gve_alloc_page(priv, &priv->pdev->dev, &buf_state->page_info.page,
- &buf_state->addr, DMA_FROM_DEVICE, GFP_ATOMIC);
- if (err)
- return err;
+ if (!rx->dqo.qpl) {
+ int err;
+ err = gve_alloc_page(priv, &priv->pdev->dev,
+ &buf_state->page_info.page,
+ &buf_state->addr,
+ DMA_FROM_DEVICE, GFP_ATOMIC);
+ if (err)
+ return err;
+ } else {
+ idx = rx->dqo.next_qpl_page_idx;
+ if (idx >= priv->rx_pages_per_qpl) {
+ net_err_ratelimited("%s: Out of QPL pages\n",
+ priv->dev->name);
+ return -ENOMEM;
+ }
+ buf_state->page_info.page = rx->dqo.qpl->pages[idx];
+ buf_state->addr = rx->dqo.qpl->page_buses[idx];
+ rx->dqo.next_qpl_page_idx++;
+ }
buf_state->page_info.page_offset = 0;
buf_state->page_info.page_address =
page_address(buf_state->page_info.page);
@@ -195,9 +221,13 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx)
for (i = 0; i < rx->dqo.num_buf_states; i++) {
struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
-
+ /* Only free page for RDA. QPL pages are freed in gve_main. */
if (bs->page_info.page)
- gve_free_page_dqo(priv, bs);
+ gve_free_page_dqo(priv, bs, !rx->dqo.qpl);
+ }
+ if (rx->dqo.qpl) {
+ gve_unassign_qpl(priv, rx->dqo.qpl->id);
+ rx->dqo.qpl = NULL;
}
if (rx->dqo.bufq.desc_ring) {
@@ -229,7 +259,8 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
int i;
const u32 buffer_queue_slots =
- priv->options_dqo_rda.rx_buff_ring_entries;
+ priv->queue_format == GVE_DQO_RDA_FORMAT ?
+ priv->options_dqo_rda.rx_buff_ring_entries : priv->rx_desc_cnt;
const u32 completion_queue_slots = priv->rx_desc_cnt;
netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
@@ -243,7 +274,9 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
rx->ctx.skb_head = NULL;
rx->ctx.skb_tail = NULL;
- rx->dqo.num_buf_states = min_t(s16, S16_MAX, buffer_queue_slots * 4);
+ rx->dqo.num_buf_states = priv->queue_format == GVE_DQO_RDA_FORMAT ?
+ min_t(s16, S16_MAX, buffer_queue_slots * 4) :
+ priv->rx_pages_per_qpl;
rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
sizeof(rx->dqo.buf_states[0]),
GFP_KERNEL);
@@ -275,6 +308,13 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
if (!rx->dqo.bufq.desc_ring)
goto err;
+ if (priv->queue_format != GVE_DQO_RDA_FORMAT) {
+ rx->dqo.qpl = gve_assign_rx_qpl(priv, rx->q_num);
+ if (!rx->dqo.qpl)
+ goto err;
+ rx->dqo.next_qpl_page_idx = 0;
+ }
+
rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
&rx->q_resources_bus, GFP_KERNEL);
if (!rx->q_resources)
@@ -352,7 +392,7 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
if (unlikely(!buf_state))
break;
- if (unlikely(gve_alloc_page_dqo(priv, buf_state))) {
+ if (unlikely(gve_alloc_page_dqo(rx, buf_state))) {
u64_stats_update_begin(&rx->statss);
rx->rx_buf_alloc_fail++;
u64_stats_update_end(&rx->statss);
@@ -415,6 +455,7 @@ static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
mark_used:
gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
+ rx->dqo.used_buf_states_cnt++;
}
static void gve_rx_skb_csum(struct sk_buff *skb,
@@ -475,6 +516,43 @@ static void gve_rx_free_skb(struct gve_rx_ring *rx)
rx->ctx.skb_tail = NULL;
}
+static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx)
+{
+ if (!rx->dqo.qpl)
+ return false;
+ if (rx->dqo.used_buf_states_cnt <
+ (rx->dqo.num_buf_states -
+ GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD))
+ return false;
+ return true;
+}
+
+static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state,
+ u16 buf_len)
+{
+ struct page *page = alloc_page(GFP_ATOMIC);
+ int num_frags;
+
+ if (!page)
+ return -ENOMEM;
+
+ memcpy(page_address(page),
+ buf_state->page_info.page_address +
+ buf_state->page_info.page_offset,
+ buf_len);
+ num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
+ skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page,
+ 0, buf_len, PAGE_SIZE);
+
+ u64_stats_update_begin(&rx->statss);
+ rx->rx_frag_alloc_cnt++;
+ u64_stats_update_end(&rx->statss);
+ /* Return unused buffer. */
+ gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
+ return 0;
+}
+
/* Chains multi skbs for single rx packet.
* Returns 0 if buffer is appended, -1 otherwise.
*/
@@ -502,12 +580,20 @@ static int gve_rx_append_frags(struct napi_struct *napi,
rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo;
}
+ /* Trigger ondemand page allocation if we are running low on buffers */
+ if (gve_rx_should_trigger_copy_ondemand(rx))
+ return gve_rx_copy_ondemand(rx, buf_state, buf_len);
+
skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
buf_state->page_info.page,
buf_state->page_info.page_offset,
buf_len, priv->data_buffer_size_dqo);
gve_dec_pagecnt_bias(&buf_state->page_info);
+ /* Advances buffer page-offset if page is partially used.
+ * Marks buffer as used if page is full.
+ */
+ gve_try_recycle_buf(priv, rx, buf_state);
return 0;
}
@@ -561,8 +647,6 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
priv)) != 0) {
goto error;
}
-
- gve_try_recycle_buf(priv, rx, buf_state);
return 0;
}
@@ -588,6 +672,12 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
goto error;
rx->ctx.skb_tail = rx->ctx.skb_head;
+ if (gve_rx_should_trigger_copy_ondemand(rx)) {
+ if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0)
+ goto error;
+ return 0;
+ }
+
skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
buf_state->page_info.page_offset, buf_len,
priv->data_buffer_size_dqo);
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
index 3c09e66ba1ab..1e19b834a613 100644
--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
@@ -13,6 +13,89 @@
#include <linux/slab.h>
#include <linux/skbuff.h>
+/* Returns true if tx_bufs are available. */
+static bool gve_has_free_tx_qpl_bufs(struct gve_tx_ring *tx, int count)
+{
+ int num_avail;
+
+ if (!tx->dqo.qpl)
+ return true;
+
+ num_avail = tx->dqo.num_tx_qpl_bufs -
+ (tx->dqo_tx.alloc_tx_qpl_buf_cnt -
+ tx->dqo_tx.free_tx_qpl_buf_cnt);
+
+ if (count <= num_avail)
+ return true;
+
+ /* Update cached value from dqo_compl. */
+ tx->dqo_tx.free_tx_qpl_buf_cnt =
+ atomic_read_acquire(&tx->dqo_compl.free_tx_qpl_buf_cnt);
+
+ num_avail = tx->dqo.num_tx_qpl_bufs -
+ (tx->dqo_tx.alloc_tx_qpl_buf_cnt -
+ tx->dqo_tx.free_tx_qpl_buf_cnt);
+
+ return count <= num_avail;
+}
+
+static s16
+gve_alloc_tx_qpl_buf(struct gve_tx_ring *tx)
+{
+ s16 index;
+
+ index = tx->dqo_tx.free_tx_qpl_buf_head;
+
+ /* No TX buffers available, try to steal the list from the
+ * completion handler.
+ */
+ if (unlikely(index == -1)) {
+ tx->dqo_tx.free_tx_qpl_buf_head =
+ atomic_xchg(&tx->dqo_compl.free_tx_qpl_buf_head, -1);
+ index = tx->dqo_tx.free_tx_qpl_buf_head;
+
+ if (unlikely(index == -1))
+ return index;
+ }
+
+ /* Remove TX buf from free list */
+ tx->dqo_tx.free_tx_qpl_buf_head = tx->dqo.tx_qpl_buf_next[index];
+
+ return index;
+}
+
+static void
+gve_free_tx_qpl_bufs(struct gve_tx_ring *tx,
+ struct gve_tx_pending_packet_dqo *pkt)
+{
+ s16 index;
+ int i;
+
+ if (!pkt->num_bufs)
+ return;
+
+ index = pkt->tx_qpl_buf_ids[0];
+ /* Create a linked list of buffers to be added to the free list */
+ for (i = 1; i < pkt->num_bufs; i++) {
+ tx->dqo.tx_qpl_buf_next[index] = pkt->tx_qpl_buf_ids[i];
+ index = pkt->tx_qpl_buf_ids[i];
+ }
+
+ while (true) {
+ s16 old_head = atomic_read_acquire(&tx->dqo_compl.free_tx_qpl_buf_head);
+
+ tx->dqo.tx_qpl_buf_next[index] = old_head;
+ if (atomic_cmpxchg(&tx->dqo_compl.free_tx_qpl_buf_head,
+ old_head,
+ pkt->tx_qpl_buf_ids[0]) == old_head) {
+ break;
+ }
+ }
+
+ atomic_add(pkt->num_bufs, &tx->dqo_compl.free_tx_qpl_buf_cnt);
+ pkt->num_bufs = 0;
+}
+
/* Returns true if a gve_tx_pending_packet_dqo object is available. */
static bool gve_has_pending_packet(struct gve_tx_ring *tx)
{
@@ -136,9 +219,40 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, int idx)
kvfree(tx->dqo.pending_packets);
tx->dqo.pending_packets = NULL;
+ kvfree(tx->dqo.tx_qpl_buf_next);
+ tx->dqo.tx_qpl_buf_next = NULL;
+
+ if (tx->dqo.qpl) {
+ gve_unassign_qpl(priv, tx->dqo.qpl->id);
+ tx->dqo.qpl = NULL;
+ }
+
netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx);
}
+static int gve_tx_qpl_buf_init(struct gve_tx_ring *tx)
+{
+ int num_tx_qpl_bufs = GVE_TX_BUFS_PER_PAGE_DQO *
+ tx->dqo.qpl->num_entries;
+ int i;
+
+ tx->dqo.tx_qpl_buf_next = kvcalloc(num_tx_qpl_bufs,
+ sizeof(tx->dqo.tx_qpl_buf_next[0]),
+ GFP_KERNEL);
+ if (!tx->dqo.tx_qpl_buf_next)
+ return -ENOMEM;
+
+ tx->dqo.num_tx_qpl_bufs = num_tx_qpl_bufs;
+
+ /* Generate free TX buf list */
+ for (i = 0; i < num_tx_qpl_bufs - 1; i++)
+ tx->dqo.tx_qpl_buf_next[i] = i + 1;
+ tx->dqo.tx_qpl_buf_next[num_tx_qpl_bufs - 1] = -1;
+
+ atomic_set_release(&tx->dqo_compl.free_tx_qpl_buf_head, -1);
+ return 0;
+}
+
static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx)
{
struct gve_tx_ring *tx = &priv->tx[idx];
@@ -155,7 +269,9 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx)
/* Queue sizes must be a power of 2 */
tx->mask = priv->tx_desc_cnt - 1;
- tx->dqo.complq_mask = priv->options_dqo_rda.tx_comp_ring_entries - 1;
+ tx->dqo.complq_mask = priv->queue_format == GVE_DQO_RDA_FORMAT ?
+ priv->options_dqo_rda.tx_comp_ring_entries - 1 :
+ tx->mask;
/* The max number of pending packets determines the maximum number of
* descriptors which maybe written to the completion queue.
@@ -211,6 +327,15 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx)
if (!tx->q_resources)
goto err;
+ if (gve_is_qpl(priv)) {
+ tx->dqo.qpl = gve_assign_tx_qpl(priv, idx);
+ if (!tx->dqo.qpl)
+ goto err;
+
+ if (gve_tx_qpl_buf_init(tx))
+ goto err;
+ }
+
gve_tx_add_to_block(priv, idx);
return 0;
@@ -267,20 +392,27 @@ static u32 num_avail_tx_slots(const struct gve_tx_ring *tx)
return tx->mask - num_used;
}
+static bool gve_has_avail_slots_tx_dqo(struct gve_tx_ring *tx,
+ int desc_count, int buf_count)
+{
+ return gve_has_pending_packet(tx) &&
+ num_avail_tx_slots(tx) >= desc_count &&
+ gve_has_free_tx_qpl_bufs(tx, buf_count);
+}
+
/* Stops the queue if available descriptors is less than 'count'.
* Return: 0 if stop is not required.
*/
-static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx, int count)
+static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx,
+ int desc_count, int buf_count)
{
- if (likely(gve_has_pending_packet(tx) &&
- num_avail_tx_slots(tx) >= count))
+ if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count)))
return 0;
/* Update cached TX head pointer */
tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head);
- if (likely(gve_has_pending_packet(tx) &&
- num_avail_tx_slots(tx) >= count))
+ if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count)))
return 0;
/* No space, so stop the queue */
@@ -295,8 +427,7 @@ static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx, int count)
*/
tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head);
- if (likely(!gve_has_pending_packet(tx) ||
- num_avail_tx_slots(tx) < count))
+ if (likely(!gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count)))
return -EBUSY;
netif_tx_start_queue(tx->netdev_txq);
@@ -444,44 +575,16 @@ gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc,
};
}
-/* Returns 0 on success, or < 0 on error.
- *
- * Before this function is called, the caller must ensure
- * gve_has_pending_packet(tx) returns true.
- */
static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ struct gve_tx_pending_packet_dqo *pkt,
+ s16 completion_tag,
+ u32 *desc_idx,
+ bool is_gso)
{
const struct skb_shared_info *shinfo = skb_shinfo(skb);
- const bool is_gso = skb_is_gso(skb);
- u32 desc_idx = tx->dqo_tx.tail;
-
- struct gve_tx_pending_packet_dqo *pkt;
- struct gve_tx_metadata_dqo metadata;
- s16 completion_tag;
int i;
- pkt = gve_alloc_pending_packet(tx);
- pkt->skb = skb;
- pkt->num_bufs = 0;
- completion_tag = pkt - tx->dqo.pending_packets;
-
- gve_extract_tx_metadata_dqo(skb, &metadata);
- if (is_gso) {
- int header_len = gve_prep_tso(skb);
-
- if (unlikely(header_len < 0))
- goto err;
-
- gve_tx_fill_tso_ctx_desc(&tx->dqo.tx_ring[desc_idx].tso_ctx,
- skb, &metadata, header_len);
- desc_idx = (desc_idx + 1) & tx->mask;
- }
-
- gve_tx_fill_general_ctx_desc(&tx->dqo.tx_ring[desc_idx].general_ctx,
- &metadata);
- desc_idx = (desc_idx + 1) & tx->mask;
-
/* Note: HW requires that the size of a non-TSO packet be within the
* range of [17, 9728].
*
@@ -490,6 +593,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
* - Hypervisor won't allow MTU larger than 9216.
*/
+ pkt->num_bufs = 0;
/* Map the linear portion of skb */
{
u32 len = skb_headlen(skb);
@@ -503,7 +607,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr);
++pkt->num_bufs;
- gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, skb, len, addr,
+ gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr,
completion_tag,
/*eop=*/shinfo->nr_frags == 0, is_gso);
}
@@ -522,10 +626,139 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr);
++pkt->num_bufs;
- gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, skb, len, addr,
+ gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr,
completion_tag, is_eop, is_gso);
}
+ return 0;
+err:
+ for (i = 0; i < pkt->num_bufs; i++) {
+ if (i == 0) {
+ dma_unmap_single(tx->dev,
+ dma_unmap_addr(pkt, dma[i]),
+ dma_unmap_len(pkt, len[i]),
+ DMA_TO_DEVICE);
+ } else {
+ dma_unmap_page(tx->dev,
+ dma_unmap_addr(pkt, dma[i]),
+ dma_unmap_len(pkt, len[i]),
+ DMA_TO_DEVICE);
+ }
+ }
+ pkt->num_bufs = 0;
+ return -1;
+}
+
+/* Tx buffer i corresponds to
+ * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO
+ * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO
+ */
+static void gve_tx_buf_get_addr(struct gve_tx_ring *tx,
+ s16 index,
+ void **va, dma_addr_t *dma_addr)
+{
+ int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
+ int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) << GVE_TX_BUF_SHIFT_DQO;
+
+ *va = page_address(tx->dqo.qpl->pages[page_id]) + offset;
+ *dma_addr = tx->dqo.qpl->page_buses[page_id] + offset;
+}
+
+static int gve_tx_add_skb_copy_dqo(struct gve_tx_ring *tx,
+ struct sk_buff *skb,
+ struct gve_tx_pending_packet_dqo *pkt,
+ s16 completion_tag,
+ u32 *desc_idx,
+ bool is_gso)
+{
+ u32 copy_offset = 0;
+ dma_addr_t dma_addr;
+ u32 copy_len;
+ s16 index;
+ void *va;
+
+ /* Break the packet into buffer size chunks */
+ pkt->num_bufs = 0;
+ while (copy_offset < skb->len) {
+ index = gve_alloc_tx_qpl_buf(tx);
+ if (unlikely(index == -1))
+ goto err;
+
+ gve_tx_buf_get_addr(tx, index, &va, &dma_addr);
+ copy_len = min_t(u32, GVE_TX_BUF_SIZE_DQO,
+ skb->len - copy_offset);
+ skb_copy_bits(skb, copy_offset, va, copy_len);
+
+ copy_offset += copy_len;
+ dma_sync_single_for_device(tx->dev, dma_addr,
+ copy_len, DMA_TO_DEVICE);
+ gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb,
+ copy_len,
+ dma_addr,
+ completion_tag,
+ copy_offset == skb->len,
+ is_gso);
+
+ pkt->tx_qpl_buf_ids[pkt->num_bufs] = index;
+ ++tx->dqo_tx.alloc_tx_qpl_buf_cnt;
+ ++pkt->num_bufs;
+ }
+
+ return 0;
+err:
+ /* Should not be here if gve_has_free_tx_qpl_bufs() check is correct */
+ gve_free_tx_qpl_bufs(tx, pkt);
+ return -ENOMEM;
+}
+
+/* Returns 0 on success, or < 0 on error.
+ *
+ * Before this function is called, the caller must ensure
+ * gve_has_pending_packet(tx) returns true.
+ */
+static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx,
+ struct sk_buff *skb)
+{
+ const bool is_gso = skb_is_gso(skb);
+ u32 desc_idx = tx->dqo_tx.tail;
+ struct gve_tx_pending_packet_dqo *pkt;
+ struct gve_tx_metadata_dqo metadata;
+ s16 completion_tag;
+
+ pkt = gve_alloc_pending_packet(tx);
+ pkt->skb = skb;
+ completion_tag = pkt - tx->dqo.pending_packets;
+
+ gve_extract_tx_metadata_dqo(skb, &metadata);
+ if (is_gso) {
+ int header_len = gve_prep_tso(skb);
+
+ if (unlikely(header_len < 0))
+ goto err;
+
+ gve_tx_fill_tso_ctx_desc(&tx->dqo.tx_ring[desc_idx].tso_ctx,
+ skb, &metadata, header_len);
+ desc_idx = (desc_idx + 1) & tx->mask;
+ }
+
+ gve_tx_fill_general_ctx_desc(&tx->dqo.tx_ring[desc_idx].general_ctx,
+ &metadata);
+ desc_idx = (desc_idx + 1) & tx->mask;
+
+ if (tx->dqo.qpl) {
+ if (gve_tx_add_skb_copy_dqo(tx, skb, pkt,
+ completion_tag,
+ &desc_idx, is_gso))
+ goto err;
+ } else {
+ if (gve_tx_add_skb_no_copy_dqo(tx, skb, pkt,
+ completion_tag,
+ &desc_idx, is_gso))
+ goto err;
+ }
+
+ tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs;
+
/* Commit the changes to our state */
tx->dqo_tx.tail = desc_idx;
@@ -547,22 +780,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
return 0;
err:
- for (i = 0; i < pkt->num_bufs; i++) {
- if (i == 0) {
- dma_unmap_single(tx->dev,
- dma_unmap_addr(pkt, dma[i]),
- dma_unmap_len(pkt, len[i]),
- DMA_TO_DEVICE);
- } else {
- dma_unmap_page(tx->dev,
- dma_unmap_addr(pkt, dma[i]),
- dma_unmap_len(pkt, len[i]),
- DMA_TO_DEVICE);
- }
- }
-
pkt->skb = NULL;
- pkt->num_bufs = 0;
gve_free_pending_packet(tx, pkt);
return -1;
@@ -636,40 +854,56 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx,
int num_buffer_descs;
int total_num_descs;
- if (skb_is_gso(skb)) {
- /* If TSO doesn't meet HW requirements, attempt to linearize the
- * packet.
- */
- if (unlikely(!gve_can_send_tso(skb) &&
- skb_linearize(skb) < 0)) {
- net_err_ratelimited("%s: Failed to transmit TSO packet\n",
- priv->dev->name);
- goto drop;
- }
-
- if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
- goto drop;
+ if (tx->dqo.qpl) {
+ if (skb_is_gso(skb))
+ if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
+ goto drop;
- num_buffer_descs = gve_num_buffer_descs_needed(skb);
+ /* We do not need to verify the number of buffers used per
+ * packet or per segment in case of TSO as with 2K size buffers
+ * none of the TX packet rules would be violated.
+ *
+ * gve_can_send_tso() checks that each TCP segment of gso_size is
+ * not distributed over more than 9 SKB frags..
+ */
+ num_buffer_descs = DIV_ROUND_UP(skb->len, GVE_TX_BUF_SIZE_DQO);
} else {
- num_buffer_descs = gve_num_buffer_descs_needed(skb);
+ if (skb_is_gso(skb)) {
+ /* If TSO doesn't meet HW requirements, attempt to linearize the
+ * packet.
+ */
+ if (unlikely(!gve_can_send_tso(skb) &&
+ skb_linearize(skb) < 0)) {
+ net_err_ratelimited("%s: Failed to transmit TSO packet\n",
+ priv->dev->name);
+ goto drop;
+ }
- if (unlikely(num_buffer_descs > GVE_TX_MAX_DATA_DESCS)) {
- if (unlikely(skb_linearize(skb) < 0))
+ if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
goto drop;
- num_buffer_descs = 1;
+ num_buffer_descs = gve_num_buffer_descs_needed(skb);
+ } else {
+ num_buffer_descs = gve_num_buffer_descs_needed(skb);
+
+ if (unlikely(num_buffer_descs > GVE_TX_MAX_DATA_DESCS)) {
+ if (unlikely(skb_linearize(skb) < 0))
+ goto drop;
+
+ num_buffer_descs = 1;
+ }
}
}
/* Metadata + (optional TSO) + data descriptors. */
total_num_descs = 1 + skb_is_gso(skb) + num_buffer_descs;
if (unlikely(gve_maybe_stop_tx_dqo(tx, total_num_descs +
- GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP))) {
+ GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP,
+ num_buffer_descs))) {
return -1;
}
- if (unlikely(gve_tx_add_skb_no_copy_dqo(tx, skb) < 0))
+ if (unlikely(gve_tx_add_skb_dqo(tx, skb) < 0))
goto drop;
netdev_tx_sent_queue(tx->netdev_txq, skb->len);
@@ -817,7 +1051,11 @@ static void gve_handle_packet_completion(struct gve_priv *priv,
return;
}
}
- gve_unmap_packet(tx->dev, pending_packet);
+ tx->dqo_tx.completed_packet_desc_cnt += pending_packet->num_bufs;
+ if (tx->dqo.qpl)
+ gve_free_tx_qpl_bufs(tx, pending_packet);
+ else
+ gve_unmap_packet(tx->dev, pending_packet);
*bytes += pending_packet->skb->len;
(*pkts)++;
@@ -875,12 +1113,16 @@ static void remove_miss_completions(struct gve_priv *priv,
remove_from_list(tx, &tx->dqo_compl.miss_completions,
pending_packet);
- /* Unmap buffers and free skb but do not unallocate packet i.e.
+ /* Unmap/free TX buffers and free skb but do not unallocate packet i.e.
* the completion tag is not freed to ensure that the driver
* can take appropriate action if a corresponding valid
* completion is received later.
*/
- gve_unmap_packet(tx->dev, pending_packet);
+ if (tx->dqo.qpl)
+ gve_free_tx_qpl_bufs(tx, pending_packet);
+ else
+ gve_unmap_packet(tx->dev, pending_packet);
+
/* This indicates the packet was dropped. */
dev_kfree_skb_any(pending_packet->skb);
pending_packet->skb = NULL;