summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/google/gve
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/google/gve')
-rw-r--r--drivers/net/ethernet/google/gve/Makefile4
-rw-r--r--drivers/net/ethernet/google/gve/gve.h187
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.c171
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.h30
-rw-r--r--drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c70
-rw-r--r--drivers/net/ethernet/google/gve/gve_desc_dqo.h3
-rw-r--r--drivers/net/ethernet/google/gve/gve_dqo.h3
-rw-r--r--drivers/net/ethernet/google/gve/gve_ethtool.c134
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c829
-rw-r--r--drivers/net/ethernet/google/gve/gve_ptp.c139
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx.c44
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx_dqo.c301
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx.c45
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx_dqo.c426
-rw-r--r--drivers/net/ethernet/google/gve/gve_utils.c6
15 files changed, 1752 insertions, 640 deletions
diff --git a/drivers/net/ethernet/google/gve/Makefile b/drivers/net/ethernet/google/gve/Makefile
index 4520f1c07a63..e0ec227a50f7 100644
--- a/drivers/net/ethernet/google/gve/Makefile
+++ b/drivers/net/ethernet/google/gve/Makefile
@@ -1,5 +1,7 @@
# Makefile for the Google virtual Ethernet (gve) driver
obj-$(CONFIG_GVE) += gve.o
-gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o gve_flow_rule.o \
+gve-y := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o gve_flow_rule.o \
gve_buffer_mgmt_dqo.o
+
+gve-$(CONFIG_PTP_1588_CLOCK) += gve_ptp.o
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 8167cc5fb0df..bceaf9b05cb4 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -11,7 +11,9 @@
#include <linux/dmapool.h>
#include <linux/ethtool_netlink.h>
#include <linux/netdevice.h>
+#include <linux/net_tstamp.h>
#include <linux/pci.h>
+#include <linux/ptp_clock_kernel.h>
#include <linux/u64_stats_sync.h>
#include <net/page_pool/helpers.h>
#include <net/xdp.h>
@@ -59,6 +61,8 @@
#define GVE_MAX_RX_BUFFER_SIZE 4096
+#define GVE_XDP_RX_BUFFER_SIZE_DQO 4096
+
#define GVE_DEFAULT_RX_BUFFER_OFFSET 2048
#define GVE_PAGE_POOL_SIZE_MULTIPLIER 4
@@ -68,6 +72,9 @@
#define GVE_FLOW_RULE_IDS_CACHE_SIZE \
(GVE_ADMINQ_BUFFER_SIZE / sizeof(((struct gve_adminq_queried_flow_rule *)0)->location))
+#define GVE_RSS_KEY_SIZE 40
+#define GVE_RSS_INDIR_SIZE 128
+
#define GVE_XDP_ACTIONS 5
#define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182
@@ -102,7 +109,13 @@ struct gve_rx_desc_queue {
/* The page info for a single slot in the RX data queue */
struct gve_rx_slot_page_info {
- struct page *page;
+ /* netmem is used for DQO RDA mode
+ * page is used in all other modes
+ */
+ union {
+ struct page *page;
+ netmem_ref netmem;
+ };
void *page_address;
u32 page_offset; /* offset to write to in page */
unsigned int buf_size;
@@ -177,6 +190,9 @@ struct gve_rx_buf_state_dqo {
/* The page posted to HW. */
struct gve_rx_slot_page_info page_info;
+ /* XSK buffer */
+ struct xdp_buff *xsk_buff;
+
/* The DMA address corresponding to `page_info`. */
dma_addr_t addr;
@@ -218,6 +234,11 @@ struct gve_rx_cnts {
/* Contains datapath state used to represent an RX queue. */
struct gve_rx_ring {
struct gve_priv *gve;
+
+ u16 packet_buffer_size; /* Size of buffer posted to NIC */
+ u16 packet_buffer_truesize; /* Total size of RX buffer */
+ u16 rx_headroom;
+
union {
/* GQI fields */
struct {
@@ -226,7 +247,6 @@ struct gve_rx_ring {
/* threshold for posting new buffs and descs */
u32 db_threshold;
- u16 packet_buffer_size;
u32 qpl_copy_pool_mask;
u32 qpl_copy_pool_head;
@@ -314,7 +334,6 @@ struct gve_rx_ring {
/* XDP stuff */
struct xdp_rxq_info xdp_rxq;
- struct xdp_rxq_info xsk_rxq;
struct xsk_buff_pool *xsk_pool;
struct page_frag_cache page_cache; /* Page cache to allocate XDP frames */
};
@@ -383,10 +402,24 @@ enum gve_packet_state {
GVE_PACKET_STATE_PENDING_REINJECT_COMPL,
/* No valid completion received within the specified timeout. */
GVE_PACKET_STATE_TIMED_OUT_COMPL,
+ /* XSK pending packet has received a packet/reinjection completion, or
+ * has timed out. At this point, the pending packet can be counted by
+ * xsk_tx_complete and freed.
+ */
+ GVE_PACKET_STATE_XSK_COMPLETE,
+};
+
+enum gve_tx_pending_packet_dqo_type {
+ GVE_TX_PENDING_PACKET_DQO_SKB,
+ GVE_TX_PENDING_PACKET_DQO_XDP_FRAME,
+ GVE_TX_PENDING_PACKET_DQO_XSK,
};
struct gve_tx_pending_packet_dqo {
- struct sk_buff *skb; /* skb for this packet */
+ union {
+ struct sk_buff *skb;
+ struct xdp_frame *xdpf;
+ };
/* 0th element corresponds to the linear portion of `skb`, should be
* unmapped with `dma_unmap_single`.
@@ -416,7 +449,10 @@ struct gve_tx_pending_packet_dqo {
/* Identifies the current state of the packet as defined in
* `enum gve_packet_state`.
*/
- u8 state;
+ u8 state : 3;
+
+ /* gve_tx_pending_packet_dqo_type */
+ u8 type : 2;
/* If packet is an outstanding miss completion, then the packet is
* freed if the corresponding re-injection completion is not received
@@ -438,6 +474,9 @@ struct gve_tx_ring {
/* DQO fields. */
struct {
+ /* Spinlock for XDP tx traffic */
+ spinlock_t xdp_lock;
+
/* Linked list of gve_tx_pending_packet_dqo. Index into
* pending_packets, or -1 if empty.
*
@@ -482,6 +521,8 @@ struct gve_tx_ring {
/* Cached value of `dqo_compl.free_tx_qpl_buf_cnt` */
u32 free_tx_qpl_buf_cnt;
};
+
+ atomic_t xsk_reorder_queue_tail;
} dqo_tx;
};
@@ -515,6 +556,9 @@ struct gve_tx_ring {
/* Last TX ring index fetched by HW */
atomic_t hw_tx_head;
+ u16 xsk_reorder_queue_head;
+ u16 xsk_reorder_queue_tail;
+
/* List to track pending packets which received a miss
* completion but not a corresponding reinjection.
*/
@@ -568,6 +612,8 @@ struct gve_tx_ring {
struct gve_tx_pending_packet_dqo *pending_packets;
s16 num_pending_packets;
+ u16 *xsk_reorder_queue;
+
u32 complq_mask; /* complq size is complq_mask + 1 */
/* QPL fields */
@@ -604,8 +650,6 @@ struct gve_tx_ring {
dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */
struct u64_stats_sync statss; /* sync stats for 32bit archs */
struct xsk_buff_pool *xsk_pool;
- u32 xdp_xsk_wakeup;
- u32 xdp_xsk_done;
u64 xdp_xsk_sent;
u64 xdp_xmit;
u64 xdp_xmit_errors;
@@ -624,10 +668,18 @@ struct gve_notify_block {
u32 irq;
};
-/* Tracks allowed and current queue settings */
-struct gve_queue_config {
+/* Tracks allowed and current rx queue settings */
+struct gve_rx_queue_config {
u16 max_queues;
- u16 num_queues; /* current */
+ u16 num_queues;
+ u16 packet_buffer_size;
+};
+
+/* Tracks allowed and current tx queue settings */
+struct gve_tx_queue_config {
+ u16 max_queues;
+ u16 num_queues; /* number of TX queues, excluding XDP queues */
+ u16 num_xdp_queues;
};
/* Tracks the available and used qpl IDs */
@@ -651,11 +703,11 @@ struct gve_ptype_lut {
/* Parameters for allocating resources for tx queues */
struct gve_tx_alloc_rings_cfg {
- struct gve_queue_config *qcfg;
+ struct gve_tx_queue_config *qcfg;
+
+ u16 num_xdp_rings;
u16 ring_size;
- u16 start_idx;
- u16 num_rings;
bool raw_addressing;
/* Allocated resources are returned here */
@@ -665,13 +717,15 @@ struct gve_tx_alloc_rings_cfg {
/* Parameters for allocating resources for rx queues */
struct gve_rx_alloc_rings_cfg {
/* tx config is also needed to determine QPL ids */
- struct gve_queue_config *qcfg;
- struct gve_queue_config *qcfg_tx;
+ struct gve_rx_queue_config *qcfg_rx;
+ struct gve_tx_queue_config *qcfg_tx;
u16 ring_size;
u16 packet_buffer_size;
bool raw_addressing;
bool enable_header_split;
+ bool reset_rss;
+ bool xdp;
/* Allocated resources are returned here */
struct gve_rx_ring *rx;
@@ -722,6 +776,17 @@ struct gve_flow_rules_cache {
u32 rule_ids_cache_num;
};
+struct gve_rss_config {
+ u8 *hash_key;
+ u32 *hash_lut;
+};
+
+struct gve_ptp {
+ struct ptp_clock_info info;
+ struct ptp_clock *clock;
+ struct gve_priv *priv;
+};
+
struct gve_priv {
struct net_device *dev;
struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */
@@ -751,10 +816,11 @@ struct gve_priv {
u32 rx_copybreak; /* copy packets smaller than this */
u16 default_num_queues; /* default num queues to set up */
- u16 num_xdp_queues;
- struct gve_queue_config tx_cfg;
- struct gve_queue_config rx_cfg;
- u32 num_ntfy_blks; /* spilt between TX and RX so must be even */
+ struct gve_tx_queue_config tx_cfg;
+ struct gve_rx_queue_config rx_cfg;
+ unsigned long *xsk_pools; /* bitmap of RX queues with XSK pools */
+ u32 num_ntfy_blks; /* split between TX and RX so must be even */
+ int numa_node;
struct gve_registers __iomem *reg_bar0; /* see gve_register.h */
__be32 __iomem *db_bar2; /* "array" of doorbells */
@@ -786,6 +852,7 @@ struct gve_priv {
u32 adminq_set_driver_parameter_cnt;
u32 adminq_report_stats_cnt;
u32 adminq_report_link_speed_cnt;
+ u32 adminq_report_nic_timestamp_cnt;
u32 adminq_get_ptype_map_cnt;
u32 adminq_verify_driver_compatibility_cnt;
u32 adminq_query_flow_rules_cnt;
@@ -823,7 +890,6 @@ struct gve_priv {
struct gve_ptype_lut *ptype_lut_dqo;
/* Must be a power of two. */
- u16 data_buffer_size_dqo;
u16 max_rx_buffer_size; /* device limit */
enum gve_queue_format queue_format;
@@ -842,6 +908,16 @@ struct gve_priv {
u16 rss_key_size;
u16 rss_lut_size;
+ bool cache_rss_config;
+ struct gve_rss_config rss_config;
+
+ /* True if the device supports reading the nic clock */
+ bool nic_timestamp_supported;
+ struct gve_ptp *ptp;
+ struct kernel_hwtstamp_config ts_config;
+ struct gve_nic_ts_report *nic_ts_report;
+ dma_addr_t nic_ts_report_bus;
+ u64 last_sync_nic_counter; /* Clock counter from last NIC TS report */
};
enum gve_service_task_flags_bit {
@@ -1024,27 +1100,16 @@ static inline bool gve_is_qpl(struct gve_priv *priv)
}
/* Returns the number of tx queue page lists */
-static inline u32 gve_num_tx_qpls(const struct gve_queue_config *tx_cfg,
- int num_xdp_queues,
+static inline u32 gve_num_tx_qpls(const struct gve_tx_queue_config *tx_cfg,
bool is_qpl)
{
if (!is_qpl)
return 0;
- return tx_cfg->num_queues + num_xdp_queues;
-}
-
-/* Returns the number of XDP tx queue page lists
- */
-static inline u32 gve_num_xdp_qpls(struct gve_priv *priv)
-{
- if (priv->queue_format != GVE_GQI_QPL_FORMAT)
- return 0;
-
- return priv->num_xdp_queues;
+ return tx_cfg->num_queues + tx_cfg->num_xdp_queues;
}
/* Returns the number of rx queue page lists */
-static inline u32 gve_num_rx_qpls(const struct gve_queue_config *rx_cfg,
+static inline u32 gve_num_rx_qpls(const struct gve_rx_queue_config *rx_cfg,
bool is_qpl)
{
if (!is_qpl)
@@ -1062,7 +1127,8 @@ static inline u32 gve_rx_qpl_id(struct gve_priv *priv, int rx_qid)
return priv->tx_cfg.max_queues + rx_qid;
}
-static inline u32 gve_get_rx_qpl_id(const struct gve_queue_config *tx_cfg, int rx_qid)
+static inline u32 gve_get_rx_qpl_id(const struct gve_tx_queue_config *tx_cfg,
+ int rx_qid)
{
return tx_cfg->max_queues + rx_qid;
}
@@ -1072,7 +1138,7 @@ static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv)
return gve_tx_qpl_id(priv, 0);
}
-static inline u32 gve_rx_start_qpl_id(const struct gve_queue_config *tx_cfg)
+static inline u32 gve_rx_start_qpl_id(const struct gve_tx_queue_config *tx_cfg)
{
return gve_get_rx_qpl_id(tx_cfg, 0);
}
@@ -1103,7 +1169,7 @@ static inline bool gve_is_gqi(struct gve_priv *priv)
static inline u32 gve_num_tx_queues(struct gve_priv *priv)
{
- return priv->tx_cfg.num_queues + priv->num_xdp_queues;
+ return priv->tx_cfg.num_queues + priv->tx_cfg.num_xdp_queues;
}
static inline u32 gve_xdp_tx_queue_id(struct gve_priv *priv, u32 queue_id)
@@ -1116,6 +1182,17 @@ static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv)
return gve_xdp_tx_queue_id(priv, 0);
}
+static inline bool gve_supports_xdp_xmit(struct gve_priv *priv)
+{
+ switch (priv->queue_format) {
+ case GVE_GQI_QPL_FORMAT:
+ case GVE_DQO_RDA_FORMAT:
+ return true;
+ default:
+ return false;
+ }
+}
+
/* gqi napi handler defined in gve_main.c */
int gve_napi_poll(struct napi_struct *napi, int budget);
@@ -1133,11 +1210,15 @@ void gve_free_queue_page_list(struct gve_priv *priv,
u32 id);
/* tx handling */
netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev);
-int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
- u32 flags);
+int gve_xdp_xmit_gqi(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
+int gve_xdp_xmit_dqo(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx,
void *data, int len, void *frame_p);
void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid);
+int gve_xdp_xmit_one_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
+ struct xdp_frame *xdpf);
bool gve_tx_poll(struct gve_notify_block *block, int budget);
bool gve_xdp_poll(struct gve_notify_block *block, int budget);
int gve_xsk_tx_poll(struct gve_notify_block *block, int budget);
@@ -1197,7 +1278,8 @@ void gve_free_buffer(struct gve_rx_ring *rx,
struct gve_rx_buf_state_dqo *buf_state);
int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc);
struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv,
- struct gve_rx_ring *rx);
+ struct gve_rx_ring *rx,
+ bool xdp);
/* Reset */
void gve_schedule_reset(struct gve_priv *priv);
@@ -1209,14 +1291,35 @@ int gve_adjust_config(struct gve_priv *priv,
struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
struct gve_rx_alloc_rings_cfg *rx_alloc_cfg);
int gve_adjust_queues(struct gve_priv *priv,
- struct gve_queue_config new_rx_config,
- struct gve_queue_config new_tx_config);
+ struct gve_rx_queue_config new_rx_config,
+ struct gve_tx_queue_config new_tx_config,
+ bool reset_rss);
/* flow steering rule */
int gve_get_flow_rule_entry(struct gve_priv *priv, struct ethtool_rxnfc *cmd);
int gve_get_flow_rule_ids(struct gve_priv *priv, struct ethtool_rxnfc *cmd, u32 *rule_locs);
int gve_add_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd);
int gve_del_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd);
int gve_flow_rules_reset(struct gve_priv *priv);
+/* RSS config */
+int gve_init_rss_config(struct gve_priv *priv, u16 num_queues);
+/* PTP and timestamping */
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+int gve_clock_nic_ts_read(struct gve_priv *priv);
+int gve_init_clock(struct gve_priv *priv);
+void gve_teardown_clock(struct gve_priv *priv);
+#else /* CONFIG_PTP_1588_CLOCK */
+static inline int gve_clock_nic_ts_read(struct gve_priv *priv)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int gve_init_clock(struct gve_priv *priv)
+{
+ return 0;
+}
+
+static inline void gve_teardown_clock(struct gve_priv *priv) { }
+#endif /* CONFIG_PTP_1588_CLOCK */
/* report stats handling */
void gve_handle_report_stats(struct gve_priv *priv);
/* exported by ethtool.c */
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index aa7d723011d0..4f33d094a2ef 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -46,6 +46,7 @@ void gve_parse_device_option(struct gve_priv *priv,
struct gve_device_option_buffer_sizes **dev_op_buffer_sizes,
struct gve_device_option_flow_steering **dev_op_flow_steering,
struct gve_device_option_rss_config **dev_op_rss_config,
+ struct gve_device_option_nic_timestamp **dev_op_nic_timestamp,
struct gve_device_option_modify_ring **dev_op_modify_ring)
{
u32 req_feat_mask = be32_to_cpu(option->required_features_mask);
@@ -225,6 +226,23 @@ void gve_parse_device_option(struct gve_priv *priv,
"RSS config");
*dev_op_rss_config = (void *)(option + 1);
break;
+ case GVE_DEV_OPT_ID_NIC_TIMESTAMP:
+ if (option_length < sizeof(**dev_op_nic_timestamp) ||
+ req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_NIC_TIMESTAMP) {
+ dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+ "Nic Timestamp",
+ (int)sizeof(**dev_op_nic_timestamp),
+ GVE_DEV_OPT_REQ_FEAT_MASK_NIC_TIMESTAMP,
+ option_length, req_feat_mask);
+ break;
+ }
+
+ if (option_length > sizeof(**dev_op_nic_timestamp))
+ dev_warn(&priv->pdev->dev,
+ GVE_DEVICE_OPTION_TOO_BIG_FMT,
+ "Nic Timestamp");
+ *dev_op_nic_timestamp = (void *)(option + 1);
+ break;
default:
/* If we don't recognize the option just continue
* without doing anything.
@@ -246,6 +264,7 @@ gve_process_device_options(struct gve_priv *priv,
struct gve_device_option_buffer_sizes **dev_op_buffer_sizes,
struct gve_device_option_flow_steering **dev_op_flow_steering,
struct gve_device_option_rss_config **dev_op_rss_config,
+ struct gve_device_option_nic_timestamp **dev_op_nic_timestamp,
struct gve_device_option_modify_ring **dev_op_modify_ring)
{
const int num_options = be16_to_cpu(descriptor->num_device_options);
@@ -269,6 +288,7 @@ gve_process_device_options(struct gve_priv *priv,
dev_op_dqo_rda, dev_op_jumbo_frames,
dev_op_dqo_qpl, dev_op_buffer_sizes,
dev_op_flow_steering, dev_op_rss_config,
+ dev_op_nic_timestamp,
dev_op_modify_ring);
dev_opt = next_opt;
}
@@ -306,6 +326,7 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv)
priv->adminq_set_driver_parameter_cnt = 0;
priv->adminq_report_stats_cnt = 0;
priv->adminq_report_link_speed_cnt = 0;
+ priv->adminq_report_nic_timestamp_cnt = 0;
priv->adminq_get_ptype_map_cnt = 0;
priv->adminq_query_flow_rules_cnt = 0;
priv->adminq_cfg_flow_rule_cnt = 0;
@@ -442,6 +463,8 @@ static int gve_adminq_kick_and_wait(struct gve_priv *priv)
int tail, head;
int i;
+ lockdep_assert_held(&priv->adminq_lock);
+
tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
head = priv->adminq_prod_cnt;
@@ -467,9 +490,6 @@ static int gve_adminq_kick_and_wait(struct gve_priv *priv)
return 0;
}
-/* This function is not threadsafe - the caller is responsible for any
- * necessary locks.
- */
static int gve_adminq_issue_cmd(struct gve_priv *priv,
union gve_adminq_command *cmd_orig)
{
@@ -477,6 +497,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
u32 opcode;
u32 tail;
+ lockdep_assert_held(&priv->adminq_lock);
+
tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
// Check if next command will overflow the buffer.
@@ -544,6 +566,9 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
case GVE_ADMINQ_REPORT_LINK_SPEED:
priv->adminq_report_link_speed_cnt++;
break;
+ case GVE_ADMINQ_REPORT_NIC_TIMESTAMP:
+ priv->adminq_report_nic_timestamp_cnt++;
+ break;
case GVE_ADMINQ_GET_PTYPE_MAP:
priv->adminq_get_ptype_map_cnt++;
break;
@@ -564,6 +589,7 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
break;
default:
dev_err(&priv->pdev->dev, "unknown AQ command opcode %d\n", opcode);
+ return -EINVAL;
}
return 0;
@@ -625,7 +651,7 @@ static int gve_adminq_execute_extended_cmd(struct gve_priv *priv, u32 opcode,
/* The device specifies that the management vector can either be the first irq
* or the last irq. ntfy_blk_msix_base_idx indicates the first irq assigned to
- * the ntfy blks. It if is 0 then the management vector is last, if it is 1 then
+ * the ntfy blks. If it is 0 then the management vector is last, if it is 1 then
* the management vector is first.
*
* gve arranges the msix vectors so that the management vector is last.
@@ -709,13 +735,19 @@ int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_que
int err;
int i;
+ mutex_lock(&priv->adminq_lock);
+
for (i = start_id; i < start_id + num_queues; i++) {
err = gve_adminq_create_tx_queue(priv, i);
if (err)
- return err;
+ goto out;
}
- return gve_adminq_kick_and_wait(priv);
+ err = gve_adminq_kick_and_wait(priv);
+
+out:
+ mutex_unlock(&priv->adminq_lock);
+ return err;
}
static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv,
@@ -731,6 +763,7 @@ static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv,
.ntfy_id = cpu_to_be32(rx->ntfy_id),
.queue_resources_addr = cpu_to_be64(rx->q_resources_bus),
.rx_ring_size = cpu_to_be16(priv->rx_desc_cnt),
+ .packet_buffer_size = cpu_to_be16(rx->packet_buffer_size),
};
if (gve_is_gqi(priv)) {
@@ -743,7 +776,6 @@ static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv,
cpu_to_be64(rx->data.data_bus);
cmd->create_rx_queue.index = cpu_to_be32(queue_index);
cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
- cmd->create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size);
} else {
u32 qpl_id = 0;
@@ -756,8 +788,6 @@ static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv,
cpu_to_be64(rx->dqo.complq.bus);
cmd->create_rx_queue.rx_data_ring_addr =
cpu_to_be64(rx->dqo.bufq.bus);
- cmd->create_rx_queue.packet_buffer_size =
- cpu_to_be16(priv->data_buffer_size_dqo);
cmd->create_rx_queue.rx_buff_ring_size =
cpu_to_be16(priv->rx_desc_cnt);
cmd->create_rx_queue.enable_rsc =
@@ -790,13 +820,19 @@ int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues)
int err;
int i;
+ mutex_lock(&priv->adminq_lock);
+
for (i = 0; i < num_queues; i++) {
err = gve_adminq_create_rx_queue(priv, i);
if (err)
- return err;
+ goto out;
}
- return gve_adminq_kick_and_wait(priv);
+ err = gve_adminq_kick_and_wait(priv);
+
+out:
+ mutex_unlock(&priv->adminq_lock);
+ return err;
}
static int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index)
@@ -822,13 +858,19 @@ int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_qu
int err;
int i;
+ mutex_lock(&priv->adminq_lock);
+
for (i = start_id; i < start_id + num_queues; i++) {
err = gve_adminq_destroy_tx_queue(priv, i);
if (err)
- return err;
+ goto out;
}
- return gve_adminq_kick_and_wait(priv);
+ err = gve_adminq_kick_and_wait(priv);
+
+out:
+ mutex_unlock(&priv->adminq_lock);
+ return err;
}
static void gve_adminq_make_destroy_rx_queue_cmd(union gve_adminq_command *cmd,
@@ -863,13 +905,19 @@ int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues)
int err;
int i;
+ mutex_lock(&priv->adminq_lock);
+
for (i = 0; i < num_queues; i++) {
err = gve_adminq_destroy_rx_queue(priv, i);
if (err)
- return err;
+ goto out;
}
- return gve_adminq_kick_and_wait(priv);
+ err = gve_adminq_kick_and_wait(priv);
+
+out:
+ mutex_unlock(&priv->adminq_lock);
+ return err;
}
static void gve_set_default_desc_cnt(struct gve_priv *priv,
@@ -885,6 +933,15 @@ static void gve_set_default_desc_cnt(struct gve_priv *priv,
priv->min_rx_desc_cnt = priv->rx_desc_cnt;
}
+static void gve_set_default_rss_sizes(struct gve_priv *priv)
+{
+ if (!gve_is_gqi(priv)) {
+ priv->rss_key_size = GVE_RSS_KEY_SIZE;
+ priv->rss_lut_size = GVE_RSS_INDIR_SIZE;
+ priv->cache_rss_config = true;
+ }
+}
+
static void gve_enable_supported_features(struct gve_priv *priv,
u32 supported_features_mask,
const struct gve_device_option_jumbo_frames
@@ -897,6 +954,8 @@ static void gve_enable_supported_features(struct gve_priv *priv,
*dev_op_flow_steering,
const struct gve_device_option_rss_config
*dev_op_rss_config,
+ const struct gve_device_option_nic_timestamp
+ *dev_op_nic_timestamp,
const struct gve_device_option_modify_ring
*dev_op_modify_ring)
{
@@ -968,11 +1027,20 @@ static void gve_enable_supported_features(struct gve_priv *priv,
be16_to_cpu(dev_op_rss_config->hash_key_size);
priv->rss_lut_size =
be16_to_cpu(dev_op_rss_config->hash_lut_size);
+ priv->cache_rss_config = false;
+ dev_dbg(&priv->pdev->dev,
+ "RSS device option enabled with key size of %u, lut size of %u.\n",
+ priv->rss_key_size, priv->rss_lut_size);
}
+
+ if (dev_op_nic_timestamp &&
+ (supported_features_mask & GVE_SUP_NIC_TIMESTAMP_MASK))
+ priv->nic_timestamp_supported = true;
}
int gve_adminq_describe_device(struct gve_priv *priv)
{
+ struct gve_device_option_nic_timestamp *dev_op_nic_timestamp = NULL;
struct gve_device_option_flow_steering *dev_op_flow_steering = NULL;
struct gve_device_option_buffer_sizes *dev_op_buffer_sizes = NULL;
struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL;
@@ -1013,6 +1081,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
&dev_op_buffer_sizes,
&dev_op_flow_steering,
&dev_op_rss_config,
+ &dev_op_nic_timestamp,
&dev_op_modify_ring);
if (err)
goto free_device_descriptor;
@@ -1052,6 +1121,8 @@ int gve_adminq_describe_device(struct gve_priv *priv)
/* set default descriptor counts */
gve_set_default_desc_cnt(priv, descriptor);
+ gve_set_default_rss_sizes(priv);
+
/* DQO supports LRO. */
if (!gve_is_gqi(priv))
priv->dev->hw_features |= NETIF_F_LRO;
@@ -1075,7 +1146,8 @@ int gve_adminq_describe_device(struct gve_priv *priv)
gve_enable_supported_features(priv, supported_features_mask,
dev_op_jumbo_frames, dev_op_dqo_qpl,
dev_op_buffer_sizes, dev_op_flow_steering,
- dev_op_rss_config, dev_op_modify_ring);
+ dev_op_rss_config, dev_op_nic_timestamp,
+ dev_op_modify_ring);
free_device_descriptor:
dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus);
@@ -1187,6 +1259,22 @@ int gve_adminq_report_link_speed(struct gve_priv *priv)
return err;
}
+int gve_adminq_report_nic_ts(struct gve_priv *priv,
+ dma_addr_t nic_ts_report_addr)
+{
+ union gve_adminq_command cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = cpu_to_be32(GVE_ADMINQ_REPORT_NIC_TIMESTAMP);
+ cmd.report_nic_ts = (struct gve_adminq_report_nic_ts) {
+ .nic_ts_report_len =
+ cpu_to_be64(sizeof(struct gve_nic_ts_report)),
+ .nic_ts_report_addr = cpu_to_be64(nic_ts_report_addr),
+ };
+
+ return gve_adminq_execute_cmd(priv, &cmd);
+}
+
int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
struct gve_ptype_lut *ptype_lut)
{
@@ -1276,8 +1364,9 @@ int gve_adminq_reset_flow_rules(struct gve_priv *priv)
int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh)
{
+ const u32 *hash_lut_to_config = NULL;
+ const u8 *hash_key_to_config = NULL;
dma_addr_t lut_bus = 0, key_bus = 0;
- u16 key_size = 0, lut_size = 0;
union gve_adminq_command cmd;
__be32 *lut = NULL;
u8 hash_alg = 0;
@@ -1287,7 +1376,7 @@ int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *r
switch (rxfh->hfunc) {
case ETH_RSS_HASH_NO_CHANGE:
- break;
+ fallthrough;
case ETH_RSS_HASH_TOP:
hash_alg = ETH_RSS_HASH_TOP;
break;
@@ -1296,27 +1385,46 @@ int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *r
}
if (rxfh->indir) {
- lut_size = priv->rss_lut_size;
+ if (rxfh->indir_size != priv->rss_lut_size)
+ return -EINVAL;
+
+ hash_lut_to_config = rxfh->indir;
+ } else if (priv->cache_rss_config) {
+ hash_lut_to_config = priv->rss_config.hash_lut;
+ }
+
+ if (hash_lut_to_config) {
lut = dma_alloc_coherent(&priv->pdev->dev,
- lut_size * sizeof(*lut),
+ priv->rss_lut_size * sizeof(*lut),
&lut_bus, GFP_KERNEL);
if (!lut)
return -ENOMEM;
for (i = 0; i < priv->rss_lut_size; i++)
- lut[i] = cpu_to_be32(rxfh->indir[i]);
+ lut[i] = cpu_to_be32(hash_lut_to_config[i]);
}
if (rxfh->key) {
- key_size = priv->rss_key_size;
+ if (rxfh->key_size != priv->rss_key_size) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ hash_key_to_config = rxfh->key;
+ } else if (priv->cache_rss_config) {
+ hash_key_to_config = priv->rss_config.hash_key;
+ }
+
+ if (hash_key_to_config) {
key = dma_alloc_coherent(&priv->pdev->dev,
- key_size, &key_bus, GFP_KERNEL);
+ priv->rss_key_size,
+ &key_bus, GFP_KERNEL);
if (!key) {
err = -ENOMEM;
goto out;
}
- memcpy(key, rxfh->key, key_size);
+ memcpy(key, hash_key_to_config, priv->rss_key_size);
}
/* Zero-valued fields in the cmd.configure_rss instruct the device to
@@ -1330,8 +1438,10 @@ int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *r
BIT(GVE_RSS_HASH_TCPV6) |
BIT(GVE_RSS_HASH_UDPV6)),
.hash_alg = hash_alg,
- .hash_key_size = cpu_to_be16(key_size),
- .hash_lut_size = cpu_to_be16(lut_size),
+ .hash_key_size =
+ cpu_to_be16((key_bus) ? priv->rss_key_size : 0),
+ .hash_lut_size =
+ cpu_to_be16((lut_bus) ? priv->rss_lut_size : 0),
.hash_key_addr = cpu_to_be64(key_bus),
.hash_lut_addr = cpu_to_be64(lut_bus),
};
@@ -1341,11 +1451,11 @@ int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *r
out:
if (lut)
dma_free_coherent(&priv->pdev->dev,
- lut_size * sizeof(*lut),
+ priv->rss_lut_size * sizeof(*lut),
lut, lut_bus);
if (key)
dma_free_coherent(&priv->pdev->dev,
- key_size, key, key_bus);
+ priv->rss_key_size, key, key_bus);
return err;
}
@@ -1449,12 +1559,15 @@ static int gve_adminq_process_rss_query(struct gve_priv *priv,
rxfh->hfunc = descriptor->hash_alg;
rss_info_addr = (void *)(descriptor + 1);
- if (rxfh->key)
+ if (rxfh->key) {
+ rxfh->key_size = priv->rss_key_size;
memcpy(rxfh->key, rss_info_addr, priv->rss_key_size);
+ }
rss_info_addr += priv->rss_key_size;
lut = (__be32 *)rss_info_addr;
if (rxfh->indir) {
+ rxfh->indir_size = priv->rss_lut_size;
for (i = 0; i < priv->rss_lut_size; i++)
rxfh->indir[i] = be32_to_cpu(lut[i]);
}
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index 228217458275..22a74b6aa17e 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -27,6 +27,7 @@ enum gve_adminq_opcodes {
GVE_ADMINQ_GET_PTYPE_MAP = 0xE,
GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY = 0xF,
GVE_ADMINQ_QUERY_FLOW_RULES = 0x10,
+ GVE_ADMINQ_REPORT_NIC_TIMESTAMP = 0x11,
GVE_ADMINQ_QUERY_RSS = 0x12,
/* For commands that are larger than 56 bytes */
@@ -174,6 +175,12 @@ struct gve_device_option_rss_config {
static_assert(sizeof(struct gve_device_option_rss_config) == 8);
+struct gve_device_option_nic_timestamp {
+ __be32 supported_features_mask;
+};
+
+static_assert(sizeof(struct gve_device_option_nic_timestamp) == 4);
+
/* Terminology:
*
* RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA
@@ -192,6 +199,7 @@ enum gve_dev_opt_id {
GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
GVE_DEV_OPT_ID_BUFFER_SIZES = 0xa,
GVE_DEV_OPT_ID_FLOW_STEERING = 0xb,
+ GVE_DEV_OPT_ID_NIC_TIMESTAMP = 0xd,
GVE_DEV_OPT_ID_RSS_CONFIG = 0xe,
};
@@ -206,6 +214,7 @@ enum gve_dev_opt_req_feat_mask {
GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING = 0x0,
GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG = 0x0,
+ GVE_DEV_OPT_REQ_FEAT_MASK_NIC_TIMESTAMP = 0x0,
};
enum gve_sup_feature_mask {
@@ -214,6 +223,7 @@ enum gve_sup_feature_mask {
GVE_SUP_BUFFER_SIZES_MASK = 1 << 4,
GVE_SUP_FLOW_STEERING_MASK = 1 << 5,
GVE_SUP_RSS_CONFIG_MASK = 1 << 7,
+ GVE_SUP_NIC_TIMESTAMP_MASK = 1 << 8,
};
#define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0
@@ -392,6 +402,21 @@ struct gve_adminq_report_link_speed {
static_assert(sizeof(struct gve_adminq_report_link_speed) == 8);
+struct gve_adminq_report_nic_ts {
+ __be64 nic_ts_report_len;
+ __be64 nic_ts_report_addr;
+};
+
+static_assert(sizeof(struct gve_adminq_report_nic_ts) == 16);
+
+struct gve_nic_ts_report {
+ __be64 nic_timestamp; /* NIC clock in nanoseconds */
+ __be64 reserved1;
+ __be64 reserved2;
+ __be64 reserved3;
+ __be64 reserved4;
+};
+
struct stats {
__be32 stat_name;
__be32 queue_id;
@@ -451,7 +476,7 @@ struct gve_ptype_entry {
};
struct gve_ptype_map {
- struct gve_ptype_entry ptypes[1 << 10]; /* PTYPES are always 10 bits. */
+ struct gve_ptype_entry ptypes[GVE_NUM_PTYPES]; /* PTYPES are always 10 bits. */
};
struct gve_adminq_get_ptype_map {
@@ -585,6 +610,7 @@ union gve_adminq_command {
struct gve_adminq_query_flow_rules query_flow_rules;
struct gve_adminq_configure_rss configure_rss;
struct gve_adminq_query_rss query_rss;
+ struct gve_adminq_report_nic_ts report_nic_ts;
struct gve_adminq_extended_command extended_command;
};
};
@@ -624,6 +650,8 @@ int gve_adminq_reset_flow_rules(struct gve_priv *priv);
int gve_adminq_query_flow_rules(struct gve_priv *priv, u16 query_opcode, u32 starting_loc);
int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh);
int gve_adminq_query_rss_config(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh);
+int gve_adminq_report_nic_ts(struct gve_priv *priv,
+ dma_addr_t nic_ts_report_addr);
struct gve_ptype_lut;
int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv,
diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c
index 403f0f335ba6..8f5021e59e0a 100644
--- a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c
@@ -4,6 +4,7 @@
* Copyright (C) 2015-2024 Google, Inc.
*/
+#include <net/xdp_sock_drv.h>
#include "gve.h"
#include "gve_utils.h"
@@ -29,6 +30,10 @@ struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx)
/* Point buf_state to itself to mark it as allocated */
buf_state->next = buffer_id;
+ /* Clear the buffer pointers */
+ buf_state->page_info.page = NULL;
+ buf_state->xsk_buff = NULL;
+
return buf_state;
}
@@ -139,7 +144,8 @@ int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx,
buf_state->page_info.page_offset = 0;
buf_state->page_info.page_address =
page_address(buf_state->page_info.page);
- buf_state->page_info.buf_size = priv->data_buffer_size_dqo;
+ buf_state->page_info.buf_size = rx->packet_buffer_truesize;
+ buf_state->page_info.pad = rx->rx_headroom;
buf_state->last_single_ref_offset = 0;
/* The page already has 1 ref. */
@@ -162,7 +168,7 @@ void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state)
void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
struct gve_rx_buf_state_dqo *buf_state)
{
- const u16 data_buffer_size = priv->data_buffer_size_dqo;
+ const u16 data_buffer_size = rx->packet_buffer_truesize;
int pagecount;
/* Can't reuse if we only fit one buffer per page */
@@ -205,49 +211,53 @@ void gve_free_to_page_pool(struct gve_rx_ring *rx,
struct gve_rx_buf_state_dqo *buf_state,
bool allow_direct)
{
- struct page *page = buf_state->page_info.page;
+ netmem_ref netmem = buf_state->page_info.netmem;
- if (!page)
+ if (!netmem)
return;
- page_pool_put_full_page(page->pp, page, allow_direct);
- buf_state->page_info.page = NULL;
+ page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, allow_direct);
+ buf_state->page_info.netmem = 0;
}
static int gve_alloc_from_page_pool(struct gve_rx_ring *rx,
struct gve_rx_buf_state_dqo *buf_state)
{
- struct gve_priv *priv = rx->gve;
- struct page *page;
+ netmem_ref netmem;
- buf_state->page_info.buf_size = priv->data_buffer_size_dqo;
- page = page_pool_alloc(rx->dqo.page_pool,
- &buf_state->page_info.page_offset,
- &buf_state->page_info.buf_size, GFP_ATOMIC);
+ buf_state->page_info.buf_size = rx->packet_buffer_truesize;
+ netmem = page_pool_alloc_netmem(rx->dqo.page_pool,
+ &buf_state->page_info.page_offset,
+ &buf_state->page_info.buf_size,
+ GFP_ATOMIC);
- if (!page)
+ if (!netmem)
return -ENOMEM;
- buf_state->page_info.page = page;
- buf_state->page_info.page_address = page_address(page);
- buf_state->addr = page_pool_get_dma_addr(page);
+ buf_state->page_info.netmem = netmem;
+ buf_state->page_info.page_address = netmem_address(netmem);
+ buf_state->addr = page_pool_get_dma_addr_netmem(netmem);
+ buf_state->page_info.pad = rx->dqo.page_pool->p.offset;
return 0;
}
struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv,
- struct gve_rx_ring *rx)
+ struct gve_rx_ring *rx,
+ bool xdp)
{
u32 ntfy_id = gve_rx_idx_to_ntfy(priv, rx->q_num);
struct page_pool_params pp = {
.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
.order = 0,
.pool_size = GVE_PAGE_POOL_SIZE_MULTIPLIER * priv->rx_desc_cnt,
+ .nid = priv->numa_node,
.dev = &priv->pdev->dev,
.netdev = priv->dev,
.napi = &priv->ntfy_blocks[ntfy_id].napi,
.max_len = PAGE_SIZE,
- .dma_dir = DMA_FROM_DEVICE,
+ .dma_dir = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
+ .offset = xdp ? XDP_PACKET_HEADROOM : 0,
};
return page_pool_create(&pp);
@@ -269,7 +279,7 @@ void gve_reuse_buffer(struct gve_rx_ring *rx,
struct gve_rx_buf_state_dqo *buf_state)
{
if (rx->dqo.page_pool) {
- buf_state->page_info.page = NULL;
+ buf_state->page_info.netmem = 0;
gve_free_buf_state(rx, buf_state);
} else {
gve_dec_pagecnt_bias(&buf_state->page_info);
@@ -281,7 +291,24 @@ int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc)
{
struct gve_rx_buf_state_dqo *buf_state;
- if (rx->dqo.page_pool) {
+ if (rx->xsk_pool) {
+ buf_state = gve_alloc_buf_state(rx);
+ if (unlikely(!buf_state))
+ return -ENOMEM;
+
+ buf_state->xsk_buff = xsk_buff_alloc(rx->xsk_pool);
+ if (unlikely(!buf_state->xsk_buff)) {
+ xsk_set_rx_need_wakeup(rx->xsk_pool);
+ gve_free_buf_state(rx, buf_state);
+ return -ENOMEM;
+ }
+ /* Allocated xsk buffer. Clear wakeup in case it was set. */
+ xsk_clear_rx_need_wakeup(rx->xsk_pool);
+ desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
+ desc->buf_addr =
+ cpu_to_le64(xsk_buff_xdp_get_dma(buf_state->xsk_buff));
+ return 0;
+ } else if (rx->dqo.page_pool) {
buf_state = gve_alloc_buf_state(rx);
if (WARN_ON_ONCE(!buf_state))
return -ENOMEM;
@@ -301,7 +328,8 @@ int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc)
}
desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states);
desc->buf_addr = cpu_to_le64(buf_state->addr +
- buf_state->page_info.page_offset);
+ buf_state->page_info.page_offset +
+ buf_state->page_info.pad);
return 0;
diff --git a/drivers/net/ethernet/google/gve/gve_desc_dqo.h b/drivers/net/ethernet/google/gve/gve_desc_dqo.h
index f79cd0591110..d17da841b5a0 100644
--- a/drivers/net/ethernet/google/gve/gve_desc_dqo.h
+++ b/drivers/net/ethernet/google/gve/gve_desc_dqo.h
@@ -247,7 +247,8 @@ struct gve_rx_compl_desc_dqo {
};
__le32 hash;
__le32 reserved6;
- __le64 reserved7;
+ __le32 reserved7;
+ __le32 ts; /* timestamp in nanosecs */
} __packed;
static_assert(sizeof(struct gve_rx_compl_desc_dqo) == 32);
diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h
index e83773fb891f..6eb442096e02 100644
--- a/drivers/net/ethernet/google/gve/gve_dqo.h
+++ b/drivers/net/ethernet/google/gve/gve_dqo.h
@@ -37,6 +37,8 @@ netdev_features_t gve_features_check_dqo(struct sk_buff *skb,
struct net_device *dev,
netdev_features_t features);
bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean);
+bool gve_xdp_poll_dqo(struct gve_notify_block *block);
+bool gve_xsk_tx_poll_dqo(struct gve_notify_block *block, int budget);
int gve_rx_poll_dqo(struct gve_notify_block *block, int budget);
int gve_tx_alloc_rings_dqo(struct gve_priv *priv,
struct gve_tx_alloc_rings_cfg *cfg);
@@ -60,6 +62,7 @@ int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
struct napi_struct *napi);
void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx);
void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx);
+void gve_xdp_tx_flush_dqo(struct gve_priv *priv, u32 xdp_qid);
static inline void
gve_tx_put_doorbell_dqo(const struct gve_priv *priv,
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index bdfc6e77b2af..d0a223250845 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -63,11 +63,11 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = {
"tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_consumed_desc[%u]", "tx_bytes[%u]",
"tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]",
- "tx_dma_mapping_error[%u]", "tx_xsk_wakeup[%u]",
- "tx_xsk_done[%u]", "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]"
+ "tx_dma_mapping_error[%u]",
+ "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]"
};
-static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = {
+static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] __nonstring_array = {
"adminq_prod_cnt", "adminq_cmd_fail", "adminq_timeouts",
"adminq_describe_device_cnt", "adminq_cfg_device_resources_cnt",
"adminq_register_page_list_cnt", "adminq_unregister_page_list_cnt",
@@ -76,7 +76,7 @@ static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = {
"adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt",
"adminq_report_stats_cnt", "adminq_report_link_speed_cnt", "adminq_get_ptype_map_cnt",
"adminq_query_flow_rules", "adminq_cfg_flow_rule", "adminq_cfg_rss_cnt",
- "adminq_query_rss_cnt",
+ "adminq_query_rss_cnt", "adminq_report_nic_timestamp_cnt",
};
static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = {
@@ -113,7 +113,7 @@ static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
i);
for (i = 0; i < ARRAY_SIZE(gve_gstrings_adminq_stats); i++)
- ethtool_puts(&s, gve_gstrings_adminq_stats[i]);
+ ethtool_cpy(&s, gve_gstrings_adminq_stats[i]);
break;
@@ -392,7 +392,9 @@ gve_get_ethtool_stats(struct net_device *netdev,
*/
data[i++] = 0;
data[i++] = 0;
- data[i++] = tx->dqo_tx.tail - tx->dqo_tx.head;
+ data[i++] =
+ (tx->dqo_tx.tail - tx->dqo_tx.head) &
+ tx->mask;
}
do {
start =
@@ -417,9 +419,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
data[i++] = value;
}
}
- /* XDP xsk counters */
- data[i++] = tx->xdp_xsk_wakeup;
- data[i++] = tx->xdp_xsk_done;
+ /* XDP counters */
do {
start = u64_stats_fetch_begin(&priv->tx[ring].statss);
data[i] = tx->xdp_xsk_sent;
@@ -456,6 +456,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
data[i++] = priv->adminq_cfg_flow_rule_cnt;
data[i++] = priv->adminq_cfg_rss_cnt;
data[i++] = priv->adminq_query_rss_cnt;
+ data[i++] = priv->adminq_report_nic_timestamp_cnt;
}
static void gve_get_channels(struct net_device *netdev,
@@ -477,11 +478,12 @@ static int gve_set_channels(struct net_device *netdev,
struct ethtool_channels *cmd)
{
struct gve_priv *priv = netdev_priv(netdev);
- struct gve_queue_config new_tx_cfg = priv->tx_cfg;
- struct gve_queue_config new_rx_cfg = priv->rx_cfg;
+ struct gve_tx_queue_config new_tx_cfg = priv->tx_cfg;
+ struct gve_rx_queue_config new_rx_cfg = priv->rx_cfg;
struct ethtool_channels old_settings;
int new_tx = cmd->tx_count;
int new_rx = cmd->rx_count;
+ bool reset_rss = false;
gve_get_channels(netdev, &old_settings);
@@ -492,22 +494,27 @@ static int gve_set_channels(struct net_device *netdev,
if (!new_rx || !new_tx)
return -EINVAL;
- if (priv->num_xdp_queues &&
- (new_tx != new_rx || (2 * new_tx > priv->tx_cfg.max_queues))) {
- dev_err(&priv->pdev->dev, "XDP load failed: The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues");
- return -EINVAL;
- }
+ if (priv->xdp_prog) {
+ if (new_tx != new_rx ||
+ (2 * new_tx > priv->tx_cfg.max_queues)) {
+ dev_err(&priv->pdev->dev, "The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues when XDP program is installed");
+ return -EINVAL;
+ }
- if (!netif_running(netdev)) {
- priv->tx_cfg.num_queues = new_tx;
- priv->rx_cfg.num_queues = new_rx;
- return 0;
+ /* One XDP TX queue per RX queue. */
+ new_tx_cfg.num_xdp_queues = new_rx;
+ } else {
+ new_tx_cfg.num_xdp_queues = 0;
}
+ if (new_rx != priv->rx_cfg.num_queues &&
+ priv->cache_rss_config && !netif_is_rxfh_configured(netdev))
+ reset_rss = true;
+
new_tx_cfg.num_queues = new_tx;
new_rx_cfg.num_queues = new_rx;
- return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg);
+ return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg, reset_rss);
}
static void gve_get_ringparam(struct net_device *netdev,
@@ -643,8 +650,7 @@ static int gve_set_tunable(struct net_device *netdev,
switch (etuna->id) {
case ETHTOOL_RX_COPYBREAK:
{
- u32 max_copybreak = gve_is_gqi(priv) ?
- GVE_DEFAULT_RX_BUFFER_SIZE : priv->data_buffer_size_dqo;
+ u32 max_copybreak = priv->rx_cfg.packet_buffer_size;
len = *(u32 *)value;
if (len > max_copybreak)
@@ -662,7 +668,7 @@ static u32 gve_get_priv_flags(struct net_device *netdev)
struct gve_priv *priv = netdev_priv(netdev);
u32 ret_flags = 0;
- /* Only 1 flag exists currently: report-stats (BIT(O)), so set that flag. */
+ /* Only 1 flag exists currently: report-stats (BIT(0)), so set that flag. */
if (priv->ethtool_flags & BIT(0))
ret_flags |= BIT(0);
return ret_flags;
@@ -700,7 +706,7 @@ static int gve_set_priv_flags(struct net_device *netdev, u32 flags)
memset(priv->stats_report->stats, 0, (tx_stats_num + rx_stats_num) *
sizeof(struct stats));
- del_timer_sync(&priv->stats_report_timer);
+ timer_delete_sync(&priv->stats_report_timer);
}
return 0;
}
@@ -793,9 +799,6 @@ static int gve_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
case ETHTOOL_SRXCLSRLDEL:
err = gve_del_flow_rule(priv, cmd);
break;
- case ETHTOOL_SRXFH:
- err = -EOPNOTSUPP;
- break;
default:
err = -EOPNOTSUPP;
break;
@@ -830,9 +833,6 @@ static int gve_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u
case ETHTOOL_GRXCLSRLALL:
err = gve_get_flow_rule_ids(priv, cmd, (u32 *)rule_locs);
break;
- case ETHTOOL_GRXFH:
- err = -EOPNOTSUPP;
- break;
default:
err = -EOPNOTSUPP;
break;
@@ -855,6 +855,25 @@ static u32 gve_get_rxfh_indir_size(struct net_device *netdev)
return priv->rss_lut_size;
}
+static void gve_get_rss_config_cache(struct gve_priv *priv,
+ struct ethtool_rxfh_param *rxfh)
+{
+ struct gve_rss_config *rss_config = &priv->rss_config;
+
+ rxfh->hfunc = ETH_RSS_HASH_TOP;
+
+ if (rxfh->key) {
+ rxfh->key_size = priv->rss_key_size;
+ memcpy(rxfh->key, rss_config->hash_key, priv->rss_key_size);
+ }
+
+ if (rxfh->indir) {
+ rxfh->indir_size = priv->rss_lut_size;
+ memcpy(rxfh->indir, rss_config->hash_lut,
+ priv->rss_lut_size * sizeof(*rxfh->indir));
+ }
+}
+
static int gve_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh)
{
struct gve_priv *priv = netdev_priv(netdev);
@@ -862,18 +881,67 @@ static int gve_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rx
if (!priv->rss_key_size || !priv->rss_lut_size)
return -EOPNOTSUPP;
+ if (priv->cache_rss_config) {
+ gve_get_rss_config_cache(priv, rxfh);
+ return 0;
+ }
+
return gve_adminq_query_rss_config(priv, rxfh);
}
+static void gve_set_rss_config_cache(struct gve_priv *priv,
+ struct ethtool_rxfh_param *rxfh)
+{
+ struct gve_rss_config *rss_config = &priv->rss_config;
+
+ if (rxfh->key)
+ memcpy(rss_config->hash_key, rxfh->key, priv->rss_key_size);
+
+ if (rxfh->indir)
+ memcpy(rss_config->hash_lut, rxfh->indir,
+ priv->rss_lut_size * sizeof(*rxfh->indir));
+}
+
static int gve_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh,
struct netlink_ext_ack *extack)
{
struct gve_priv *priv = netdev_priv(netdev);
+ int err;
if (!priv->rss_key_size || !priv->rss_lut_size)
return -EOPNOTSUPP;
- return gve_adminq_configure_rss(priv, rxfh);
+ err = gve_adminq_configure_rss(priv, rxfh);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Fail to configure RSS config");
+ return err;
+ }
+
+ if (priv->cache_rss_config)
+ gve_set_rss_config_cache(priv, rxfh);
+
+ return 0;
+}
+
+static int gve_get_ts_info(struct net_device *netdev,
+ struct kernel_ethtool_ts_info *info)
+{
+ struct gve_priv *priv = netdev_priv(netdev);
+
+ ethtool_op_get_ts_info(netdev, info);
+
+ if (priv->nic_timestamp_supported) {
+ info->so_timestamping |= SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ info->rx_filters |= BIT(HWTSTAMP_FILTER_NONE) |
+ BIT(HWTSTAMP_FILTER_ALL);
+
+ if (priv->ptp)
+ info->phc_index = ptp_clock_index(priv->ptp->clock);
+ }
+
+ return 0;
}
const struct ethtool_ops gve_ethtool_ops = {
@@ -904,5 +972,5 @@ const struct ethtool_ops gve_ethtool_ops = {
.get_priv_flags = gve_get_priv_flags,
.set_priv_flags = gve_set_priv_flags,
.get_link_ksettings = gve_get_link_ksettings,
- .get_ts_info = ethtool_op_get_ts_info,
+ .get_ts_info = gve_get_ts_info,
};
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 533e659b15b3..1be1b1ef31ee 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -4,6 +4,7 @@
* Copyright (C) 2015-2024 Google LLC
*/
+#include <linux/bitmap.h>
#include <linux/bpf.h>
#include <linux/cpumask.h>
#include <linux/etherdevice.h>
@@ -184,6 +185,43 @@ static void gve_free_flow_rule_caches(struct gve_priv *priv)
flow_rules_cache->rules_cache = NULL;
}
+static int gve_alloc_rss_config_cache(struct gve_priv *priv)
+{
+ struct gve_rss_config *rss_config = &priv->rss_config;
+
+ if (!priv->cache_rss_config)
+ return 0;
+
+ rss_config->hash_key = kcalloc(priv->rss_key_size,
+ sizeof(rss_config->hash_key[0]),
+ GFP_KERNEL);
+ if (!rss_config->hash_key)
+ return -ENOMEM;
+
+ rss_config->hash_lut = kcalloc(priv->rss_lut_size,
+ sizeof(rss_config->hash_lut[0]),
+ GFP_KERNEL);
+ if (!rss_config->hash_lut)
+ goto free_rss_key_cache;
+
+ return 0;
+
+free_rss_key_cache:
+ kfree(rss_config->hash_key);
+ rss_config->hash_key = NULL;
+ return -ENOMEM;
+}
+
+static void gve_free_rss_config_cache(struct gve_priv *priv)
+{
+ struct gve_rss_config *rss_config = &priv->rss_config;
+
+ kfree(rss_config->hash_key);
+ kfree(rss_config->hash_lut);
+
+ memset(rss_config, 0, sizeof(*rss_config));
+}
+
static int gve_alloc_counter_array(struct gve_priv *priv)
{
priv->counter_array =
@@ -231,7 +269,8 @@ static void gve_stats_report_schedule(struct gve_priv *priv)
static void gve_stats_report_timer(struct timer_list *t)
{
- struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
+ struct gve_priv *priv = timer_container_of(priv, t,
+ stats_report_timer);
mod_timer(&priv->stats_report_timer,
round_jiffies(jiffies +
@@ -265,7 +304,7 @@ static void gve_free_stats_report(struct gve_priv *priv)
if (!priv->stats_report)
return;
- del_timer_sync(&priv->stats_report_timer);
+ timer_delete_sync(&priv->stats_report_timer);
dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
priv->stats_report, priv->stats_report_bus);
priv->stats_report = NULL;
@@ -376,14 +415,24 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
bool reschedule = false;
int work_done = 0;
- if (block->tx)
- reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
+ if (block->tx) {
+ if (block->tx->q_num < priv->tx_cfg.num_queues)
+ reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
+ else
+ reschedule |= gve_xdp_poll_dqo(block);
+ }
if (!budget)
return 0;
if (block->rx) {
work_done = gve_rx_poll_dqo(block, budget);
+
+ /* Poll XSK TX as part of RX NAPI. Setup re-poll based on if
+ * either datapath has more work to do.
+ */
+ if (priv->xdp_prog)
+ reschedule |= gve_xsk_tx_poll_dqo(block, budget);
reschedule |= work_done == budget;
}
@@ -419,10 +468,19 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
return work_done;
}
+static const struct cpumask *gve_get_node_mask(struct gve_priv *priv)
+{
+ if (priv->numa_node == NUMA_NO_NODE)
+ return cpu_all_mask;
+ else
+ return cpumask_of_node(priv->numa_node);
+}
+
static int gve_alloc_notify_blocks(struct gve_priv *priv)
{
int num_vecs_requested = priv->num_ntfy_blks + 1;
- unsigned int active_cpus;
+ const struct cpumask *node_mask;
+ unsigned int cur_cpu;
int vecs_enabled;
int i, j;
int err;
@@ -461,8 +519,6 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
}
- /* Half the notification blocks go to TX and half to RX */
- active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
/* Setup Management Vector - the last vector */
snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
@@ -491,6 +547,8 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
}
/* Setup the other blocks - the first n-1 vectors */
+ node_mask = gve_get_node_mask(priv);
+ cur_cpu = cpumask_first(node_mask);
for (i = 0; i < priv->num_ntfy_blks; i++) {
struct gve_notify_block *block = &priv->ntfy_blocks[i];
int msix_idx = i;
@@ -507,9 +565,17 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
goto abort_with_some_ntfy_blocks;
}
block->irq = priv->msix_vectors[msix_idx].vector;
- irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
- get_cpu_mask(i % active_cpus));
+ irq_set_affinity_and_hint(block->irq,
+ cpumask_of(cur_cpu));
block->irq_db_index = &priv->irq_db_indices[i].index;
+
+ cur_cpu = cpumask_next(cur_cpu, node_mask);
+ /* Wrap once CPUs in the node have been exhausted, or when
+ * starting RX queue affinities. TX and RX queues of the same
+ * index share affinity.
+ */
+ if (cur_cpu >= nr_cpu_ids || (i + 1) == priv->tx_cfg.max_queues)
+ cur_cpu = cpumask_first(node_mask);
}
return 0;
abort_with_some_ntfy_blocks:
@@ -575,12 +641,18 @@ static int gve_setup_device_resources(struct gve_priv *priv)
err = gve_alloc_flow_rule_caches(priv);
if (err)
return err;
- err = gve_alloc_counter_array(priv);
+ err = gve_alloc_rss_config_cache(priv);
if (err)
goto abort_with_flow_rule_caches;
- err = gve_alloc_notify_blocks(priv);
+ err = gve_alloc_counter_array(priv);
+ if (err)
+ goto abort_with_rss_config_cache;
+ err = gve_init_clock(priv);
if (err)
goto abort_with_counter;
+ err = gve_alloc_notify_blocks(priv);
+ if (err)
+ goto abort_with_clock;
err = gve_alloc_stats_report(priv);
if (err)
goto abort_with_ntfy_blocks;
@@ -611,6 +683,12 @@ static int gve_setup_device_resources(struct gve_priv *priv)
}
}
+ err = gve_init_rss_config(priv, priv->rx_cfg.num_queues);
+ if (err) {
+ dev_err(&priv->pdev->dev, "Failed to init RSS config");
+ goto abort_with_ptype_lut;
+ }
+
err = gve_adminq_report_stats(priv, priv->stats_report_len,
priv->stats_report_bus,
GVE_STATS_REPORT_TIMER_PERIOD);
@@ -627,8 +705,12 @@ abort_with_stats_report:
gve_free_stats_report(priv);
abort_with_ntfy_blocks:
gve_free_notify_blocks(priv);
+abort_with_clock:
+ gve_teardown_clock(priv);
abort_with_counter:
gve_free_counter_array(priv);
+abort_with_rss_config_cache:
+ gve_free_rss_config_cache(priv);
abort_with_flow_rule_caches:
gve_free_flow_rule_caches(priv);
@@ -669,9 +751,11 @@ static void gve_teardown_device_resources(struct gve_priv *priv)
priv->ptype_lut_dqo = NULL;
gve_free_flow_rule_caches(priv);
+ gve_free_rss_config_cache(priv);
gve_free_counter_array(priv);
gve_free_notify_blocks(priv);
gve_free_stats_report(priv);
+ gve_teardown_clock(priv);
gve_clear_device_resources_ok(priv);
}
@@ -746,30 +830,13 @@ static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx
return rx->dqo.qpl;
}
-static int gve_register_xdp_qpls(struct gve_priv *priv)
-{
- int start_id;
- int err;
- int i;
-
- start_id = gve_xdp_tx_start_queue_id(priv);
- for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
- err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i));
- /* This failure will trigger a reset - no need to clean up */
- if (err)
- return err;
- }
- return 0;
-}
-
static int gve_register_qpls(struct gve_priv *priv)
{
int num_tx_qpls, num_rx_qpls;
int err;
int i;
- num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
- gve_is_qpl(priv));
+ num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv));
num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
for (i = 0; i < num_tx_qpls; i++) {
@@ -787,30 +854,13 @@ static int gve_register_qpls(struct gve_priv *priv)
return 0;
}
-static int gve_unregister_xdp_qpls(struct gve_priv *priv)
-{
- int start_id;
- int err;
- int i;
-
- start_id = gve_xdp_tx_start_queue_id(priv);
- for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
- err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i));
- /* This failure will trigger a reset - no need to clean */
- if (err)
- return err;
- }
- return 0;
-}
-
static int gve_unregister_qpls(struct gve_priv *priv)
{
int num_tx_qpls, num_rx_qpls;
int err;
int i;
- num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv),
- gve_is_qpl(priv));
+ num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv));
num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv));
for (i = 0; i < num_tx_qpls; i++) {
@@ -829,27 +879,6 @@ static int gve_unregister_qpls(struct gve_priv *priv)
return 0;
}
-static int gve_create_xdp_rings(struct gve_priv *priv)
-{
- int err;
-
- err = gve_adminq_create_tx_queues(priv,
- gve_xdp_tx_start_queue_id(priv),
- priv->num_xdp_queues);
- if (err) {
- netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
- priv->num_xdp_queues);
- /* This failure will trigger a reset - no need to clean
- * up
- */
- return err;
- }
- netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
- priv->num_xdp_queues);
-
- return 0;
-}
-
static int gve_create_rings(struct gve_priv *priv)
{
int num_tx_queues = gve_num_tx_queues(priv);
@@ -905,7 +934,7 @@ static void init_xdp_sync_stats(struct gve_priv *priv)
int i;
/* Init stats */
- for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
+ for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) {
int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
u64_stats_init(&priv->tx[i].statss);
@@ -930,24 +959,21 @@ static void gve_init_sync_stats(struct gve_priv *priv)
static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv,
struct gve_tx_alloc_rings_cfg *cfg)
{
- int num_xdp_queues = priv->xdp_prog ? priv->rx_cfg.num_queues : 0;
-
cfg->qcfg = &priv->tx_cfg;
cfg->raw_addressing = !gve_is_qpl(priv);
cfg->ring_size = priv->tx_desc_cnt;
- cfg->start_idx = 0;
- cfg->num_rings = priv->tx_cfg.num_queues + num_xdp_queues;
+ cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues;
cfg->tx = priv->tx;
}
-static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings)
+static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings)
{
int i;
if (!priv->tx)
return;
- for (i = start_id; i < start_id + num_rings; i++) {
+ for (i = 0; i < num_rings; i++) {
if (gve_is_gqi(priv))
gve_tx_stop_ring_gqi(priv, i);
else
@@ -955,12 +981,11 @@ static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings
}
}
-static void gve_tx_start_rings(struct gve_priv *priv, int start_id,
- int num_rings)
+static void gve_tx_start_rings(struct gve_priv *priv, int num_rings)
{
int i;
- for (i = start_id; i < start_id + num_rings; i++) {
+ for (i = 0; i < num_rings; i++) {
if (gve_is_gqi(priv))
gve_tx_start_ring_gqi(priv, i);
else
@@ -968,28 +993,6 @@ static void gve_tx_start_rings(struct gve_priv *priv, int start_id,
}
}
-static int gve_alloc_xdp_rings(struct gve_priv *priv)
-{
- struct gve_tx_alloc_rings_cfg cfg = {0};
- int err = 0;
-
- if (!priv->num_xdp_queues)
- return 0;
-
- gve_tx_get_curr_alloc_cfg(priv, &cfg);
- cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
- cfg.num_rings = priv->num_xdp_queues;
-
- err = gve_tx_alloc_rings_gqi(priv, &cfg);
- if (err)
- return err;
-
- gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings);
- init_xdp_sync_stats(priv);
-
- return 0;
-}
-
static int gve_queues_mem_alloc(struct gve_priv *priv,
struct gve_tx_alloc_rings_cfg *tx_alloc_cfg,
struct gve_rx_alloc_rings_cfg *rx_alloc_cfg)
@@ -1020,26 +1023,6 @@ free_tx:
return err;
}
-static int gve_destroy_xdp_rings(struct gve_priv *priv)
-{
- int start_id;
- int err;
-
- start_id = gve_xdp_tx_start_queue_id(priv);
- err = gve_adminq_destroy_tx_queues(priv,
- start_id,
- priv->num_xdp_queues);
- if (err) {
- netif_err(priv, drv, priv->dev,
- "failed to destroy XDP queues\n");
- /* This failure will trigger a reset - no need to clean up */
- return err;
- }
- netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
-
- return 0;
-}
-
static int gve_destroy_rings(struct gve_priv *priv)
{
int num_tx_queues = gve_num_tx_queues(priv);
@@ -1064,20 +1047,6 @@ static int gve_destroy_rings(struct gve_priv *priv)
return 0;
}
-static void gve_free_xdp_rings(struct gve_priv *priv)
-{
- struct gve_tx_alloc_rings_cfg cfg = {0};
-
- gve_tx_get_curr_alloc_cfg(priv, &cfg);
- cfg.start_idx = gve_xdp_tx_start_queue_id(priv);
- cfg.num_rings = priv->num_xdp_queues;
-
- if (priv->tx) {
- gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings);
- gve_tx_free_rings_gqi(priv, &cfg);
- }
-}
-
static void gve_queues_mem_free(struct gve_priv *priv,
struct gve_tx_alloc_rings_cfg *tx_cfg,
struct gve_rx_alloc_rings_cfg *rx_cfg)
@@ -1095,7 +1064,7 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev,
struct page **page, dma_addr_t *dma,
enum dma_data_direction dir, gfp_t gfp_flags)
{
- *page = alloc_page(gfp_flags);
+ *page = alloc_pages_node(priv->numa_node, gfp_flags, 0);
if (!*page) {
priv->page_alloc_fail++;
return -ENOMEM;
@@ -1196,18 +1165,84 @@ static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
static void gve_turndown(struct gve_priv *priv);
static void gve_turnup(struct gve_priv *priv);
+static void gve_unreg_xsk_pool(struct gve_priv *priv, u16 qid)
+{
+ struct gve_rx_ring *rx;
+
+ if (!priv->rx)
+ return;
+
+ rx = &priv->rx[qid];
+ rx->xsk_pool = NULL;
+ if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
+ xdp_rxq_info_unreg_mem_model(&rx->xdp_rxq);
+
+ if (!priv->tx)
+ return;
+ priv->tx[gve_xdp_tx_queue_id(priv, qid)].xsk_pool = NULL;
+}
+
+static int gve_reg_xsk_pool(struct gve_priv *priv, struct net_device *dev,
+ struct xsk_buff_pool *pool, u16 qid)
+{
+ struct gve_rx_ring *rx;
+ u16 tx_qid;
+ int err;
+
+ rx = &priv->rx[qid];
+ err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
+ MEM_TYPE_XSK_BUFF_POOL, pool);
+ if (err) {
+ gve_unreg_xsk_pool(priv, qid);
+ return err;
+ }
+
+ rx->xsk_pool = pool;
+
+ tx_qid = gve_xdp_tx_queue_id(priv, qid);
+ priv->tx[tx_qid].xsk_pool = pool;
+
+ return 0;
+}
+
+static void gve_unreg_xdp_info(struct gve_priv *priv)
+{
+ int i;
+
+ if (!priv->tx_cfg.num_xdp_queues || !priv->rx)
+ return;
+
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ struct gve_rx_ring *rx = &priv->rx[i];
+
+ if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
+ xdp_rxq_info_unreg(&rx->xdp_rxq);
+
+ gve_unreg_xsk_pool(priv, i);
+ }
+}
+
+static struct xsk_buff_pool *gve_get_xsk_pool(struct gve_priv *priv, int qid)
+{
+ if (!test_bit(qid, priv->xsk_pools))
+ return NULL;
+
+ return xsk_get_pool_from_qid(priv->dev, qid);
+}
+
static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
{
struct napi_struct *napi;
struct gve_rx_ring *rx;
int err = 0;
- int i, j;
- u32 tx_qid;
+ int i;
- if (!priv->num_xdp_queues)
+ if (!priv->tx_cfg.num_xdp_queues)
return 0;
for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ struct xsk_buff_pool *xsk_pool;
+
rx = &priv->rx[i];
napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
@@ -1215,64 +1250,28 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
napi->napi_id);
if (err)
goto err;
- err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
- MEM_TYPE_PAGE_SHARED, NULL);
+
+ xsk_pool = gve_get_xsk_pool(priv, i);
+ if (xsk_pool)
+ err = gve_reg_xsk_pool(priv, dev, xsk_pool, i);
+ else if (gve_is_qpl(priv))
+ err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED,
+ NULL);
+ else
+ err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
+ MEM_TYPE_PAGE_POOL,
+ rx->dqo.page_pool);
if (err)
goto err;
- rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
- if (rx->xsk_pool) {
- err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
- napi->napi_id);
- if (err)
- goto err;
- err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
- MEM_TYPE_XSK_BUFF_POOL, NULL);
- if (err)
- goto err;
- xsk_pool_set_rxq_info(rx->xsk_pool,
- &rx->xsk_rxq);
- }
- }
-
- for (i = 0; i < priv->num_xdp_queues; i++) {
- tx_qid = gve_xdp_tx_queue_id(priv, i);
- priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
}
return 0;
err:
- for (j = i; j >= 0; j--) {
- rx = &priv->rx[j];
- if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
- xdp_rxq_info_unreg(&rx->xdp_rxq);
- if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
- xdp_rxq_info_unreg(&rx->xsk_rxq);
- }
+ gve_unreg_xdp_info(priv);
return err;
}
-static void gve_unreg_xdp_info(struct gve_priv *priv)
-{
- int i, tx_qid;
-
- if (!priv->num_xdp_queues)
- return;
-
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- struct gve_rx_ring *rx = &priv->rx[i];
-
- xdp_rxq_info_unreg(&rx->xdp_rxq);
- if (rx->xsk_pool) {
- xdp_rxq_info_unreg(&rx->xsk_rxq);
- rx->xsk_pool = NULL;
- }
- }
-
- for (i = 0; i < priv->num_xdp_queues; i++) {
- tx_qid = gve_xdp_tx_queue_id(priv, i);
- priv->tx[tx_qid].xsk_pool = NULL;
- }
-}
static void gve_drain_page_cache(struct gve_priv *priv)
{
@@ -1285,15 +1284,14 @@ static void gve_drain_page_cache(struct gve_priv *priv)
static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv,
struct gve_rx_alloc_rings_cfg *cfg)
{
- cfg->qcfg = &priv->rx_cfg;
+ cfg->qcfg_rx = &priv->rx_cfg;
cfg->qcfg_tx = &priv->tx_cfg;
cfg->raw_addressing = !gve_is_qpl(priv);
cfg->enable_header_split = priv->header_split_enabled;
cfg->ring_size = priv->rx_desc_cnt;
- cfg->packet_buffer_size = gve_is_gqi(priv) ?
- GVE_DEFAULT_RX_BUFFER_SIZE :
- priv->data_buffer_size_dqo;
+ cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size;
cfg->rx = priv->rx;
+ cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues;
}
void gve_get_curr_alloc_cfgs(struct gve_priv *priv,
@@ -1366,17 +1364,13 @@ static int gve_queues_start(struct gve_priv *priv,
/* Record new configs into priv */
priv->tx_cfg = *tx_alloc_cfg->qcfg;
- priv->rx_cfg = *rx_alloc_cfg->qcfg;
+ priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings;
+ priv->rx_cfg = *rx_alloc_cfg->qcfg_rx;
priv->tx_desc_cnt = tx_alloc_cfg->ring_size;
priv->rx_desc_cnt = rx_alloc_cfg->ring_size;
- if (priv->xdp_prog)
- priv->num_xdp_queues = priv->rx_cfg.num_queues;
- else
- priv->num_xdp_queues = 0;
-
- gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings);
- gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues);
+ gve_tx_start_rings(priv, gve_num_tx_queues(priv));
+ gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues);
gve_init_sync_stats(priv);
err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
@@ -1390,12 +1384,18 @@ static int gve_queues_start(struct gve_priv *priv,
if (err)
goto stop_and_free_rings;
+ if (rx_alloc_cfg->reset_rss) {
+ err = gve_init_rss_config(priv, priv->rx_cfg.num_queues);
+ if (err)
+ goto reset;
+ }
+
err = gve_register_qpls(priv);
if (err)
goto reset;
priv->header_split_enabled = rx_alloc_cfg->enable_header_split;
- priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size;
+ priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size;
err = gve_create_rings(priv);
if (err)
@@ -1422,7 +1422,7 @@ reset:
/* return the original error */
return err;
stop_and_free_rings:
- gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
+ gve_tx_stop_rings(priv, gve_num_tx_queues(priv));
gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
gve_queues_mem_remove(priv);
return err;
@@ -1467,11 +1467,11 @@ static int gve_queues_stop(struct gve_priv *priv)
goto err;
gve_clear_device_rings_ok(priv);
}
- del_timer_sync(&priv->stats_report_timer);
+ timer_delete_sync(&priv->stats_report_timer);
gve_unreg_xdp_info(priv);
- gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv));
+ gve_tx_stop_rings(priv, gve_num_tx_queues(priv));
gve_rx_stop_rings(priv, priv->rx_cfg.num_queues);
priv->interface_down_cnt++;
@@ -1501,56 +1501,6 @@ static int gve_close(struct net_device *dev)
return 0;
}
-static int gve_remove_xdp_queues(struct gve_priv *priv)
-{
- int err;
-
- err = gve_destroy_xdp_rings(priv);
- if (err)
- return err;
-
- err = gve_unregister_xdp_qpls(priv);
- if (err)
- return err;
-
- gve_unreg_xdp_info(priv);
- gve_free_xdp_rings(priv);
-
- priv->num_xdp_queues = 0;
- return 0;
-}
-
-static int gve_add_xdp_queues(struct gve_priv *priv)
-{
- int err;
-
- priv->num_xdp_queues = priv->rx_cfg.num_queues;
-
- err = gve_alloc_xdp_rings(priv);
- if (err)
- goto err;
-
- err = gve_reg_xdp_info(priv, priv->dev);
- if (err)
- goto free_xdp_rings;
-
- err = gve_register_xdp_qpls(priv);
- if (err)
- goto free_xdp_rings;
-
- err = gve_create_xdp_rings(priv);
- if (err)
- goto free_xdp_rings;
-
- return 0;
-
-free_xdp_rings:
- gve_free_xdp_rings(priv);
-err:
- priv->num_xdp_queues = 0;
- return err;
-}
-
static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
{
if (!gve_get_napi_enabled(priv))
@@ -1568,6 +1518,19 @@ static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
}
}
+static int gve_configure_rings_xdp(struct gve_priv *priv,
+ u16 num_xdp_rings)
+{
+ struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
+ struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
+
+ gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+ tx_alloc_cfg.num_xdp_rings = num_xdp_rings;
+
+ rx_alloc_cfg.xdp = !!num_xdp_rings;
+ return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
+}
+
static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
@@ -1580,42 +1543,49 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
WRITE_ONCE(priv->xdp_prog, prog);
if (old_prog)
bpf_prog_put(old_prog);
+
+ /* Update priv XDP queue configuration */
+ priv->tx_cfg.num_xdp_queues = priv->xdp_prog ?
+ priv->rx_cfg.num_queues : 0;
return 0;
}
- gve_turndown(priv);
- if (!old_prog && prog) {
- // Allocate XDP TX queues if an XDP program is
- // being installed
- err = gve_add_xdp_queues(priv);
- if (err)
- goto out;
- } else if (old_prog && !prog) {
- // Remove XDP TX queues if an XDP program is
- // being uninstalled
- err = gve_remove_xdp_queues(priv);
- if (err)
- goto out;
- }
+ if (!old_prog && prog)
+ err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues);
+ else if (old_prog && !prog)
+ err = gve_configure_rings_xdp(priv, 0);
+
+ if (err)
+ goto out;
+
WRITE_ONCE(priv->xdp_prog, prog);
if (old_prog)
bpf_prog_put(old_prog);
out:
- gve_turnup(priv);
status = ioread32be(&priv->reg_bar0->device_status);
gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
return err;
}
+static int gve_xdp_xmit(struct net_device *dev, int n,
+ struct xdp_frame **frames, u32 flags)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+
+ if (priv->queue_format == GVE_GQI_QPL_FORMAT)
+ return gve_xdp_xmit_gqi(dev, n, frames, flags);
+ else if (priv->queue_format == GVE_DQO_RDA_FORMAT)
+ return gve_xdp_xmit_dqo(dev, n, frames, flags);
+
+ return -EOPNOTSUPP;
+}
+
static int gve_xsk_pool_enable(struct net_device *dev,
struct xsk_buff_pool *pool,
u16 qid)
{
struct gve_priv *priv = netdev_priv(dev);
- struct napi_struct *napi;
- struct gve_rx_ring *rx;
- int tx_qid;
int err;
if (qid >= priv->rx_cfg.num_queues) {
@@ -1633,34 +1603,31 @@ static int gve_xsk_pool_enable(struct net_device *dev,
if (err)
return err;
+ set_bit(qid, priv->xsk_pools);
+
/* If XDP prog is not installed or interface is down, return. */
if (!priv->xdp_prog || !netif_running(dev))
return 0;
- rx = &priv->rx[qid];
- napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
- err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
- if (err)
- goto err;
-
- err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
- MEM_TYPE_XSK_BUFF_POOL, NULL);
+ err = gve_reg_xsk_pool(priv, dev, pool, qid);
if (err)
- goto err;
-
- xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
- rx->xsk_pool = pool;
-
- tx_qid = gve_xdp_tx_queue_id(priv, qid);
- priv->tx[tx_qid].xsk_pool = pool;
+ goto err_xsk_pool_dma_mapped;
+ /* Stop and start RDA queues to repost buffers. */
+ if (!gve_is_qpl(priv)) {
+ err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues);
+ if (err)
+ goto err_xsk_pool_registered;
+ }
return 0;
-err:
- if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
- xdp_rxq_info_unreg(&rx->xsk_rxq);
+err_xsk_pool_registered:
+ gve_unreg_xsk_pool(priv, qid);
+err_xsk_pool_dma_mapped:
+ clear_bit(qid, priv->xsk_pools);
xsk_pool_dma_unmap(pool,
- DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
+ DMA_ATTR_SKIP_CPU_SYNC |
+ DMA_ATTR_WEAK_ORDERING);
return err;
}
@@ -1672,18 +1639,28 @@ static int gve_xsk_pool_disable(struct net_device *dev,
struct napi_struct *napi_tx;
struct xsk_buff_pool *pool;
int tx_qid;
+ int err;
- pool = xsk_get_pool_from_qid(dev, qid);
- if (!pool)
- return -EINVAL;
if (qid >= priv->rx_cfg.num_queues)
return -EINVAL;
- /* If XDP prog is not installed or interface is down, unmap DMA and
- * return.
- */
- if (!priv->xdp_prog || !netif_running(dev))
- goto done;
+ clear_bit(qid, priv->xsk_pools);
+
+ pool = xsk_get_pool_from_qid(dev, qid);
+ if (pool)
+ xsk_pool_dma_unmap(pool,
+ DMA_ATTR_SKIP_CPU_SYNC |
+ DMA_ATTR_WEAK_ORDERING);
+
+ if (!netif_running(dev) || !priv->tx_cfg.num_xdp_queues)
+ return 0;
+
+ /* Stop and start RDA queues to repost buffers. */
+ if (!gve_is_qpl(priv) && priv->xdp_prog) {
+ err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues);
+ if (err)
+ return err;
+ }
napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
napi_disable(napi_rx); /* make sure current rx poll is done */
@@ -1692,22 +1669,19 @@ static int gve_xsk_pool_disable(struct net_device *dev,
napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
napi_disable(napi_tx); /* make sure current tx poll is done */
- priv->rx[qid].xsk_pool = NULL;
- xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
- priv->tx[tx_qid].xsk_pool = NULL;
+ gve_unreg_xsk_pool(priv, qid);
smp_mb(); /* Make sure it is visible to the workers on datapath */
napi_enable(napi_rx);
- if (gve_rx_work_pending(&priv->rx[qid]))
- napi_schedule(napi_rx);
-
napi_enable(napi_tx);
- if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
- napi_schedule(napi_tx);
+ if (gve_is_gqi(priv)) {
+ if (gve_rx_work_pending(&priv->rx[qid]))
+ napi_schedule(napi_rx);
+
+ if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
+ napi_schedule(napi_tx);
+ }
-done:
- xsk_pool_dma_unmap(pool,
- DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
return 0;
}
@@ -1736,19 +1710,23 @@ static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
static int verify_xdp_configuration(struct net_device *dev)
{
struct gve_priv *priv = netdev_priv(dev);
+ u16 max_xdp_mtu;
if (dev->features & NETIF_F_LRO) {
netdev_warn(dev, "XDP is not supported when LRO is on.\n");
return -EOPNOTSUPP;
}
- if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
- netdev_warn(dev, "XDP is not supported in mode %d.\n",
- priv->queue_format);
+ if (priv->header_split_enabled) {
+ netdev_warn(dev, "XDP is not supported when header-data split is enabled.\n");
return -EOPNOTSUPP;
}
- if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) {
+ max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr);
+ if (priv->queue_format == GVE_GQI_QPL_FORMAT)
+ max_xdp_mtu -= GVE_RX_PAD;
+
+ if (dev->mtu > max_xdp_mtu) {
netdev_warn(dev, "XDP is not supported for mtu %d.\n",
dev->mtu);
return -EOPNOTSUPP;
@@ -1786,6 +1764,26 @@ static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
}
+int gve_init_rss_config(struct gve_priv *priv, u16 num_queues)
+{
+ struct gve_rss_config *rss_config = &priv->rss_config;
+ struct ethtool_rxfh_param rxfh = {0};
+ u16 i;
+
+ if (!priv->cache_rss_config)
+ return 0;
+
+ for (i = 0; i < priv->rss_lut_size; i++)
+ rss_config->hash_lut[i] =
+ ethtool_rxfh_indir_default(i, num_queues);
+
+ netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size);
+
+ rxfh.hfunc = ETH_RSS_HASH_TOP;
+
+ return gve_adminq_configure_rss(priv, &rxfh);
+}
+
int gve_flow_rules_reset(struct gve_priv *priv)
{
if (!priv->max_flow_rules)
@@ -1800,7 +1798,7 @@ int gve_adjust_config(struct gve_priv *priv,
{
int err;
- /* Allocate resources for the new confiugration */
+ /* Allocate resources for the new configuration */
err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg);
if (err) {
netif_err(priv, drv, priv->dev,
@@ -1833,12 +1831,12 @@ int gve_adjust_config(struct gve_priv *priv,
}
int gve_adjust_queues(struct gve_priv *priv,
- struct gve_queue_config new_rx_config,
- struct gve_queue_config new_tx_config)
+ struct gve_rx_queue_config new_rx_config,
+ struct gve_tx_queue_config new_tx_config,
+ bool reset_rss)
{
struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0};
struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0};
- int num_xdp_queues;
int err;
gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg);
@@ -1846,18 +1844,19 @@ int gve_adjust_queues(struct gve_priv *priv,
/* Relay the new config from ethtool */
tx_alloc_cfg.qcfg = &new_tx_config;
rx_alloc_cfg.qcfg_tx = &new_tx_config;
- rx_alloc_cfg.qcfg = &new_rx_config;
- tx_alloc_cfg.num_rings = new_tx_config.num_queues;
-
- /* Add dedicated XDP TX queues if enabled. */
- num_xdp_queues = priv->xdp_prog ? new_rx_config.num_queues : 0;
- tx_alloc_cfg.num_rings += num_xdp_queues;
+ rx_alloc_cfg.qcfg_rx = &new_rx_config;
+ rx_alloc_cfg.reset_rss = reset_rss;
if (netif_running(priv->dev)) {
err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
return err;
}
/* Set the config for the next up. */
+ if (reset_rss) {
+ err = gve_init_rss_config(priv, new_rx_config.num_queues);
+ if (err)
+ return err;
+ }
priv->tx_cfg = new_tx_config;
priv->rx_cfg = new_rx_config;
@@ -1886,7 +1885,7 @@ static void gve_turndown(struct gve_priv *priv)
netif_queue_set_napi(priv->dev, idx,
NETDEV_QUEUE_TYPE_TX, NULL);
- napi_disable(&block->napi);
+ napi_disable_locked(&block->napi);
}
for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
@@ -1897,12 +1896,14 @@ static void gve_turndown(struct gve_priv *priv)
netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX,
NULL);
- napi_disable(&block->napi);
+ napi_disable_locked(&block->napi);
}
/* Stop tx queues */
netif_tx_disable(priv->dev);
+ xdp_features_clear_redirect_target_locked(priv->dev);
+
gve_clear_napi_enabled(priv);
gve_clear_report_stats(priv);
@@ -1925,7 +1926,7 @@ static void gve_turnup(struct gve_priv *priv)
if (!gve_tx_was_added_to_block(priv, idx))
continue;
- napi_enable(&block->napi);
+ napi_enable_locked(&block->napi);
if (idx < priv->tx_cfg.num_queues)
netif_queue_set_napi(priv->dev, idx,
@@ -1953,7 +1954,7 @@ static void gve_turnup(struct gve_priv *priv)
if (!gve_rx_was_added_to_block(priv, idx))
continue;
- napi_enable(&block->napi);
+ napi_enable_locked(&block->napi);
netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX,
&block->napi);
@@ -1972,6 +1973,9 @@ static void gve_turnup(struct gve_priv *priv)
napi_schedule(&block->napi);
}
+ if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv))
+ xdp_features_set_redirect_target_locked(priv->dev, false);
+
gve_set_napi_enabled(priv);
}
@@ -1984,49 +1988,56 @@ static void gve_turnup_and_check_status(struct gve_priv *priv)
gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
}
-static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
+static struct gve_notify_block *gve_get_tx_notify_block(struct gve_priv *priv,
+ unsigned int txqueue)
{
- struct gve_notify_block *block;
- struct gve_tx_ring *tx = NULL;
- struct gve_priv *priv;
- u32 last_nic_done;
- u32 current_time;
u32 ntfy_idx;
- netdev_info(dev, "Timeout on tx queue, %d", txqueue);
- priv = netdev_priv(dev);
if (txqueue > priv->tx_cfg.num_queues)
- goto reset;
+ return NULL;
ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
if (ntfy_idx >= priv->num_ntfy_blks)
- goto reset;
+ return NULL;
- block = &priv->ntfy_blocks[ntfy_idx];
- tx = block->tx;
+ return &priv->ntfy_blocks[ntfy_idx];
+}
+
+static bool gve_tx_timeout_try_q_kick(struct gve_priv *priv,
+ unsigned int txqueue)
+{
+ struct gve_notify_block *block;
+ u32 current_time;
+
+ block = gve_get_tx_notify_block(priv, txqueue);
+
+ if (!block)
+ return false;
current_time = jiffies_to_msecs(jiffies);
- if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
- goto reset;
+ if (block->tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
+ return false;
- /* Check to see if there are missed completions, which will allow us to
- * kick the queue.
- */
- last_nic_done = gve_tx_load_event_counter(priv, tx);
- if (last_nic_done - tx->done) {
- netdev_info(dev, "Kicking queue %d", txqueue);
- iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
- napi_schedule(&block->napi);
- tx->last_kick_msec = current_time;
- goto out;
- } // Else reset.
+ netdev_info(priv->dev, "Kicking queue %d", txqueue);
+ napi_schedule(&block->napi);
+ block->tx->last_kick_msec = current_time;
+ return true;
+}
-reset:
- gve_schedule_reset(priv);
+static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+ struct gve_notify_block *block;
+ struct gve_priv *priv;
-out:
- if (tx)
- tx->queue_timeout++;
+ netdev_info(dev, "Timeout on tx queue, %d", txqueue);
+ priv = netdev_priv(dev);
+
+ if (!gve_tx_timeout_try_q_kick(priv, txqueue))
+ gve_schedule_reset(priv);
+
+ block = gve_get_tx_notify_block(priv, txqueue);
+ if (block)
+ block->tx->queue_timeout++;
priv->tx_timeo_cnt++;
}
@@ -2038,10 +2049,13 @@ u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit)
return GVE_DEFAULT_RX_BUFFER_SIZE;
}
-/* header-split is not supported on non-DQO_RDA yet even if device advertises it */
+/* Header split is only supported on DQ RDA queue format. If XDP is enabled,
+ * header split is not allowed.
+ */
bool gve_header_split_supported(const struct gve_priv *priv)
{
- return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT;
+ return priv->header_buf_size &&
+ priv->queue_format == GVE_DQO_RDA_FORMAT && !priv->xdp_prog;
}
int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split)
@@ -2090,6 +2104,12 @@ static int gve_set_features(struct net_device *netdev,
if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
netdev->features ^= NETIF_F_LRO;
+ if (priv->xdp_prog && (netdev->features & NETIF_F_LRO)) {
+ netdev_warn(netdev,
+ "XDP is not supported when LRO is on.\n");
+ err = -EOPNOTSUPP;
+ goto revert_features;
+ }
if (netif_running(netdev)) {
err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg);
if (err)
@@ -2109,6 +2129,46 @@ revert_features:
return err;
}
+static int gve_get_ts_config(struct net_device *dev,
+ struct kernel_hwtstamp_config *kernel_config)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+
+ *kernel_config = priv->ts_config;
+ return 0;
+}
+
+static int gve_set_ts_config(struct net_device *dev,
+ struct kernel_hwtstamp_config *kernel_config,
+ struct netlink_ext_ack *extack)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+
+ if (kernel_config->tx_type != HWTSTAMP_TX_OFF) {
+ NL_SET_ERR_MSG_MOD(extack, "TX timestamping is not supported");
+ return -ERANGE;
+ }
+
+ if (kernel_config->rx_filter != HWTSTAMP_FILTER_NONE) {
+ if (!priv->nic_ts_report) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "RX timestamping is not supported");
+ kernel_config->rx_filter = HWTSTAMP_FILTER_NONE;
+ return -EOPNOTSUPP;
+ }
+
+ kernel_config->rx_filter = HWTSTAMP_FILTER_ALL;
+ gve_clock_nic_ts_read(priv);
+ ptp_schedule_worker(priv->ptp->clock, 0);
+ } else {
+ ptp_cancel_worker_sync(priv->ptp->clock);
+ }
+
+ priv->ts_config.rx_filter = kernel_config->rx_filter;
+
+ return 0;
+}
+
static const struct net_device_ops gve_netdev_ops = {
.ndo_start_xmit = gve_start_xmit,
.ndo_features_check = gve_features_check,
@@ -2120,6 +2180,8 @@ static const struct net_device_ops gve_netdev_ops = {
.ndo_bpf = gve_xdp,
.ndo_xdp_xmit = gve_xdp_xmit,
.ndo_xsk_wakeup = gve_xsk_wakeup,
+ .ndo_hwtstamp_get = gve_get_ts_config,
+ .ndo_hwtstamp_set = gve_set_ts_config,
};
static void gve_handle_status(struct gve_priv *priv, u32 status)
@@ -2145,7 +2207,9 @@ static void gve_handle_reset(struct gve_priv *priv)
if (gve_get_do_reset(priv)) {
rtnl_lock();
+ netdev_lock(priv->dev);
gve_reset(priv, false);
+ netdev_unlock(priv->dev);
rtnl_unlock();
}
}
@@ -2219,7 +2283,7 @@ void gve_handle_report_stats(struct gve_priv *priv)
};
stats[stats_idx++] = (struct stats) {
.stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
- .value = cpu_to_be64(priv->rx[0].fill_cnt),
+ .value = cpu_to_be64(priv->rx[idx].fill_cnt),
.queue_id = cpu_to_be32(idx),
};
}
@@ -2246,13 +2310,16 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv)
if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
xdp_features = NETDEV_XDP_ACT_BASIC;
xdp_features |= NETDEV_XDP_ACT_REDIRECT;
- xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
+ xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
+ } else if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
+ xdp_features = NETDEV_XDP_ACT_BASIC;
+ xdp_features |= NETDEV_XDP_ACT_REDIRECT;
xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
} else {
xdp_features = 0;
}
- xdp_set_features_flag(priv->dev, xdp_features);
+ xdp_set_features_flag_locked(priv->dev, xdp_features);
}
static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
@@ -2302,7 +2369,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
goto err;
}
- /* Big TCP is only supported on DQ*/
+ /* Big TCP is only supported on DQO */
if (!gve_is_gqi(priv))
netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
@@ -2312,6 +2379,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
*/
priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
priv->mgmt_msix_idx = priv->num_ntfy_blks;
+ priv->numa_node = dev_to_node(&priv->pdev->dev);
priv->tx_cfg.max_queues =
min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
@@ -2326,6 +2394,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
priv->rx_cfg.num_queues);
}
+ priv->tx_cfg.num_xdp_queues = 0;
dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
@@ -2337,11 +2406,26 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
}
+ priv->ts_config.tx_type = HWTSTAMP_TX_OFF;
+ priv->ts_config.rx_filter = HWTSTAMP_FILTER_NONE;
+
setup_device:
+ priv->xsk_pools = bitmap_zalloc(priv->rx_cfg.max_queues, GFP_KERNEL);
+ if (!priv->xsk_pools) {
+ err = -ENOMEM;
+ goto err;
+ }
+
gve_set_netdev_xdp_features(priv);
err = gve_setup_device_resources(priv);
- if (!err)
- return 0;
+ if (err)
+ goto err_free_xsk_bitmap;
+
+ return 0;
+
+err_free_xsk_bitmap:
+ bitmap_free(priv->xsk_pools);
+ priv->xsk_pools = NULL;
err:
gve_adminq_free(&priv->pdev->dev, priv);
return err;
@@ -2351,6 +2435,8 @@ static void gve_teardown_priv_resources(struct gve_priv *priv)
{
gve_teardown_device_resources(priv);
gve_adminq_free(&priv->pdev->dev, priv);
+ bitmap_free(priv->xsk_pools);
+ priv->xsk_pools = NULL;
}
static void gve_trigger_reset(struct gve_priv *priv)
@@ -2706,7 +2792,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
priv->service_task_flags = 0x0;
priv->state_flags = 0x0;
priv->ethtool_flags = 0x0;
- priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE;
+ priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
gve_set_probe_in_progress(priv);
@@ -2725,6 +2811,9 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto abort_with_wq;
+ if (!gve_is_gqi(priv) && !gve_is_qpl(priv))
+ dev->netmem_tx = true;
+
err = register_netdev(dev);
if (err)
goto abort_with_gve_init;
@@ -2781,7 +2870,10 @@ static void gve_shutdown(struct pci_dev *pdev)
struct gve_priv *priv = netdev_priv(netdev);
bool was_up = netif_running(priv->dev);
+ netif_device_detach(netdev);
+
rtnl_lock();
+ netdev_lock(netdev);
if (was_up && gve_close(priv->dev)) {
/* If the dev was up, attempt to close, if close fails, reset */
gve_reset_and_teardown(priv, was_up);
@@ -2789,6 +2881,7 @@ static void gve_shutdown(struct pci_dev *pdev)
/* If the dev wasn't up or close worked, finish tearing down */
gve_teardown_priv_resources(priv);
}
+ netdev_unlock(netdev);
rtnl_unlock();
}
@@ -2801,6 +2894,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
priv->suspend_cnt++;
rtnl_lock();
+ netdev_lock(netdev);
if (was_up && gve_close(priv->dev)) {
/* If the dev was up, attempt to close, if close fails, reset */
gve_reset_and_teardown(priv, was_up);
@@ -2809,6 +2903,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
gve_teardown_priv_resources(priv);
}
priv->up_before_suspend = was_up;
+ netdev_unlock(netdev);
rtnl_unlock();
return 0;
}
@@ -2821,7 +2916,9 @@ static int gve_resume(struct pci_dev *pdev)
priv->resume_cnt++;
rtnl_lock();
+ netdev_lock(netdev);
err = gve_reset_recovery(priv, priv->up_before_suspend);
+ netdev_unlock(netdev);
rtnl_unlock();
return err;
}
diff --git a/drivers/net/ethernet/google/gve/gve_ptp.c b/drivers/net/ethernet/google/gve/gve_ptp.c
new file mode 100644
index 000000000000..e96247c9d68d
--- /dev/null
+++ b/drivers/net/ethernet/google/gve/gve_ptp.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2025 Google LLC
+ */
+
+#include "gve.h"
+#include "gve_adminq.h"
+
+/* Interval to schedule a nic timestamp calibration, 250ms. */
+#define GVE_NIC_TS_SYNC_INTERVAL_MS 250
+
+/* Read the nic timestamp from hardware via the admin queue. */
+int gve_clock_nic_ts_read(struct gve_priv *priv)
+{
+ u64 nic_raw;
+ int err;
+
+ err = gve_adminq_report_nic_ts(priv, priv->nic_ts_report_bus);
+ if (err)
+ return err;
+
+ nic_raw = be64_to_cpu(priv->nic_ts_report->nic_timestamp);
+ WRITE_ONCE(priv->last_sync_nic_counter, nic_raw);
+
+ return 0;
+}
+
+static long gve_ptp_do_aux_work(struct ptp_clock_info *info)
+{
+ const struct gve_ptp *ptp = container_of(info, struct gve_ptp, info);
+ struct gve_priv *priv = ptp->priv;
+ int err;
+
+ if (gve_get_reset_in_progress(priv) || !gve_get_admin_queue_ok(priv))
+ goto out;
+
+ err = gve_clock_nic_ts_read(priv);
+ if (err && net_ratelimit())
+ dev_err(&priv->pdev->dev,
+ "%s read err %d\n", __func__, err);
+
+out:
+ return msecs_to_jiffies(GVE_NIC_TS_SYNC_INTERVAL_MS);
+}
+
+static const struct ptp_clock_info gve_ptp_caps = {
+ .owner = THIS_MODULE,
+ .name = "gve clock",
+ .do_aux_work = gve_ptp_do_aux_work,
+};
+
+static int gve_ptp_init(struct gve_priv *priv)
+{
+ struct gve_ptp *ptp;
+ int err;
+
+ if (!priv->nic_timestamp_supported) {
+ dev_dbg(&priv->pdev->dev, "Device does not support PTP\n");
+ return -EOPNOTSUPP;
+ }
+
+ priv->ptp = kzalloc(sizeof(*priv->ptp), GFP_KERNEL);
+ if (!priv->ptp)
+ return -ENOMEM;
+
+ ptp = priv->ptp;
+ ptp->info = gve_ptp_caps;
+ ptp->clock = ptp_clock_register(&ptp->info, &priv->pdev->dev);
+
+ if (IS_ERR(ptp->clock)) {
+ dev_err(&priv->pdev->dev, "PTP clock registration failed\n");
+ err = PTR_ERR(ptp->clock);
+ goto free_ptp;
+ }
+
+ ptp->priv = priv;
+ return 0;
+
+free_ptp:
+ kfree(ptp);
+ priv->ptp = NULL;
+ return err;
+}
+
+static void gve_ptp_release(struct gve_priv *priv)
+{
+ struct gve_ptp *ptp = priv->ptp;
+
+ if (!ptp)
+ return;
+
+ if (ptp->clock)
+ ptp_clock_unregister(ptp->clock);
+
+ kfree(ptp);
+ priv->ptp = NULL;
+}
+
+int gve_init_clock(struct gve_priv *priv)
+{
+ int err;
+
+ if (!priv->nic_timestamp_supported)
+ return 0;
+
+ err = gve_ptp_init(priv);
+ if (err)
+ return err;
+
+ priv->nic_ts_report =
+ dma_alloc_coherent(&priv->pdev->dev,
+ sizeof(struct gve_nic_ts_report),
+ &priv->nic_ts_report_bus,
+ GFP_KERNEL);
+ if (!priv->nic_ts_report) {
+ dev_err(&priv->pdev->dev, "%s dma alloc error\n", __func__);
+ err = -ENOMEM;
+ goto release_ptp;
+ }
+
+ return 0;
+
+release_ptp:
+ gve_ptp_release(priv);
+ return err;
+}
+
+void gve_teardown_clock(struct gve_priv *priv)
+{
+ gve_ptp_release(priv);
+
+ if (priv->nic_ts_report) {
+ dma_free_coherent(&priv->pdev->dev,
+ sizeof(struct gve_nic_ts_report),
+ priv->nic_ts_report, priv->nic_ts_report_bus);
+ priv->nic_ts_report = NULL;
+ }
+}
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index acb73d4d0de6..ec424d2f4f57 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -141,12 +141,15 @@ void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx,
netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
}
-static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
- dma_addr_t addr, struct page *page, __be64 *slot_addr)
+static void gve_setup_rx_buffer(struct gve_rx_ring *rx,
+ struct gve_rx_slot_page_info *page_info,
+ dma_addr_t addr, struct page *page,
+ __be64 *slot_addr)
{
page_info->page = page;
page_info->page_offset = 0;
page_info->page_address = page_address(page);
+ page_info->buf_size = rx->packet_buffer_size;
*slot_addr = cpu_to_be64(addr);
/* The page already has 1 ref */
page_ref_add(page, INT_MAX - 1);
@@ -171,7 +174,7 @@ static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
return err;
}
- gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
+ gve_setup_rx_buffer(rx, page_info, dma, page, &data_slot->addr);
return 0;
}
@@ -189,8 +192,8 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx,
*/
slots = rx->mask + 1;
- rx->data.page_info = kvzalloc(slots *
- sizeof(*rx->data.page_info), GFP_KERNEL);
+ rx->data.page_info = kvcalloc_node(slots, sizeof(*rx->data.page_info),
+ GFP_KERNEL, priv->numa_node);
if (!rx->data.page_info)
return -ENOMEM;
@@ -199,7 +202,8 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx,
struct page *page = rx->data.qpl->pages[i];
dma_addr_t addr = i * PAGE_SIZE;
- gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
+ gve_setup_rx_buffer(rx, &rx->data.page_info[i], addr,
+ page,
&rx->data.data_ring[i].qpl_offset);
continue;
}
@@ -212,7 +216,8 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx,
if (!rx->data.raw_addressing) {
for (j = 0; j < rx->qpl_copy_pool_mask + 1; j++) {
- struct page *page = alloc_page(GFP_KERNEL);
+ struct page *page = alloc_pages_node(priv->numa_node,
+ GFP_KERNEL, 0);
if (!page) {
err = -ENOMEM;
@@ -222,6 +227,7 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx,
rx->qpl_copy_pool[j].page = page;
rx->qpl_copy_pool[j].page_offset = 0;
rx->qpl_copy_pool[j].page_address = page_address(page);
+ rx->qpl_copy_pool[j].buf_size = rx->packet_buffer_size;
/* The page already has 1 ref. */
page_ref_add(page, INT_MAX - 1);
@@ -283,6 +289,7 @@ int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
rx->gve = priv;
rx->q_num = idx;
+ rx->packet_buffer_size = cfg->packet_buffer_size;
rx->mask = slots - 1;
rx->data.raw_addressing = cfg->raw_addressing;
@@ -297,10 +304,9 @@ int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
rx->qpl_copy_pool_mask = min_t(u32, U32_MAX, slots * 2) - 1;
rx->qpl_copy_pool_head = 0;
- rx->qpl_copy_pool = kvcalloc(rx->qpl_copy_pool_mask + 1,
- sizeof(rx->qpl_copy_pool[0]),
- GFP_KERNEL);
-
+ rx->qpl_copy_pool = kvcalloc_node(rx->qpl_copy_pool_mask + 1,
+ sizeof(rx->qpl_copy_pool[0]),
+ GFP_KERNEL, priv->numa_node);
if (!rx->qpl_copy_pool) {
err = -ENOMEM;
goto abort_with_slots;
@@ -351,7 +357,6 @@ int gve_rx_alloc_ring_gqi(struct gve_priv *priv,
rx->db_threshold = slots / 2;
gve_rx_init_ring_state_gqi(rx);
- rx->packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE;
gve_rx_ctx_clear(&rx->ctx);
return 0;
@@ -385,12 +390,12 @@ int gve_rx_alloc_rings_gqi(struct gve_priv *priv,
int err = 0;
int i, j;
- rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring),
+ rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring),
GFP_KERNEL);
if (!rx)
return -ENOMEM;
- for (i = 0; i < cfg->qcfg->num_queues; i++) {
+ for (i = 0; i < cfg->qcfg_rx->num_queues; i++) {
err = gve_rx_alloc_ring_gqi(priv, cfg, &rx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
@@ -419,7 +424,7 @@ void gve_rx_free_rings_gqi(struct gve_priv *priv,
if (!rx)
return;
- for (i = 0; i < cfg->qcfg->num_queues; i++)
+ for (i = 0; i < cfg->qcfg_rx->num_queues; i++)
gve_rx_free_ring_gqi(priv, &rx[i], cfg);
kvfree(rx);
@@ -590,7 +595,7 @@ static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx,
copy_page_info->pad = page_info->pad;
skb = gve_rx_add_frags(napi, copy_page_info,
- rx->packet_buffer_size, len, ctx);
+ copy_page_info->buf_size, len, ctx);
if (unlikely(!skb))
return NULL;
@@ -630,7 +635,8 @@ gve_rx_qpl(struct device *dev, struct net_device *netdev,
* device.
*/
if (page_info->can_flip) {
- skb = gve_rx_add_frags(napi, page_info, rx->packet_buffer_size, len, ctx);
+ skb = gve_rx_add_frags(napi, page_info, page_info->buf_size,
+ len, ctx);
/* No point in recycling if we didn't get the skb */
if (skb) {
/* Make sure that the page isn't freed. */
@@ -680,7 +686,7 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx,
skb = gve_rx_raw_addressing(&priv->pdev->dev, netdev,
page_info, len, napi,
data_slot,
- rx->packet_buffer_size, ctx);
+ page_info->buf_size, ctx);
} else {
skb = gve_rx_qpl(&priv->pdev->dev, netdev, rx,
page_info, len, napi, data_slot);
@@ -855,7 +861,7 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
void *old_data;
int xdp_act;
- xdp_init_buff(&xdp, rx->packet_buffer_size, &rx->xdp_rxq);
+ xdp_init_buff(&xdp, page_info->buf_size, &rx->xdp_rxq);
xdp_prepare_buff(&xdp, page_info->page_address +
page_info->page_offset, GVE_RX_PAD,
len, false);
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
index 8ac0047f1ada..7380c2b7a2d8 100644
--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -8,6 +8,7 @@
#include "gve_dqo.h"
#include "gve_adminq.h"
#include "gve_utils.h"
+#include <linux/bpf.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/skbuff.h>
@@ -15,6 +16,7 @@
#include <net/ip6_checksum.h>
#include <net/ipv6.h>
#include <net/tcp.h>
+#include <net/xdp_sock_drv.h>
static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx)
{
@@ -109,10 +111,13 @@ static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx)
void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx)
{
int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
+ struct gve_rx_ring *rx = &priv->rx[idx];
if (!gve_rx_was_added_to_block(priv, idx))
return;
+ if (rx->dqo.page_pool)
+ page_pool_disable_direct_recycling(rx->dqo.page_pool);
gve_remove_napi(priv, ntfy_idx);
gve_rx_remove_from_block(priv, idx);
gve_rx_reset_ring_dqo(priv, idx);
@@ -145,6 +150,10 @@ void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
gve_free_to_page_pool(rx, bs, false);
else
gve_free_qpl_page_dqo(bs);
+ if (gve_buf_state_is_allocated(rx, bs) && bs->xsk_buff) {
+ xsk_buff_free(bs->xsk_buff);
+ bs->xsk_buff = NULL;
+ }
}
if (rx->dqo.qpl) {
@@ -221,12 +230,21 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
memset(rx, 0, sizeof(*rx));
rx->gve = priv;
rx->q_num = idx;
+ rx->packet_buffer_size = cfg->packet_buffer_size;
+
+ if (cfg->xdp) {
+ rx->packet_buffer_truesize = GVE_XDP_RX_BUFFER_SIZE_DQO;
+ rx->rx_headroom = XDP_PACKET_HEADROOM;
+ } else {
+ rx->packet_buffer_truesize = rx->packet_buffer_size;
+ rx->rx_headroom = 0;
+ }
rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots :
gve_get_rx_pages_per_qpl_dqo(cfg->ring_size);
- rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
- sizeof(rx->dqo.buf_states[0]),
- GFP_KERNEL);
+ rx->dqo.buf_states = kvcalloc_node(rx->dqo.num_buf_states,
+ sizeof(rx->dqo.buf_states[0]),
+ GFP_KERNEL, priv->numa_node);
if (!rx->dqo.buf_states)
return -ENOMEM;
@@ -251,7 +269,7 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv,
goto err;
if (cfg->raw_addressing) {
- pool = gve_rx_create_page_pool(priv, rx);
+ pool = gve_rx_create_page_pool(priv, rx, cfg->xdp);
if (IS_ERR(pool))
goto err;
@@ -297,12 +315,12 @@ int gve_rx_alloc_rings_dqo(struct gve_priv *priv,
int err;
int i;
- rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring),
+ rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring),
GFP_KERNEL);
if (!rx)
return -ENOMEM;
- for (i = 0; i < cfg->qcfg->num_queues; i++) {
+ for (i = 0; i < cfg->qcfg_rx->num_queues; i++) {
err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
@@ -331,7 +349,7 @@ void gve_rx_free_rings_dqo(struct gve_priv *priv,
if (!rx)
return;
- for (i = 0; i < cfg->qcfg->num_queues; i++)
+ for (i = 0; i < cfg->qcfg_rx->num_queues; i++)
gve_rx_free_ring_dqo(priv, &rx[i], cfg);
kvfree(rx);
@@ -425,6 +443,29 @@ static void gve_rx_skb_hash(struct sk_buff *skb,
skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type);
}
+/* Expand the hardware timestamp to the full 64 bits of width, and add it to the
+ * skb.
+ *
+ * This algorithm works by using the passed hardware timestamp to generate a
+ * diff relative to the last read of the nic clock. This diff can be positive or
+ * negative, as it is possible that we have read the clock more recently than
+ * the hardware has received this packet. To detect this, we use the high bit of
+ * the diff, and assume that the read is more recent if the high bit is set. In
+ * this case we invert the process.
+ *
+ * Note that this means if the time delta between packet reception and the last
+ * clock read is greater than ~2 seconds, this will provide invalid results.
+ */
+static void gve_rx_skb_hwtstamp(struct gve_rx_ring *rx, u32 hwts)
+{
+ u64 last_read = READ_ONCE(rx->gve->last_sync_nic_counter);
+ struct sk_buff *skb = rx->ctx.skb_head;
+ u32 low = (u32)last_read;
+ s32 diff = hwts - low;
+
+ skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(last_read + diff);
+}
+
static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx)
{
if (!rx->ctx.skb_head)
@@ -452,7 +493,7 @@ static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
struct gve_rx_buf_state_dqo *buf_state,
u16 buf_len)
{
- struct page *page = alloc_page(GFP_ATOMIC);
+ struct page *page = alloc_pages_node(rx->gve->numa_node, GFP_ATOMIC, 0);
int num_frags;
if (!page)
@@ -474,6 +515,25 @@ static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
return 0;
}
+static void gve_skb_add_rx_frag(struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state,
+ int num_frags, u16 buf_len)
+{
+ if (rx->dqo.page_pool) {
+ skb_add_rx_frag_netmem(rx->ctx.skb_tail, num_frags,
+ buf_state->page_info.netmem,
+ buf_state->page_info.page_offset +
+ buf_state->page_info.pad, buf_len,
+ buf_state->page_info.buf_size);
+ } else {
+ skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
+ buf_state->page_info.page,
+ buf_state->page_info.page_offset +
+ buf_state->page_info.pad, buf_len,
+ buf_state->page_info.buf_size);
+ }
+}
+
/* Chains multi skbs for single rx packet.
* Returns 0 if buffer is appended, -1 otherwise.
*/
@@ -511,14 +571,153 @@ static int gve_rx_append_frags(struct napi_struct *napi,
if (gve_rx_should_trigger_copy_ondemand(rx))
return gve_rx_copy_ondemand(rx, buf_state, buf_len);
- skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
- buf_state->page_info.page,
- buf_state->page_info.page_offset,
- buf_len, buf_state->page_info.buf_size);
+ gve_skb_add_rx_frag(rx, buf_state, num_frags, buf_len);
gve_reuse_buffer(rx, buf_state);
return 0;
}
+static int gve_xdp_tx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct xdp_buff *xdp)
+{
+ struct gve_tx_ring *tx;
+ struct xdp_frame *xdpf;
+ u32 tx_qid;
+ int err;
+
+ xdpf = xdp_convert_buff_to_frame(xdp);
+ if (unlikely(!xdpf)) {
+ if (rx->xsk_pool)
+ xsk_buff_free(xdp);
+ return -ENOSPC;
+ }
+
+ tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num);
+ tx = &priv->tx[tx_qid];
+ spin_lock(&tx->dqo_tx.xdp_lock);
+ err = gve_xdp_xmit_one_dqo(priv, tx, xdpf);
+ spin_unlock(&tx->dqo_tx.xdp_lock);
+
+ return err;
+}
+
+static void gve_xsk_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct xdp_buff *xdp, struct bpf_prog *xprog,
+ int xdp_act)
+{
+ switch (xdp_act) {
+ case XDP_ABORTED:
+ case XDP_DROP:
+ default:
+ xsk_buff_free(xdp);
+ break;
+ case XDP_TX:
+ if (unlikely(gve_xdp_tx_dqo(priv, rx, xdp)))
+ goto err;
+ break;
+ case XDP_REDIRECT:
+ if (unlikely(xdp_do_redirect(priv->dev, xdp, xprog)))
+ goto err;
+ break;
+ }
+
+ u64_stats_update_begin(&rx->statss);
+ if ((u32)xdp_act < GVE_XDP_ACTIONS)
+ rx->xdp_actions[xdp_act]++;
+ u64_stats_update_end(&rx->statss);
+ return;
+
+err:
+ u64_stats_update_begin(&rx->statss);
+ if (xdp_act == XDP_TX)
+ rx->xdp_tx_errors++;
+ if (xdp_act == XDP_REDIRECT)
+ rx->xdp_redirect_errors++;
+ u64_stats_update_end(&rx->statss);
+}
+
+static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct xdp_buff *xdp, struct bpf_prog *xprog,
+ int xdp_act,
+ struct gve_rx_buf_state_dqo *buf_state)
+{
+ int err;
+ switch (xdp_act) {
+ case XDP_ABORTED:
+ case XDP_DROP:
+ default:
+ gve_free_buffer(rx, buf_state);
+ break;
+ case XDP_TX:
+ err = gve_xdp_tx_dqo(priv, rx, xdp);
+ if (unlikely(err))
+ goto err;
+ gve_reuse_buffer(rx, buf_state);
+ break;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(priv->dev, xdp, xprog);
+ if (unlikely(err))
+ goto err;
+ gve_reuse_buffer(rx, buf_state);
+ break;
+ }
+ u64_stats_update_begin(&rx->statss);
+ if ((u32)xdp_act < GVE_XDP_ACTIONS)
+ rx->xdp_actions[xdp_act]++;
+ u64_stats_update_end(&rx->statss);
+ return;
+err:
+ u64_stats_update_begin(&rx->statss);
+ if (xdp_act == XDP_TX)
+ rx->xdp_tx_errors++;
+ else if (xdp_act == XDP_REDIRECT)
+ rx->xdp_redirect_errors++;
+ u64_stats_update_end(&rx->statss);
+ gve_free_buffer(rx, buf_state);
+ return;
+}
+
+static int gve_rx_xsk_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
+ struct gve_rx_buf_state_dqo *buf_state, int buf_len,
+ struct bpf_prog *xprog)
+{
+ struct xdp_buff *xdp = buf_state->xsk_buff;
+ struct gve_priv *priv = rx->gve;
+ int xdp_act;
+
+ xdp->data_end = xdp->data + buf_len;
+ xsk_buff_dma_sync_for_cpu(xdp);
+
+ if (xprog) {
+ xdp_act = bpf_prog_run_xdp(xprog, xdp);
+ buf_len = xdp->data_end - xdp->data;
+ if (xdp_act != XDP_PASS) {
+ gve_xsk_done_dqo(priv, rx, xdp, xprog, xdp_act);
+ gve_free_buf_state(rx, buf_state);
+ return 0;
+ }
+ }
+
+ /* Copy the data to skb */
+ rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi,
+ xdp->data, buf_len);
+ if (unlikely(!rx->ctx.skb_head)) {
+ xsk_buff_free(xdp);
+ gve_free_buf_state(rx, buf_state);
+ return -ENOMEM;
+ }
+ rx->ctx.skb_tail = rx->ctx.skb_head;
+
+ /* Free XSK buffer and Buffer state */
+ xsk_buff_free(xdp);
+ gve_free_buf_state(rx, buf_state);
+
+ /* Update Stats */
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_actions[XDP_PASS]++;
+ u64_stats_update_end(&rx->statss);
+ return 0;
+}
+
/* Returns 0 if descriptor is completed successfully.
* Returns -EINVAL if descriptor is invalid.
* Returns -ENOMEM if data cannot be copied to skb.
@@ -533,6 +732,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
const bool hsplit = compl_desc->split_header;
struct gve_rx_buf_state_dqo *buf_state;
struct gve_priv *priv = rx->gve;
+ struct bpf_prog *xprog;
u16 buf_len;
u16 hdr_len;
@@ -556,10 +756,19 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
buf_len = compl_desc->packet_len;
hdr_len = compl_desc->header_len;
+ xprog = READ_ONCE(priv->xdp_prog);
+ if (buf_state->xsk_buff)
+ return gve_rx_xsk_dqo(napi, rx, buf_state, buf_len, xprog);
+
/* Page might have not been used for awhile and was likely last written
* by a different thread.
*/
- prefetch(buf_state->page_info.page);
+ if (rx->dqo.page_pool) {
+ if (!netmem_is_net_iov(buf_state->page_info.netmem))
+ prefetch(netmem_to_page(buf_state->page_info.netmem));
+ } else {
+ prefetch(buf_state->page_info.page);
+ }
/* Copy the header into the skb in the case of header split */
if (hsplit) {
@@ -588,7 +797,8 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
/* Sync the portion of dma buffer for CPU to read. */
dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr,
- buf_state->page_info.page_offset,
+ buf_state->page_info.page_offset +
+ buf_state->page_info.pad,
buf_len, DMA_FROM_DEVICE);
/* Append to current skb if one exists. */
@@ -600,6 +810,33 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
return 0;
}
+ if (xprog) {
+ struct xdp_buff xdp;
+ void *old_data;
+ int xdp_act;
+
+ xdp_init_buff(&xdp, buf_state->page_info.buf_size,
+ &rx->xdp_rxq);
+ xdp_prepare_buff(&xdp,
+ buf_state->page_info.page_address +
+ buf_state->page_info.page_offset,
+ buf_state->page_info.pad,
+ buf_len, false);
+ old_data = xdp.data;
+ xdp_act = bpf_prog_run_xdp(xprog, &xdp);
+ buf_state->page_info.pad += xdp.data - old_data;
+ buf_len = xdp.data_end - xdp.data;
+ if (xdp_act != XDP_PASS) {
+ gve_xdp_done_dqo(priv, rx, &xdp, xprog, xdp_act,
+ buf_state);
+ return 0;
+ }
+
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_actions[XDP_PASS]++;
+ u64_stats_update_end(&rx->statss);
+ }
+
if (eop && buf_len <= priv->rx_copybreak) {
rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
&buf_state->page_info, buf_len);
@@ -630,9 +867,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
if (rx->dqo.page_pool)
skb_mark_for_recycle(rx->ctx.skb_head);
- skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
- buf_state->page_info.page_offset, buf_len,
- buf_state->page_info.buf_size);
+ gve_skb_add_rx_frag(rx, buf_state, 0, buf_len);
gve_reuse_buffer(rx, buf_state);
return 0;
@@ -683,6 +918,9 @@ static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
if (feat & NETIF_F_RXCSUM)
gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype);
+ if (rx->gve->ts_config.rx_filter == HWTSTAMP_FILTER_ALL)
+ gve_rx_skb_hwtstamp(rx, le32_to_cpu(desc->ts));
+
/* RSC packets must set gso_size otherwise the TCP stack will complain
* that packets are larger than MTU.
*/
@@ -702,16 +940,27 @@ static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi,
int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
{
- struct napi_struct *napi = &block->napi;
- netdev_features_t feat = napi->dev->features;
-
- struct gve_rx_ring *rx = block->rx;
- struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq;
-
+ struct gve_rx_compl_queue_dqo *complq;
+ struct napi_struct *napi;
+ netdev_features_t feat;
+ struct gve_rx_ring *rx;
+ struct gve_priv *priv;
+ u64 xdp_redirects;
u32 work_done = 0;
u64 bytes = 0;
+ u64 xdp_txs;
int err;
+ napi = &block->napi;
+ feat = napi->dev->features;
+
+ rx = block->rx;
+ priv = rx->gve;
+ complq = &rx->dqo.complq;
+
+ xdp_redirects = rx->xdp_actions[XDP_REDIRECT];
+ xdp_txs = rx->xdp_actions[XDP_TX];
+
while (work_done < budget) {
struct gve_rx_compl_desc_dqo *compl_desc =
&complq->desc_ring[complq->head];
@@ -785,6 +1034,12 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
rx->ctx.skb_tail = NULL;
}
+ if (xdp_txs != rx->xdp_actions[XDP_TX])
+ gve_xdp_tx_flush_dqo(priv, rx->q_num);
+
+ if (xdp_redirects != rx->xdp_actions[XDP_REDIRECT])
+ xdp_do_flush();
+
gve_rx_post_buffers_dqo(rx);
u64_stats_update_begin(&rx->statss);
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 4350ebd9c2bd..c6ff0968929d 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -334,27 +334,23 @@ int gve_tx_alloc_rings_gqi(struct gve_priv *priv,
struct gve_tx_alloc_rings_cfg *cfg)
{
struct gve_tx_ring *tx = cfg->tx;
+ int total_queues;
int err = 0;
int i, j;
- if (cfg->start_idx + cfg->num_rings > cfg->qcfg->max_queues) {
+ total_queues = cfg->qcfg->num_queues + cfg->num_xdp_rings;
+ if (total_queues > cfg->qcfg->max_queues) {
netif_err(priv, drv, priv->dev,
"Cannot alloc more than the max num of Tx rings\n");
return -EINVAL;
}
- if (cfg->start_idx == 0) {
- tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring),
- GFP_KERNEL);
- if (!tx)
- return -ENOMEM;
- } else if (!tx) {
- netif_err(priv, drv, priv->dev,
- "Cannot alloc tx rings from a nonzero start idx without tx array\n");
- return -EINVAL;
- }
+ tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring),
+ GFP_KERNEL);
+ if (!tx)
+ return -ENOMEM;
- for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) {
+ for (i = 0; i < total_queues; i++) {
err = gve_tx_alloc_ring_gqi(priv, cfg, &tx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
@@ -370,8 +366,7 @@ int gve_tx_alloc_rings_gqi(struct gve_priv *priv,
cleanup:
for (j = 0; j < i; j++)
gve_tx_free_ring_gqi(priv, &tx[j], cfg);
- if (cfg->start_idx == 0)
- kvfree(tx);
+ kvfree(tx);
return err;
}
@@ -384,13 +379,11 @@ void gve_tx_free_rings_gqi(struct gve_priv *priv,
if (!tx)
return;
- for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++)
+ for (i = 0; i < cfg->qcfg->num_queues + cfg->qcfg->num_xdp_queues; i++)
gve_tx_free_ring_gqi(priv, &tx[i], cfg);
- if (cfg->start_idx == 0) {
- kvfree(tx);
- cfg->tx = NULL;
- }
+ kvfree(tx);
+ cfg->tx = NULL;
}
/* gve_tx_avail - Calculates the number of slots available in the ring
@@ -830,8 +823,8 @@ static int gve_tx_fill_xdp(struct gve_priv *priv, struct gve_tx_ring *tx,
return ndescs;
}
-int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
- u32 flags)
+int gve_xdp_xmit_gqi(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags)
{
struct gve_priv *priv = netdev_priv(dev);
struct gve_tx_ring *tx;
@@ -844,7 +837,7 @@ int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
return -ENETDOWN;
qid = gve_xdp_tx_queue_id(priv,
- smp_processor_id() % priv->num_xdp_queues);
+ smp_processor_id() % priv->tx_cfg.num_xdp_queues);
tx = &priv->tx[qid];
@@ -959,13 +952,9 @@ static int gve_xsk_tx(struct gve_priv *priv, struct gve_tx_ring *tx,
spin_lock(&tx->xdp_lock);
while (sent < budget) {
- if (!gve_can_tx(tx, GVE_TX_START_THRESH))
- goto out;
-
- if (!xsk_tx_peek_desc(tx->xsk_pool, &desc)) {
- tx->xdp_xsk_done = tx->xdp_xsk_wakeup;
+ if (!gve_can_tx(tx, GVE_TX_START_THRESH) ||
+ !xsk_tx_peek_desc(tx->xsk_pool, &desc))
goto out;
- }
data = xsk_buff_raw_get_data(tx->xsk_pool, desc.addr);
nsegs = gve_tx_fill_xdp(priv, tx, data, desc.len, NULL, true);
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
index 394debc62268..6f1d515673d2 100644
--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
@@ -9,9 +9,11 @@
#include "gve_utils.h"
#include "gve_dqo.h"
#include <net/ip.h>
+#include <linux/bpf.h>
#include <linux/tcp.h>
#include <linux/slab.h>
#include <linux/skbuff.h>
+#include <net/xdp_sock_drv.h>
/* Returns true if tx_bufs are available. */
static bool gve_has_free_tx_qpl_bufs(struct gve_tx_ring *tx, int count)
@@ -110,6 +112,14 @@ static bool gve_has_pending_packet(struct gve_tx_ring *tx)
return false;
}
+void gve_xdp_tx_flush_dqo(struct gve_priv *priv, u32 xdp_qid)
+{
+ u32 tx_qid = gve_xdp_tx_queue_id(priv, xdp_qid);
+ struct gve_tx_ring *tx = &priv->tx[tx_qid];
+
+ gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail);
+}
+
static struct gve_tx_pending_packet_dqo *
gve_alloc_pending_packet(struct gve_tx_ring *tx)
{
@@ -198,7 +208,8 @@ void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx)
gve_remove_napi(priv, ntfy_idx);
gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL);
- netdev_tx_reset_queue(tx->netdev_txq);
+ if (tx->netdev_txq)
+ netdev_tx_reset_queue(tx->netdev_txq);
gve_tx_clean_pending_packets(tx);
gve_tx_remove_from_block(priv, idx);
}
@@ -231,6 +242,9 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
tx->dqo.tx_ring = NULL;
}
+ kvfree(tx->dqo.xsk_reorder_queue);
+ tx->dqo.xsk_reorder_queue = NULL;
+
kvfree(tx->dqo.pending_packets);
tx->dqo.pending_packets = NULL;
@@ -276,7 +290,8 @@ void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx)
gve_tx_add_to_block(priv, idx);
- tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
+ if (idx < priv->tx_cfg.num_queues)
+ tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo);
}
@@ -295,6 +310,7 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv,
memset(tx, 0, sizeof(*tx));
tx->q_num = idx;
tx->dev = hdev;
+ spin_lock_init(&tx->dqo_tx.xdp_lock);
atomic_set_release(&tx->dqo_compl.hw_tx_head, 0);
/* Queue sizes must be a power of 2 */
@@ -333,6 +349,17 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv,
tx->dqo.pending_packets[tx->dqo.num_pending_packets - 1].next = -1;
atomic_set_release(&tx->dqo_compl.free_pending_packets, -1);
+
+ /* Only alloc xsk pool for XDP queues */
+ if (idx >= cfg->qcfg->num_queues && cfg->num_xdp_rings) {
+ tx->dqo.xsk_reorder_queue =
+ kvcalloc(tx->dqo.complq_mask + 1,
+ sizeof(tx->dqo.xsk_reorder_queue[0]),
+ GFP_KERNEL);
+ if (!tx->dqo.xsk_reorder_queue)
+ goto err;
+ }
+
tx->dqo_compl.miss_completions.head = -1;
tx->dqo_compl.miss_completions.tail = -1;
tx->dqo_compl.timed_out_completions.head = -1;
@@ -379,27 +406,23 @@ int gve_tx_alloc_rings_dqo(struct gve_priv *priv,
struct gve_tx_alloc_rings_cfg *cfg)
{
struct gve_tx_ring *tx = cfg->tx;
+ int total_queues;
int err = 0;
int i, j;
- if (cfg->start_idx + cfg->num_rings > cfg->qcfg->max_queues) {
+ total_queues = cfg->qcfg->num_queues + cfg->num_xdp_rings;
+ if (total_queues > cfg->qcfg->max_queues) {
netif_err(priv, drv, priv->dev,
"Cannot alloc more than the max num of Tx rings\n");
return -EINVAL;
}
- if (cfg->start_idx == 0) {
- tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring),
- GFP_KERNEL);
- if (!tx)
- return -ENOMEM;
- } else if (!tx) {
- netif_err(priv, drv, priv->dev,
- "Cannot alloc tx rings from a nonzero start idx without tx array\n");
- return -EINVAL;
- }
+ tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring),
+ GFP_KERNEL);
+ if (!tx)
+ return -ENOMEM;
- for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) {
+ for (i = 0; i < total_queues; i++) {
err = gve_tx_alloc_ring_dqo(priv, cfg, &tx[i], i);
if (err) {
netif_err(priv, drv, priv->dev,
@@ -415,8 +438,7 @@ int gve_tx_alloc_rings_dqo(struct gve_priv *priv,
err:
for (j = 0; j < i; j++)
gve_tx_free_ring_dqo(priv, &tx[j], cfg);
- if (cfg->start_idx == 0)
- kvfree(tx);
+ kvfree(tx);
return err;
}
@@ -429,13 +451,11 @@ void gve_tx_free_rings_dqo(struct gve_priv *priv,
if (!tx)
return;
- for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++)
+ for (i = 0; i < cfg->qcfg->num_queues + cfg->qcfg->num_xdp_queues; i++)
gve_tx_free_ring_dqo(priv, &tx[i], cfg);
- if (cfg->start_idx == 0) {
- kvfree(tx);
- cfg->tx = NULL;
- }
+ kvfree(tx);
+ cfg->tx = NULL;
}
/* Returns the number of slots available in the ring */
@@ -446,12 +466,28 @@ static u32 num_avail_tx_slots(const struct gve_tx_ring *tx)
return tx->mask - num_used;
}
+/* Checks if the requested number of slots are available in the ring */
+static bool gve_has_tx_slots_available(struct gve_tx_ring *tx, u32 slots_req)
+{
+ u32 num_avail = num_avail_tx_slots(tx);
+
+ slots_req += GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP;
+
+ if (num_avail >= slots_req)
+ return true;
+
+ /* Update cached TX head pointer */
+ tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head);
+
+ return num_avail_tx_slots(tx) >= slots_req;
+}
+
static bool gve_has_avail_slots_tx_dqo(struct gve_tx_ring *tx,
int desc_count, int buf_count)
{
return gve_has_pending_packet(tx) &&
- num_avail_tx_slots(tx) >= desc_count &&
- gve_has_free_tx_qpl_bufs(tx, buf_count);
+ gve_has_tx_slots_available(tx, desc_count) &&
+ gve_has_free_tx_qpl_bufs(tx, buf_count);
}
/* Stops the queue if available descriptors is less than 'count'.
@@ -463,12 +499,6 @@ static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx,
if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count)))
return 0;
- /* Update cached TX head pointer */
- tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head);
-
- if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count)))
- return 0;
-
/* No space, so stop the queue */
tx->stop_queue++;
netif_tx_stop_queue(tx->netdev_txq);
@@ -479,8 +509,6 @@ static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx,
/* After stopping queue, check if we can transmit again in order to
* avoid TOCTOU bug.
*/
- tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head);
-
if (likely(!gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count)))
return -EBUSY;
@@ -507,11 +535,9 @@ static void gve_extract_tx_metadata_dqo(const struct sk_buff *skb,
}
static void gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, u32 *desc_idx,
- struct sk_buff *skb, u32 len, u64 addr,
+ bool enable_csum, u32 len, u64 addr,
s16 compl_tag, bool eop, bool is_gso)
{
- const bool checksum_offload_en = skb->ip_summed == CHECKSUM_PARTIAL;
-
while (len > 0) {
struct gve_tx_pkt_desc_dqo *desc =
&tx->dqo.tx_ring[*desc_idx].pkt;
@@ -522,7 +548,7 @@ static void gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, u32 *desc_idx,
.buf_addr = cpu_to_le64(addr),
.dtype = GVE_TX_PKT_DESC_DTYPE_DQO,
.end_of_packet = cur_eop,
- .checksum_offload_enable = checksum_offload_en,
+ .checksum_offload_enable = enable_csum,
.compl_tag = cpu_to_le16(compl_tag),
.buf_size = cur_len,
};
@@ -619,6 +645,25 @@ gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc,
};
}
+static void gve_tx_update_tail(struct gve_tx_ring *tx, u32 desc_idx)
+{
+ u32 last_desc_idx = (desc_idx - 1) & tx->mask;
+ u32 last_report_event_interval =
+ (last_desc_idx - tx->dqo_tx.last_re_idx) & tx->mask;
+
+ /* Commit the changes to our state */
+ tx->dqo_tx.tail = desc_idx;
+
+ /* Request a descriptor completion on the last descriptor of the
+ * packet if we are allowed to by the HW enforced interval.
+ */
+
+ if (unlikely(last_report_event_interval >= GVE_TX_MIN_RE_INTERVAL)) {
+ tx->dqo.tx_ring[last_desc_idx].pkt.report_event = true;
+ tx->dqo_tx.last_re_idx = last_desc_idx;
+ }
+}
+
static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
struct sk_buff *skb,
struct gve_tx_pending_packet_dqo *pkt,
@@ -626,6 +671,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
u32 *desc_idx,
bool is_gso)
{
+ bool enable_csum = skb->ip_summed == CHECKSUM_PARTIAL;
const struct skb_shared_info *shinfo = skb_shinfo(skb);
int i;
@@ -651,7 +697,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr);
++pkt->num_bufs;
- gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr,
+ gve_tx_fill_pkt_desc_dqo(tx, desc_idx, enable_csum, len, addr,
completion_tag,
/*eop=*/shinfo->nr_frags == 0, is_gso);
}
@@ -667,10 +713,11 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
goto err;
dma_unmap_len_set(pkt, len[pkt->num_bufs], len);
- dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr);
+ netmem_dma_unmap_addr_set(skb_frag_netmem(frag), pkt,
+ dma[pkt->num_bufs], addr);
++pkt->num_bufs;
- gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr,
+ gve_tx_fill_pkt_desc_dqo(tx, desc_idx, enable_csum, len, addr,
completion_tag, is_eop, is_gso);
}
@@ -715,6 +762,7 @@ static int gve_tx_add_skb_copy_dqo(struct gve_tx_ring *tx,
u32 *desc_idx,
bool is_gso)
{
+ bool enable_csum = skb->ip_summed == CHECKSUM_PARTIAL;
u32 copy_offset = 0;
dma_addr_t dma_addr;
u32 copy_len;
@@ -736,7 +784,7 @@ static int gve_tx_add_skb_copy_dqo(struct gve_tx_ring *tx,
copy_offset += copy_len;
dma_sync_single_for_device(tx->dev, dma_addr,
copy_len, DMA_TO_DEVICE);
- gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb,
+ gve_tx_fill_pkt_desc_dqo(tx, desc_idx, enable_csum,
copy_len,
dma_addr,
completion_tag,
@@ -770,7 +818,11 @@ static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx,
s16 completion_tag;
pkt = gve_alloc_pending_packet(tx);
+ if (!pkt)
+ return -ENOMEM;
+
pkt->skb = skb;
+ pkt->type = GVE_TX_PENDING_PACKET_DQO_SKB;
completion_tag = pkt - tx->dqo.pending_packets;
gve_extract_tx_metadata_dqo(skb, &metadata);
@@ -803,24 +855,7 @@ static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx,
tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs;
- /* Commit the changes to our state */
- tx->dqo_tx.tail = desc_idx;
-
- /* Request a descriptor completion on the last descriptor of the
- * packet if we are allowed to by the HW enforced interval.
- */
- {
- u32 last_desc_idx = (desc_idx - 1) & tx->mask;
- u32 last_report_event_interval =
- (last_desc_idx - tx->dqo_tx.last_re_idx) & tx->mask;
-
- if (unlikely(last_report_event_interval >=
- GVE_TX_MIN_RE_INTERVAL)) {
- tx->dqo.tx_ring[last_desc_idx].pkt.report_event = true;
- tx->dqo_tx.last_re_idx = last_desc_idx;
- }
- }
-
+ gve_tx_update_tail(tx, desc_idx);
return 0;
err:
@@ -954,9 +989,8 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx,
/* Metadata + (optional TSO) + data descriptors. */
total_num_descs = 1 + skb_is_gso(skb) + num_buffer_descs;
- if (unlikely(gve_maybe_stop_tx_dqo(tx, total_num_descs +
- GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP,
- num_buffer_descs))) {
+ if (unlikely(gve_maybe_stop_tx_dqo(tx, total_num_descs,
+ num_buffer_descs))) {
return -1;
}
@@ -973,6 +1007,38 @@ drop:
return 0;
}
+static void gve_xsk_reorder_queue_push_dqo(struct gve_tx_ring *tx,
+ u16 completion_tag)
+{
+ u32 tail = atomic_read(&tx->dqo_tx.xsk_reorder_queue_tail);
+
+ tx->dqo.xsk_reorder_queue[tail] = completion_tag;
+ tail = (tail + 1) & tx->dqo.complq_mask;
+ atomic_set_release(&tx->dqo_tx.xsk_reorder_queue_tail, tail);
+}
+
+static struct gve_tx_pending_packet_dqo *
+gve_xsk_reorder_queue_head(struct gve_tx_ring *tx)
+{
+ u32 head = tx->dqo_compl.xsk_reorder_queue_head;
+
+ if (head == tx->dqo_compl.xsk_reorder_queue_tail) {
+ tx->dqo_compl.xsk_reorder_queue_tail =
+ atomic_read_acquire(&tx->dqo_tx.xsk_reorder_queue_tail);
+
+ if (head == tx->dqo_compl.xsk_reorder_queue_tail)
+ return NULL;
+ }
+
+ return &tx->dqo.pending_packets[tx->dqo.xsk_reorder_queue[head]];
+}
+
+static void gve_xsk_reorder_queue_pop_dqo(struct gve_tx_ring *tx)
+{
+ tx->dqo_compl.xsk_reorder_queue_head++;
+ tx->dqo_compl.xsk_reorder_queue_head &= tx->dqo.complq_mask;
+}
+
/* Transmit a given skb and ring the doorbell. */
netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev)
{
@@ -996,6 +1062,62 @@ netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
+static bool gve_xsk_tx_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
+ int budget)
+{
+ struct xsk_buff_pool *pool = tx->xsk_pool;
+ struct xdp_desc desc;
+ bool repoll = false;
+ int sent = 0;
+
+ spin_lock(&tx->dqo_tx.xdp_lock);
+ for (; sent < budget; sent++) {
+ struct gve_tx_pending_packet_dqo *pkt;
+ s16 completion_tag;
+ dma_addr_t addr;
+ u32 desc_idx;
+
+ if (unlikely(!gve_has_avail_slots_tx_dqo(tx, 1, 1))) {
+ repoll = true;
+ break;
+ }
+
+ if (!xsk_tx_peek_desc(pool, &desc))
+ break;
+
+ pkt = gve_alloc_pending_packet(tx);
+ pkt->type = GVE_TX_PENDING_PACKET_DQO_XSK;
+ pkt->num_bufs = 0;
+ completion_tag = pkt - tx->dqo.pending_packets;
+
+ addr = xsk_buff_raw_get_dma(pool, desc.addr);
+ xsk_buff_raw_dma_sync_for_device(pool, addr, desc.len);
+
+ desc_idx = tx->dqo_tx.tail;
+ gve_tx_fill_pkt_desc_dqo(tx, &desc_idx,
+ true, desc.len,
+ addr, completion_tag, true,
+ false);
+ ++pkt->num_bufs;
+ gve_tx_update_tail(tx, desc_idx);
+ tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs;
+ gve_xsk_reorder_queue_push_dqo(tx, completion_tag);
+ }
+
+ if (sent) {
+ gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail);
+ xsk_tx_release(pool);
+ }
+
+ spin_unlock(&tx->dqo_tx.xdp_lock);
+
+ u64_stats_update_begin(&tx->statss);
+ tx->xdp_xsk_sent += sent;
+ u64_stats_update_end(&tx->statss);
+
+ return (sent == budget) || repoll;
+}
+
static void add_to_list(struct gve_tx_ring *tx, struct gve_index_list *list,
struct gve_tx_pending_packet_dqo *pending_packet)
{
@@ -1045,8 +1167,9 @@ static void gve_unmap_packet(struct device *dev,
dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]),
dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE);
for (i = 1; i < pkt->num_bufs; i++) {
- dma_unmap_page(dev, dma_unmap_addr(pkt, dma[i]),
- dma_unmap_len(pkt, len[i]), DMA_TO_DEVICE);
+ netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]),
+ dma_unmap_len(pkt, len[i]),
+ DMA_TO_DEVICE, 0);
}
pkt->num_bufs = 0;
}
@@ -1109,16 +1232,35 @@ static void gve_handle_packet_completion(struct gve_priv *priv,
}
}
tx->dqo_tx.completed_packet_desc_cnt += pending_packet->num_bufs;
- if (tx->dqo.qpl)
- gve_free_tx_qpl_bufs(tx, pending_packet);
- else
+
+ switch (pending_packet->type) {
+ case GVE_TX_PENDING_PACKET_DQO_SKB:
+ if (tx->dqo.qpl)
+ gve_free_tx_qpl_bufs(tx, pending_packet);
+ else
+ gve_unmap_packet(tx->dev, pending_packet);
+ (*pkts)++;
+ *bytes += pending_packet->skb->len;
+
+ napi_consume_skb(pending_packet->skb, is_napi);
+ pending_packet->skb = NULL;
+ gve_free_pending_packet(tx, pending_packet);
+ break;
+ case GVE_TX_PENDING_PACKET_DQO_XDP_FRAME:
gve_unmap_packet(tx->dev, pending_packet);
+ (*pkts)++;
+ *bytes += pending_packet->xdpf->len;
- *bytes += pending_packet->skb->len;
- (*pkts)++;
- napi_consume_skb(pending_packet->skb, is_napi);
- pending_packet->skb = NULL;
- gve_free_pending_packet(tx, pending_packet);
+ xdp_return_frame(pending_packet->xdpf);
+ pending_packet->xdpf = NULL;
+ gve_free_pending_packet(tx, pending_packet);
+ break;
+ case GVE_TX_PENDING_PACKET_DQO_XSK:
+ pending_packet->state = GVE_PACKET_STATE_XSK_COMPLETE;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
}
static void gve_handle_miss_completion(struct gve_priv *priv,
@@ -1215,8 +1357,34 @@ static void remove_timed_out_completions(struct gve_priv *priv,
remove_from_list(tx, &tx->dqo_compl.timed_out_completions,
pending_packet);
+
+ /* Need to count XSK packets in xsk_tx_completed. */
+ if (pending_packet->type == GVE_TX_PENDING_PACKET_DQO_XSK)
+ pending_packet->state = GVE_PACKET_STATE_XSK_COMPLETE;
+ else
+ gve_free_pending_packet(tx, pending_packet);
+ }
+}
+
+static void gve_tx_process_xsk_completions(struct gve_tx_ring *tx)
+{
+ u32 num_xsks = 0;
+
+ while (true) {
+ struct gve_tx_pending_packet_dqo *pending_packet =
+ gve_xsk_reorder_queue_head(tx);
+
+ if (!pending_packet ||
+ pending_packet->state != GVE_PACKET_STATE_XSK_COMPLETE)
+ break;
+
+ num_xsks++;
+ gve_xsk_reorder_queue_pop_dqo(tx);
gve_free_pending_packet(tx, pending_packet);
}
+
+ if (num_xsks)
+ xsk_tx_completed(tx->xsk_pool, num_xsks);
}
int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
@@ -1289,13 +1457,17 @@ int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
num_descs_cleaned++;
}
- netdev_tx_completed_queue(tx->netdev_txq,
- pkt_compl_pkts + miss_compl_pkts,
- pkt_compl_bytes + miss_compl_bytes);
+ if (tx->netdev_txq)
+ netdev_tx_completed_queue(tx->netdev_txq,
+ pkt_compl_pkts + miss_compl_pkts,
+ pkt_compl_bytes + miss_compl_bytes);
remove_miss_completions(priv, tx);
remove_timed_out_completions(priv, tx);
+ if (tx->xsk_pool)
+ gve_tx_process_xsk_completions(tx);
+
u64_stats_update_begin(&tx->statss);
tx->bytes_done += pkt_compl_bytes + reinject_compl_bytes;
tx->pkt_done += pkt_compl_pkts + reinject_compl_pkts;
@@ -1327,3 +1499,111 @@ bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean)
compl_desc = &tx->dqo.compl_ring[tx->dqo_compl.head];
return compl_desc->generation != tx->dqo_compl.cur_gen_bit;
}
+
+bool gve_xsk_tx_poll_dqo(struct gve_notify_block *rx_block, int budget)
+{
+ struct gve_rx_ring *rx = rx_block->rx;
+ struct gve_priv *priv = rx->gve;
+ struct gve_tx_ring *tx;
+
+ tx = &priv->tx[gve_xdp_tx_queue_id(priv, rx->q_num)];
+ if (tx->xsk_pool)
+ return gve_xsk_tx_dqo(priv, tx, budget);
+
+ return 0;
+}
+
+bool gve_xdp_poll_dqo(struct gve_notify_block *block)
+{
+ struct gve_tx_compl_desc *compl_desc;
+ struct gve_tx_ring *tx = block->tx;
+ struct gve_priv *priv = block->priv;
+
+ gve_clean_tx_done_dqo(priv, tx, &block->napi);
+
+ /* Return true if we still have work. */
+ compl_desc = &tx->dqo.compl_ring[tx->dqo_compl.head];
+ return compl_desc->generation != tx->dqo_compl.cur_gen_bit;
+}
+
+int gve_xdp_xmit_one_dqo(struct gve_priv *priv, struct gve_tx_ring *tx,
+ struct xdp_frame *xdpf)
+{
+ struct gve_tx_pending_packet_dqo *pkt;
+ u32 desc_idx = tx->dqo_tx.tail;
+ s16 completion_tag;
+ int num_descs = 1;
+ dma_addr_t addr;
+ int err;
+
+ if (unlikely(!gve_has_tx_slots_available(tx, num_descs)))
+ return -EBUSY;
+
+ pkt = gve_alloc_pending_packet(tx);
+ if (unlikely(!pkt))
+ return -EBUSY;
+
+ pkt->type = GVE_TX_PENDING_PACKET_DQO_XDP_FRAME;
+ pkt->num_bufs = 0;
+ pkt->xdpf = xdpf;
+ completion_tag = pkt - tx->dqo.pending_packets;
+
+ /* Generate Packet Descriptor */
+ addr = dma_map_single(tx->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE);
+ err = dma_mapping_error(tx->dev, addr);
+ if (unlikely(err))
+ goto err;
+
+ dma_unmap_len_set(pkt, len[pkt->num_bufs], xdpf->len);
+ dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr);
+ pkt->num_bufs++;
+
+ gve_tx_fill_pkt_desc_dqo(tx, &desc_idx,
+ false, xdpf->len,
+ addr, completion_tag, true,
+ false);
+
+ gve_tx_update_tail(tx, desc_idx);
+ return 0;
+
+err:
+ pkt->xdpf = NULL;
+ pkt->num_bufs = 0;
+ gve_free_pending_packet(tx, pkt);
+ return err;
+}
+
+int gve_xdp_xmit_dqo(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_tx_ring *tx;
+ int i, err = 0, qid;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ qid = gve_xdp_tx_queue_id(priv,
+ smp_processor_id() % priv->tx_cfg.num_xdp_queues);
+
+ tx = &priv->tx[qid];
+
+ spin_lock(&tx->dqo_tx.xdp_lock);
+ for (i = 0; i < n; i++) {
+ err = gve_xdp_xmit_one_dqo(priv, tx, frames[i]);
+ if (err)
+ break;
+ }
+
+ if (flags & XDP_XMIT_FLUSH)
+ gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail);
+
+ spin_unlock(&tx->dqo_tx.xdp_lock);
+
+ u64_stats_update_begin(&tx->statss);
+ tx->xdp_xmit += n;
+ tx->xdp_xmit_errors += n - i;
+ u64_stats_update_end(&tx->statss);
+
+ return i ? i : err;
+}
diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c
index 30fef100257e..ace9b8698021 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.c
+++ b/drivers/net/ethernet/google/gve/gve_utils.c
@@ -110,13 +110,13 @@ void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
{
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
- netif_napi_add(priv->dev, &block->napi, gve_poll);
- netif_napi_set_irq(&block->napi, block->irq);
+ netif_napi_add_locked(priv->dev, &block->napi, gve_poll);
+ netif_napi_set_irq_locked(&block->napi, block->irq);
}
void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
{
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
- netif_napi_del(&block->napi);
+ netif_napi_del_locked(&block->napi);
}