diff options
Diffstat (limited to 'drivers/net/ethernet/google/gve')
-rw-r--r-- | drivers/net/ethernet/google/gve/Makefile | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve.h | 187 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_adminq.c | 171 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_adminq.h | 30 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c | 70 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_desc_dqo.h | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_dqo.h | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_ethtool.c | 134 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_main.c | 829 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_ptp.c | 139 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_rx.c | 44 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_rx_dqo.c | 301 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_tx.c | 45 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_tx_dqo.c | 426 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_utils.c | 6 |
15 files changed, 1752 insertions, 640 deletions
diff --git a/drivers/net/ethernet/google/gve/Makefile b/drivers/net/ethernet/google/gve/Makefile index 4520f1c07a63..e0ec227a50f7 100644 --- a/drivers/net/ethernet/google/gve/Makefile +++ b/drivers/net/ethernet/google/gve/Makefile @@ -1,5 +1,7 @@ # Makefile for the Google virtual Ethernet (gve) driver obj-$(CONFIG_GVE) += gve.o -gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o gve_flow_rule.o \ +gve-y := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o gve_flow_rule.o \ gve_buffer_mgmt_dqo.o + +gve-$(CONFIG_PTP_1588_CLOCK) += gve_ptp.o diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 8167cc5fb0df..bceaf9b05cb4 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -11,7 +11,9 @@ #include <linux/dmapool.h> #include <linux/ethtool_netlink.h> #include <linux/netdevice.h> +#include <linux/net_tstamp.h> #include <linux/pci.h> +#include <linux/ptp_clock_kernel.h> #include <linux/u64_stats_sync.h> #include <net/page_pool/helpers.h> #include <net/xdp.h> @@ -59,6 +61,8 @@ #define GVE_MAX_RX_BUFFER_SIZE 4096 +#define GVE_XDP_RX_BUFFER_SIZE_DQO 4096 + #define GVE_DEFAULT_RX_BUFFER_OFFSET 2048 #define GVE_PAGE_POOL_SIZE_MULTIPLIER 4 @@ -68,6 +72,9 @@ #define GVE_FLOW_RULE_IDS_CACHE_SIZE \ (GVE_ADMINQ_BUFFER_SIZE / sizeof(((struct gve_adminq_queried_flow_rule *)0)->location)) +#define GVE_RSS_KEY_SIZE 40 +#define GVE_RSS_INDIR_SIZE 128 + #define GVE_XDP_ACTIONS 5 #define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182 @@ -102,7 +109,13 @@ struct gve_rx_desc_queue { /* The page info for a single slot in the RX data queue */ struct gve_rx_slot_page_info { - struct page *page; + /* netmem is used for DQO RDA mode + * page is used in all other modes + */ + union { + struct page *page; + netmem_ref netmem; + }; void *page_address; u32 page_offset; /* offset to write to in page */ unsigned int buf_size; @@ -177,6 +190,9 @@ struct gve_rx_buf_state_dqo { /* The page posted to HW. */ struct gve_rx_slot_page_info page_info; + /* XSK buffer */ + struct xdp_buff *xsk_buff; + /* The DMA address corresponding to `page_info`. */ dma_addr_t addr; @@ -218,6 +234,11 @@ struct gve_rx_cnts { /* Contains datapath state used to represent an RX queue. */ struct gve_rx_ring { struct gve_priv *gve; + + u16 packet_buffer_size; /* Size of buffer posted to NIC */ + u16 packet_buffer_truesize; /* Total size of RX buffer */ + u16 rx_headroom; + union { /* GQI fields */ struct { @@ -226,7 +247,6 @@ struct gve_rx_ring { /* threshold for posting new buffs and descs */ u32 db_threshold; - u16 packet_buffer_size; u32 qpl_copy_pool_mask; u32 qpl_copy_pool_head; @@ -314,7 +334,6 @@ struct gve_rx_ring { /* XDP stuff */ struct xdp_rxq_info xdp_rxq; - struct xdp_rxq_info xsk_rxq; struct xsk_buff_pool *xsk_pool; struct page_frag_cache page_cache; /* Page cache to allocate XDP frames */ }; @@ -383,10 +402,24 @@ enum gve_packet_state { GVE_PACKET_STATE_PENDING_REINJECT_COMPL, /* No valid completion received within the specified timeout. */ GVE_PACKET_STATE_TIMED_OUT_COMPL, + /* XSK pending packet has received a packet/reinjection completion, or + * has timed out. At this point, the pending packet can be counted by + * xsk_tx_complete and freed. + */ + GVE_PACKET_STATE_XSK_COMPLETE, +}; + +enum gve_tx_pending_packet_dqo_type { + GVE_TX_PENDING_PACKET_DQO_SKB, + GVE_TX_PENDING_PACKET_DQO_XDP_FRAME, + GVE_TX_PENDING_PACKET_DQO_XSK, }; struct gve_tx_pending_packet_dqo { - struct sk_buff *skb; /* skb for this packet */ + union { + struct sk_buff *skb; + struct xdp_frame *xdpf; + }; /* 0th element corresponds to the linear portion of `skb`, should be * unmapped with `dma_unmap_single`. @@ -416,7 +449,10 @@ struct gve_tx_pending_packet_dqo { /* Identifies the current state of the packet as defined in * `enum gve_packet_state`. */ - u8 state; + u8 state : 3; + + /* gve_tx_pending_packet_dqo_type */ + u8 type : 2; /* If packet is an outstanding miss completion, then the packet is * freed if the corresponding re-injection completion is not received @@ -438,6 +474,9 @@ struct gve_tx_ring { /* DQO fields. */ struct { + /* Spinlock for XDP tx traffic */ + spinlock_t xdp_lock; + /* Linked list of gve_tx_pending_packet_dqo. Index into * pending_packets, or -1 if empty. * @@ -482,6 +521,8 @@ struct gve_tx_ring { /* Cached value of `dqo_compl.free_tx_qpl_buf_cnt` */ u32 free_tx_qpl_buf_cnt; }; + + atomic_t xsk_reorder_queue_tail; } dqo_tx; }; @@ -515,6 +556,9 @@ struct gve_tx_ring { /* Last TX ring index fetched by HW */ atomic_t hw_tx_head; + u16 xsk_reorder_queue_head; + u16 xsk_reorder_queue_tail; + /* List to track pending packets which received a miss * completion but not a corresponding reinjection. */ @@ -568,6 +612,8 @@ struct gve_tx_ring { struct gve_tx_pending_packet_dqo *pending_packets; s16 num_pending_packets; + u16 *xsk_reorder_queue; + u32 complq_mask; /* complq size is complq_mask + 1 */ /* QPL fields */ @@ -604,8 +650,6 @@ struct gve_tx_ring { dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */ struct u64_stats_sync statss; /* sync stats for 32bit archs */ struct xsk_buff_pool *xsk_pool; - u32 xdp_xsk_wakeup; - u32 xdp_xsk_done; u64 xdp_xsk_sent; u64 xdp_xmit; u64 xdp_xmit_errors; @@ -624,10 +668,18 @@ struct gve_notify_block { u32 irq; }; -/* Tracks allowed and current queue settings */ -struct gve_queue_config { +/* Tracks allowed and current rx queue settings */ +struct gve_rx_queue_config { u16 max_queues; - u16 num_queues; /* current */ + u16 num_queues; + u16 packet_buffer_size; +}; + +/* Tracks allowed and current tx queue settings */ +struct gve_tx_queue_config { + u16 max_queues; + u16 num_queues; /* number of TX queues, excluding XDP queues */ + u16 num_xdp_queues; }; /* Tracks the available and used qpl IDs */ @@ -651,11 +703,11 @@ struct gve_ptype_lut { /* Parameters for allocating resources for tx queues */ struct gve_tx_alloc_rings_cfg { - struct gve_queue_config *qcfg; + struct gve_tx_queue_config *qcfg; + + u16 num_xdp_rings; u16 ring_size; - u16 start_idx; - u16 num_rings; bool raw_addressing; /* Allocated resources are returned here */ @@ -665,13 +717,15 @@ struct gve_tx_alloc_rings_cfg { /* Parameters for allocating resources for rx queues */ struct gve_rx_alloc_rings_cfg { /* tx config is also needed to determine QPL ids */ - struct gve_queue_config *qcfg; - struct gve_queue_config *qcfg_tx; + struct gve_rx_queue_config *qcfg_rx; + struct gve_tx_queue_config *qcfg_tx; u16 ring_size; u16 packet_buffer_size; bool raw_addressing; bool enable_header_split; + bool reset_rss; + bool xdp; /* Allocated resources are returned here */ struct gve_rx_ring *rx; @@ -722,6 +776,17 @@ struct gve_flow_rules_cache { u32 rule_ids_cache_num; }; +struct gve_rss_config { + u8 *hash_key; + u32 *hash_lut; +}; + +struct gve_ptp { + struct ptp_clock_info info; + struct ptp_clock *clock; + struct gve_priv *priv; +}; + struct gve_priv { struct net_device *dev; struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */ @@ -751,10 +816,11 @@ struct gve_priv { u32 rx_copybreak; /* copy packets smaller than this */ u16 default_num_queues; /* default num queues to set up */ - u16 num_xdp_queues; - struct gve_queue_config tx_cfg; - struct gve_queue_config rx_cfg; - u32 num_ntfy_blks; /* spilt between TX and RX so must be even */ + struct gve_tx_queue_config tx_cfg; + struct gve_rx_queue_config rx_cfg; + unsigned long *xsk_pools; /* bitmap of RX queues with XSK pools */ + u32 num_ntfy_blks; /* split between TX and RX so must be even */ + int numa_node; struct gve_registers __iomem *reg_bar0; /* see gve_register.h */ __be32 __iomem *db_bar2; /* "array" of doorbells */ @@ -786,6 +852,7 @@ struct gve_priv { u32 adminq_set_driver_parameter_cnt; u32 adminq_report_stats_cnt; u32 adminq_report_link_speed_cnt; + u32 adminq_report_nic_timestamp_cnt; u32 adminq_get_ptype_map_cnt; u32 adminq_verify_driver_compatibility_cnt; u32 adminq_query_flow_rules_cnt; @@ -823,7 +890,6 @@ struct gve_priv { struct gve_ptype_lut *ptype_lut_dqo; /* Must be a power of two. */ - u16 data_buffer_size_dqo; u16 max_rx_buffer_size; /* device limit */ enum gve_queue_format queue_format; @@ -842,6 +908,16 @@ struct gve_priv { u16 rss_key_size; u16 rss_lut_size; + bool cache_rss_config; + struct gve_rss_config rss_config; + + /* True if the device supports reading the nic clock */ + bool nic_timestamp_supported; + struct gve_ptp *ptp; + struct kernel_hwtstamp_config ts_config; + struct gve_nic_ts_report *nic_ts_report; + dma_addr_t nic_ts_report_bus; + u64 last_sync_nic_counter; /* Clock counter from last NIC TS report */ }; enum gve_service_task_flags_bit { @@ -1024,27 +1100,16 @@ static inline bool gve_is_qpl(struct gve_priv *priv) } /* Returns the number of tx queue page lists */ -static inline u32 gve_num_tx_qpls(const struct gve_queue_config *tx_cfg, - int num_xdp_queues, +static inline u32 gve_num_tx_qpls(const struct gve_tx_queue_config *tx_cfg, bool is_qpl) { if (!is_qpl) return 0; - return tx_cfg->num_queues + num_xdp_queues; -} - -/* Returns the number of XDP tx queue page lists - */ -static inline u32 gve_num_xdp_qpls(struct gve_priv *priv) -{ - if (priv->queue_format != GVE_GQI_QPL_FORMAT) - return 0; - - return priv->num_xdp_queues; + return tx_cfg->num_queues + tx_cfg->num_xdp_queues; } /* Returns the number of rx queue page lists */ -static inline u32 gve_num_rx_qpls(const struct gve_queue_config *rx_cfg, +static inline u32 gve_num_rx_qpls(const struct gve_rx_queue_config *rx_cfg, bool is_qpl) { if (!is_qpl) @@ -1062,7 +1127,8 @@ static inline u32 gve_rx_qpl_id(struct gve_priv *priv, int rx_qid) return priv->tx_cfg.max_queues + rx_qid; } -static inline u32 gve_get_rx_qpl_id(const struct gve_queue_config *tx_cfg, int rx_qid) +static inline u32 gve_get_rx_qpl_id(const struct gve_tx_queue_config *tx_cfg, + int rx_qid) { return tx_cfg->max_queues + rx_qid; } @@ -1072,7 +1138,7 @@ static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv) return gve_tx_qpl_id(priv, 0); } -static inline u32 gve_rx_start_qpl_id(const struct gve_queue_config *tx_cfg) +static inline u32 gve_rx_start_qpl_id(const struct gve_tx_queue_config *tx_cfg) { return gve_get_rx_qpl_id(tx_cfg, 0); } @@ -1103,7 +1169,7 @@ static inline bool gve_is_gqi(struct gve_priv *priv) static inline u32 gve_num_tx_queues(struct gve_priv *priv) { - return priv->tx_cfg.num_queues + priv->num_xdp_queues; + return priv->tx_cfg.num_queues + priv->tx_cfg.num_xdp_queues; } static inline u32 gve_xdp_tx_queue_id(struct gve_priv *priv, u32 queue_id) @@ -1116,6 +1182,17 @@ static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv) return gve_xdp_tx_queue_id(priv, 0); } +static inline bool gve_supports_xdp_xmit(struct gve_priv *priv) +{ + switch (priv->queue_format) { + case GVE_GQI_QPL_FORMAT: + case GVE_DQO_RDA_FORMAT: + return true; + default: + return false; + } +} + /* gqi napi handler defined in gve_main.c */ int gve_napi_poll(struct napi_struct *napi, int budget); @@ -1133,11 +1210,15 @@ void gve_free_queue_page_list(struct gve_priv *priv, u32 id); /* tx handling */ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev); -int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, - u32 flags); +int gve_xdp_xmit_gqi(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); +int gve_xdp_xmit_dqo(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx, void *data, int len, void *frame_p); void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid); +int gve_xdp_xmit_one_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, + struct xdp_frame *xdpf); bool gve_tx_poll(struct gve_notify_block *block, int budget); bool gve_xdp_poll(struct gve_notify_block *block, int budget); int gve_xsk_tx_poll(struct gve_notify_block *block, int budget); @@ -1197,7 +1278,8 @@ void gve_free_buffer(struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state); int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc); struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv, - struct gve_rx_ring *rx); + struct gve_rx_ring *rx, + bool xdp); /* Reset */ void gve_schedule_reset(struct gve_priv *priv); @@ -1209,14 +1291,35 @@ int gve_adjust_config(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, struct gve_rx_alloc_rings_cfg *rx_alloc_cfg); int gve_adjust_queues(struct gve_priv *priv, - struct gve_queue_config new_rx_config, - struct gve_queue_config new_tx_config); + struct gve_rx_queue_config new_rx_config, + struct gve_tx_queue_config new_tx_config, + bool reset_rss); /* flow steering rule */ int gve_get_flow_rule_entry(struct gve_priv *priv, struct ethtool_rxnfc *cmd); int gve_get_flow_rule_ids(struct gve_priv *priv, struct ethtool_rxnfc *cmd, u32 *rule_locs); int gve_add_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd); int gve_del_flow_rule(struct gve_priv *priv, struct ethtool_rxnfc *cmd); int gve_flow_rules_reset(struct gve_priv *priv); +/* RSS config */ +int gve_init_rss_config(struct gve_priv *priv, u16 num_queues); +/* PTP and timestamping */ +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) +int gve_clock_nic_ts_read(struct gve_priv *priv); +int gve_init_clock(struct gve_priv *priv); +void gve_teardown_clock(struct gve_priv *priv); +#else /* CONFIG_PTP_1588_CLOCK */ +static inline int gve_clock_nic_ts_read(struct gve_priv *priv) +{ + return -EOPNOTSUPP; +} + +static inline int gve_init_clock(struct gve_priv *priv) +{ + return 0; +} + +static inline void gve_teardown_clock(struct gve_priv *priv) { } +#endif /* CONFIG_PTP_1588_CLOCK */ /* report stats handling */ void gve_handle_report_stats(struct gve_priv *priv); /* exported by ethtool.c */ diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index aa7d723011d0..4f33d094a2ef 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -46,6 +46,7 @@ void gve_parse_device_option(struct gve_priv *priv, struct gve_device_option_buffer_sizes **dev_op_buffer_sizes, struct gve_device_option_flow_steering **dev_op_flow_steering, struct gve_device_option_rss_config **dev_op_rss_config, + struct gve_device_option_nic_timestamp **dev_op_nic_timestamp, struct gve_device_option_modify_ring **dev_op_modify_ring) { u32 req_feat_mask = be32_to_cpu(option->required_features_mask); @@ -225,6 +226,23 @@ void gve_parse_device_option(struct gve_priv *priv, "RSS config"); *dev_op_rss_config = (void *)(option + 1); break; + case GVE_DEV_OPT_ID_NIC_TIMESTAMP: + if (option_length < sizeof(**dev_op_nic_timestamp) || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_NIC_TIMESTAMP) { + dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Nic Timestamp", + (int)sizeof(**dev_op_nic_timestamp), + GVE_DEV_OPT_REQ_FEAT_MASK_NIC_TIMESTAMP, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_nic_timestamp)) + dev_warn(&priv->pdev->dev, + GVE_DEVICE_OPTION_TOO_BIG_FMT, + "Nic Timestamp"); + *dev_op_nic_timestamp = (void *)(option + 1); + break; default: /* If we don't recognize the option just continue * without doing anything. @@ -246,6 +264,7 @@ gve_process_device_options(struct gve_priv *priv, struct gve_device_option_buffer_sizes **dev_op_buffer_sizes, struct gve_device_option_flow_steering **dev_op_flow_steering, struct gve_device_option_rss_config **dev_op_rss_config, + struct gve_device_option_nic_timestamp **dev_op_nic_timestamp, struct gve_device_option_modify_ring **dev_op_modify_ring) { const int num_options = be16_to_cpu(descriptor->num_device_options); @@ -269,6 +288,7 @@ gve_process_device_options(struct gve_priv *priv, dev_op_dqo_rda, dev_op_jumbo_frames, dev_op_dqo_qpl, dev_op_buffer_sizes, dev_op_flow_steering, dev_op_rss_config, + dev_op_nic_timestamp, dev_op_modify_ring); dev_opt = next_opt; } @@ -306,6 +326,7 @@ int gve_adminq_alloc(struct device *dev, struct gve_priv *priv) priv->adminq_set_driver_parameter_cnt = 0; priv->adminq_report_stats_cnt = 0; priv->adminq_report_link_speed_cnt = 0; + priv->adminq_report_nic_timestamp_cnt = 0; priv->adminq_get_ptype_map_cnt = 0; priv->adminq_query_flow_rules_cnt = 0; priv->adminq_cfg_flow_rule_cnt = 0; @@ -442,6 +463,8 @@ static int gve_adminq_kick_and_wait(struct gve_priv *priv) int tail, head; int i; + lockdep_assert_held(&priv->adminq_lock); + tail = ioread32be(&priv->reg_bar0->adminq_event_counter); head = priv->adminq_prod_cnt; @@ -467,9 +490,6 @@ static int gve_adminq_kick_and_wait(struct gve_priv *priv) return 0; } -/* This function is not threadsafe - the caller is responsible for any - * necessary locks. - */ static int gve_adminq_issue_cmd(struct gve_priv *priv, union gve_adminq_command *cmd_orig) { @@ -477,6 +497,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, u32 opcode; u32 tail; + lockdep_assert_held(&priv->adminq_lock); + tail = ioread32be(&priv->reg_bar0->adminq_event_counter); // Check if next command will overflow the buffer. @@ -544,6 +566,9 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, case GVE_ADMINQ_REPORT_LINK_SPEED: priv->adminq_report_link_speed_cnt++; break; + case GVE_ADMINQ_REPORT_NIC_TIMESTAMP: + priv->adminq_report_nic_timestamp_cnt++; + break; case GVE_ADMINQ_GET_PTYPE_MAP: priv->adminq_get_ptype_map_cnt++; break; @@ -564,6 +589,7 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv, break; default: dev_err(&priv->pdev->dev, "unknown AQ command opcode %d\n", opcode); + return -EINVAL; } return 0; @@ -625,7 +651,7 @@ static int gve_adminq_execute_extended_cmd(struct gve_priv *priv, u32 opcode, /* The device specifies that the management vector can either be the first irq * or the last irq. ntfy_blk_msix_base_idx indicates the first irq assigned to - * the ntfy blks. It if is 0 then the management vector is last, if it is 1 then + * the ntfy blks. If it is 0 then the management vector is last, if it is 1 then * the management vector is first. * * gve arranges the msix vectors so that the management vector is last. @@ -709,13 +735,19 @@ int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_que int err; int i; + mutex_lock(&priv->adminq_lock); + for (i = start_id; i < start_id + num_queues; i++) { err = gve_adminq_create_tx_queue(priv, i); if (err) - return err; + goto out; } - return gve_adminq_kick_and_wait(priv); + err = gve_adminq_kick_and_wait(priv); + +out: + mutex_unlock(&priv->adminq_lock); + return err; } static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv, @@ -731,6 +763,7 @@ static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv, .ntfy_id = cpu_to_be32(rx->ntfy_id), .queue_resources_addr = cpu_to_be64(rx->q_resources_bus), .rx_ring_size = cpu_to_be16(priv->rx_desc_cnt), + .packet_buffer_size = cpu_to_be16(rx->packet_buffer_size), }; if (gve_is_gqi(priv)) { @@ -743,7 +776,6 @@ static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv, cpu_to_be64(rx->data.data_bus); cmd->create_rx_queue.index = cpu_to_be32(queue_index); cmd->create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id); - cmd->create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size); } else { u32 qpl_id = 0; @@ -756,8 +788,6 @@ static void gve_adminq_get_create_rx_queue_cmd(struct gve_priv *priv, cpu_to_be64(rx->dqo.complq.bus); cmd->create_rx_queue.rx_data_ring_addr = cpu_to_be64(rx->dqo.bufq.bus); - cmd->create_rx_queue.packet_buffer_size = - cpu_to_be16(priv->data_buffer_size_dqo); cmd->create_rx_queue.rx_buff_ring_size = cpu_to_be16(priv->rx_desc_cnt); cmd->create_rx_queue.enable_rsc = @@ -790,13 +820,19 @@ int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues) int err; int i; + mutex_lock(&priv->adminq_lock); + for (i = 0; i < num_queues; i++) { err = gve_adminq_create_rx_queue(priv, i); if (err) - return err; + goto out; } - return gve_adminq_kick_and_wait(priv); + err = gve_adminq_kick_and_wait(priv); + +out: + mutex_unlock(&priv->adminq_lock); + return err; } static int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index) @@ -822,13 +858,19 @@ int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_qu int err; int i; + mutex_lock(&priv->adminq_lock); + for (i = start_id; i < start_id + num_queues; i++) { err = gve_adminq_destroy_tx_queue(priv, i); if (err) - return err; + goto out; } - return gve_adminq_kick_and_wait(priv); + err = gve_adminq_kick_and_wait(priv); + +out: + mutex_unlock(&priv->adminq_lock); + return err; } static void gve_adminq_make_destroy_rx_queue_cmd(union gve_adminq_command *cmd, @@ -863,13 +905,19 @@ int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 num_queues) int err; int i; + mutex_lock(&priv->adminq_lock); + for (i = 0; i < num_queues; i++) { err = gve_adminq_destroy_rx_queue(priv, i); if (err) - return err; + goto out; } - return gve_adminq_kick_and_wait(priv); + err = gve_adminq_kick_and_wait(priv); + +out: + mutex_unlock(&priv->adminq_lock); + return err; } static void gve_set_default_desc_cnt(struct gve_priv *priv, @@ -885,6 +933,15 @@ static void gve_set_default_desc_cnt(struct gve_priv *priv, priv->min_rx_desc_cnt = priv->rx_desc_cnt; } +static void gve_set_default_rss_sizes(struct gve_priv *priv) +{ + if (!gve_is_gqi(priv)) { + priv->rss_key_size = GVE_RSS_KEY_SIZE; + priv->rss_lut_size = GVE_RSS_INDIR_SIZE; + priv->cache_rss_config = true; + } +} + static void gve_enable_supported_features(struct gve_priv *priv, u32 supported_features_mask, const struct gve_device_option_jumbo_frames @@ -897,6 +954,8 @@ static void gve_enable_supported_features(struct gve_priv *priv, *dev_op_flow_steering, const struct gve_device_option_rss_config *dev_op_rss_config, + const struct gve_device_option_nic_timestamp + *dev_op_nic_timestamp, const struct gve_device_option_modify_ring *dev_op_modify_ring) { @@ -968,11 +1027,20 @@ static void gve_enable_supported_features(struct gve_priv *priv, be16_to_cpu(dev_op_rss_config->hash_key_size); priv->rss_lut_size = be16_to_cpu(dev_op_rss_config->hash_lut_size); + priv->cache_rss_config = false; + dev_dbg(&priv->pdev->dev, + "RSS device option enabled with key size of %u, lut size of %u.\n", + priv->rss_key_size, priv->rss_lut_size); } + + if (dev_op_nic_timestamp && + (supported_features_mask & GVE_SUP_NIC_TIMESTAMP_MASK)) + priv->nic_timestamp_supported = true; } int gve_adminq_describe_device(struct gve_priv *priv) { + struct gve_device_option_nic_timestamp *dev_op_nic_timestamp = NULL; struct gve_device_option_flow_steering *dev_op_flow_steering = NULL; struct gve_device_option_buffer_sizes *dev_op_buffer_sizes = NULL; struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL; @@ -1013,6 +1081,7 @@ int gve_adminq_describe_device(struct gve_priv *priv) &dev_op_buffer_sizes, &dev_op_flow_steering, &dev_op_rss_config, + &dev_op_nic_timestamp, &dev_op_modify_ring); if (err) goto free_device_descriptor; @@ -1052,6 +1121,8 @@ int gve_adminq_describe_device(struct gve_priv *priv) /* set default descriptor counts */ gve_set_default_desc_cnt(priv, descriptor); + gve_set_default_rss_sizes(priv); + /* DQO supports LRO. */ if (!gve_is_gqi(priv)) priv->dev->hw_features |= NETIF_F_LRO; @@ -1075,7 +1146,8 @@ int gve_adminq_describe_device(struct gve_priv *priv) gve_enable_supported_features(priv, supported_features_mask, dev_op_jumbo_frames, dev_op_dqo_qpl, dev_op_buffer_sizes, dev_op_flow_steering, - dev_op_rss_config, dev_op_modify_ring); + dev_op_rss_config, dev_op_nic_timestamp, + dev_op_modify_ring); free_device_descriptor: dma_pool_free(priv->adminq_pool, descriptor, descriptor_bus); @@ -1187,6 +1259,22 @@ int gve_adminq_report_link_speed(struct gve_priv *priv) return err; } +int gve_adminq_report_nic_ts(struct gve_priv *priv, + dma_addr_t nic_ts_report_addr) +{ + union gve_adminq_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_REPORT_NIC_TIMESTAMP); + cmd.report_nic_ts = (struct gve_adminq_report_nic_ts) { + .nic_ts_report_len = + cpu_to_be64(sizeof(struct gve_nic_ts_report)), + .nic_ts_report_addr = cpu_to_be64(nic_ts_report_addr), + }; + + return gve_adminq_execute_cmd(priv, &cmd); +} + int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv, struct gve_ptype_lut *ptype_lut) { @@ -1276,8 +1364,9 @@ int gve_adminq_reset_flow_rules(struct gve_priv *priv) int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh) { + const u32 *hash_lut_to_config = NULL; + const u8 *hash_key_to_config = NULL; dma_addr_t lut_bus = 0, key_bus = 0; - u16 key_size = 0, lut_size = 0; union gve_adminq_command cmd; __be32 *lut = NULL; u8 hash_alg = 0; @@ -1287,7 +1376,7 @@ int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *r switch (rxfh->hfunc) { case ETH_RSS_HASH_NO_CHANGE: - break; + fallthrough; case ETH_RSS_HASH_TOP: hash_alg = ETH_RSS_HASH_TOP; break; @@ -1296,27 +1385,46 @@ int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *r } if (rxfh->indir) { - lut_size = priv->rss_lut_size; + if (rxfh->indir_size != priv->rss_lut_size) + return -EINVAL; + + hash_lut_to_config = rxfh->indir; + } else if (priv->cache_rss_config) { + hash_lut_to_config = priv->rss_config.hash_lut; + } + + if (hash_lut_to_config) { lut = dma_alloc_coherent(&priv->pdev->dev, - lut_size * sizeof(*lut), + priv->rss_lut_size * sizeof(*lut), &lut_bus, GFP_KERNEL); if (!lut) return -ENOMEM; for (i = 0; i < priv->rss_lut_size; i++) - lut[i] = cpu_to_be32(rxfh->indir[i]); + lut[i] = cpu_to_be32(hash_lut_to_config[i]); } if (rxfh->key) { - key_size = priv->rss_key_size; + if (rxfh->key_size != priv->rss_key_size) { + err = -EINVAL; + goto out; + } + + hash_key_to_config = rxfh->key; + } else if (priv->cache_rss_config) { + hash_key_to_config = priv->rss_config.hash_key; + } + + if (hash_key_to_config) { key = dma_alloc_coherent(&priv->pdev->dev, - key_size, &key_bus, GFP_KERNEL); + priv->rss_key_size, + &key_bus, GFP_KERNEL); if (!key) { err = -ENOMEM; goto out; } - memcpy(key, rxfh->key, key_size); + memcpy(key, hash_key_to_config, priv->rss_key_size); } /* Zero-valued fields in the cmd.configure_rss instruct the device to @@ -1330,8 +1438,10 @@ int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *r BIT(GVE_RSS_HASH_TCPV6) | BIT(GVE_RSS_HASH_UDPV6)), .hash_alg = hash_alg, - .hash_key_size = cpu_to_be16(key_size), - .hash_lut_size = cpu_to_be16(lut_size), + .hash_key_size = + cpu_to_be16((key_bus) ? priv->rss_key_size : 0), + .hash_lut_size = + cpu_to_be16((lut_bus) ? priv->rss_lut_size : 0), .hash_key_addr = cpu_to_be64(key_bus), .hash_lut_addr = cpu_to_be64(lut_bus), }; @@ -1341,11 +1451,11 @@ int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *r out: if (lut) dma_free_coherent(&priv->pdev->dev, - lut_size * sizeof(*lut), + priv->rss_lut_size * sizeof(*lut), lut, lut_bus); if (key) dma_free_coherent(&priv->pdev->dev, - key_size, key, key_bus); + priv->rss_key_size, key, key_bus); return err; } @@ -1449,12 +1559,15 @@ static int gve_adminq_process_rss_query(struct gve_priv *priv, rxfh->hfunc = descriptor->hash_alg; rss_info_addr = (void *)(descriptor + 1); - if (rxfh->key) + if (rxfh->key) { + rxfh->key_size = priv->rss_key_size; memcpy(rxfh->key, rss_info_addr, priv->rss_key_size); + } rss_info_addr += priv->rss_key_size; lut = (__be32 *)rss_info_addr; if (rxfh->indir) { + rxfh->indir_size = priv->rss_lut_size; for (i = 0; i < priv->rss_lut_size; i++) rxfh->indir[i] = be32_to_cpu(lut[i]); } diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 228217458275..22a74b6aa17e 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -27,6 +27,7 @@ enum gve_adminq_opcodes { GVE_ADMINQ_GET_PTYPE_MAP = 0xE, GVE_ADMINQ_VERIFY_DRIVER_COMPATIBILITY = 0xF, GVE_ADMINQ_QUERY_FLOW_RULES = 0x10, + GVE_ADMINQ_REPORT_NIC_TIMESTAMP = 0x11, GVE_ADMINQ_QUERY_RSS = 0x12, /* For commands that are larger than 56 bytes */ @@ -174,6 +175,12 @@ struct gve_device_option_rss_config { static_assert(sizeof(struct gve_device_option_rss_config) == 8); +struct gve_device_option_nic_timestamp { + __be32 supported_features_mask; +}; + +static_assert(sizeof(struct gve_device_option_nic_timestamp) == 4); + /* Terminology: * * RDA - Raw DMA Addressing - Buffers associated with SKBs are directly DMA @@ -192,6 +199,7 @@ enum gve_dev_opt_id { GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8, GVE_DEV_OPT_ID_BUFFER_SIZES = 0xa, GVE_DEV_OPT_ID_FLOW_STEERING = 0xb, + GVE_DEV_OPT_ID_NIC_TIMESTAMP = 0xd, GVE_DEV_OPT_ID_RSS_CONFIG = 0xe, }; @@ -206,6 +214,7 @@ enum gve_dev_opt_req_feat_mask { GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_FLOW_STEERING = 0x0, GVE_DEV_OPT_REQ_FEAT_MASK_RSS_CONFIG = 0x0, + GVE_DEV_OPT_REQ_FEAT_MASK_NIC_TIMESTAMP = 0x0, }; enum gve_sup_feature_mask { @@ -214,6 +223,7 @@ enum gve_sup_feature_mask { GVE_SUP_BUFFER_SIZES_MASK = 1 << 4, GVE_SUP_FLOW_STEERING_MASK = 1 << 5, GVE_SUP_RSS_CONFIG_MASK = 1 << 7, + GVE_SUP_NIC_TIMESTAMP_MASK = 1 << 8, }; #define GVE_DEV_OPT_LEN_GQI_RAW_ADDRESSING 0x0 @@ -392,6 +402,21 @@ struct gve_adminq_report_link_speed { static_assert(sizeof(struct gve_adminq_report_link_speed) == 8); +struct gve_adminq_report_nic_ts { + __be64 nic_ts_report_len; + __be64 nic_ts_report_addr; +}; + +static_assert(sizeof(struct gve_adminq_report_nic_ts) == 16); + +struct gve_nic_ts_report { + __be64 nic_timestamp; /* NIC clock in nanoseconds */ + __be64 reserved1; + __be64 reserved2; + __be64 reserved3; + __be64 reserved4; +}; + struct stats { __be32 stat_name; __be32 queue_id; @@ -451,7 +476,7 @@ struct gve_ptype_entry { }; struct gve_ptype_map { - struct gve_ptype_entry ptypes[1 << 10]; /* PTYPES are always 10 bits. */ + struct gve_ptype_entry ptypes[GVE_NUM_PTYPES]; /* PTYPES are always 10 bits. */ }; struct gve_adminq_get_ptype_map { @@ -585,6 +610,7 @@ union gve_adminq_command { struct gve_adminq_query_flow_rules query_flow_rules; struct gve_adminq_configure_rss configure_rss; struct gve_adminq_query_rss query_rss; + struct gve_adminq_report_nic_ts report_nic_ts; struct gve_adminq_extended_command extended_command; }; }; @@ -624,6 +650,8 @@ int gve_adminq_reset_flow_rules(struct gve_priv *priv); int gve_adminq_query_flow_rules(struct gve_priv *priv, u16 query_opcode, u32 starting_loc); int gve_adminq_configure_rss(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh); int gve_adminq_query_rss_config(struct gve_priv *priv, struct ethtool_rxfh_param *rxfh); +int gve_adminq_report_nic_ts(struct gve_priv *priv, + dma_addr_t nic_ts_report_addr); struct gve_ptype_lut; int gve_adminq_get_ptype_map_dqo(struct gve_priv *priv, diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c index 403f0f335ba6..8f5021e59e0a 100644 --- a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c @@ -4,6 +4,7 @@ * Copyright (C) 2015-2024 Google, Inc. */ +#include <net/xdp_sock_drv.h> #include "gve.h" #include "gve_utils.h" @@ -29,6 +30,10 @@ struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx) /* Point buf_state to itself to mark it as allocated */ buf_state->next = buffer_id; + /* Clear the buffer pointers */ + buf_state->page_info.page = NULL; + buf_state->xsk_buff = NULL; + return buf_state; } @@ -139,7 +144,8 @@ int gve_alloc_qpl_page_dqo(struct gve_rx_ring *rx, buf_state->page_info.page_offset = 0; buf_state->page_info.page_address = page_address(buf_state->page_info.page); - buf_state->page_info.buf_size = priv->data_buffer_size_dqo; + buf_state->page_info.buf_size = rx->packet_buffer_truesize; + buf_state->page_info.pad = rx->rx_headroom; buf_state->last_single_ref_offset = 0; /* The page already has 1 ref. */ @@ -162,7 +168,7 @@ void gve_free_qpl_page_dqo(struct gve_rx_buf_state_dqo *buf_state) void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state) { - const u16 data_buffer_size = priv->data_buffer_size_dqo; + const u16 data_buffer_size = rx->packet_buffer_truesize; int pagecount; /* Can't reuse if we only fit one buffer per page */ @@ -205,49 +211,53 @@ void gve_free_to_page_pool(struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state, bool allow_direct) { - struct page *page = buf_state->page_info.page; + netmem_ref netmem = buf_state->page_info.netmem; - if (!page) + if (!netmem) return; - page_pool_put_full_page(page->pp, page, allow_direct); - buf_state->page_info.page = NULL; + page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, allow_direct); + buf_state->page_info.netmem = 0; } static int gve_alloc_from_page_pool(struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state) { - struct gve_priv *priv = rx->gve; - struct page *page; + netmem_ref netmem; - buf_state->page_info.buf_size = priv->data_buffer_size_dqo; - page = page_pool_alloc(rx->dqo.page_pool, - &buf_state->page_info.page_offset, - &buf_state->page_info.buf_size, GFP_ATOMIC); + buf_state->page_info.buf_size = rx->packet_buffer_truesize; + netmem = page_pool_alloc_netmem(rx->dqo.page_pool, + &buf_state->page_info.page_offset, + &buf_state->page_info.buf_size, + GFP_ATOMIC); - if (!page) + if (!netmem) return -ENOMEM; - buf_state->page_info.page = page; - buf_state->page_info.page_address = page_address(page); - buf_state->addr = page_pool_get_dma_addr(page); + buf_state->page_info.netmem = netmem; + buf_state->page_info.page_address = netmem_address(netmem); + buf_state->addr = page_pool_get_dma_addr_netmem(netmem); + buf_state->page_info.pad = rx->dqo.page_pool->p.offset; return 0; } struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv, - struct gve_rx_ring *rx) + struct gve_rx_ring *rx, + bool xdp) { u32 ntfy_id = gve_rx_idx_to_ntfy(priv, rx->q_num); struct page_pool_params pp = { .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, .order = 0, .pool_size = GVE_PAGE_POOL_SIZE_MULTIPLIER * priv->rx_desc_cnt, + .nid = priv->numa_node, .dev = &priv->pdev->dev, .netdev = priv->dev, .napi = &priv->ntfy_blocks[ntfy_id].napi, .max_len = PAGE_SIZE, - .dma_dir = DMA_FROM_DEVICE, + .dma_dir = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, + .offset = xdp ? XDP_PACKET_HEADROOM : 0, }; return page_pool_create(&pp); @@ -269,7 +279,7 @@ void gve_reuse_buffer(struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state) { if (rx->dqo.page_pool) { - buf_state->page_info.page = NULL; + buf_state->page_info.netmem = 0; gve_free_buf_state(rx, buf_state); } else { gve_dec_pagecnt_bias(&buf_state->page_info); @@ -281,7 +291,24 @@ int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc) { struct gve_rx_buf_state_dqo *buf_state; - if (rx->dqo.page_pool) { + if (rx->xsk_pool) { + buf_state = gve_alloc_buf_state(rx); + if (unlikely(!buf_state)) + return -ENOMEM; + + buf_state->xsk_buff = xsk_buff_alloc(rx->xsk_pool); + if (unlikely(!buf_state->xsk_buff)) { + xsk_set_rx_need_wakeup(rx->xsk_pool); + gve_free_buf_state(rx, buf_state); + return -ENOMEM; + } + /* Allocated xsk buffer. Clear wakeup in case it was set. */ + xsk_clear_rx_need_wakeup(rx->xsk_pool); + desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); + desc->buf_addr = + cpu_to_le64(xsk_buff_xdp_get_dma(buf_state->xsk_buff)); + return 0; + } else if (rx->dqo.page_pool) { buf_state = gve_alloc_buf_state(rx); if (WARN_ON_ONCE(!buf_state)) return -ENOMEM; @@ -301,7 +328,8 @@ int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc) } desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); desc->buf_addr = cpu_to_le64(buf_state->addr + - buf_state->page_info.page_offset); + buf_state->page_info.page_offset + + buf_state->page_info.pad); return 0; diff --git a/drivers/net/ethernet/google/gve/gve_desc_dqo.h b/drivers/net/ethernet/google/gve/gve_desc_dqo.h index f79cd0591110..d17da841b5a0 100644 --- a/drivers/net/ethernet/google/gve/gve_desc_dqo.h +++ b/drivers/net/ethernet/google/gve/gve_desc_dqo.h @@ -247,7 +247,8 @@ struct gve_rx_compl_desc_dqo { }; __le32 hash; __le32 reserved6; - __le64 reserved7; + __le32 reserved7; + __le32 ts; /* timestamp in nanosecs */ } __packed; static_assert(sizeof(struct gve_rx_compl_desc_dqo) == 32); diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h index e83773fb891f..6eb442096e02 100644 --- a/drivers/net/ethernet/google/gve/gve_dqo.h +++ b/drivers/net/ethernet/google/gve/gve_dqo.h @@ -37,6 +37,8 @@ netdev_features_t gve_features_check_dqo(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean); +bool gve_xdp_poll_dqo(struct gve_notify_block *block); +bool gve_xsk_tx_poll_dqo(struct gve_notify_block *block, int budget); int gve_rx_poll_dqo(struct gve_notify_block *block, int budget); int gve_tx_alloc_rings_dqo(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *cfg); @@ -60,6 +62,7 @@ int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, struct napi_struct *napi); void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx); void gve_rx_write_doorbell_dqo(const struct gve_priv *priv, int queue_idx); +void gve_xdp_tx_flush_dqo(struct gve_priv *priv, u32 xdp_qid); static inline void gve_tx_put_doorbell_dqo(const struct gve_priv *priv, diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index bdfc6e77b2af..d0a223250845 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -63,11 +63,11 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = { "tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_consumed_desc[%u]", "tx_bytes[%u]", "tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]", - "tx_dma_mapping_error[%u]", "tx_xsk_wakeup[%u]", - "tx_xsk_done[%u]", "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]" + "tx_dma_mapping_error[%u]", + "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]" }; -static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = { +static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] __nonstring_array = { "adminq_prod_cnt", "adminq_cmd_fail", "adminq_timeouts", "adminq_describe_device_cnt", "adminq_cfg_device_resources_cnt", "adminq_register_page_list_cnt", "adminq_unregister_page_list_cnt", @@ -76,7 +76,7 @@ static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = { "adminq_dcfg_device_resources_cnt", "adminq_set_driver_parameter_cnt", "adminq_report_stats_cnt", "adminq_report_link_speed_cnt", "adminq_get_ptype_map_cnt", "adminq_query_flow_rules", "adminq_cfg_flow_rule", "adminq_cfg_rss_cnt", - "adminq_query_rss_cnt", + "adminq_query_rss_cnt", "adminq_report_nic_timestamp_cnt", }; static const char gve_gstrings_priv_flags[][ETH_GSTRING_LEN] = { @@ -113,7 +113,7 @@ static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data) i); for (i = 0; i < ARRAY_SIZE(gve_gstrings_adminq_stats); i++) - ethtool_puts(&s, gve_gstrings_adminq_stats[i]); + ethtool_cpy(&s, gve_gstrings_adminq_stats[i]); break; @@ -392,7 +392,9 @@ gve_get_ethtool_stats(struct net_device *netdev, */ data[i++] = 0; data[i++] = 0; - data[i++] = tx->dqo_tx.tail - tx->dqo_tx.head; + data[i++] = + (tx->dqo_tx.tail - tx->dqo_tx.head) & + tx->mask; } do { start = @@ -417,9 +419,7 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = value; } } - /* XDP xsk counters */ - data[i++] = tx->xdp_xsk_wakeup; - data[i++] = tx->xdp_xsk_done; + /* XDP counters */ do { start = u64_stats_fetch_begin(&priv->tx[ring].statss); data[i] = tx->xdp_xsk_sent; @@ -456,6 +456,7 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = priv->adminq_cfg_flow_rule_cnt; data[i++] = priv->adminq_cfg_rss_cnt; data[i++] = priv->adminq_query_rss_cnt; + data[i++] = priv->adminq_report_nic_timestamp_cnt; } static void gve_get_channels(struct net_device *netdev, @@ -477,11 +478,12 @@ static int gve_set_channels(struct net_device *netdev, struct ethtool_channels *cmd) { struct gve_priv *priv = netdev_priv(netdev); - struct gve_queue_config new_tx_cfg = priv->tx_cfg; - struct gve_queue_config new_rx_cfg = priv->rx_cfg; + struct gve_tx_queue_config new_tx_cfg = priv->tx_cfg; + struct gve_rx_queue_config new_rx_cfg = priv->rx_cfg; struct ethtool_channels old_settings; int new_tx = cmd->tx_count; int new_rx = cmd->rx_count; + bool reset_rss = false; gve_get_channels(netdev, &old_settings); @@ -492,22 +494,27 @@ static int gve_set_channels(struct net_device *netdev, if (!new_rx || !new_tx) return -EINVAL; - if (priv->num_xdp_queues && - (new_tx != new_rx || (2 * new_tx > priv->tx_cfg.max_queues))) { - dev_err(&priv->pdev->dev, "XDP load failed: The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues"); - return -EINVAL; - } + if (priv->xdp_prog) { + if (new_tx != new_rx || + (2 * new_tx > priv->tx_cfg.max_queues)) { + dev_err(&priv->pdev->dev, "The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues when XDP program is installed"); + return -EINVAL; + } - if (!netif_running(netdev)) { - priv->tx_cfg.num_queues = new_tx; - priv->rx_cfg.num_queues = new_rx; - return 0; + /* One XDP TX queue per RX queue. */ + new_tx_cfg.num_xdp_queues = new_rx; + } else { + new_tx_cfg.num_xdp_queues = 0; } + if (new_rx != priv->rx_cfg.num_queues && + priv->cache_rss_config && !netif_is_rxfh_configured(netdev)) + reset_rss = true; + new_tx_cfg.num_queues = new_tx; new_rx_cfg.num_queues = new_rx; - return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg); + return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg, reset_rss); } static void gve_get_ringparam(struct net_device *netdev, @@ -643,8 +650,7 @@ static int gve_set_tunable(struct net_device *netdev, switch (etuna->id) { case ETHTOOL_RX_COPYBREAK: { - u32 max_copybreak = gve_is_gqi(priv) ? - GVE_DEFAULT_RX_BUFFER_SIZE : priv->data_buffer_size_dqo; + u32 max_copybreak = priv->rx_cfg.packet_buffer_size; len = *(u32 *)value; if (len > max_copybreak) @@ -662,7 +668,7 @@ static u32 gve_get_priv_flags(struct net_device *netdev) struct gve_priv *priv = netdev_priv(netdev); u32 ret_flags = 0; - /* Only 1 flag exists currently: report-stats (BIT(O)), so set that flag. */ + /* Only 1 flag exists currently: report-stats (BIT(0)), so set that flag. */ if (priv->ethtool_flags & BIT(0)) ret_flags |= BIT(0); return ret_flags; @@ -700,7 +706,7 @@ static int gve_set_priv_flags(struct net_device *netdev, u32 flags) memset(priv->stats_report->stats, 0, (tx_stats_num + rx_stats_num) * sizeof(struct stats)); - del_timer_sync(&priv->stats_report_timer); + timer_delete_sync(&priv->stats_report_timer); } return 0; } @@ -793,9 +799,6 @@ static int gve_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) case ETHTOOL_SRXCLSRLDEL: err = gve_del_flow_rule(priv, cmd); break; - case ETHTOOL_SRXFH: - err = -EOPNOTSUPP; - break; default: err = -EOPNOTSUPP; break; @@ -830,9 +833,6 @@ static int gve_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u case ETHTOOL_GRXCLSRLALL: err = gve_get_flow_rule_ids(priv, cmd, (u32 *)rule_locs); break; - case ETHTOOL_GRXFH: - err = -EOPNOTSUPP; - break; default: err = -EOPNOTSUPP; break; @@ -855,6 +855,25 @@ static u32 gve_get_rxfh_indir_size(struct net_device *netdev) return priv->rss_lut_size; } +static void gve_get_rss_config_cache(struct gve_priv *priv, + struct ethtool_rxfh_param *rxfh) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + rxfh->hfunc = ETH_RSS_HASH_TOP; + + if (rxfh->key) { + rxfh->key_size = priv->rss_key_size; + memcpy(rxfh->key, rss_config->hash_key, priv->rss_key_size); + } + + if (rxfh->indir) { + rxfh->indir_size = priv->rss_lut_size; + memcpy(rxfh->indir, rss_config->hash_lut, + priv->rss_lut_size * sizeof(*rxfh->indir)); + } +} + static int gve_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) { struct gve_priv *priv = netdev_priv(netdev); @@ -862,18 +881,67 @@ static int gve_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rx if (!priv->rss_key_size || !priv->rss_lut_size) return -EOPNOTSUPP; + if (priv->cache_rss_config) { + gve_get_rss_config_cache(priv, rxfh); + return 0; + } + return gve_adminq_query_rss_config(priv, rxfh); } +static void gve_set_rss_config_cache(struct gve_priv *priv, + struct ethtool_rxfh_param *rxfh) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + if (rxfh->key) + memcpy(rss_config->hash_key, rxfh->key, priv->rss_key_size); + + if (rxfh->indir) + memcpy(rss_config->hash_lut, rxfh->indir, + priv->rss_lut_size * sizeof(*rxfh->indir)); +} + static int gve_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh, struct netlink_ext_ack *extack) { struct gve_priv *priv = netdev_priv(netdev); + int err; if (!priv->rss_key_size || !priv->rss_lut_size) return -EOPNOTSUPP; - return gve_adminq_configure_rss(priv, rxfh); + err = gve_adminq_configure_rss(priv, rxfh); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Fail to configure RSS config"); + return err; + } + + if (priv->cache_rss_config) + gve_set_rss_config_cache(priv, rxfh); + + return 0; +} + +static int gve_get_ts_info(struct net_device *netdev, + struct kernel_ethtool_ts_info *info) +{ + struct gve_priv *priv = netdev_priv(netdev); + + ethtool_op_get_ts_info(netdev, info); + + if (priv->nic_timestamp_supported) { + info->so_timestamping |= SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->rx_filters |= BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); + + if (priv->ptp) + info->phc_index = ptp_clock_index(priv->ptp->clock); + } + + return 0; } const struct ethtool_ops gve_ethtool_ops = { @@ -904,5 +972,5 @@ const struct ethtool_ops gve_ethtool_ops = { .get_priv_flags = gve_get_priv_flags, .set_priv_flags = gve_set_priv_flags, .get_link_ksettings = gve_get_link_ksettings, - .get_ts_info = ethtool_op_get_ts_info, + .get_ts_info = gve_get_ts_info, }; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 533e659b15b3..1be1b1ef31ee 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -4,6 +4,7 @@ * Copyright (C) 2015-2024 Google LLC */ +#include <linux/bitmap.h> #include <linux/bpf.h> #include <linux/cpumask.h> #include <linux/etherdevice.h> @@ -184,6 +185,43 @@ static void gve_free_flow_rule_caches(struct gve_priv *priv) flow_rules_cache->rules_cache = NULL; } +static int gve_alloc_rss_config_cache(struct gve_priv *priv) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + if (!priv->cache_rss_config) + return 0; + + rss_config->hash_key = kcalloc(priv->rss_key_size, + sizeof(rss_config->hash_key[0]), + GFP_KERNEL); + if (!rss_config->hash_key) + return -ENOMEM; + + rss_config->hash_lut = kcalloc(priv->rss_lut_size, + sizeof(rss_config->hash_lut[0]), + GFP_KERNEL); + if (!rss_config->hash_lut) + goto free_rss_key_cache; + + return 0; + +free_rss_key_cache: + kfree(rss_config->hash_key); + rss_config->hash_key = NULL; + return -ENOMEM; +} + +static void gve_free_rss_config_cache(struct gve_priv *priv) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + + kfree(rss_config->hash_key); + kfree(rss_config->hash_lut); + + memset(rss_config, 0, sizeof(*rss_config)); +} + static int gve_alloc_counter_array(struct gve_priv *priv) { priv->counter_array = @@ -231,7 +269,8 @@ static void gve_stats_report_schedule(struct gve_priv *priv) static void gve_stats_report_timer(struct timer_list *t) { - struct gve_priv *priv = from_timer(priv, t, stats_report_timer); + struct gve_priv *priv = timer_container_of(priv, t, + stats_report_timer); mod_timer(&priv->stats_report_timer, round_jiffies(jiffies + @@ -265,7 +304,7 @@ static void gve_free_stats_report(struct gve_priv *priv) if (!priv->stats_report) return; - del_timer_sync(&priv->stats_report_timer); + timer_delete_sync(&priv->stats_report_timer); dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, priv->stats_report, priv->stats_report_bus); priv->stats_report = NULL; @@ -376,14 +415,24 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget) bool reschedule = false; int work_done = 0; - if (block->tx) - reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); + if (block->tx) { + if (block->tx->q_num < priv->tx_cfg.num_queues) + reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true); + else + reschedule |= gve_xdp_poll_dqo(block); + } if (!budget) return 0; if (block->rx) { work_done = gve_rx_poll_dqo(block, budget); + + /* Poll XSK TX as part of RX NAPI. Setup re-poll based on if + * either datapath has more work to do. + */ + if (priv->xdp_prog) + reschedule |= gve_xsk_tx_poll_dqo(block, budget); reschedule |= work_done == budget; } @@ -419,10 +468,19 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget) return work_done; } +static const struct cpumask *gve_get_node_mask(struct gve_priv *priv) +{ + if (priv->numa_node == NUMA_NO_NODE) + return cpu_all_mask; + else + return cpumask_of_node(priv->numa_node); +} + static int gve_alloc_notify_blocks(struct gve_priv *priv) { int num_vecs_requested = priv->num_ntfy_blks + 1; - unsigned int active_cpus; + const struct cpumask *node_mask; + unsigned int cur_cpu; int vecs_enabled; int i, j; int err; @@ -461,8 +519,6 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; } - /* Half the notification blocks go to TX and half to RX */ - active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); /* Setup Management Vector - the last vector */ snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s", @@ -491,6 +547,8 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) } /* Setup the other blocks - the first n-1 vectors */ + node_mask = gve_get_node_mask(priv); + cur_cpu = cpumask_first(node_mask); for (i = 0; i < priv->num_ntfy_blks; i++) { struct gve_notify_block *block = &priv->ntfy_blocks[i]; int msix_idx = i; @@ -507,9 +565,17 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv) goto abort_with_some_ntfy_blocks; } block->irq = priv->msix_vectors[msix_idx].vector; - irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, - get_cpu_mask(i % active_cpus)); + irq_set_affinity_and_hint(block->irq, + cpumask_of(cur_cpu)); block->irq_db_index = &priv->irq_db_indices[i].index; + + cur_cpu = cpumask_next(cur_cpu, node_mask); + /* Wrap once CPUs in the node have been exhausted, or when + * starting RX queue affinities. TX and RX queues of the same + * index share affinity. + */ + if (cur_cpu >= nr_cpu_ids || (i + 1) == priv->tx_cfg.max_queues) + cur_cpu = cpumask_first(node_mask); } return 0; abort_with_some_ntfy_blocks: @@ -575,12 +641,18 @@ static int gve_setup_device_resources(struct gve_priv *priv) err = gve_alloc_flow_rule_caches(priv); if (err) return err; - err = gve_alloc_counter_array(priv); + err = gve_alloc_rss_config_cache(priv); if (err) goto abort_with_flow_rule_caches; - err = gve_alloc_notify_blocks(priv); + err = gve_alloc_counter_array(priv); + if (err) + goto abort_with_rss_config_cache; + err = gve_init_clock(priv); if (err) goto abort_with_counter; + err = gve_alloc_notify_blocks(priv); + if (err) + goto abort_with_clock; err = gve_alloc_stats_report(priv); if (err) goto abort_with_ntfy_blocks; @@ -611,6 +683,12 @@ static int gve_setup_device_resources(struct gve_priv *priv) } } + err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); + if (err) { + dev_err(&priv->pdev->dev, "Failed to init RSS config"); + goto abort_with_ptype_lut; + } + err = gve_adminq_report_stats(priv, priv->stats_report_len, priv->stats_report_bus, GVE_STATS_REPORT_TIMER_PERIOD); @@ -627,8 +705,12 @@ abort_with_stats_report: gve_free_stats_report(priv); abort_with_ntfy_blocks: gve_free_notify_blocks(priv); +abort_with_clock: + gve_teardown_clock(priv); abort_with_counter: gve_free_counter_array(priv); +abort_with_rss_config_cache: + gve_free_rss_config_cache(priv); abort_with_flow_rule_caches: gve_free_flow_rule_caches(priv); @@ -669,9 +751,11 @@ static void gve_teardown_device_resources(struct gve_priv *priv) priv->ptype_lut_dqo = NULL; gve_free_flow_rule_caches(priv); + gve_free_rss_config_cache(priv); gve_free_counter_array(priv); gve_free_notify_blocks(priv); gve_free_stats_report(priv); + gve_teardown_clock(priv); gve_clear_device_resources_ok(priv); } @@ -746,30 +830,13 @@ static struct gve_queue_page_list *gve_rx_get_qpl(struct gve_priv *priv, int idx return rx->dqo.qpl; } -static int gve_register_xdp_qpls(struct gve_priv *priv) -{ - int start_id; - int err; - int i; - - start_id = gve_xdp_tx_start_queue_id(priv); - for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_register_qpl(priv, gve_tx_get_qpl(priv, i)); - /* This failure will trigger a reset - no need to clean up */ - if (err) - return err; - } - return 0; -} - static int gve_register_qpls(struct gve_priv *priv) { int num_tx_qpls, num_rx_qpls; int err; int i; - num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), - gve_is_qpl(priv)); + num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); for (i = 0; i < num_tx_qpls; i++) { @@ -787,30 +854,13 @@ static int gve_register_qpls(struct gve_priv *priv) return 0; } -static int gve_unregister_xdp_qpls(struct gve_priv *priv) -{ - int start_id; - int err; - int i; - - start_id = gve_xdp_tx_start_queue_id(priv); - for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) { - err = gve_unregister_qpl(priv, gve_tx_get_qpl(priv, i)); - /* This failure will trigger a reset - no need to clean */ - if (err) - return err; - } - return 0; -} - static int gve_unregister_qpls(struct gve_priv *priv) { int num_tx_qpls, num_rx_qpls; int err; int i; - num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_num_xdp_qpls(priv), - gve_is_qpl(priv)); + num_tx_qpls = gve_num_tx_qpls(&priv->tx_cfg, gve_is_qpl(priv)); num_rx_qpls = gve_num_rx_qpls(&priv->rx_cfg, gve_is_qpl(priv)); for (i = 0; i < num_tx_qpls; i++) { @@ -829,27 +879,6 @@ static int gve_unregister_qpls(struct gve_priv *priv) return 0; } -static int gve_create_xdp_rings(struct gve_priv *priv) -{ - int err; - - err = gve_adminq_create_tx_queues(priv, - gve_xdp_tx_start_queue_id(priv), - priv->num_xdp_queues); - if (err) { - netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n", - priv->num_xdp_queues); - /* This failure will trigger a reset - no need to clean - * up - */ - return err; - } - netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n", - priv->num_xdp_queues); - - return 0; -} - static int gve_create_rings(struct gve_priv *priv) { int num_tx_queues = gve_num_tx_queues(priv); @@ -905,7 +934,7 @@ static void init_xdp_sync_stats(struct gve_priv *priv) int i; /* Init stats */ - for (i = start_id; i < start_id + priv->num_xdp_queues; i++) { + for (i = start_id; i < start_id + priv->tx_cfg.num_xdp_queues; i++) { int ntfy_idx = gve_tx_idx_to_ntfy(priv, i); u64_stats_init(&priv->tx[i].statss); @@ -930,24 +959,21 @@ static void gve_init_sync_stats(struct gve_priv *priv) static void gve_tx_get_curr_alloc_cfg(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *cfg) { - int num_xdp_queues = priv->xdp_prog ? priv->rx_cfg.num_queues : 0; - cfg->qcfg = &priv->tx_cfg; cfg->raw_addressing = !gve_is_qpl(priv); cfg->ring_size = priv->tx_desc_cnt; - cfg->start_idx = 0; - cfg->num_rings = priv->tx_cfg.num_queues + num_xdp_queues; + cfg->num_xdp_rings = cfg->qcfg->num_xdp_queues; cfg->tx = priv->tx; } -static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings) +static void gve_tx_stop_rings(struct gve_priv *priv, int num_rings) { int i; if (!priv->tx) return; - for (i = start_id; i < start_id + num_rings; i++) { + for (i = 0; i < num_rings; i++) { if (gve_is_gqi(priv)) gve_tx_stop_ring_gqi(priv, i); else @@ -955,12 +981,11 @@ static void gve_tx_stop_rings(struct gve_priv *priv, int start_id, int num_rings } } -static void gve_tx_start_rings(struct gve_priv *priv, int start_id, - int num_rings) +static void gve_tx_start_rings(struct gve_priv *priv, int num_rings) { int i; - for (i = start_id; i < start_id + num_rings; i++) { + for (i = 0; i < num_rings; i++) { if (gve_is_gqi(priv)) gve_tx_start_ring_gqi(priv, i); else @@ -968,28 +993,6 @@ static void gve_tx_start_rings(struct gve_priv *priv, int start_id, } } -static int gve_alloc_xdp_rings(struct gve_priv *priv) -{ - struct gve_tx_alloc_rings_cfg cfg = {0}; - int err = 0; - - if (!priv->num_xdp_queues) - return 0; - - gve_tx_get_curr_alloc_cfg(priv, &cfg); - cfg.start_idx = gve_xdp_tx_start_queue_id(priv); - cfg.num_rings = priv->num_xdp_queues; - - err = gve_tx_alloc_rings_gqi(priv, &cfg); - if (err) - return err; - - gve_tx_start_rings(priv, cfg.start_idx, cfg.num_rings); - init_xdp_sync_stats(priv); - - return 0; -} - static int gve_queues_mem_alloc(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *tx_alloc_cfg, struct gve_rx_alloc_rings_cfg *rx_alloc_cfg) @@ -1020,26 +1023,6 @@ free_tx: return err; } -static int gve_destroy_xdp_rings(struct gve_priv *priv) -{ - int start_id; - int err; - - start_id = gve_xdp_tx_start_queue_id(priv); - err = gve_adminq_destroy_tx_queues(priv, - start_id, - priv->num_xdp_queues); - if (err) { - netif_err(priv, drv, priv->dev, - "failed to destroy XDP queues\n"); - /* This failure will trigger a reset - no need to clean up */ - return err; - } - netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n"); - - return 0; -} - static int gve_destroy_rings(struct gve_priv *priv) { int num_tx_queues = gve_num_tx_queues(priv); @@ -1064,20 +1047,6 @@ static int gve_destroy_rings(struct gve_priv *priv) return 0; } -static void gve_free_xdp_rings(struct gve_priv *priv) -{ - struct gve_tx_alloc_rings_cfg cfg = {0}; - - gve_tx_get_curr_alloc_cfg(priv, &cfg); - cfg.start_idx = gve_xdp_tx_start_queue_id(priv); - cfg.num_rings = priv->num_xdp_queues; - - if (priv->tx) { - gve_tx_stop_rings(priv, cfg.start_idx, cfg.num_rings); - gve_tx_free_rings_gqi(priv, &cfg); - } -} - static void gve_queues_mem_free(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *tx_cfg, struct gve_rx_alloc_rings_cfg *rx_cfg) @@ -1095,7 +1064,7 @@ int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, enum dma_data_direction dir, gfp_t gfp_flags) { - *page = alloc_page(gfp_flags); + *page = alloc_pages_node(priv->numa_node, gfp_flags, 0); if (!*page) { priv->page_alloc_fail++; return -ENOMEM; @@ -1196,18 +1165,84 @@ static int gve_reset_recovery(struct gve_priv *priv, bool was_up); static void gve_turndown(struct gve_priv *priv); static void gve_turnup(struct gve_priv *priv); +static void gve_unreg_xsk_pool(struct gve_priv *priv, u16 qid) +{ + struct gve_rx_ring *rx; + + if (!priv->rx) + return; + + rx = &priv->rx[qid]; + rx->xsk_pool = NULL; + if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) + xdp_rxq_info_unreg_mem_model(&rx->xdp_rxq); + + if (!priv->tx) + return; + priv->tx[gve_xdp_tx_queue_id(priv, qid)].xsk_pool = NULL; +} + +static int gve_reg_xsk_pool(struct gve_priv *priv, struct net_device *dev, + struct xsk_buff_pool *pool, u16 qid) +{ + struct gve_rx_ring *rx; + u16 tx_qid; + int err; + + rx = &priv->rx[qid]; + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, pool); + if (err) { + gve_unreg_xsk_pool(priv, qid); + return err; + } + + rx->xsk_pool = pool; + + tx_qid = gve_xdp_tx_queue_id(priv, qid); + priv->tx[tx_qid].xsk_pool = pool; + + return 0; +} + +static void gve_unreg_xdp_info(struct gve_priv *priv) +{ + int i; + + if (!priv->tx_cfg.num_xdp_queues || !priv->rx) + return; + + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + struct gve_rx_ring *rx = &priv->rx[i]; + + if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) + xdp_rxq_info_unreg(&rx->xdp_rxq); + + gve_unreg_xsk_pool(priv, i); + } +} + +static struct xsk_buff_pool *gve_get_xsk_pool(struct gve_priv *priv, int qid) +{ + if (!test_bit(qid, priv->xsk_pools)) + return NULL; + + return xsk_get_pool_from_qid(priv->dev, qid); +} + static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) { struct napi_struct *napi; struct gve_rx_ring *rx; int err = 0; - int i, j; - u32 tx_qid; + int i; - if (!priv->num_xdp_queues) + if (!priv->tx_cfg.num_xdp_queues) return 0; for (i = 0; i < priv->rx_cfg.num_queues; i++) { + struct xsk_buff_pool *xsk_pool; + rx = &priv->rx[i]; napi = &priv->ntfy_blocks[rx->ntfy_id].napi; @@ -1215,64 +1250,28 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) napi->napi_id); if (err) goto err; - err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, - MEM_TYPE_PAGE_SHARED, NULL); + + xsk_pool = gve_get_xsk_pool(priv, i); + if (xsk_pool) + err = gve_reg_xsk_pool(priv, dev, xsk_pool, i); + else if (gve_is_qpl(priv)) + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL); + else + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_PAGE_POOL, + rx->dqo.page_pool); if (err) goto err; - rx->xsk_pool = xsk_get_pool_from_qid(dev, i); - if (rx->xsk_pool) { - err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, - napi->napi_id); - if (err) - goto err; - err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, - MEM_TYPE_XSK_BUFF_POOL, NULL); - if (err) - goto err; - xsk_pool_set_rxq_info(rx->xsk_pool, - &rx->xsk_rxq); - } - } - - for (i = 0; i < priv->num_xdp_queues; i++) { - tx_qid = gve_xdp_tx_queue_id(priv, i); - priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); } return 0; err: - for (j = i; j >= 0; j--) { - rx = &priv->rx[j]; - if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) - xdp_rxq_info_unreg(&rx->xdp_rxq); - if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) - xdp_rxq_info_unreg(&rx->xsk_rxq); - } + gve_unreg_xdp_info(priv); return err; } -static void gve_unreg_xdp_info(struct gve_priv *priv) -{ - int i, tx_qid; - - if (!priv->num_xdp_queues) - return; - - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - struct gve_rx_ring *rx = &priv->rx[i]; - - xdp_rxq_info_unreg(&rx->xdp_rxq); - if (rx->xsk_pool) { - xdp_rxq_info_unreg(&rx->xsk_rxq); - rx->xsk_pool = NULL; - } - } - - for (i = 0; i < priv->num_xdp_queues; i++) { - tx_qid = gve_xdp_tx_queue_id(priv, i); - priv->tx[tx_qid].xsk_pool = NULL; - } -} static void gve_drain_page_cache(struct gve_priv *priv) { @@ -1285,15 +1284,14 @@ static void gve_drain_page_cache(struct gve_priv *priv) static void gve_rx_get_curr_alloc_cfg(struct gve_priv *priv, struct gve_rx_alloc_rings_cfg *cfg) { - cfg->qcfg = &priv->rx_cfg; + cfg->qcfg_rx = &priv->rx_cfg; cfg->qcfg_tx = &priv->tx_cfg; cfg->raw_addressing = !gve_is_qpl(priv); cfg->enable_header_split = priv->header_split_enabled; cfg->ring_size = priv->rx_desc_cnt; - cfg->packet_buffer_size = gve_is_gqi(priv) ? - GVE_DEFAULT_RX_BUFFER_SIZE : - priv->data_buffer_size_dqo; + cfg->packet_buffer_size = priv->rx_cfg.packet_buffer_size; cfg->rx = priv->rx; + cfg->xdp = !!cfg->qcfg_tx->num_xdp_queues; } void gve_get_curr_alloc_cfgs(struct gve_priv *priv, @@ -1366,17 +1364,13 @@ static int gve_queues_start(struct gve_priv *priv, /* Record new configs into priv */ priv->tx_cfg = *tx_alloc_cfg->qcfg; - priv->rx_cfg = *rx_alloc_cfg->qcfg; + priv->tx_cfg.num_xdp_queues = tx_alloc_cfg->num_xdp_rings; + priv->rx_cfg = *rx_alloc_cfg->qcfg_rx; priv->tx_desc_cnt = tx_alloc_cfg->ring_size; priv->rx_desc_cnt = rx_alloc_cfg->ring_size; - if (priv->xdp_prog) - priv->num_xdp_queues = priv->rx_cfg.num_queues; - else - priv->num_xdp_queues = 0; - - gve_tx_start_rings(priv, 0, tx_alloc_cfg->num_rings); - gve_rx_start_rings(priv, rx_alloc_cfg->qcfg->num_queues); + gve_tx_start_rings(priv, gve_num_tx_queues(priv)); + gve_rx_start_rings(priv, rx_alloc_cfg->qcfg_rx->num_queues); gve_init_sync_stats(priv); err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); @@ -1390,12 +1384,18 @@ static int gve_queues_start(struct gve_priv *priv, if (err) goto stop_and_free_rings; + if (rx_alloc_cfg->reset_rss) { + err = gve_init_rss_config(priv, priv->rx_cfg.num_queues); + if (err) + goto reset; + } + err = gve_register_qpls(priv); if (err) goto reset; priv->header_split_enabled = rx_alloc_cfg->enable_header_split; - priv->data_buffer_size_dqo = rx_alloc_cfg->packet_buffer_size; + priv->rx_cfg.packet_buffer_size = rx_alloc_cfg->packet_buffer_size; err = gve_create_rings(priv); if (err) @@ -1422,7 +1422,7 @@ reset: /* return the original error */ return err; stop_and_free_rings: - gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); + gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); gve_queues_mem_remove(priv); return err; @@ -1467,11 +1467,11 @@ static int gve_queues_stop(struct gve_priv *priv) goto err; gve_clear_device_rings_ok(priv); } - del_timer_sync(&priv->stats_report_timer); + timer_delete_sync(&priv->stats_report_timer); gve_unreg_xdp_info(priv); - gve_tx_stop_rings(priv, 0, gve_num_tx_queues(priv)); + gve_tx_stop_rings(priv, gve_num_tx_queues(priv)); gve_rx_stop_rings(priv, priv->rx_cfg.num_queues); priv->interface_down_cnt++; @@ -1501,56 +1501,6 @@ static int gve_close(struct net_device *dev) return 0; } -static int gve_remove_xdp_queues(struct gve_priv *priv) -{ - int err; - - err = gve_destroy_xdp_rings(priv); - if (err) - return err; - - err = gve_unregister_xdp_qpls(priv); - if (err) - return err; - - gve_unreg_xdp_info(priv); - gve_free_xdp_rings(priv); - - priv->num_xdp_queues = 0; - return 0; -} - -static int gve_add_xdp_queues(struct gve_priv *priv) -{ - int err; - - priv->num_xdp_queues = priv->rx_cfg.num_queues; - - err = gve_alloc_xdp_rings(priv); - if (err) - goto err; - - err = gve_reg_xdp_info(priv, priv->dev); - if (err) - goto free_xdp_rings; - - err = gve_register_xdp_qpls(priv); - if (err) - goto free_xdp_rings; - - err = gve_create_xdp_rings(priv); - if (err) - goto free_xdp_rings; - - return 0; - -free_xdp_rings: - gve_free_xdp_rings(priv); -err: - priv->num_xdp_queues = 0; - return err; -} - static void gve_handle_link_status(struct gve_priv *priv, bool link_status) { if (!gve_get_napi_enabled(priv)) @@ -1568,6 +1518,19 @@ static void gve_handle_link_status(struct gve_priv *priv, bool link_status) } } +static int gve_configure_rings_xdp(struct gve_priv *priv, + u16 num_xdp_rings) +{ + struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; + struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; + + gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); + tx_alloc_cfg.num_xdp_rings = num_xdp_rings; + + rx_alloc_cfg.xdp = !!num_xdp_rings; + return gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); +} + static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, struct netlink_ext_ack *extack) { @@ -1580,42 +1543,49 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, WRITE_ONCE(priv->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); + + /* Update priv XDP queue configuration */ + priv->tx_cfg.num_xdp_queues = priv->xdp_prog ? + priv->rx_cfg.num_queues : 0; return 0; } - gve_turndown(priv); - if (!old_prog && prog) { - // Allocate XDP TX queues if an XDP program is - // being installed - err = gve_add_xdp_queues(priv); - if (err) - goto out; - } else if (old_prog && !prog) { - // Remove XDP TX queues if an XDP program is - // being uninstalled - err = gve_remove_xdp_queues(priv); - if (err) - goto out; - } + if (!old_prog && prog) + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); + else if (old_prog && !prog) + err = gve_configure_rings_xdp(priv, 0); + + if (err) + goto out; + WRITE_ONCE(priv->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); out: - gve_turnup(priv); status = ioread32be(&priv->reg_bar0->device_status); gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); return err; } +static int gve_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct gve_priv *priv = netdev_priv(dev); + + if (priv->queue_format == GVE_GQI_QPL_FORMAT) + return gve_xdp_xmit_gqi(dev, n, frames, flags); + else if (priv->queue_format == GVE_DQO_RDA_FORMAT) + return gve_xdp_xmit_dqo(dev, n, frames, flags); + + return -EOPNOTSUPP; +} + static int gve_xsk_pool_enable(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid) { struct gve_priv *priv = netdev_priv(dev); - struct napi_struct *napi; - struct gve_rx_ring *rx; - int tx_qid; int err; if (qid >= priv->rx_cfg.num_queues) { @@ -1633,34 +1603,31 @@ static int gve_xsk_pool_enable(struct net_device *dev, if (err) return err; + set_bit(qid, priv->xsk_pools); + /* If XDP prog is not installed or interface is down, return. */ if (!priv->xdp_prog || !netif_running(dev)) return 0; - rx = &priv->rx[qid]; - napi = &priv->ntfy_blocks[rx->ntfy_id].napi; - err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); - if (err) - goto err; - - err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, - MEM_TYPE_XSK_BUFF_POOL, NULL); + err = gve_reg_xsk_pool(priv, dev, pool, qid); if (err) - goto err; - - xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); - rx->xsk_pool = pool; - - tx_qid = gve_xdp_tx_queue_id(priv, qid); - priv->tx[tx_qid].xsk_pool = pool; + goto err_xsk_pool_dma_mapped; + /* Stop and start RDA queues to repost buffers. */ + if (!gve_is_qpl(priv)) { + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); + if (err) + goto err_xsk_pool_registered; + } return 0; -err: - if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) - xdp_rxq_info_unreg(&rx->xsk_rxq); +err_xsk_pool_registered: + gve_unreg_xsk_pool(priv, qid); +err_xsk_pool_dma_mapped: + clear_bit(qid, priv->xsk_pools); xsk_pool_dma_unmap(pool, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_WEAK_ORDERING); return err; } @@ -1672,18 +1639,28 @@ static int gve_xsk_pool_disable(struct net_device *dev, struct napi_struct *napi_tx; struct xsk_buff_pool *pool; int tx_qid; + int err; - pool = xsk_get_pool_from_qid(dev, qid); - if (!pool) - return -EINVAL; if (qid >= priv->rx_cfg.num_queues) return -EINVAL; - /* If XDP prog is not installed or interface is down, unmap DMA and - * return. - */ - if (!priv->xdp_prog || !netif_running(dev)) - goto done; + clear_bit(qid, priv->xsk_pools); + + pool = xsk_get_pool_from_qid(dev, qid); + if (pool) + xsk_pool_dma_unmap(pool, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_WEAK_ORDERING); + + if (!netif_running(dev) || !priv->tx_cfg.num_xdp_queues) + return 0; + + /* Stop and start RDA queues to repost buffers. */ + if (!gve_is_qpl(priv) && priv->xdp_prog) { + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); + if (err) + return err; + } napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; napi_disable(napi_rx); /* make sure current rx poll is done */ @@ -1692,22 +1669,19 @@ static int gve_xsk_pool_disable(struct net_device *dev, napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; napi_disable(napi_tx); /* make sure current tx poll is done */ - priv->rx[qid].xsk_pool = NULL; - xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); - priv->tx[tx_qid].xsk_pool = NULL; + gve_unreg_xsk_pool(priv, qid); smp_mb(); /* Make sure it is visible to the workers on datapath */ napi_enable(napi_rx); - if (gve_rx_work_pending(&priv->rx[qid])) - napi_schedule(napi_rx); - napi_enable(napi_tx); - if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) - napi_schedule(napi_tx); + if (gve_is_gqi(priv)) { + if (gve_rx_work_pending(&priv->rx[qid])) + napi_schedule(napi_rx); + + if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) + napi_schedule(napi_tx); + } -done: - xsk_pool_dma_unmap(pool, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); return 0; } @@ -1736,19 +1710,23 @@ static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) static int verify_xdp_configuration(struct net_device *dev) { struct gve_priv *priv = netdev_priv(dev); + u16 max_xdp_mtu; if (dev->features & NETIF_F_LRO) { netdev_warn(dev, "XDP is not supported when LRO is on.\n"); return -EOPNOTSUPP; } - if (priv->queue_format != GVE_GQI_QPL_FORMAT) { - netdev_warn(dev, "XDP is not supported in mode %d.\n", - priv->queue_format); + if (priv->header_split_enabled) { + netdev_warn(dev, "XDP is not supported when header-data split is enabled.\n"); return -EOPNOTSUPP; } - if (dev->mtu > GVE_DEFAULT_RX_BUFFER_SIZE - sizeof(struct ethhdr) - GVE_RX_PAD) { + max_xdp_mtu = priv->rx_cfg.packet_buffer_size - sizeof(struct ethhdr); + if (priv->queue_format == GVE_GQI_QPL_FORMAT) + max_xdp_mtu -= GVE_RX_PAD; + + if (dev->mtu > max_xdp_mtu) { netdev_warn(dev, "XDP is not supported for mtu %d.\n", dev->mtu); return -EOPNOTSUPP; @@ -1786,6 +1764,26 @@ static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } +int gve_init_rss_config(struct gve_priv *priv, u16 num_queues) +{ + struct gve_rss_config *rss_config = &priv->rss_config; + struct ethtool_rxfh_param rxfh = {0}; + u16 i; + + if (!priv->cache_rss_config) + return 0; + + for (i = 0; i < priv->rss_lut_size; i++) + rss_config->hash_lut[i] = + ethtool_rxfh_indir_default(i, num_queues); + + netdev_rss_key_fill(rss_config->hash_key, priv->rss_key_size); + + rxfh.hfunc = ETH_RSS_HASH_TOP; + + return gve_adminq_configure_rss(priv, &rxfh); +} + int gve_flow_rules_reset(struct gve_priv *priv) { if (!priv->max_flow_rules) @@ -1800,7 +1798,7 @@ int gve_adjust_config(struct gve_priv *priv, { int err; - /* Allocate resources for the new confiugration */ + /* Allocate resources for the new configuration */ err = gve_queues_mem_alloc(priv, tx_alloc_cfg, rx_alloc_cfg); if (err) { netif_err(priv, drv, priv->dev, @@ -1833,12 +1831,12 @@ int gve_adjust_config(struct gve_priv *priv, } int gve_adjust_queues(struct gve_priv *priv, - struct gve_queue_config new_rx_config, - struct gve_queue_config new_tx_config) + struct gve_rx_queue_config new_rx_config, + struct gve_tx_queue_config new_tx_config, + bool reset_rss) { struct gve_tx_alloc_rings_cfg tx_alloc_cfg = {0}; struct gve_rx_alloc_rings_cfg rx_alloc_cfg = {0}; - int num_xdp_queues; int err; gve_get_curr_alloc_cfgs(priv, &tx_alloc_cfg, &rx_alloc_cfg); @@ -1846,18 +1844,19 @@ int gve_adjust_queues(struct gve_priv *priv, /* Relay the new config from ethtool */ tx_alloc_cfg.qcfg = &new_tx_config; rx_alloc_cfg.qcfg_tx = &new_tx_config; - rx_alloc_cfg.qcfg = &new_rx_config; - tx_alloc_cfg.num_rings = new_tx_config.num_queues; - - /* Add dedicated XDP TX queues if enabled. */ - num_xdp_queues = priv->xdp_prog ? new_rx_config.num_queues : 0; - tx_alloc_cfg.num_rings += num_xdp_queues; + rx_alloc_cfg.qcfg_rx = &new_rx_config; + rx_alloc_cfg.reset_rss = reset_rss; if (netif_running(priv->dev)) { err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); return err; } /* Set the config for the next up. */ + if (reset_rss) { + err = gve_init_rss_config(priv, new_rx_config.num_queues); + if (err) + return err; + } priv->tx_cfg = new_tx_config; priv->rx_cfg = new_rx_config; @@ -1886,7 +1885,7 @@ static void gve_turndown(struct gve_priv *priv) netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_TX, NULL); - napi_disable(&block->napi); + napi_disable_locked(&block->napi); } for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); @@ -1897,12 +1896,14 @@ static void gve_turndown(struct gve_priv *priv) netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, NULL); - napi_disable(&block->napi); + napi_disable_locked(&block->napi); } /* Stop tx queues */ netif_tx_disable(priv->dev); + xdp_features_clear_redirect_target_locked(priv->dev); + gve_clear_napi_enabled(priv); gve_clear_report_stats(priv); @@ -1925,7 +1926,7 @@ static void gve_turnup(struct gve_priv *priv) if (!gve_tx_was_added_to_block(priv, idx)) continue; - napi_enable(&block->napi); + napi_enable_locked(&block->napi); if (idx < priv->tx_cfg.num_queues) netif_queue_set_napi(priv->dev, idx, @@ -1953,7 +1954,7 @@ static void gve_turnup(struct gve_priv *priv) if (!gve_rx_was_added_to_block(priv, idx)) continue; - napi_enable(&block->napi); + napi_enable_locked(&block->napi); netif_queue_set_napi(priv->dev, idx, NETDEV_QUEUE_TYPE_RX, &block->napi); @@ -1972,6 +1973,9 @@ static void gve_turnup(struct gve_priv *priv) napi_schedule(&block->napi); } + if (priv->tx_cfg.num_xdp_queues && gve_supports_xdp_xmit(priv)) + xdp_features_set_redirect_target_locked(priv->dev, false); + gve_set_napi_enabled(priv); } @@ -1984,49 +1988,56 @@ static void gve_turnup_and_check_status(struct gve_priv *priv) gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status); } -static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) +static struct gve_notify_block *gve_get_tx_notify_block(struct gve_priv *priv, + unsigned int txqueue) { - struct gve_notify_block *block; - struct gve_tx_ring *tx = NULL; - struct gve_priv *priv; - u32 last_nic_done; - u32 current_time; u32 ntfy_idx; - netdev_info(dev, "Timeout on tx queue, %d", txqueue); - priv = netdev_priv(dev); if (txqueue > priv->tx_cfg.num_queues) - goto reset; + return NULL; ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); if (ntfy_idx >= priv->num_ntfy_blks) - goto reset; + return NULL; - block = &priv->ntfy_blocks[ntfy_idx]; - tx = block->tx; + return &priv->ntfy_blocks[ntfy_idx]; +} + +static bool gve_tx_timeout_try_q_kick(struct gve_priv *priv, + unsigned int txqueue) +{ + struct gve_notify_block *block; + u32 current_time; + + block = gve_get_tx_notify_block(priv, txqueue); + + if (!block) + return false; current_time = jiffies_to_msecs(jiffies); - if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) - goto reset; + if (block->tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) + return false; - /* Check to see if there are missed completions, which will allow us to - * kick the queue. - */ - last_nic_done = gve_tx_load_event_counter(priv, tx); - if (last_nic_done - tx->done) { - netdev_info(dev, "Kicking queue %d", txqueue); - iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); - napi_schedule(&block->napi); - tx->last_kick_msec = current_time; - goto out; - } // Else reset. + netdev_info(priv->dev, "Kicking queue %d", txqueue); + napi_schedule(&block->napi); + block->tx->last_kick_msec = current_time; + return true; +} -reset: - gve_schedule_reset(priv); +static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + struct gve_notify_block *block; + struct gve_priv *priv; -out: - if (tx) - tx->queue_timeout++; + netdev_info(dev, "Timeout on tx queue, %d", txqueue); + priv = netdev_priv(dev); + + if (!gve_tx_timeout_try_q_kick(priv, txqueue)) + gve_schedule_reset(priv); + + block = gve_get_tx_notify_block(priv, txqueue); + if (block) + block->tx->queue_timeout++; priv->tx_timeo_cnt++; } @@ -2038,10 +2049,13 @@ u16 gve_get_pkt_buf_size(const struct gve_priv *priv, bool enable_hsplit) return GVE_DEFAULT_RX_BUFFER_SIZE; } -/* header-split is not supported on non-DQO_RDA yet even if device advertises it */ +/* Header split is only supported on DQ RDA queue format. If XDP is enabled, + * header split is not allowed. + */ bool gve_header_split_supported(const struct gve_priv *priv) { - return priv->header_buf_size && priv->queue_format == GVE_DQO_RDA_FORMAT; + return priv->header_buf_size && + priv->queue_format == GVE_DQO_RDA_FORMAT && !priv->xdp_prog; } int gve_set_hsplit_config(struct gve_priv *priv, u8 tcp_data_split) @@ -2090,6 +2104,12 @@ static int gve_set_features(struct net_device *netdev, if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; + if (priv->xdp_prog && (netdev->features & NETIF_F_LRO)) { + netdev_warn(netdev, + "XDP is not supported when LRO is on.\n"); + err = -EOPNOTSUPP; + goto revert_features; + } if (netif_running(netdev)) { err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) @@ -2109,6 +2129,46 @@ revert_features: return err; } +static int gve_get_ts_config(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_config) +{ + struct gve_priv *priv = netdev_priv(dev); + + *kernel_config = priv->ts_config; + return 0; +} + +static int gve_set_ts_config(struct net_device *dev, + struct kernel_hwtstamp_config *kernel_config, + struct netlink_ext_ack *extack) +{ + struct gve_priv *priv = netdev_priv(dev); + + if (kernel_config->tx_type != HWTSTAMP_TX_OFF) { + NL_SET_ERR_MSG_MOD(extack, "TX timestamping is not supported"); + return -ERANGE; + } + + if (kernel_config->rx_filter != HWTSTAMP_FILTER_NONE) { + if (!priv->nic_ts_report) { + NL_SET_ERR_MSG_MOD(extack, + "RX timestamping is not supported"); + kernel_config->rx_filter = HWTSTAMP_FILTER_NONE; + return -EOPNOTSUPP; + } + + kernel_config->rx_filter = HWTSTAMP_FILTER_ALL; + gve_clock_nic_ts_read(priv); + ptp_schedule_worker(priv->ptp->clock, 0); + } else { + ptp_cancel_worker_sync(priv->ptp->clock); + } + + priv->ts_config.rx_filter = kernel_config->rx_filter; + + return 0; +} + static const struct net_device_ops gve_netdev_ops = { .ndo_start_xmit = gve_start_xmit, .ndo_features_check = gve_features_check, @@ -2120,6 +2180,8 @@ static const struct net_device_ops gve_netdev_ops = { .ndo_bpf = gve_xdp, .ndo_xdp_xmit = gve_xdp_xmit, .ndo_xsk_wakeup = gve_xsk_wakeup, + .ndo_hwtstamp_get = gve_get_ts_config, + .ndo_hwtstamp_set = gve_set_ts_config, }; static void gve_handle_status(struct gve_priv *priv, u32 status) @@ -2145,7 +2207,9 @@ static void gve_handle_reset(struct gve_priv *priv) if (gve_get_do_reset(priv)) { rtnl_lock(); + netdev_lock(priv->dev); gve_reset(priv, false); + netdev_unlock(priv->dev); rtnl_unlock(); } } @@ -2219,7 +2283,7 @@ void gve_handle_report_stats(struct gve_priv *priv) }; stats[stats_idx++] = (struct stats) { .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), - .value = cpu_to_be64(priv->rx[0].fill_cnt), + .value = cpu_to_be64(priv->rx[idx].fill_cnt), .queue_id = cpu_to_be32(idx), }; } @@ -2246,13 +2310,16 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv) if (priv->queue_format == GVE_GQI_QPL_FORMAT) { xdp_features = NETDEV_XDP_ACT_BASIC; xdp_features |= NETDEV_XDP_ACT_REDIRECT; - xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; + xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; + } else if (priv->queue_format == GVE_DQO_RDA_FORMAT) { + xdp_features = NETDEV_XDP_ACT_BASIC; + xdp_features |= NETDEV_XDP_ACT_REDIRECT; xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; } else { xdp_features = 0; } - xdp_set_features_flag(priv->dev, xdp_features); + xdp_set_features_flag_locked(priv->dev, xdp_features); } static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) @@ -2302,7 +2369,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) goto err; } - /* Big TCP is only supported on DQ*/ + /* Big TCP is only supported on DQO */ if (!gve_is_gqi(priv)) netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX); @@ -2312,6 +2379,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) */ priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; priv->mgmt_msix_idx = priv->num_ntfy_blks; + priv->numa_node = dev_to_node(&priv->pdev->dev); priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); @@ -2326,6 +2394,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, priv->rx_cfg.num_queues); } + priv->tx_cfg.num_xdp_queues = 0; dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n", priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); @@ -2337,11 +2406,26 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO; } + priv->ts_config.tx_type = HWTSTAMP_TX_OFF; + priv->ts_config.rx_filter = HWTSTAMP_FILTER_NONE; + setup_device: + priv->xsk_pools = bitmap_zalloc(priv->rx_cfg.max_queues, GFP_KERNEL); + if (!priv->xsk_pools) { + err = -ENOMEM; + goto err; + } + gve_set_netdev_xdp_features(priv); err = gve_setup_device_resources(priv); - if (!err) - return 0; + if (err) + goto err_free_xsk_bitmap; + + return 0; + +err_free_xsk_bitmap: + bitmap_free(priv->xsk_pools); + priv->xsk_pools = NULL; err: gve_adminq_free(&priv->pdev->dev, priv); return err; @@ -2351,6 +2435,8 @@ static void gve_teardown_priv_resources(struct gve_priv *priv) { gve_teardown_device_resources(priv); gve_adminq_free(&priv->pdev->dev, priv); + bitmap_free(priv->xsk_pools); + priv->xsk_pools = NULL; } static void gve_trigger_reset(struct gve_priv *priv) @@ -2706,7 +2792,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) priv->service_task_flags = 0x0; priv->state_flags = 0x0; priv->ethtool_flags = 0x0; - priv->data_buffer_size_dqo = GVE_DEFAULT_RX_BUFFER_SIZE; + priv->rx_cfg.packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; priv->max_rx_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; gve_set_probe_in_progress(priv); @@ -2725,6 +2811,9 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto abort_with_wq; + if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) + dev->netmem_tx = true; + err = register_netdev(dev); if (err) goto abort_with_gve_init; @@ -2781,7 +2870,10 @@ static void gve_shutdown(struct pci_dev *pdev) struct gve_priv *priv = netdev_priv(netdev); bool was_up = netif_running(priv->dev); + netif_device_detach(netdev); + rtnl_lock(); + netdev_lock(netdev); if (was_up && gve_close(priv->dev)) { /* If the dev was up, attempt to close, if close fails, reset */ gve_reset_and_teardown(priv, was_up); @@ -2789,6 +2881,7 @@ static void gve_shutdown(struct pci_dev *pdev) /* If the dev wasn't up or close worked, finish tearing down */ gve_teardown_priv_resources(priv); } + netdev_unlock(netdev); rtnl_unlock(); } @@ -2801,6 +2894,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) priv->suspend_cnt++; rtnl_lock(); + netdev_lock(netdev); if (was_up && gve_close(priv->dev)) { /* If the dev was up, attempt to close, if close fails, reset */ gve_reset_and_teardown(priv, was_up); @@ -2809,6 +2903,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) gve_teardown_priv_resources(priv); } priv->up_before_suspend = was_up; + netdev_unlock(netdev); rtnl_unlock(); return 0; } @@ -2821,7 +2916,9 @@ static int gve_resume(struct pci_dev *pdev) priv->resume_cnt++; rtnl_lock(); + netdev_lock(netdev); err = gve_reset_recovery(priv, priv->up_before_suspend); + netdev_unlock(netdev); rtnl_unlock(); return err; } diff --git a/drivers/net/ethernet/google/gve/gve_ptp.c b/drivers/net/ethernet/google/gve/gve_ptp.c new file mode 100644 index 000000000000..e96247c9d68d --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve_ptp.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Google virtual Ethernet (gve) driver + * + * Copyright (C) 2025 Google LLC + */ + +#include "gve.h" +#include "gve_adminq.h" + +/* Interval to schedule a nic timestamp calibration, 250ms. */ +#define GVE_NIC_TS_SYNC_INTERVAL_MS 250 + +/* Read the nic timestamp from hardware via the admin queue. */ +int gve_clock_nic_ts_read(struct gve_priv *priv) +{ + u64 nic_raw; + int err; + + err = gve_adminq_report_nic_ts(priv, priv->nic_ts_report_bus); + if (err) + return err; + + nic_raw = be64_to_cpu(priv->nic_ts_report->nic_timestamp); + WRITE_ONCE(priv->last_sync_nic_counter, nic_raw); + + return 0; +} + +static long gve_ptp_do_aux_work(struct ptp_clock_info *info) +{ + const struct gve_ptp *ptp = container_of(info, struct gve_ptp, info); + struct gve_priv *priv = ptp->priv; + int err; + + if (gve_get_reset_in_progress(priv) || !gve_get_admin_queue_ok(priv)) + goto out; + + err = gve_clock_nic_ts_read(priv); + if (err && net_ratelimit()) + dev_err(&priv->pdev->dev, + "%s read err %d\n", __func__, err); + +out: + return msecs_to_jiffies(GVE_NIC_TS_SYNC_INTERVAL_MS); +} + +static const struct ptp_clock_info gve_ptp_caps = { + .owner = THIS_MODULE, + .name = "gve clock", + .do_aux_work = gve_ptp_do_aux_work, +}; + +static int gve_ptp_init(struct gve_priv *priv) +{ + struct gve_ptp *ptp; + int err; + + if (!priv->nic_timestamp_supported) { + dev_dbg(&priv->pdev->dev, "Device does not support PTP\n"); + return -EOPNOTSUPP; + } + + priv->ptp = kzalloc(sizeof(*priv->ptp), GFP_KERNEL); + if (!priv->ptp) + return -ENOMEM; + + ptp = priv->ptp; + ptp->info = gve_ptp_caps; + ptp->clock = ptp_clock_register(&ptp->info, &priv->pdev->dev); + + if (IS_ERR(ptp->clock)) { + dev_err(&priv->pdev->dev, "PTP clock registration failed\n"); + err = PTR_ERR(ptp->clock); + goto free_ptp; + } + + ptp->priv = priv; + return 0; + +free_ptp: + kfree(ptp); + priv->ptp = NULL; + return err; +} + +static void gve_ptp_release(struct gve_priv *priv) +{ + struct gve_ptp *ptp = priv->ptp; + + if (!ptp) + return; + + if (ptp->clock) + ptp_clock_unregister(ptp->clock); + + kfree(ptp); + priv->ptp = NULL; +} + +int gve_init_clock(struct gve_priv *priv) +{ + int err; + + if (!priv->nic_timestamp_supported) + return 0; + + err = gve_ptp_init(priv); + if (err) + return err; + + priv->nic_ts_report = + dma_alloc_coherent(&priv->pdev->dev, + sizeof(struct gve_nic_ts_report), + &priv->nic_ts_report_bus, + GFP_KERNEL); + if (!priv->nic_ts_report) { + dev_err(&priv->pdev->dev, "%s dma alloc error\n", __func__); + err = -ENOMEM; + goto release_ptp; + } + + return 0; + +release_ptp: + gve_ptp_release(priv); + return err; +} + +void gve_teardown_clock(struct gve_priv *priv) +{ + gve_ptp_release(priv); + + if (priv->nic_ts_report) { + dma_free_coherent(&priv->pdev->dev, + sizeof(struct gve_nic_ts_report), + priv->nic_ts_report, priv->nic_ts_report_bus); + priv->nic_ts_report = NULL; + } +} diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index acb73d4d0de6..ec424d2f4f57 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -141,12 +141,15 @@ void gve_rx_free_ring_gqi(struct gve_priv *priv, struct gve_rx_ring *rx, netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); } -static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, - dma_addr_t addr, struct page *page, __be64 *slot_addr) +static void gve_setup_rx_buffer(struct gve_rx_ring *rx, + struct gve_rx_slot_page_info *page_info, + dma_addr_t addr, struct page *page, + __be64 *slot_addr) { page_info->page = page; page_info->page_offset = 0; page_info->page_address = page_address(page); + page_info->buf_size = rx->packet_buffer_size; *slot_addr = cpu_to_be64(addr); /* The page already has 1 ref */ page_ref_add(page, INT_MAX - 1); @@ -171,7 +174,7 @@ static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev, return err; } - gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr); + gve_setup_rx_buffer(rx, page_info, dma, page, &data_slot->addr); return 0; } @@ -189,8 +192,8 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx, */ slots = rx->mask + 1; - rx->data.page_info = kvzalloc(slots * - sizeof(*rx->data.page_info), GFP_KERNEL); + rx->data.page_info = kvcalloc_node(slots, sizeof(*rx->data.page_info), + GFP_KERNEL, priv->numa_node); if (!rx->data.page_info) return -ENOMEM; @@ -199,7 +202,8 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx, struct page *page = rx->data.qpl->pages[i]; dma_addr_t addr = i * PAGE_SIZE; - gve_setup_rx_buffer(&rx->data.page_info[i], addr, page, + gve_setup_rx_buffer(rx, &rx->data.page_info[i], addr, + page, &rx->data.data_ring[i].qpl_offset); continue; } @@ -212,7 +216,8 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx, if (!rx->data.raw_addressing) { for (j = 0; j < rx->qpl_copy_pool_mask + 1; j++) { - struct page *page = alloc_page(GFP_KERNEL); + struct page *page = alloc_pages_node(priv->numa_node, + GFP_KERNEL, 0); if (!page) { err = -ENOMEM; @@ -222,6 +227,7 @@ static int gve_rx_prefill_pages(struct gve_rx_ring *rx, rx->qpl_copy_pool[j].page = page; rx->qpl_copy_pool[j].page_offset = 0; rx->qpl_copy_pool[j].page_address = page_address(page); + rx->qpl_copy_pool[j].buf_size = rx->packet_buffer_size; /* The page already has 1 ref. */ page_ref_add(page, INT_MAX - 1); @@ -283,6 +289,7 @@ int gve_rx_alloc_ring_gqi(struct gve_priv *priv, rx->gve = priv; rx->q_num = idx; + rx->packet_buffer_size = cfg->packet_buffer_size; rx->mask = slots - 1; rx->data.raw_addressing = cfg->raw_addressing; @@ -297,10 +304,9 @@ int gve_rx_alloc_ring_gqi(struct gve_priv *priv, rx->qpl_copy_pool_mask = min_t(u32, U32_MAX, slots * 2) - 1; rx->qpl_copy_pool_head = 0; - rx->qpl_copy_pool = kvcalloc(rx->qpl_copy_pool_mask + 1, - sizeof(rx->qpl_copy_pool[0]), - GFP_KERNEL); - + rx->qpl_copy_pool = kvcalloc_node(rx->qpl_copy_pool_mask + 1, + sizeof(rx->qpl_copy_pool[0]), + GFP_KERNEL, priv->numa_node); if (!rx->qpl_copy_pool) { err = -ENOMEM; goto abort_with_slots; @@ -351,7 +357,6 @@ int gve_rx_alloc_ring_gqi(struct gve_priv *priv, rx->db_threshold = slots / 2; gve_rx_init_ring_state_gqi(rx); - rx->packet_buffer_size = GVE_DEFAULT_RX_BUFFER_SIZE; gve_rx_ctx_clear(&rx->ctx); return 0; @@ -385,12 +390,12 @@ int gve_rx_alloc_rings_gqi(struct gve_priv *priv, int err = 0; int i, j; - rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring), + rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring), GFP_KERNEL); if (!rx) return -ENOMEM; - for (i = 0; i < cfg->qcfg->num_queues; i++) { + for (i = 0; i < cfg->qcfg_rx->num_queues; i++) { err = gve_rx_alloc_ring_gqi(priv, cfg, &rx[i], i); if (err) { netif_err(priv, drv, priv->dev, @@ -419,7 +424,7 @@ void gve_rx_free_rings_gqi(struct gve_priv *priv, if (!rx) return; - for (i = 0; i < cfg->qcfg->num_queues; i++) + for (i = 0; i < cfg->qcfg_rx->num_queues; i++) gve_rx_free_ring_gqi(priv, &rx[i], cfg); kvfree(rx); @@ -590,7 +595,7 @@ static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx, copy_page_info->pad = page_info->pad; skb = gve_rx_add_frags(napi, copy_page_info, - rx->packet_buffer_size, len, ctx); + copy_page_info->buf_size, len, ctx); if (unlikely(!skb)) return NULL; @@ -630,7 +635,8 @@ gve_rx_qpl(struct device *dev, struct net_device *netdev, * device. */ if (page_info->can_flip) { - skb = gve_rx_add_frags(napi, page_info, rx->packet_buffer_size, len, ctx); + skb = gve_rx_add_frags(napi, page_info, page_info->buf_size, + len, ctx); /* No point in recycling if we didn't get the skb */ if (skb) { /* Make sure that the page isn't freed. */ @@ -680,7 +686,7 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx, skb = gve_rx_raw_addressing(&priv->pdev->dev, netdev, page_info, len, napi, data_slot, - rx->packet_buffer_size, ctx); + page_info->buf_size, ctx); } else { skb = gve_rx_qpl(&priv->pdev->dev, netdev, rx, page_info, len, napi, data_slot); @@ -855,7 +861,7 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, void *old_data; int xdp_act; - xdp_init_buff(&xdp, rx->packet_buffer_size, &rx->xdp_rxq); + xdp_init_buff(&xdp, page_info->buf_size, &rx->xdp_rxq); xdp_prepare_buff(&xdp, page_info->page_address + page_info->page_offset, GVE_RX_PAD, len, false); diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 8ac0047f1ada..7380c2b7a2d8 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -8,6 +8,7 @@ #include "gve_dqo.h" #include "gve_adminq.h" #include "gve_utils.h" +#include <linux/bpf.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/skbuff.h> @@ -15,6 +16,7 @@ #include <net/ip6_checksum.h> #include <net/ipv6.h> #include <net/tcp.h> +#include <net/xdp_sock_drv.h> static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) { @@ -109,10 +111,13 @@ static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx) void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); + struct gve_rx_ring *rx = &priv->rx[idx]; if (!gve_rx_was_added_to_block(priv, idx)) return; + if (rx->dqo.page_pool) + page_pool_disable_direct_recycling(rx->dqo.page_pool); gve_remove_napi(priv, ntfy_idx); gve_rx_remove_from_block(priv, idx); gve_rx_reset_ring_dqo(priv, idx); @@ -145,6 +150,10 @@ void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, gve_free_to_page_pool(rx, bs, false); else gve_free_qpl_page_dqo(bs); + if (gve_buf_state_is_allocated(rx, bs) && bs->xsk_buff) { + xsk_buff_free(bs->xsk_buff); + bs->xsk_buff = NULL; + } } if (rx->dqo.qpl) { @@ -221,12 +230,21 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv, memset(rx, 0, sizeof(*rx)); rx->gve = priv; rx->q_num = idx; + rx->packet_buffer_size = cfg->packet_buffer_size; + + if (cfg->xdp) { + rx->packet_buffer_truesize = GVE_XDP_RX_BUFFER_SIZE_DQO; + rx->rx_headroom = XDP_PACKET_HEADROOM; + } else { + rx->packet_buffer_truesize = rx->packet_buffer_size; + rx->rx_headroom = 0; + } rx->dqo.num_buf_states = cfg->raw_addressing ? buffer_queue_slots : gve_get_rx_pages_per_qpl_dqo(cfg->ring_size); - rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states, - sizeof(rx->dqo.buf_states[0]), - GFP_KERNEL); + rx->dqo.buf_states = kvcalloc_node(rx->dqo.num_buf_states, + sizeof(rx->dqo.buf_states[0]), + GFP_KERNEL, priv->numa_node); if (!rx->dqo.buf_states) return -ENOMEM; @@ -251,7 +269,7 @@ int gve_rx_alloc_ring_dqo(struct gve_priv *priv, goto err; if (cfg->raw_addressing) { - pool = gve_rx_create_page_pool(priv, rx); + pool = gve_rx_create_page_pool(priv, rx, cfg->xdp); if (IS_ERR(pool)) goto err; @@ -297,12 +315,12 @@ int gve_rx_alloc_rings_dqo(struct gve_priv *priv, int err; int i; - rx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_rx_ring), + rx = kvcalloc(cfg->qcfg_rx->max_queues, sizeof(struct gve_rx_ring), GFP_KERNEL); if (!rx) return -ENOMEM; - for (i = 0; i < cfg->qcfg->num_queues; i++) { + for (i = 0; i < cfg->qcfg_rx->num_queues; i++) { err = gve_rx_alloc_ring_dqo(priv, cfg, &rx[i], i); if (err) { netif_err(priv, drv, priv->dev, @@ -331,7 +349,7 @@ void gve_rx_free_rings_dqo(struct gve_priv *priv, if (!rx) return; - for (i = 0; i < cfg->qcfg->num_queues; i++) + for (i = 0; i < cfg->qcfg_rx->num_queues; i++) gve_rx_free_ring_dqo(priv, &rx[i], cfg); kvfree(rx); @@ -425,6 +443,29 @@ static void gve_rx_skb_hash(struct sk_buff *skb, skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type); } +/* Expand the hardware timestamp to the full 64 bits of width, and add it to the + * skb. + * + * This algorithm works by using the passed hardware timestamp to generate a + * diff relative to the last read of the nic clock. This diff can be positive or + * negative, as it is possible that we have read the clock more recently than + * the hardware has received this packet. To detect this, we use the high bit of + * the diff, and assume that the read is more recent if the high bit is set. In + * this case we invert the process. + * + * Note that this means if the time delta between packet reception and the last + * clock read is greater than ~2 seconds, this will provide invalid results. + */ +static void gve_rx_skb_hwtstamp(struct gve_rx_ring *rx, u32 hwts) +{ + u64 last_read = READ_ONCE(rx->gve->last_sync_nic_counter); + struct sk_buff *skb = rx->ctx.skb_head; + u32 low = (u32)last_read; + s32 diff = hwts - low; + + skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(last_read + diff); +} + static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx) { if (!rx->ctx.skb_head) @@ -452,7 +493,7 @@ static int gve_rx_copy_ondemand(struct gve_rx_ring *rx, struct gve_rx_buf_state_dqo *buf_state, u16 buf_len) { - struct page *page = alloc_page(GFP_ATOMIC); + struct page *page = alloc_pages_node(rx->gve->numa_node, GFP_ATOMIC, 0); int num_frags; if (!page) @@ -474,6 +515,25 @@ static int gve_rx_copy_ondemand(struct gve_rx_ring *rx, return 0; } +static void gve_skb_add_rx_frag(struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, + int num_frags, u16 buf_len) +{ + if (rx->dqo.page_pool) { + skb_add_rx_frag_netmem(rx->ctx.skb_tail, num_frags, + buf_state->page_info.netmem, + buf_state->page_info.page_offset + + buf_state->page_info.pad, buf_len, + buf_state->page_info.buf_size); + } else { + skb_add_rx_frag(rx->ctx.skb_tail, num_frags, + buf_state->page_info.page, + buf_state->page_info.page_offset + + buf_state->page_info.pad, buf_len, + buf_state->page_info.buf_size); + } +} + /* Chains multi skbs for single rx packet. * Returns 0 if buffer is appended, -1 otherwise. */ @@ -511,14 +571,153 @@ static int gve_rx_append_frags(struct napi_struct *napi, if (gve_rx_should_trigger_copy_ondemand(rx)) return gve_rx_copy_ondemand(rx, buf_state, buf_len); - skb_add_rx_frag(rx->ctx.skb_tail, num_frags, - buf_state->page_info.page, - buf_state->page_info.page_offset, - buf_len, buf_state->page_info.buf_size); + gve_skb_add_rx_frag(rx, buf_state, num_frags, buf_len); gve_reuse_buffer(rx, buf_state); return 0; } +static int gve_xdp_tx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct xdp_buff *xdp) +{ + struct gve_tx_ring *tx; + struct xdp_frame *xdpf; + u32 tx_qid; + int err; + + xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) { + if (rx->xsk_pool) + xsk_buff_free(xdp); + return -ENOSPC; + } + + tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num); + tx = &priv->tx[tx_qid]; + spin_lock(&tx->dqo_tx.xdp_lock); + err = gve_xdp_xmit_one_dqo(priv, tx, xdpf); + spin_unlock(&tx->dqo_tx.xdp_lock); + + return err; +} + +static void gve_xsk_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct xdp_buff *xdp, struct bpf_prog *xprog, + int xdp_act) +{ + switch (xdp_act) { + case XDP_ABORTED: + case XDP_DROP: + default: + xsk_buff_free(xdp); + break; + case XDP_TX: + if (unlikely(gve_xdp_tx_dqo(priv, rx, xdp))) + goto err; + break; + case XDP_REDIRECT: + if (unlikely(xdp_do_redirect(priv->dev, xdp, xprog))) + goto err; + break; + } + + u64_stats_update_begin(&rx->statss); + if ((u32)xdp_act < GVE_XDP_ACTIONS) + rx->xdp_actions[xdp_act]++; + u64_stats_update_end(&rx->statss); + return; + +err: + u64_stats_update_begin(&rx->statss); + if (xdp_act == XDP_TX) + rx->xdp_tx_errors++; + if (xdp_act == XDP_REDIRECT) + rx->xdp_redirect_errors++; + u64_stats_update_end(&rx->statss); +} + +static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct xdp_buff *xdp, struct bpf_prog *xprog, + int xdp_act, + struct gve_rx_buf_state_dqo *buf_state) +{ + int err; + switch (xdp_act) { + case XDP_ABORTED: + case XDP_DROP: + default: + gve_free_buffer(rx, buf_state); + break; + case XDP_TX: + err = gve_xdp_tx_dqo(priv, rx, xdp); + if (unlikely(err)) + goto err; + gve_reuse_buffer(rx, buf_state); + break; + case XDP_REDIRECT: + err = xdp_do_redirect(priv->dev, xdp, xprog); + if (unlikely(err)) + goto err; + gve_reuse_buffer(rx, buf_state); + break; + } + u64_stats_update_begin(&rx->statss); + if ((u32)xdp_act < GVE_XDP_ACTIONS) + rx->xdp_actions[xdp_act]++; + u64_stats_update_end(&rx->statss); + return; +err: + u64_stats_update_begin(&rx->statss); + if (xdp_act == XDP_TX) + rx->xdp_tx_errors++; + else if (xdp_act == XDP_REDIRECT) + rx->xdp_redirect_errors++; + u64_stats_update_end(&rx->statss); + gve_free_buffer(rx, buf_state); + return; +} + +static int gve_rx_xsk_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, int buf_len, + struct bpf_prog *xprog) +{ + struct xdp_buff *xdp = buf_state->xsk_buff; + struct gve_priv *priv = rx->gve; + int xdp_act; + + xdp->data_end = xdp->data + buf_len; + xsk_buff_dma_sync_for_cpu(xdp); + + if (xprog) { + xdp_act = bpf_prog_run_xdp(xprog, xdp); + buf_len = xdp->data_end - xdp->data; + if (xdp_act != XDP_PASS) { + gve_xsk_done_dqo(priv, rx, xdp, xprog, xdp_act); + gve_free_buf_state(rx, buf_state); + return 0; + } + } + + /* Copy the data to skb */ + rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi, + xdp->data, buf_len); + if (unlikely(!rx->ctx.skb_head)) { + xsk_buff_free(xdp); + gve_free_buf_state(rx, buf_state); + return -ENOMEM; + } + rx->ctx.skb_tail = rx->ctx.skb_head; + + /* Free XSK buffer and Buffer state */ + xsk_buff_free(xdp); + gve_free_buf_state(rx, buf_state); + + /* Update Stats */ + u64_stats_update_begin(&rx->statss); + rx->xdp_actions[XDP_PASS]++; + u64_stats_update_end(&rx->statss); + return 0; +} + /* Returns 0 if descriptor is completed successfully. * Returns -EINVAL if descriptor is invalid. * Returns -ENOMEM if data cannot be copied to skb. @@ -533,6 +732,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, const bool hsplit = compl_desc->split_header; struct gve_rx_buf_state_dqo *buf_state; struct gve_priv *priv = rx->gve; + struct bpf_prog *xprog; u16 buf_len; u16 hdr_len; @@ -556,10 +756,19 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, buf_len = compl_desc->packet_len; hdr_len = compl_desc->header_len; + xprog = READ_ONCE(priv->xdp_prog); + if (buf_state->xsk_buff) + return gve_rx_xsk_dqo(napi, rx, buf_state, buf_len, xprog); + /* Page might have not been used for awhile and was likely last written * by a different thread. */ - prefetch(buf_state->page_info.page); + if (rx->dqo.page_pool) { + if (!netmem_is_net_iov(buf_state->page_info.netmem)) + prefetch(netmem_to_page(buf_state->page_info.netmem)); + } else { + prefetch(buf_state->page_info.page); + } /* Copy the header into the skb in the case of header split */ if (hsplit) { @@ -588,7 +797,8 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, /* Sync the portion of dma buffer for CPU to read. */ dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr, - buf_state->page_info.page_offset, + buf_state->page_info.page_offset + + buf_state->page_info.pad, buf_len, DMA_FROM_DEVICE); /* Append to current skb if one exists. */ @@ -600,6 +810,33 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, return 0; } + if (xprog) { + struct xdp_buff xdp; + void *old_data; + int xdp_act; + + xdp_init_buff(&xdp, buf_state->page_info.buf_size, + &rx->xdp_rxq); + xdp_prepare_buff(&xdp, + buf_state->page_info.page_address + + buf_state->page_info.page_offset, + buf_state->page_info.pad, + buf_len, false); + old_data = xdp.data; + xdp_act = bpf_prog_run_xdp(xprog, &xdp); + buf_state->page_info.pad += xdp.data - old_data; + buf_len = xdp.data_end - xdp.data; + if (xdp_act != XDP_PASS) { + gve_xdp_done_dqo(priv, rx, &xdp, xprog, xdp_act, + buf_state); + return 0; + } + + u64_stats_update_begin(&rx->statss); + rx->xdp_actions[XDP_PASS]++; + u64_stats_update_end(&rx->statss); + } + if (eop && buf_len <= priv->rx_copybreak) { rx->ctx.skb_head = gve_rx_copy(priv->dev, napi, &buf_state->page_info, buf_len); @@ -630,9 +867,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, if (rx->dqo.page_pool) skb_mark_for_recycle(rx->ctx.skb_head); - skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page, - buf_state->page_info.page_offset, buf_len, - buf_state->page_info.buf_size); + gve_skb_add_rx_frag(rx, buf_state, 0, buf_len); gve_reuse_buffer(rx, buf_state); return 0; @@ -683,6 +918,9 @@ static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi, if (feat & NETIF_F_RXCSUM) gve_rx_skb_csum(rx->ctx.skb_head, desc, ptype); + if (rx->gve->ts_config.rx_filter == HWTSTAMP_FILTER_ALL) + gve_rx_skb_hwtstamp(rx, le32_to_cpu(desc->ts)); + /* RSC packets must set gso_size otherwise the TCP stack will complain * that packets are larger than MTU. */ @@ -702,16 +940,27 @@ static int gve_rx_complete_skb(struct gve_rx_ring *rx, struct napi_struct *napi, int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) { - struct napi_struct *napi = &block->napi; - netdev_features_t feat = napi->dev->features; - - struct gve_rx_ring *rx = block->rx; - struct gve_rx_compl_queue_dqo *complq = &rx->dqo.complq; - + struct gve_rx_compl_queue_dqo *complq; + struct napi_struct *napi; + netdev_features_t feat; + struct gve_rx_ring *rx; + struct gve_priv *priv; + u64 xdp_redirects; u32 work_done = 0; u64 bytes = 0; + u64 xdp_txs; int err; + napi = &block->napi; + feat = napi->dev->features; + + rx = block->rx; + priv = rx->gve; + complq = &rx->dqo.complq; + + xdp_redirects = rx->xdp_actions[XDP_REDIRECT]; + xdp_txs = rx->xdp_actions[XDP_TX]; + while (work_done < budget) { struct gve_rx_compl_desc_dqo *compl_desc = &complq->desc_ring[complq->head]; @@ -785,6 +1034,12 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) rx->ctx.skb_tail = NULL; } + if (xdp_txs != rx->xdp_actions[XDP_TX]) + gve_xdp_tx_flush_dqo(priv, rx->q_num); + + if (xdp_redirects != rx->xdp_actions[XDP_REDIRECT]) + xdp_do_flush(); + gve_rx_post_buffers_dqo(rx); u64_stats_update_begin(&rx->statss); diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 4350ebd9c2bd..c6ff0968929d 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -334,27 +334,23 @@ int gve_tx_alloc_rings_gqi(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *cfg) { struct gve_tx_ring *tx = cfg->tx; + int total_queues; int err = 0; int i, j; - if (cfg->start_idx + cfg->num_rings > cfg->qcfg->max_queues) { + total_queues = cfg->qcfg->num_queues + cfg->num_xdp_rings; + if (total_queues > cfg->qcfg->max_queues) { netif_err(priv, drv, priv->dev, "Cannot alloc more than the max num of Tx rings\n"); return -EINVAL; } - if (cfg->start_idx == 0) { - tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring), - GFP_KERNEL); - if (!tx) - return -ENOMEM; - } else if (!tx) { - netif_err(priv, drv, priv->dev, - "Cannot alloc tx rings from a nonzero start idx without tx array\n"); - return -EINVAL; - } + tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring), + GFP_KERNEL); + if (!tx) + return -ENOMEM; - for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) { + for (i = 0; i < total_queues; i++) { err = gve_tx_alloc_ring_gqi(priv, cfg, &tx[i], i); if (err) { netif_err(priv, drv, priv->dev, @@ -370,8 +366,7 @@ int gve_tx_alloc_rings_gqi(struct gve_priv *priv, cleanup: for (j = 0; j < i; j++) gve_tx_free_ring_gqi(priv, &tx[j], cfg); - if (cfg->start_idx == 0) - kvfree(tx); + kvfree(tx); return err; } @@ -384,13 +379,11 @@ void gve_tx_free_rings_gqi(struct gve_priv *priv, if (!tx) return; - for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) + for (i = 0; i < cfg->qcfg->num_queues + cfg->qcfg->num_xdp_queues; i++) gve_tx_free_ring_gqi(priv, &tx[i], cfg); - if (cfg->start_idx == 0) { - kvfree(tx); - cfg->tx = NULL; - } + kvfree(tx); + cfg->tx = NULL; } /* gve_tx_avail - Calculates the number of slots available in the ring @@ -830,8 +823,8 @@ static int gve_tx_fill_xdp(struct gve_priv *priv, struct gve_tx_ring *tx, return ndescs; } -int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, - u32 flags) +int gve_xdp_xmit_gqi(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags) { struct gve_priv *priv = netdev_priv(dev); struct gve_tx_ring *tx; @@ -844,7 +837,7 @@ int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, return -ENETDOWN; qid = gve_xdp_tx_queue_id(priv, - smp_processor_id() % priv->num_xdp_queues); + smp_processor_id() % priv->tx_cfg.num_xdp_queues); tx = &priv->tx[qid]; @@ -959,13 +952,9 @@ static int gve_xsk_tx(struct gve_priv *priv, struct gve_tx_ring *tx, spin_lock(&tx->xdp_lock); while (sent < budget) { - if (!gve_can_tx(tx, GVE_TX_START_THRESH)) - goto out; - - if (!xsk_tx_peek_desc(tx->xsk_pool, &desc)) { - tx->xdp_xsk_done = tx->xdp_xsk_wakeup; + if (!gve_can_tx(tx, GVE_TX_START_THRESH) || + !xsk_tx_peek_desc(tx->xsk_pool, &desc)) goto out; - } data = xsk_buff_raw_get_data(tx->xsk_pool, desc.addr); nsegs = gve_tx_fill_xdp(priv, tx, data, desc.len, NULL, true); diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 394debc62268..6f1d515673d2 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -9,9 +9,11 @@ #include "gve_utils.h" #include "gve_dqo.h" #include <net/ip.h> +#include <linux/bpf.h> #include <linux/tcp.h> #include <linux/slab.h> #include <linux/skbuff.h> +#include <net/xdp_sock_drv.h> /* Returns true if tx_bufs are available. */ static bool gve_has_free_tx_qpl_bufs(struct gve_tx_ring *tx, int count) @@ -110,6 +112,14 @@ static bool gve_has_pending_packet(struct gve_tx_ring *tx) return false; } +void gve_xdp_tx_flush_dqo(struct gve_priv *priv, u32 xdp_qid) +{ + u32 tx_qid = gve_xdp_tx_queue_id(priv, xdp_qid); + struct gve_tx_ring *tx = &priv->tx[tx_qid]; + + gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail); +} + static struct gve_tx_pending_packet_dqo * gve_alloc_pending_packet(struct gve_tx_ring *tx) { @@ -198,7 +208,8 @@ void gve_tx_stop_ring_dqo(struct gve_priv *priv, int idx) gve_remove_napi(priv, ntfy_idx); gve_clean_tx_done_dqo(priv, tx, /*napi=*/NULL); - netdev_tx_reset_queue(tx->netdev_txq); + if (tx->netdev_txq) + netdev_tx_reset_queue(tx->netdev_txq); gve_tx_clean_pending_packets(tx); gve_tx_remove_from_block(priv, idx); } @@ -231,6 +242,9 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, tx->dqo.tx_ring = NULL; } + kvfree(tx->dqo.xsk_reorder_queue); + tx->dqo.xsk_reorder_queue = NULL; + kvfree(tx->dqo.pending_packets); tx->dqo.pending_packets = NULL; @@ -276,7 +290,8 @@ void gve_tx_start_ring_dqo(struct gve_priv *priv, int idx) gve_tx_add_to_block(priv, idx); - tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); + if (idx < priv->tx_cfg.num_queues) + tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); gve_add_napi(priv, ntfy_idx, gve_napi_poll_dqo); } @@ -295,6 +310,7 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, memset(tx, 0, sizeof(*tx)); tx->q_num = idx; tx->dev = hdev; + spin_lock_init(&tx->dqo_tx.xdp_lock); atomic_set_release(&tx->dqo_compl.hw_tx_head, 0); /* Queue sizes must be a power of 2 */ @@ -333,6 +349,17 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, tx->dqo.pending_packets[tx->dqo.num_pending_packets - 1].next = -1; atomic_set_release(&tx->dqo_compl.free_pending_packets, -1); + + /* Only alloc xsk pool for XDP queues */ + if (idx >= cfg->qcfg->num_queues && cfg->num_xdp_rings) { + tx->dqo.xsk_reorder_queue = + kvcalloc(tx->dqo.complq_mask + 1, + sizeof(tx->dqo.xsk_reorder_queue[0]), + GFP_KERNEL); + if (!tx->dqo.xsk_reorder_queue) + goto err; + } + tx->dqo_compl.miss_completions.head = -1; tx->dqo_compl.miss_completions.tail = -1; tx->dqo_compl.timed_out_completions.head = -1; @@ -379,27 +406,23 @@ int gve_tx_alloc_rings_dqo(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *cfg) { struct gve_tx_ring *tx = cfg->tx; + int total_queues; int err = 0; int i, j; - if (cfg->start_idx + cfg->num_rings > cfg->qcfg->max_queues) { + total_queues = cfg->qcfg->num_queues + cfg->num_xdp_rings; + if (total_queues > cfg->qcfg->max_queues) { netif_err(priv, drv, priv->dev, "Cannot alloc more than the max num of Tx rings\n"); return -EINVAL; } - if (cfg->start_idx == 0) { - tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring), - GFP_KERNEL); - if (!tx) - return -ENOMEM; - } else if (!tx) { - netif_err(priv, drv, priv->dev, - "Cannot alloc tx rings from a nonzero start idx without tx array\n"); - return -EINVAL; - } + tx = kvcalloc(cfg->qcfg->max_queues, sizeof(struct gve_tx_ring), + GFP_KERNEL); + if (!tx) + return -ENOMEM; - for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) { + for (i = 0; i < total_queues; i++) { err = gve_tx_alloc_ring_dqo(priv, cfg, &tx[i], i); if (err) { netif_err(priv, drv, priv->dev, @@ -415,8 +438,7 @@ int gve_tx_alloc_rings_dqo(struct gve_priv *priv, err: for (j = 0; j < i; j++) gve_tx_free_ring_dqo(priv, &tx[j], cfg); - if (cfg->start_idx == 0) - kvfree(tx); + kvfree(tx); return err; } @@ -429,13 +451,11 @@ void gve_tx_free_rings_dqo(struct gve_priv *priv, if (!tx) return; - for (i = cfg->start_idx; i < cfg->start_idx + cfg->num_rings; i++) + for (i = 0; i < cfg->qcfg->num_queues + cfg->qcfg->num_xdp_queues; i++) gve_tx_free_ring_dqo(priv, &tx[i], cfg); - if (cfg->start_idx == 0) { - kvfree(tx); - cfg->tx = NULL; - } + kvfree(tx); + cfg->tx = NULL; } /* Returns the number of slots available in the ring */ @@ -446,12 +466,28 @@ static u32 num_avail_tx_slots(const struct gve_tx_ring *tx) return tx->mask - num_used; } +/* Checks if the requested number of slots are available in the ring */ +static bool gve_has_tx_slots_available(struct gve_tx_ring *tx, u32 slots_req) +{ + u32 num_avail = num_avail_tx_slots(tx); + + slots_req += GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP; + + if (num_avail >= slots_req) + return true; + + /* Update cached TX head pointer */ + tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head); + + return num_avail_tx_slots(tx) >= slots_req; +} + static bool gve_has_avail_slots_tx_dqo(struct gve_tx_ring *tx, int desc_count, int buf_count) { return gve_has_pending_packet(tx) && - num_avail_tx_slots(tx) >= desc_count && - gve_has_free_tx_qpl_bufs(tx, buf_count); + gve_has_tx_slots_available(tx, desc_count) && + gve_has_free_tx_qpl_bufs(tx, buf_count); } /* Stops the queue if available descriptors is less than 'count'. @@ -463,12 +499,6 @@ static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx, if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) return 0; - /* Update cached TX head pointer */ - tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head); - - if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) - return 0; - /* No space, so stop the queue */ tx->stop_queue++; netif_tx_stop_queue(tx->netdev_txq); @@ -479,8 +509,6 @@ static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx, /* After stopping queue, check if we can transmit again in order to * avoid TOCTOU bug. */ - tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head); - if (likely(!gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count))) return -EBUSY; @@ -507,11 +535,9 @@ static void gve_extract_tx_metadata_dqo(const struct sk_buff *skb, } static void gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, u32 *desc_idx, - struct sk_buff *skb, u32 len, u64 addr, + bool enable_csum, u32 len, u64 addr, s16 compl_tag, bool eop, bool is_gso) { - const bool checksum_offload_en = skb->ip_summed == CHECKSUM_PARTIAL; - while (len > 0) { struct gve_tx_pkt_desc_dqo *desc = &tx->dqo.tx_ring[*desc_idx].pkt; @@ -522,7 +548,7 @@ static void gve_tx_fill_pkt_desc_dqo(struct gve_tx_ring *tx, u32 *desc_idx, .buf_addr = cpu_to_le64(addr), .dtype = GVE_TX_PKT_DESC_DTYPE_DQO, .end_of_packet = cur_eop, - .checksum_offload_enable = checksum_offload_en, + .checksum_offload_enable = enable_csum, .compl_tag = cpu_to_le16(compl_tag), .buf_size = cur_len, }; @@ -619,6 +645,25 @@ gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc, }; } +static void gve_tx_update_tail(struct gve_tx_ring *tx, u32 desc_idx) +{ + u32 last_desc_idx = (desc_idx - 1) & tx->mask; + u32 last_report_event_interval = + (last_desc_idx - tx->dqo_tx.last_re_idx) & tx->mask; + + /* Commit the changes to our state */ + tx->dqo_tx.tail = desc_idx; + + /* Request a descriptor completion on the last descriptor of the + * packet if we are allowed to by the HW enforced interval. + */ + + if (unlikely(last_report_event_interval >= GVE_TX_MIN_RE_INTERVAL)) { + tx->dqo.tx_ring[last_desc_idx].pkt.report_event = true; + tx->dqo_tx.last_re_idx = last_desc_idx; + } +} + static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, struct sk_buff *skb, struct gve_tx_pending_packet_dqo *pkt, @@ -626,6 +671,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, u32 *desc_idx, bool is_gso) { + bool enable_csum = skb->ip_summed == CHECKSUM_PARTIAL; const struct skb_shared_info *shinfo = skb_shinfo(skb); int i; @@ -651,7 +697,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr); ++pkt->num_bufs; - gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr, + gve_tx_fill_pkt_desc_dqo(tx, desc_idx, enable_csum, len, addr, completion_tag, /*eop=*/shinfo->nr_frags == 0, is_gso); } @@ -667,10 +713,11 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx, goto err; dma_unmap_len_set(pkt, len[pkt->num_bufs], len); - dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr); + netmem_dma_unmap_addr_set(skb_frag_netmem(frag), pkt, + dma[pkt->num_bufs], addr); ++pkt->num_bufs; - gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr, + gve_tx_fill_pkt_desc_dqo(tx, desc_idx, enable_csum, len, addr, completion_tag, is_eop, is_gso); } @@ -715,6 +762,7 @@ static int gve_tx_add_skb_copy_dqo(struct gve_tx_ring *tx, u32 *desc_idx, bool is_gso) { + bool enable_csum = skb->ip_summed == CHECKSUM_PARTIAL; u32 copy_offset = 0; dma_addr_t dma_addr; u32 copy_len; @@ -736,7 +784,7 @@ static int gve_tx_add_skb_copy_dqo(struct gve_tx_ring *tx, copy_offset += copy_len; dma_sync_single_for_device(tx->dev, dma_addr, copy_len, DMA_TO_DEVICE); - gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, + gve_tx_fill_pkt_desc_dqo(tx, desc_idx, enable_csum, copy_len, dma_addr, completion_tag, @@ -770,7 +818,11 @@ static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx, s16 completion_tag; pkt = gve_alloc_pending_packet(tx); + if (!pkt) + return -ENOMEM; + pkt->skb = skb; + pkt->type = GVE_TX_PENDING_PACKET_DQO_SKB; completion_tag = pkt - tx->dqo.pending_packets; gve_extract_tx_metadata_dqo(skb, &metadata); @@ -803,24 +855,7 @@ static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx, tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs; - /* Commit the changes to our state */ - tx->dqo_tx.tail = desc_idx; - - /* Request a descriptor completion on the last descriptor of the - * packet if we are allowed to by the HW enforced interval. - */ - { - u32 last_desc_idx = (desc_idx - 1) & tx->mask; - u32 last_report_event_interval = - (last_desc_idx - tx->dqo_tx.last_re_idx) & tx->mask; - - if (unlikely(last_report_event_interval >= - GVE_TX_MIN_RE_INTERVAL)) { - tx->dqo.tx_ring[last_desc_idx].pkt.report_event = true; - tx->dqo_tx.last_re_idx = last_desc_idx; - } - } - + gve_tx_update_tail(tx, desc_idx); return 0; err: @@ -954,9 +989,8 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx, /* Metadata + (optional TSO) + data descriptors. */ total_num_descs = 1 + skb_is_gso(skb) + num_buffer_descs; - if (unlikely(gve_maybe_stop_tx_dqo(tx, total_num_descs + - GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP, - num_buffer_descs))) { + if (unlikely(gve_maybe_stop_tx_dqo(tx, total_num_descs, + num_buffer_descs))) { return -1; } @@ -973,6 +1007,38 @@ drop: return 0; } +static void gve_xsk_reorder_queue_push_dqo(struct gve_tx_ring *tx, + u16 completion_tag) +{ + u32 tail = atomic_read(&tx->dqo_tx.xsk_reorder_queue_tail); + + tx->dqo.xsk_reorder_queue[tail] = completion_tag; + tail = (tail + 1) & tx->dqo.complq_mask; + atomic_set_release(&tx->dqo_tx.xsk_reorder_queue_tail, tail); +} + +static struct gve_tx_pending_packet_dqo * +gve_xsk_reorder_queue_head(struct gve_tx_ring *tx) +{ + u32 head = tx->dqo_compl.xsk_reorder_queue_head; + + if (head == tx->dqo_compl.xsk_reorder_queue_tail) { + tx->dqo_compl.xsk_reorder_queue_tail = + atomic_read_acquire(&tx->dqo_tx.xsk_reorder_queue_tail); + + if (head == tx->dqo_compl.xsk_reorder_queue_tail) + return NULL; + } + + return &tx->dqo.pending_packets[tx->dqo.xsk_reorder_queue[head]]; +} + +static void gve_xsk_reorder_queue_pop_dqo(struct gve_tx_ring *tx) +{ + tx->dqo_compl.xsk_reorder_queue_head++; + tx->dqo_compl.xsk_reorder_queue_head &= tx->dqo.complq_mask; +} + /* Transmit a given skb and ring the doorbell. */ netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev) { @@ -996,6 +1062,62 @@ netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } +static bool gve_xsk_tx_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, + int budget) +{ + struct xsk_buff_pool *pool = tx->xsk_pool; + struct xdp_desc desc; + bool repoll = false; + int sent = 0; + + spin_lock(&tx->dqo_tx.xdp_lock); + for (; sent < budget; sent++) { + struct gve_tx_pending_packet_dqo *pkt; + s16 completion_tag; + dma_addr_t addr; + u32 desc_idx; + + if (unlikely(!gve_has_avail_slots_tx_dqo(tx, 1, 1))) { + repoll = true; + break; + } + + if (!xsk_tx_peek_desc(pool, &desc)) + break; + + pkt = gve_alloc_pending_packet(tx); + pkt->type = GVE_TX_PENDING_PACKET_DQO_XSK; + pkt->num_bufs = 0; + completion_tag = pkt - tx->dqo.pending_packets; + + addr = xsk_buff_raw_get_dma(pool, desc.addr); + xsk_buff_raw_dma_sync_for_device(pool, addr, desc.len); + + desc_idx = tx->dqo_tx.tail; + gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, + true, desc.len, + addr, completion_tag, true, + false); + ++pkt->num_bufs; + gve_tx_update_tail(tx, desc_idx); + tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs; + gve_xsk_reorder_queue_push_dqo(tx, completion_tag); + } + + if (sent) { + gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail); + xsk_tx_release(pool); + } + + spin_unlock(&tx->dqo_tx.xdp_lock); + + u64_stats_update_begin(&tx->statss); + tx->xdp_xsk_sent += sent; + u64_stats_update_end(&tx->statss); + + return (sent == budget) || repoll; +} + static void add_to_list(struct gve_tx_ring *tx, struct gve_index_list *list, struct gve_tx_pending_packet_dqo *pending_packet) { @@ -1045,8 +1167,9 @@ static void gve_unmap_packet(struct device *dev, dma_unmap_single(dev, dma_unmap_addr(pkt, dma[0]), dma_unmap_len(pkt, len[0]), DMA_TO_DEVICE); for (i = 1; i < pkt->num_bufs; i++) { - dma_unmap_page(dev, dma_unmap_addr(pkt, dma[i]), - dma_unmap_len(pkt, len[i]), DMA_TO_DEVICE); + netmem_dma_unmap_page_attrs(dev, dma_unmap_addr(pkt, dma[i]), + dma_unmap_len(pkt, len[i]), + DMA_TO_DEVICE, 0); } pkt->num_bufs = 0; } @@ -1109,16 +1232,35 @@ static void gve_handle_packet_completion(struct gve_priv *priv, } } tx->dqo_tx.completed_packet_desc_cnt += pending_packet->num_bufs; - if (tx->dqo.qpl) - gve_free_tx_qpl_bufs(tx, pending_packet); - else + + switch (pending_packet->type) { + case GVE_TX_PENDING_PACKET_DQO_SKB: + if (tx->dqo.qpl) + gve_free_tx_qpl_bufs(tx, pending_packet); + else + gve_unmap_packet(tx->dev, pending_packet); + (*pkts)++; + *bytes += pending_packet->skb->len; + + napi_consume_skb(pending_packet->skb, is_napi); + pending_packet->skb = NULL; + gve_free_pending_packet(tx, pending_packet); + break; + case GVE_TX_PENDING_PACKET_DQO_XDP_FRAME: gve_unmap_packet(tx->dev, pending_packet); + (*pkts)++; + *bytes += pending_packet->xdpf->len; - *bytes += pending_packet->skb->len; - (*pkts)++; - napi_consume_skb(pending_packet->skb, is_napi); - pending_packet->skb = NULL; - gve_free_pending_packet(tx, pending_packet); + xdp_return_frame(pending_packet->xdpf); + pending_packet->xdpf = NULL; + gve_free_pending_packet(tx, pending_packet); + break; + case GVE_TX_PENDING_PACKET_DQO_XSK: + pending_packet->state = GVE_PACKET_STATE_XSK_COMPLETE; + break; + default: + WARN_ON_ONCE(1); + } } static void gve_handle_miss_completion(struct gve_priv *priv, @@ -1215,8 +1357,34 @@ static void remove_timed_out_completions(struct gve_priv *priv, remove_from_list(tx, &tx->dqo_compl.timed_out_completions, pending_packet); + + /* Need to count XSK packets in xsk_tx_completed. */ + if (pending_packet->type == GVE_TX_PENDING_PACKET_DQO_XSK) + pending_packet->state = GVE_PACKET_STATE_XSK_COMPLETE; + else + gve_free_pending_packet(tx, pending_packet); + } +} + +static void gve_tx_process_xsk_completions(struct gve_tx_ring *tx) +{ + u32 num_xsks = 0; + + while (true) { + struct gve_tx_pending_packet_dqo *pending_packet = + gve_xsk_reorder_queue_head(tx); + + if (!pending_packet || + pending_packet->state != GVE_PACKET_STATE_XSK_COMPLETE) + break; + + num_xsks++; + gve_xsk_reorder_queue_pop_dqo(tx); gve_free_pending_packet(tx, pending_packet); } + + if (num_xsks) + xsk_tx_completed(tx->xsk_pool, num_xsks); } int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, @@ -1289,13 +1457,17 @@ int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, num_descs_cleaned++; } - netdev_tx_completed_queue(tx->netdev_txq, - pkt_compl_pkts + miss_compl_pkts, - pkt_compl_bytes + miss_compl_bytes); + if (tx->netdev_txq) + netdev_tx_completed_queue(tx->netdev_txq, + pkt_compl_pkts + miss_compl_pkts, + pkt_compl_bytes + miss_compl_bytes); remove_miss_completions(priv, tx); remove_timed_out_completions(priv, tx); + if (tx->xsk_pool) + gve_tx_process_xsk_completions(tx); + u64_stats_update_begin(&tx->statss); tx->bytes_done += pkt_compl_bytes + reinject_compl_bytes; tx->pkt_done += pkt_compl_pkts + reinject_compl_pkts; @@ -1327,3 +1499,111 @@ bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean) compl_desc = &tx->dqo.compl_ring[tx->dqo_compl.head]; return compl_desc->generation != tx->dqo_compl.cur_gen_bit; } + +bool gve_xsk_tx_poll_dqo(struct gve_notify_block *rx_block, int budget) +{ + struct gve_rx_ring *rx = rx_block->rx; + struct gve_priv *priv = rx->gve; + struct gve_tx_ring *tx; + + tx = &priv->tx[gve_xdp_tx_queue_id(priv, rx->q_num)]; + if (tx->xsk_pool) + return gve_xsk_tx_dqo(priv, tx, budget); + + return 0; +} + +bool gve_xdp_poll_dqo(struct gve_notify_block *block) +{ + struct gve_tx_compl_desc *compl_desc; + struct gve_tx_ring *tx = block->tx; + struct gve_priv *priv = block->priv; + + gve_clean_tx_done_dqo(priv, tx, &block->napi); + + /* Return true if we still have work. */ + compl_desc = &tx->dqo.compl_ring[tx->dqo_compl.head]; + return compl_desc->generation != tx->dqo_compl.cur_gen_bit; +} + +int gve_xdp_xmit_one_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, + struct xdp_frame *xdpf) +{ + struct gve_tx_pending_packet_dqo *pkt; + u32 desc_idx = tx->dqo_tx.tail; + s16 completion_tag; + int num_descs = 1; + dma_addr_t addr; + int err; + + if (unlikely(!gve_has_tx_slots_available(tx, num_descs))) + return -EBUSY; + + pkt = gve_alloc_pending_packet(tx); + if (unlikely(!pkt)) + return -EBUSY; + + pkt->type = GVE_TX_PENDING_PACKET_DQO_XDP_FRAME; + pkt->num_bufs = 0; + pkt->xdpf = xdpf; + completion_tag = pkt - tx->dqo.pending_packets; + + /* Generate Packet Descriptor */ + addr = dma_map_single(tx->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE); + err = dma_mapping_error(tx->dev, addr); + if (unlikely(err)) + goto err; + + dma_unmap_len_set(pkt, len[pkt->num_bufs], xdpf->len); + dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr); + pkt->num_bufs++; + + gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, + false, xdpf->len, + addr, completion_tag, true, + false); + + gve_tx_update_tail(tx, desc_idx); + return 0; + +err: + pkt->xdpf = NULL; + pkt->num_bufs = 0; + gve_free_pending_packet(tx, pkt); + return err; +} + +int gve_xdp_xmit_dqo(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_tx_ring *tx; + int i, err = 0, qid; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + qid = gve_xdp_tx_queue_id(priv, + smp_processor_id() % priv->tx_cfg.num_xdp_queues); + + tx = &priv->tx[qid]; + + spin_lock(&tx->dqo_tx.xdp_lock); + for (i = 0; i < n; i++) { + err = gve_xdp_xmit_one_dqo(priv, tx, frames[i]); + if (err) + break; + } + + if (flags & XDP_XMIT_FLUSH) + gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail); + + spin_unlock(&tx->dqo_tx.xdp_lock); + + u64_stats_update_begin(&tx->statss); + tx->xdp_xmit += n; + tx->xdp_xmit_errors += n - i; + u64_stats_update_end(&tx->statss); + + return i ? i : err; +} diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c index 30fef100257e..ace9b8698021 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.c +++ b/drivers/net/ethernet/google/gve/gve_utils.c @@ -110,13 +110,13 @@ void gve_add_napi(struct gve_priv *priv, int ntfy_idx, { struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - netif_napi_add(priv->dev, &block->napi, gve_poll); - netif_napi_set_irq(&block->napi, block->irq); + netif_napi_add_locked(priv->dev, &block->napi, gve_poll); + netif_napi_set_irq_locked(&block->napi, block->irq); } void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) { struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; - netif_napi_del(&block->napi); + netif_napi_del_locked(&block->napi); } |