diff options
Diffstat (limited to 'drivers/net/ethernet/intel/i40e')
-rw-r--r-- | drivers/net/ethernet/intel/i40e/Makefile | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e.h | 19 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 437 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_main.c | 398 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_ptp.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.c | 201 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.h | 20 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx_common.h | 94 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 445 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_xsk.c | 967 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_xsk.h | 25 |
12 files changed, 2101 insertions, 513 deletions
diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile index 14397e7e9925..50590e8d1fd1 100644 --- a/drivers/net/ethernet/intel/i40e/Makefile +++ b/drivers/net/ethernet/intel/i40e/Makefile @@ -22,6 +22,7 @@ i40e-objs := i40e_main.o \ i40e_txrx.o \ i40e_ptp.o \ i40e_client.o \ - i40e_virtchnl_pf.o + i40e_virtchnl_pf.o \ + i40e_xsk.o i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 7a80652e2500..876cac317e79 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -786,6 +786,11 @@ struct i40e_vsi { /* VSI specific handlers */ irqreturn_t (*irq_handler)(int irq, void *data); + + /* AF_XDP zero-copy */ + struct xdp_umem **xsk_umems; + u16 num_xsk_umems_used; + u16 num_xsk_umems; } ____cacheline_internodealigned_in_smp; struct i40e_netdev_priv { @@ -1090,6 +1095,20 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) return !!vsi->xdp_prog; } +static inline struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring) +{ + bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi); + int qid = ring->queue_index; + + if (ring_is_xdp(ring)) + qid -= ring->vsi->alloc_queue_pairs; + + if (!ring->vsi->xsk_umems || !ring->vsi->xsk_umems[qid] || !xdp_on) + return NULL; + + return ring->vsi->xsk_umems[qid]; +} + int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate); int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 56b911a5dd8b..a20d1cf058ad 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -132,8 +132,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) dev_info(&pf->pdev->dev, " vlan_features = 0x%08lx\n", (unsigned long int)nd->vlan_features); } - dev_info(&pf->pdev->dev, " active_vlans is %s\n", - vsi->active_vlans ? "<valid>" : "<null>"); dev_info(&pf->pdev->dev, " flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n", vsi->flags, vsi->netdev_registered, vsi->current_netdev_flags); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 5ff6caa83948..9f8464f80783 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -5,26 +5,227 @@ #include "i40e.h" #include "i40e_diag.h" +#include "i40e_txrx_common.h" +/* ethtool statistics helpers */ + +/** + * struct i40e_stats - definition for an ethtool statistic + * @stat_string: statistic name to display in ethtool -S output + * @sizeof_stat: the sizeof() the stat, must be no greater than sizeof(u64) + * @stat_offset: offsetof() the stat from a base pointer + * + * This structure defines a statistic to be added to the ethtool stats buffer. + * It defines a statistic as offset from a common base pointer. Stats should + * be defined in constant arrays using the I40E_STAT macro, with every element + * of the array using the same _type for calculating the sizeof_stat and + * stat_offset. + * + * The @sizeof_stat is expected to be sizeof(u8), sizeof(u16), sizeof(u32) or + * sizeof(u64). Other sizes are not expected and will produce a WARN_ONCE from + * the i40e_add_ethtool_stat() helper function. + * + * The @stat_string is interpreted as a format string, allowing formatted + * values to be inserted while looping over multiple structures for a given + * statistics array. Thus, every statistic string in an array should have the + * same type and number of format specifiers, to be formatted by variadic + * arguments to the i40e_add_stat_string() helper function. + **/ struct i40e_stats { - /* The stat_string is expected to be a format string formatted using - * vsnprintf by i40e_add_stat_strings. Every member of a stats array - * should use the same format specifiers as they will be formatted - * using the same variadic arguments. - */ char stat_string[ETH_GSTRING_LEN]; int sizeof_stat; int stat_offset; }; +/* Helper macro to define an i40e_stat structure with proper size and type. + * Use this when defining constant statistics arrays. Note that @_type expects + * only a type name and is used multiple times. + */ #define I40E_STAT(_type, _name, _stat) { \ .stat_string = _name, \ .sizeof_stat = FIELD_SIZEOF(_type, _stat), \ .stat_offset = offsetof(_type, _stat) \ } +/* Helper macro for defining some statistics directly copied from the netdev + * stats structure. + */ #define I40E_NETDEV_STAT(_net_stat) \ I40E_STAT(struct rtnl_link_stats64, #_net_stat, _net_stat) + +/* Helper macro for defining some statistics related to queues */ +#define I40E_QUEUE_STAT(_name, _stat) \ + I40E_STAT(struct i40e_ring, _name, _stat) + +/* Stats associated with a Tx or Rx ring */ +static const struct i40e_stats i40e_gstrings_queue_stats[] = { + I40E_QUEUE_STAT("%s-%u.packets", stats.packets), + I40E_QUEUE_STAT("%s-%u.bytes", stats.bytes), +}; + +/** + * i40e_add_one_ethtool_stat - copy the stat into the supplied buffer + * @data: location to store the stat value + * @pointer: basis for where to copy from + * @stat: the stat definition + * + * Copies the stat data defined by the pointer and stat structure pair into + * the memory supplied as data. Used to implement i40e_add_ethtool_stats and + * i40e_add_queue_stats. If the pointer is null, data will be zero'd. + */ +static void +i40e_add_one_ethtool_stat(u64 *data, void *pointer, + const struct i40e_stats *stat) +{ + char *p; + + if (!pointer) { + /* ensure that the ethtool data buffer is zero'd for any stats + * which don't have a valid pointer. + */ + *data = 0; + return; + } + + p = (char *)pointer + stat->stat_offset; + switch (stat->sizeof_stat) { + case sizeof(u64): + *data = *((u64 *)p); + break; + case sizeof(u32): + *data = *((u32 *)p); + break; + case sizeof(u16): + *data = *((u16 *)p); + break; + case sizeof(u8): + *data = *((u8 *)p); + break; + default: + WARN_ONCE(1, "unexpected stat size for %s", + stat->stat_string); + *data = 0; + } +} + +/** + * __i40e_add_ethtool_stats - copy stats into the ethtool supplied buffer + * @data: ethtool stats buffer + * @pointer: location to copy stats from + * @stats: array of stats to copy + * @size: the size of the stats definition + * + * Copy the stats defined by the stats array using the pointer as a base into + * the data buffer supplied by ethtool. Updates the data pointer to point to + * the next empty location for successive calls to __i40e_add_ethtool_stats. + * If pointer is null, set the data values to zero and update the pointer to + * skip these stats. + **/ +static void +__i40e_add_ethtool_stats(u64 **data, void *pointer, + const struct i40e_stats stats[], + const unsigned int size) +{ + unsigned int i; + + for (i = 0; i < size; i++) + i40e_add_one_ethtool_stat((*data)++, pointer, &stats[i]); +} + +/** + * i40e_add_ethtool_stats - copy stats into ethtool supplied buffer + * @data: ethtool stats buffer + * @pointer: location where stats are stored + * @stats: static const array of stat definitions + * + * Macro to ease the use of __i40e_add_ethtool_stats by taking a static + * constant stats array and passing the ARRAY_SIZE(). This avoids typos by + * ensuring that we pass the size associated with the given stats array. + * + * The parameter @stats is evaluated twice, so parameters with side effects + * should be avoided. + **/ +#define i40e_add_ethtool_stats(data, pointer, stats) \ + __i40e_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats)) + +/** + * i40e_add_queue_stats - copy queue statistics into supplied buffer + * @data: ethtool stats buffer + * @ring: the ring to copy + * + * Queue statistics must be copied while protected by + * u64_stats_fetch_begin_irq, so we can't directly use i40e_add_ethtool_stats. + * Assumes that queue stats are defined in i40e_gstrings_queue_stats. If the + * ring pointer is null, zero out the queue stat values and update the data + * pointer. Otherwise safely copy the stats from the ring into the supplied + * buffer and update the data pointer when finished. + * + * This function expects to be called while under rcu_read_lock(). + **/ +static void +i40e_add_queue_stats(u64 **data, struct i40e_ring *ring) +{ + const unsigned int size = ARRAY_SIZE(i40e_gstrings_queue_stats); + const struct i40e_stats *stats = i40e_gstrings_queue_stats; + unsigned int start; + unsigned int i; + + /* To avoid invalid statistics values, ensure that we keep retrying + * the copy until we get a consistent value according to + * u64_stats_fetch_retry_irq. But first, make sure our ring is + * non-null before attempting to access its syncp. + */ + do { + start = !ring ? 0 : u64_stats_fetch_begin_irq(&ring->syncp); + for (i = 0; i < size; i++) { + i40e_add_one_ethtool_stat(&(*data)[i], ring, + &stats[i]); + } + } while (ring && u64_stats_fetch_retry_irq(&ring->syncp, start)); + + /* Once we successfully copy the stats in, update the data pointer */ + *data += size; +} + +/** + * __i40e_add_stat_strings - copy stat strings into ethtool buffer + * @p: ethtool supplied buffer + * @stats: stat definitions array + * @size: size of the stats array + * + * Format and copy the strings described by stats into the buffer pointed at + * by p. + **/ +static void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[], + const unsigned int size, ...) +{ + unsigned int i; + + for (i = 0; i < size; i++) { + va_list args; + + va_start(args, size); + vsnprintf(*p, ETH_GSTRING_LEN, stats[i].stat_string, args); + *p += ETH_GSTRING_LEN; + va_end(args); + } +} + +/** + * 40e_add_stat_strings - copy stat strings into ethtool buffer + * @p: ethtool supplied buffer + * @stats: stat definitions array + * + * Format and copy the strings described by the const static stats value into + * the buffer pointed at by p. + * + * The parameter @stats is evaluated twice, so parameters with side effects + * should be avoided. Additionally, stats must be an array such that + * ARRAY_SIZE can be called on it. + **/ +#define i40e_add_stat_strings(p, stats, ...) \ + __i40e_add_stat_strings(p, stats, ARRAY_SIZE(stats), ## __VA_ARGS__) + #define I40E_PF_STAT(_name, _stat) \ I40E_STAT(struct i40e_pf, _name, _stat) #define I40E_VSI_STAT(_name, _stat) \ @@ -33,6 +234,8 @@ struct i40e_stats { I40E_STAT(struct i40e_veb, _name, _stat) #define I40E_PFC_STAT(_name, _stat) \ I40E_STAT(struct i40e_pfc_stats, _name, _stat) +#define I40E_QUEUE_STAT(_name, _stat) \ + I40E_STAT(struct i40e_ring, _name, _stat) static const struct i40e_stats i40e_gstrings_net_stats[] = { I40E_NETDEV_STAT(rx_packets), @@ -171,20 +374,11 @@ static const struct i40e_stats i40e_gstrings_pfc_stats[] = { I40E_PFC_STAT("port.rx_priority_%u_xon_2_xoff", priority_xon_2_xoff), }; -/* We use num_tx_queues here as a proxy for the maximum number of queues - * available because we always allocate queues symmetrically. - */ -#define I40E_MAX_NUM_QUEUES(n) ((n)->num_tx_queues) -#define I40E_QUEUE_STATS_LEN(n) \ - (I40E_MAX_NUM_QUEUES(n) \ - * 2 /* Tx and Rx together */ \ - * (sizeof(struct i40e_queue_stats) / sizeof(u64))) -#define I40E_GLOBAL_STATS_LEN ARRAY_SIZE(i40e_gstrings_stats) #define I40E_NETDEV_STATS_LEN ARRAY_SIZE(i40e_gstrings_net_stats) + #define I40E_MISC_STATS_LEN ARRAY_SIZE(i40e_gstrings_misc_stats) -#define I40E_VSI_STATS_LEN(n) (I40E_NETDEV_STATS_LEN + \ - I40E_MISC_STATS_LEN + \ - I40E_QUEUE_STATS_LEN((n))) + +#define I40E_VSI_STATS_LEN (I40E_NETDEV_STATS_LEN + I40E_MISC_STATS_LEN) #define I40E_PFC_STATS_LEN (ARRAY_SIZE(i40e_gstrings_pfc_stats) * \ I40E_MAX_USER_PRIORITY) @@ -193,10 +387,15 @@ static const struct i40e_stats i40e_gstrings_pfc_stats[] = { (ARRAY_SIZE(i40e_gstrings_veb_tc_stats) * \ I40E_MAX_TRAFFIC_CLASS)) -#define I40E_PF_STATS_LEN(n) (I40E_GLOBAL_STATS_LEN + \ +#define I40E_GLOBAL_STATS_LEN ARRAY_SIZE(i40e_gstrings_stats) + +#define I40E_PF_STATS_LEN (I40E_GLOBAL_STATS_LEN + \ I40E_PFC_STATS_LEN + \ I40E_VEB_STATS_LEN + \ - I40E_VSI_STATS_LEN((n))) + I40E_VSI_STATS_LEN) + +/* Length of stats for a single queue */ +#define I40E_QUEUE_STATS_LEN ARRAY_SIZE(i40e_gstrings_queue_stats) enum i40e_ethtool_test_id { I40E_ETH_TEST_REG = 0, @@ -1512,6 +1711,13 @@ static int i40e_set_ringparam(struct net_device *netdev, (new_rx_count == vsi->rx_rings[0]->count)) return 0; + /* If there is a AF_XDP UMEM attached to any of Rx rings, + * disallow changing the number of descriptors -- regardless + * if the netdev is running or not. + */ + if (i40e_xsk_any_rx_ring_enabled(vsi)) + return -EBUSY; + while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) { timeout--; if (!timeout) @@ -1701,11 +1907,30 @@ static int i40e_get_stats_count(struct net_device *netdev) struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; + int stats_len; if (vsi == pf->vsi[pf->lan_vsi] && pf->hw.partition_id == 1) - return I40E_PF_STATS_LEN(netdev); + stats_len = I40E_PF_STATS_LEN; else - return I40E_VSI_STATS_LEN(netdev); + stats_len = I40E_VSI_STATS_LEN; + + /* The number of stats reported for a given net_device must remain + * constant throughout the life of that device. + * + * This is because the API for obtaining the size, strings, and stats + * is spread out over three separate ethtool ioctls. There is no safe + * way to lock the number of stats across these calls, so we must + * assume that they will never change. + * + * Due to this, we report the maximum number of queues, even if not + * every queue is currently configured. Since we always allocate + * queues in pairs, we'll just use netdev->num_tx_queues * 2. This + * works because the num_tx_queues is set at device creation and never + * changes. + */ + stats_len += I40E_QUEUE_STATS_LEN * 2 * netdev->num_tx_queues; + + return stats_len; } static int i40e_get_sset_count(struct net_device *netdev, int sset) @@ -1728,89 +1953,6 @@ static int i40e_get_sset_count(struct net_device *netdev, int sset) } /** - * i40e_add_one_ethtool_stat - copy the stat into the supplied buffer - * @data: location to store the stat value - * @pointer: basis for where to copy from - * @stat: the stat definition - * - * Copies the stat data defined by the pointer and stat structure pair into - * the memory supplied as data. Used to implement i40e_add_ethtool_stats. - * If the pointer is null, data will be zero'd. - */ -static inline void -i40e_add_one_ethtool_stat(u64 *data, void *pointer, - const struct i40e_stats *stat) -{ - char *p; - - if (!pointer) { - /* ensure that the ethtool data buffer is zero'd for any stats - * which don't have a valid pointer. - */ - *data = 0; - return; - } - - p = (char *)pointer + stat->stat_offset; - switch (stat->sizeof_stat) { - case sizeof(u64): - *data = *((u64 *)p); - break; - case sizeof(u32): - *data = *((u32 *)p); - break; - case sizeof(u16): - *data = *((u16 *)p); - break; - case sizeof(u8): - *data = *((u8 *)p); - break; - default: - WARN_ONCE(1, "unexpected stat size for %s", - stat->stat_string); - *data = 0; - } -} - -/** - * __i40e_add_ethtool_stats - copy stats into the ethtool supplied buffer - * @data: ethtool stats buffer - * @pointer: location to copy stats from - * @stats: array of stats to copy - * @size: the size of the stats definition - * - * Copy the stats defined by the stats array using the pointer as a base into - * the data buffer supplied by ethtool. Updates the data pointer to point to - * the next empty location for successive calls to __i40e_add_ethtool_stats. - * If pointer is null, set the data values to zero and update the pointer to - * skip these stats. - **/ -static inline void -__i40e_add_ethtool_stats(u64 **data, void *pointer, - const struct i40e_stats stats[], - const unsigned int size) -{ - unsigned int i; - - for (i = 0; i < size; i++) - i40e_add_one_ethtool_stat((*data)++, pointer, &stats[i]); -} - -/** - * i40e_add_ethtool_stats - copy stats into ethtool supplied buffer - * @data: ethtool stats buffer - * @pointer: location where stats are stored - * @stats: static const array of stat definitions - * - * Macro to ease the use of __i40e_add_ethtool_stats by taking a static - * constant stats array and passing the ARRAY_SIZE(). This avoids typos by - * ensuring that we pass the size associated with the given stats array. - * Assumes that stats is an array. - **/ -#define i40e_add_ethtool_stats(data, pointer, stats) \ - __i40e_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats)) - -/** * i40e_get_pfc_stats - copy HW PFC statistics to formatted structure * @pf: the PF device structure * @i: the priority value to copy @@ -1853,12 +1995,10 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_ring *tx_ring, *rx_ring; struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; struct i40e_veb *veb = pf->veb[pf->lan_veb]; unsigned int i; - unsigned int start; bool veb_stats; u64 *p = data; @@ -1870,38 +2010,12 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, i40e_add_ethtool_stats(&data, vsi, i40e_gstrings_misc_stats); rcu_read_lock(); - for (i = 0; i < I40E_MAX_NUM_QUEUES(netdev) ; i++) { - tx_ring = READ_ONCE(vsi->tx_rings[i]); - - if (!tx_ring) { - /* Bump the stat counter to skip these stats, and make - * sure the memory is zero'd - */ - *(data++) = 0; - *(data++) = 0; - *(data++) = 0; - *(data++) = 0; - continue; - } - - /* process Tx ring statistics */ - do { - start = u64_stats_fetch_begin_irq(&tx_ring->syncp); - data[0] = tx_ring->stats.packets; - data[1] = tx_ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start)); - data += 2; - - /* Rx ring is the 2nd half of the queue pair */ - rx_ring = &tx_ring[1]; - do { - start = u64_stats_fetch_begin_irq(&rx_ring->syncp); - data[0] = rx_ring->stats.packets; - data[1] = rx_ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start)); - data += 2; + for (i = 0; i < netdev->num_tx_queues; i++) { + i40e_add_queue_stats(&data, READ_ONCE(vsi->tx_rings[i])); + i40e_add_queue_stats(&data, READ_ONCE(vsi->rx_rings[i])); } rcu_read_unlock(); + if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1) goto check_data_pointer; @@ -1933,42 +2047,6 @@ check_data_pointer: } /** - * __i40e_add_stat_strings - copy stat strings into ethtool buffer - * @p: ethtool supplied buffer - * @stats: stat definitions array - * @size: size of the stats array - * - * Format and copy the strings described by stats into the buffer pointed at - * by p. - **/ -static void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[], - const unsigned int size, ...) -{ - unsigned int i; - - for (i = 0; i < size; i++) { - va_list args; - - va_start(args, size); - vsnprintf(*p, ETH_GSTRING_LEN, stats[i].stat_string, args); - *p += ETH_GSTRING_LEN; - va_end(args); - } -} - -/** - * 40e_add_stat_strings - copy stat strings into ethtool buffer - * @p: ethtool supplied buffer - * @stats: stat definitions array - * - * Format and copy the strings described by the const static stats value into - * the buffer pointed at by p. Assumes that stats can have ARRAY_SIZE called - * for it. - **/ -#define i40e_add_stat_strings(p, stats, ...) \ - __i40e_add_stat_strings(p, stats, ARRAY_SIZE(stats), ## __VA_ARGS__) - -/** * i40e_get_stat_strings - copy stat strings into supplied buffer * @netdev: the netdev to collect strings for * @data: supplied buffer to copy strings into @@ -1990,16 +2068,13 @@ static void i40e_get_stat_strings(struct net_device *netdev, u8 *data) i40e_add_stat_strings(&data, i40e_gstrings_misc_stats); - for (i = 0; i < I40E_MAX_NUM_QUEUES(netdev); i++) { - snprintf(data, ETH_GSTRING_LEN, "tx-%u.tx_packets", i); - data += ETH_GSTRING_LEN; - snprintf(data, ETH_GSTRING_LEN, "tx-%u.tx_bytes", i); - data += ETH_GSTRING_LEN; - snprintf(data, ETH_GSTRING_LEN, "rx-%u.rx_packets", i); - data += ETH_GSTRING_LEN; - snprintf(data, ETH_GSTRING_LEN, "rx-%u.rx_bytes", i); - data += ETH_GSTRING_LEN; + for (i = 0; i < netdev->num_tx_queues; i++) { + i40e_add_stat_strings(&data, i40e_gstrings_queue_stats, + "tx", i); + i40e_add_stat_strings(&data, i40e_gstrings_queue_stats, + "rx", i); } + if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1) return; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ac685ad4d877..bc71a21c1dc2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9,7 +9,9 @@ /* Local includes */ #include "i40e.h" #include "i40e_diag.h" +#include "i40e_xsk.h" #include <net/udp_tunnel.h> +#include <net/xdp_sock.h> /* All i40e tracepoints are defined by the include below, which * must be included exactly once across the whole kernel with * CREATE_TRACE_POINTS defined @@ -89,7 +91,7 @@ MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all), Debug mask (0x8XXXXXXX MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>"); MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); MODULE_VERSION(DRV_VERSION); static struct workqueue_struct *i40e_wq; @@ -420,9 +422,9 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev, struct rtnl_link_stats64 *stats) { struct i40e_netdev_priv *np = netdev_priv(netdev); - struct i40e_ring *tx_ring, *rx_ring; struct i40e_vsi *vsi = np->vsi; struct rtnl_link_stats64 *vsi_stats = i40e_get_vsi_stats_struct(vsi); + struct i40e_ring *ring; int i; if (test_bit(__I40E_VSI_DOWN, vsi->state)) @@ -436,24 +438,26 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev, u64 bytes, packets; unsigned int start; - tx_ring = READ_ONCE(vsi->tx_rings[i]); - if (!tx_ring) + ring = READ_ONCE(vsi->tx_rings[i]); + if (!ring) continue; - i40e_get_netdev_stats_struct_tx(tx_ring, stats); + i40e_get_netdev_stats_struct_tx(ring, stats); - rx_ring = &tx_ring[1]; + if (i40e_enabled_xdp_vsi(vsi)) { + ring++; + i40e_get_netdev_stats_struct_tx(ring, stats); + } + ring++; do { - start = u64_stats_fetch_begin_irq(&rx_ring->syncp); - packets = rx_ring->stats.packets; - bytes = rx_ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start)); + start = u64_stats_fetch_begin_irq(&ring->syncp); + packets = ring->stats.packets; + bytes = ring->stats.bytes; + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); stats->rx_packets += packets; stats->rx_bytes += bytes; - if (i40e_enabled_xdp_vsi(vsi)) - i40e_get_netdev_stats_struct_tx(&rx_ring[1], stats); } rcu_read_unlock(); @@ -1528,8 +1532,8 @@ static int i40e_set_mac(struct net_device *netdev, void *p) return 0; } - if (test_bit(__I40E_VSI_DOWN, vsi->back->state) || - test_bit(__I40E_RESET_RECOVERY_PENDING, vsi->back->state)) + if (test_bit(__I40E_DOWN, pf->state) || + test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) return -EADDRNOTAVAIL; if (ether_addr_equal(hw->mac.addr, addr->sa_data)) @@ -1553,8 +1557,7 @@ static int i40e_set_mac(struct net_device *netdev, void *p) if (vsi->type == I40E_VSI_MAIN) { i40e_status ret; - ret = i40e_aq_mac_address_write(&vsi->back->hw, - I40E_AQC_WRITE_TYPE_LAA_WOL, + ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_WOL, addr->sa_data, NULL); if (ret) netdev_info(netdev, "Ignoring error from firmware on LAA update, status %s, AQ ret %s\n", @@ -1565,7 +1568,7 @@ static int i40e_set_mac(struct net_device *netdev, void *p) /* schedule our worker thread which will take care of * applying the new filter changes */ - i40e_service_event_schedule(vsi->back); + i40e_service_event_schedule(pf); return 0; } @@ -3072,6 +3075,9 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) i40e_status err = 0; u32 qtx_ctl = 0; + if (ring_is_xdp(ring)) + ring->xsk_umem = i40e_xsk_umem(ring); + /* some ATR related tx ring init */ if (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) { ring->atr_sample_rate = vsi->back->atr_sample_rate; @@ -3181,13 +3187,46 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) struct i40e_hw *hw = &vsi->back->hw; struct i40e_hmc_obj_rxq rx_ctx; i40e_status err = 0; + bool ok; + int ret; bitmap_zero(ring->state, __I40E_RING_STATE_NBITS); /* clear the context structure first */ memset(&rx_ctx, 0, sizeof(rx_ctx)); - ring->rx_buf_len = vsi->rx_buf_len; + if (ring->vsi->type == I40E_VSI_MAIN) + xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); + + ring->xsk_umem = i40e_xsk_umem(ring); + if (ring->xsk_umem) { + ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr - + XDP_PACKET_HEADROOM; + /* For AF_XDP ZC, we disallow packets to span on + * multiple buffers, thus letting us skip that + * handling in the fast-path. + */ + chain_len = 1; + ring->zca.free = i40e_zca_free; + ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_ZERO_COPY, + &ring->zca); + if (ret) + return ret; + dev_info(&vsi->back->pdev->dev, + "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n", + ring->queue_index); + + } else { + ring->rx_buf_len = vsi->rx_buf_len; + if (ring->vsi->type == I40E_VSI_MAIN) { + ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, + MEM_TYPE_PAGE_SHARED, + NULL); + if (ret) + return ret; + } + } rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len, BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT)); @@ -3243,7 +3282,15 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q); writel(0, ring->tail); - i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring)); + ok = ring->xsk_umem ? + i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring)) : + !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring)); + if (!ok) { + dev_info(&vsi->back->pdev->dev, + "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n", + ring->xsk_umem ? "UMEM enabled " : "", + ring->queue_index, pf_q); + } return 0; } @@ -6384,7 +6431,10 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) char *req_fec = ""; char *an = ""; - new_speed = pf->hw.phy.link_info.link_speed; + if (isup) + new_speed = pf->hw.phy.link_info.link_speed; + else + new_speed = I40E_LINK_SPEED_UNKNOWN; if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed)) return; @@ -6568,6 +6618,24 @@ static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up) struct i40e_hw *hw = &pf->hw; i40e_status err; u64 mask; + u8 speed; + + /* Card might've been put in an unstable state by other drivers + * and applications, which causes incorrect speed values being + * set on startup. In order to clear speed registers, we call + * get_phy_capabilities twice, once to get initial state of + * available speeds, and once to get current PHY config. + */ + err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, + NULL); + if (err) { + dev_err(&pf->pdev->dev, + "failed to get phy cap., ret = %s last_status = %s\n", + i40e_stat_str(hw, err), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return err; + } + speed = abilities.link_speed; /* Get the current phy config */ err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, @@ -6581,9 +6649,9 @@ static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up) } /* If link needs to go up, but was not forced to go down, - * no need for a flap + * and its speed values are OK, no need for a flap */ - if (is_up && abilities.phy_type != 0) + if (is_up && abilities.phy_type != 0 && abilities.link_speed != 0) return I40E_SUCCESS; /* To force link we need to set bits for all supported PHY types, @@ -6595,7 +6663,10 @@ static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up) config.phy_type_ext = is_up ? (u8)((mask >> 32) & 0xff) : 0; /* Copy the old settings, except of phy_type */ config.abilities = abilities.abilities; - config.link_speed = abilities.link_speed; + if (abilities.link_speed != 0) + config.link_speed = abilities.link_speed; + else + config.link_speed = speed; config.eee_capability = abilities.eee_capability; config.eeer = abilities.eeer_val; config.low_power_ctrl = abilities.d3_lpan; @@ -8440,14 +8511,9 @@ static void i40e_link_event(struct i40e_pf *pf) i40e_status status; bool new_link, old_link; - /* save off old link status information */ - pf->hw.phy.link_info_old = pf->hw.phy.link_info; - /* set this to force the get_link_status call to refresh state */ pf->hw.phy.get_link_info = true; - old_link = (pf->hw.phy.link_info_old.link_info & I40E_AQ_LINK_UP); - status = i40e_get_link_status(&pf->hw, &new_link); /* On success, disable temp link polling */ @@ -11828,6 +11894,256 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, } /** + * i40e_enter_busy_conf - Enters busy config state + * @vsi: vsi + * + * Returns 0 on success, <0 for failure. + **/ +static int i40e_enter_busy_conf(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + int timeout = 50; + + while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) { + timeout--; + if (!timeout) + return -EBUSY; + usleep_range(1000, 2000); + } + + return 0; +} + +/** + * i40e_exit_busy_conf - Exits busy config state + * @vsi: vsi + **/ +static void i40e_exit_busy_conf(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + + clear_bit(__I40E_CONFIG_BUSY, pf->state); +} + +/** + * i40e_queue_pair_reset_stats - Resets all statistics for a queue pair + * @vsi: vsi + * @queue_pair: queue pair + **/ +static void i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair) +{ + memset(&vsi->rx_rings[queue_pair]->rx_stats, 0, + sizeof(vsi->rx_rings[queue_pair]->rx_stats)); + memset(&vsi->tx_rings[queue_pair]->stats, 0, + sizeof(vsi->tx_rings[queue_pair]->stats)); + if (i40e_enabled_xdp_vsi(vsi)) { + memset(&vsi->xdp_rings[queue_pair]->stats, 0, + sizeof(vsi->xdp_rings[queue_pair]->stats)); + } +} + +/** + * i40e_queue_pair_clean_rings - Cleans all the rings of a queue pair + * @vsi: vsi + * @queue_pair: queue pair + **/ +static void i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair) +{ + i40e_clean_tx_ring(vsi->tx_rings[queue_pair]); + if (i40e_enabled_xdp_vsi(vsi)) + i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]); + i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); +} + +/** + * i40e_queue_pair_toggle_napi - Enables/disables NAPI for a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * @enable: true for enable, false for disable + **/ +static void i40e_queue_pair_toggle_napi(struct i40e_vsi *vsi, int queue_pair, + bool enable) +{ + struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; + struct i40e_q_vector *q_vector = rxr->q_vector; + + if (!vsi->netdev) + return; + + /* All rings in a qp belong to the same qvector. */ + if (q_vector->rx.ring || q_vector->tx.ring) { + if (enable) + napi_enable(&q_vector->napi); + else + napi_disable(&q_vector->napi); + } +} + +/** + * i40e_queue_pair_toggle_rings - Enables/disables all rings for a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * @enable: true for enable, false for disable + * + * Returns 0 on success, <0 on failure. + **/ +static int i40e_queue_pair_toggle_rings(struct i40e_vsi *vsi, int queue_pair, + bool enable) +{ + struct i40e_pf *pf = vsi->back; + int pf_q, ret = 0; + + pf_q = vsi->base_queue + queue_pair; + ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q, + false /*is xdp*/, enable); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d Tx ring %d %sable timeout\n", + vsi->seid, pf_q, (enable ? "en" : "dis")); + return ret; + } + + i40e_control_rx_q(pf, pf_q, enable); + ret = i40e_pf_rxq_wait(pf, pf_q, enable); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d Rx ring %d %sable timeout\n", + vsi->seid, pf_q, (enable ? "en" : "dis")); + return ret; + } + + /* Due to HW errata, on Rx disable only, the register can + * indicate done before it really is. Needs 50ms to be sure + */ + if (!enable) + mdelay(50); + + if (!i40e_enabled_xdp_vsi(vsi)) + return ret; + + ret = i40e_control_wait_tx_q(vsi->seid, pf, + pf_q + vsi->alloc_queue_pairs, + true /*is xdp*/, enable); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d XDP Tx ring %d %sable timeout\n", + vsi->seid, pf_q, (enable ? "en" : "dis")); + } + + return ret; +} + +/** + * i40e_queue_pair_enable_irq - Enables interrupts for a queue pair + * @vsi: vsi + * @queue_pair: queue_pair + **/ +static void i40e_queue_pair_enable_irq(struct i40e_vsi *vsi, int queue_pair) +{ + struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + + /* All rings in a qp belong to the same qvector. */ + if (pf->flags & I40E_FLAG_MSIX_ENABLED) + i40e_irq_dynamic_enable(vsi, rxr->q_vector->v_idx); + else + i40e_irq_dynamic_enable_icr0(pf); + + i40e_flush(hw); +} + +/** + * i40e_queue_pair_disable_irq - Disables interrupts for a queue pair + * @vsi: vsi + * @queue_pair: queue_pair + **/ +static void i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair) +{ + struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + + /* For simplicity, instead of removing the qp interrupt causes + * from the interrupt linked list, we simply disable the interrupt, and + * leave the list intact. + * + * All rings in a qp belong to the same qvector. + */ + if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + u32 intpf = vsi->base_vector + rxr->q_vector->v_idx; + + wr32(hw, I40E_PFINT_DYN_CTLN(intpf - 1), 0); + i40e_flush(hw); + synchronize_irq(pf->msix_entries[intpf].vector); + } else { + /* Legacy and MSI mode - this stops all interrupt handling */ + wr32(hw, I40E_PFINT_ICR0_ENA, 0); + wr32(hw, I40E_PFINT_DYN_CTL0, 0); + i40e_flush(hw); + synchronize_irq(pf->pdev->irq); + } +} + +/** + * i40e_queue_pair_disable - Disables a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * + * Returns 0 on success, <0 on failure. + **/ +int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) +{ + int err; + + err = i40e_enter_busy_conf(vsi); + if (err) + return err; + + i40e_queue_pair_disable_irq(vsi, queue_pair); + err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); + i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); + i40e_queue_pair_clean_rings(vsi, queue_pair); + i40e_queue_pair_reset_stats(vsi, queue_pair); + + return err; +} + +/** + * i40e_queue_pair_enable - Enables a queue pair + * @vsi: vsi + * @queue_pair: queue pair + * + * Returns 0 on success, <0 on failure. + **/ +int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair) +{ + int err; + + err = i40e_configure_tx_ring(vsi->tx_rings[queue_pair]); + if (err) + return err; + + if (i40e_enabled_xdp_vsi(vsi)) { + err = i40e_configure_tx_ring(vsi->xdp_rings[queue_pair]); + if (err) + return err; + } + + err = i40e_configure_rx_ring(vsi->rx_rings[queue_pair]); + if (err) + return err; + + err = i40e_queue_pair_toggle_rings(vsi, queue_pair, true /* on */); + i40e_queue_pair_toggle_napi(vsi, queue_pair, true /* on */); + i40e_queue_pair_enable_irq(vsi, queue_pair); + + i40e_exit_busy_conf(vsi); + + return err; +} + +/** * i40e_xdp - implements ndo_bpf for i40e * @dev: netdevice * @xdp: XDP command @@ -11847,6 +12163,12 @@ static int i40e_xdp(struct net_device *dev, case XDP_QUERY_PROG: xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0; return 0; + case XDP_QUERY_XSK_UMEM: + return i40e_xsk_umem_query(vsi, &xdp->xsk.umem, + xdp->xsk.queue_id); + case XDP_SETUP_XSK_UMEM: + return i40e_xsk_umem_setup(vsi, xdp->xsk.umem, + xdp->xsk.queue_id); default: return -EINVAL; } @@ -11886,6 +12208,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_bridge_setlink = i40e_ndo_bridge_setlink, .ndo_bpf = i40e_xdp, .ndo_xdp_xmit = i40e_xdp_xmit, + .ndo_xsk_async_xmit = i40e_xsk_async_xmit, }; /** @@ -13033,7 +13356,7 @@ struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, for (vsi_idx = 0; vsi_idx < pf->num_alloc_vsi; vsi_idx++) if (pf->vsi[vsi_idx] && pf->vsi[vsi_idx]->seid == vsi_seid) break; - if (vsi_idx >= pf->num_alloc_vsi && vsi_seid != 0) { + if (vsi_idx == pf->num_alloc_vsi && vsi_seid != 0) { dev_info(&pf->pdev->dev, "vsi seid %d not found\n", vsi_seid); return NULL; @@ -14159,6 +14482,7 @@ static void i40e_remove(struct pci_dev *pdev) mutex_destroy(&hw->aq.asq_mutex); /* Clear all dynamic memory lists of rings, q_vectors, and VSIs */ + rtnl_lock(); i40e_clear_interrupt_scheme(pf); for (i = 0; i < pf->num_alloc_vsi; i++) { if (pf->vsi[i]) { @@ -14167,6 +14491,7 @@ static void i40e_remove(struct pci_dev *pdev) pf->vsi[i] = NULL; } } + rtnl_unlock(); for (i = 0; i < I40E_MAX_VEB; i++) { kfree(pf->veb[i]); @@ -14227,7 +14552,6 @@ static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev) { struct i40e_pf *pf = pci_get_drvdata(pdev); pci_ers_result_t result; - int err; u32 reg; dev_dbg(&pdev->dev, "%s\n", __func__); @@ -14248,14 +14572,6 @@ static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev) result = PCI_ERS_RESULT_DISCONNECT; } - err = pci_cleanup_aer_uncorrect_error_status(pdev); - if (err) { - dev_info(&pdev->dev, - "pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n", - err); - /* non-fatal, continue */ - } - return result; } @@ -14378,7 +14694,13 @@ static void i40e_shutdown(struct pci_dev *pdev) wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); + /* Since we're going to destroy queues during the + * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this + * whole section + */ + rtnl_lock(); i40e_clear_interrupt_scheme(pf); + rtnl_unlock(); if (system_state == SYSTEM_POWER_OFF) { pci_wake_from_d3(pdev, pf->wol_en); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 35f2866b38c6..1199f0502d6d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -694,7 +694,8 @@ static long i40e_ptp_create_clock(struct i40e_pf *pf) if (!IS_ERR_OR_NULL(pf->ptp_clock)) return 0; - strncpy(pf->ptp_caps.name, i40e_driver_name, sizeof(pf->ptp_caps.name)); + strncpy(pf->ptp_caps.name, i40e_driver_name, + sizeof(pf->ptp_caps.name) - 1); pf->ptp_caps.owner = THIS_MODULE; pf->ptp_caps.max_adj = 999999999; pf->ptp_caps.n_ext_ts = 0; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index b5042d1a63c0..740ea58ba938 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -8,16 +8,8 @@ #include "i40e.h" #include "i40e_trace.h" #include "i40e_prototype.h" - -static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, - u32 td_tag) -{ - return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA | - ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) | - ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) | - ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) | - ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT)); -} +#include "i40e_txrx_common.h" +#include "i40e_xsk.h" #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) /** @@ -536,8 +528,8 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi, * This is used to verify if the FD programming or invalidation * requested by SW to the HW is successful or not and take actions accordingly. **/ -static void i40e_fd_handle_status(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc, u8 prog_id) +void i40e_fd_handle_status(struct i40e_ring *rx_ring, + union i40e_rx_desc *rx_desc, u8 prog_id) { struct i40e_pf *pf = rx_ring->vsi->back; struct pci_dev *pdev = pf->pdev; @@ -644,13 +636,18 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring) unsigned long bi_size; u16 i; - /* ring already cleared, nothing to do */ - if (!tx_ring->tx_bi) - return; + if (ring_is_xdp(tx_ring) && tx_ring->xsk_umem) { + i40e_xsk_clean_tx_ring(tx_ring); + } else { + /* ring already cleared, nothing to do */ + if (!tx_ring->tx_bi) + return; - /* Free all the Tx ring sk_buffs */ - for (i = 0; i < tx_ring->count; i++) - i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]); + /* Free all the Tx ring sk_buffs */ + for (i = 0; i < tx_ring->count; i++) + i40e_unmap_and_free_tx_resource(tx_ring, + &tx_ring->tx_bi[i]); + } bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; memset(tx_ring->tx_bi, 0, bi_size); @@ -767,8 +764,6 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi) } } -#define WB_STRIDE 4 - /** * i40e_clean_tx_irq - Reclaim resources after transmit completes * @vsi: the VSI we care about @@ -873,27 +868,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, i += tx_ring->count; tx_ring->next_to_clean = i; - u64_stats_update_begin(&tx_ring->syncp); - tx_ring->stats.bytes += total_bytes; - tx_ring->stats.packets += total_packets; - u64_stats_update_end(&tx_ring->syncp); - tx_ring->q_vector->tx.total_bytes += total_bytes; - tx_ring->q_vector->tx.total_packets += total_packets; - - if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) { - /* check to see if there are < 4 descriptors - * waiting to be written back, then kick the hardware to force - * them to be written back in case we stay in NAPI. - * In this mode on X722 we do not enable Interrupt. - */ - unsigned int j = i40e_get_tx_pending(tx_ring, false); - - if (budget && - ((j / WB_STRIDE) == 0) && (j > 0) && - !test_bit(__I40E_VSI_DOWN, vsi->state) && - (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) - tx_ring->arm_wb = true; - } + i40e_update_tx_stats(tx_ring, total_packets, total_bytes); + i40e_arm_wb(tx_ring, vsi, budget); if (ring_is_xdp(tx_ring)) return !!budget; @@ -1244,6 +1220,11 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, new_buff->page = old_buff->page; new_buff->page_offset = old_buff->page_offset; new_buff->pagecnt_bias = old_buff->pagecnt_bias; + + rx_ring->rx_stats.page_reuse_count++; + + /* clear contents of buffer_info */ + old_buff->page = NULL; } /** @@ -1266,7 +1247,7 @@ static inline bool i40e_rx_is_programming_status(u64 qw) } /** - * i40e_clean_programming_status - clean the programming status descriptor + * i40e_clean_programming_status - try clean the programming status descriptor * @rx_ring: the rx ring that has this descriptor * @rx_desc: the rx descriptor written back by HW * @qw: qword representing status_error_len in CPU ordering @@ -1275,15 +1256,22 @@ static inline bool i40e_rx_is_programming_status(u64 qw) * status being successful or not and take actions accordingly. FCoE should * handle its context/filter programming/invalidation status and take actions. * + * Returns an i40e_rx_buffer to reuse if the cleanup occurred, otherwise NULL. **/ -static void i40e_clean_programming_status(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc, - u64 qw) +struct i40e_rx_buffer *i40e_clean_programming_status( + struct i40e_ring *rx_ring, + union i40e_rx_desc *rx_desc, + u64 qw) { struct i40e_rx_buffer *rx_buffer; - u32 ntc = rx_ring->next_to_clean; + u32 ntc; u8 id; + if (!i40e_rx_is_programming_status(qw)) + return NULL; + + ntc = rx_ring->next_to_clean; + /* fetch, update, and store next to clean */ rx_buffer = &rx_ring->rx_bi[ntc++]; ntc = (ntc < rx_ring->count) ? ntc : 0; @@ -1291,18 +1279,13 @@ static void i40e_clean_programming_status(struct i40e_ring *rx_ring, prefetch(I40E_RX_DESC(rx_ring, ntc)); - /* place unused page back on the ring */ - i40e_reuse_rx_page(rx_ring, rx_buffer); - rx_ring->rx_stats.page_reuse_count++; - - /* clear contents of buffer_info */ - rx_buffer->page = NULL; - id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >> I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT; if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS) i40e_fd_handle_status(rx_ring, rx_desc, id); + + return rx_buffer; } /** @@ -1372,6 +1355,11 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) rx_ring->skb = NULL; } + if (rx_ring->xsk_umem) { + i40e_xsk_clean_rx_ring(rx_ring); + goto skip_free; + } + /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; @@ -1400,6 +1388,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) rx_bi->page_offset = 0; } +skip_free: bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; memset(rx_ring->rx_bi, 0, bi_size); @@ -1492,7 +1481,7 @@ err: * @rx_ring: ring to bump * @val: new head index **/ -static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) +void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) { rx_ring->next_to_use = val; @@ -1576,8 +1565,8 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, * @skb: packet to send up * @vlan_tag: vlan tag for packet **/ -static void i40e_receive_skb(struct i40e_ring *rx_ring, - struct sk_buff *skb, u16 vlan_tag) +void i40e_receive_skb(struct i40e_ring *rx_ring, + struct sk_buff *skb, u16 vlan_tag) { struct i40e_q_vector *q_vector = rx_ring->q_vector; @@ -1804,7 +1793,6 @@ static inline void i40e_rx_hash(struct i40e_ring *ring, * order to populate the hash, checksum, VLAN, protocol, and * other fields within the skb. **/ -static inline void i40e_process_skb_fields(struct i40e_ring *rx_ring, union i40e_rx_desc *rx_desc, struct sk_buff *skb, u8 rx_ptype) @@ -2152,7 +2140,6 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, if (i40e_can_reuse_rx_page(rx_buffer)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); - rx_ring->rx_stats.page_reuse_count++; } else { /* we are not reusing the buffer so unmap it */ dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, @@ -2160,10 +2147,9 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, DMA_FROM_DEVICE, I40E_RX_DMA_ATTR); __page_frag_cache_drain(rx_buffer->page, rx_buffer->pagecnt_bias); + /* clear contents of buffer_info */ + rx_buffer->page = NULL; } - - /* clear contents of buffer_info */ - rx_buffer->page = NULL; } /** @@ -2199,16 +2185,10 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, return true; } -#define I40E_XDP_PASS 0 -#define I40E_XDP_CONSUMED BIT(0) -#define I40E_XDP_TX BIT(1) -#define I40E_XDP_REDIR BIT(2) - static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf, struct i40e_ring *xdp_ring); -static int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, - struct i40e_ring *xdp_ring) +int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring) { struct xdp_frame *xdpf = convert_to_xdp_frame(xdp); @@ -2287,7 +2267,13 @@ static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring, #endif } -static inline void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring) +/** + * i40e_xdp_ring_update_tail - Updates the XDP Tx ring tail register + * @xdp_ring: XDP Tx ring + * + * This function updates the XDP Tx ring tail register. + **/ +void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring) { /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. @@ -2297,6 +2283,48 @@ static inline void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring) } /** + * i40e_update_rx_stats - Update Rx ring statistics + * @rx_ring: rx descriptor ring + * @total_rx_bytes: number of bytes received + * @total_rx_packets: number of packets received + * + * This function updates the Rx ring statistics. + **/ +void i40e_update_rx_stats(struct i40e_ring *rx_ring, + unsigned int total_rx_bytes, + unsigned int total_rx_packets) +{ + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->stats.packets += total_rx_packets; + rx_ring->stats.bytes += total_rx_bytes; + u64_stats_update_end(&rx_ring->syncp); + rx_ring->q_vector->rx.total_packets += total_rx_packets; + rx_ring->q_vector->rx.total_bytes += total_rx_bytes; +} + +/** + * i40e_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map + * @rx_ring: Rx ring + * @xdp_res: Result of the receive batch + * + * This function bumps XDP Tx tail and/or flush redirect map, and + * should be called when a batch of packets has been processed in the + * napi loop. + **/ +void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res) +{ + if (xdp_res & I40E_XDP_REDIR) + xdp_do_flush_map(); + + if (xdp_res & I40E_XDP_TX) { + struct i40e_ring *xdp_ring = + rx_ring->vsi->xdp_rings[rx_ring->queue_index]; + + i40e_xdp_ring_update_tail(xdp_ring); + } +} + +/** * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: rx descriptor ring to transact packets on * @budget: Total limit on number of packets to process @@ -2349,11 +2377,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) */ dma_rmb(); - if (unlikely(i40e_rx_is_programming_status(qword))) { - i40e_clean_programming_status(rx_ring, rx_desc, qword); + rx_buffer = i40e_clean_programming_status(rx_ring, rx_desc, + qword); + if (unlikely(rx_buffer)) { + i40e_reuse_rx_page(rx_ring, rx_buffer); cleaned_count++; continue; } + size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; if (!size) @@ -2432,24 +2463,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) total_rx_packets++; } - if (xdp_xmit & I40E_XDP_REDIR) - xdp_do_flush_map(); - - if (xdp_xmit & I40E_XDP_TX) { - struct i40e_ring *xdp_ring = - rx_ring->vsi->xdp_rings[rx_ring->queue_index]; - - i40e_xdp_ring_update_tail(xdp_ring); - } - + i40e_finalize_xdp_rx(rx_ring, xdp_xmit); rx_ring->skb = skb; - u64_stats_update_begin(&rx_ring->syncp); - rx_ring->stats.packets += total_rx_packets; - rx_ring->stats.bytes += total_rx_bytes; - u64_stats_update_end(&rx_ring->syncp); - rx_ring->q_vector->rx.total_packets += total_rx_packets; - rx_ring->q_vector->rx.total_bytes += total_rx_bytes; + i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets); /* guarantee a trip back through this routine if there was a failure */ return failure ? budget : (int)total_rx_packets; @@ -2587,7 +2604,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) * budget and be more aggressive about cleaning up the Tx descriptors. */ i40e_for_each_ring(ring, q_vector->tx) { - if (!i40e_clean_tx_irq(vsi, ring, budget)) { + bool wd = ring->xsk_umem ? + i40e_clean_xdp_tx_irq(vsi, ring, budget) : + i40e_clean_tx_irq(vsi, ring, budget); + + if (!wd) { clean_complete = false; continue; } @@ -2605,7 +2626,9 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) budget_per_ring = max(budget/q_vector->num_ringpairs, 1); i40e_for_each_ring(ring, q_vector->rx) { - int cleaned = i40e_clean_rx_irq(ring, budget_per_ring); + int cleaned = ring->xsk_umem ? + i40e_clean_rx_irq_zc(ring, budget_per_ring) : + i40e_clean_rx_irq(ring, budget_per_ring); work_done += cleaned; /* if we clean as many as budgeted, we must not be done */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index bb04f6a731fe..100e92d2982f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -296,13 +296,17 @@ struct i40e_tx_buffer { struct i40e_rx_buffer { dma_addr_t dma; - struct page *page; -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) - __u32 page_offset; -#else - __u16 page_offset; -#endif - __u16 pagecnt_bias; + union { + struct { + struct page *page; + __u32 page_offset; + __u16 pagecnt_bias; + }; + struct { + void *addr; + u64 handle; + }; + }; }; struct i40e_queue_stats { @@ -414,6 +418,8 @@ struct i40e_ring { struct i40e_channel *ch; struct xdp_rxq_info xdp_rxq; + struct xdp_umem *xsk_umem; + struct zero_copy_allocator zca; /* ZC allocator anchor */ } ____cacheline_internodealigned_in_smp; static inline bool ring_uses_build_skb(struct i40e_ring *ring) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h new file mode 100644 index 000000000000..09809dffe399 --- /dev/null +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2018 Intel Corporation. */ + +#ifndef I40E_TXRX_COMMON_ +#define I40E_TXRX_COMMON_ + +void i40e_fd_handle_status(struct i40e_ring *rx_ring, + union i40e_rx_desc *rx_desc, u8 prog_id); +int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring); +struct i40e_rx_buffer *i40e_clean_programming_status( + struct i40e_ring *rx_ring, + union i40e_rx_desc *rx_desc, + u64 qw); +void i40e_process_skb_fields(struct i40e_ring *rx_ring, + union i40e_rx_desc *rx_desc, struct sk_buff *skb, + u8 rx_ptype); +void i40e_receive_skb(struct i40e_ring *rx_ring, + struct sk_buff *skb, u16 vlan_tag); +void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring); +void i40e_update_rx_stats(struct i40e_ring *rx_ring, + unsigned int total_rx_bytes, + unsigned int total_rx_packets); +void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res); +void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val); + +#define I40E_XDP_PASS 0 +#define I40E_XDP_CONSUMED BIT(0) +#define I40E_XDP_TX BIT(1) +#define I40E_XDP_REDIR BIT(2) + +/** + * build_ctob - Builds the Tx descriptor (cmd, offset and type) qword + **/ +static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, + u32 td_tag) +{ + return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA | + ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) | + ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) | + ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) | + ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT)); +} + +/** + * i40e_update_tx_stats - Update the egress statistics for the Tx ring + * @tx_ring: Tx ring to update + * @total_packet: total packets sent + * @total_bytes: total bytes sent + **/ +static inline void i40e_update_tx_stats(struct i40e_ring *tx_ring, + unsigned int total_packets, + unsigned int total_bytes) +{ + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->stats.bytes += total_bytes; + tx_ring->stats.packets += total_packets; + u64_stats_update_end(&tx_ring->syncp); + tx_ring->q_vector->tx.total_bytes += total_bytes; + tx_ring->q_vector->tx.total_packets += total_packets; +} + +#define WB_STRIDE 4 + +/** + * i40e_arm_wb - (Possibly) arms Tx write-back + * @tx_ring: Tx ring to update + * @vsi: the VSI + * @budget: the NAPI budget left + **/ +static inline void i40e_arm_wb(struct i40e_ring *tx_ring, + struct i40e_vsi *vsi, + int budget) +{ + if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) { + /* check to see if there are < 4 descriptors + * waiting to be written back, then kick the hardware to force + * them to be written back in case we stay in NAPI. + * In this mode on X722 we do not enable Interrupt. + */ + unsigned int j = i40e_get_tx_pending(tx_ring, false); + + if (budget && + ((j / WB_STRIDE) == 0) && j > 0 && + !test_bit(__I40E_VSI_DOWN, vsi->state) && + (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) + tx_ring->arm_wb = true; + } +} + +void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring); +void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring); +bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi); + +#endif /* I40E_TXRX_COMMON_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index c6d24eaede18..81b0e1f8d14b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1084,6 +1084,136 @@ static int i40e_quiesce_vf_pci(struct i40e_vf *vf) return -EIO; } +static inline int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi); + +/** + * i40e_config_vf_promiscuous_mode + * @vf: pointer to the VF info + * @vsi_id: VSI id + * @allmulti: set MAC L2 layer multicast promiscuous enable/disable + * @alluni: set MAC L2 layer unicast promiscuous enable/disable + * + * Called from the VF to configure the promiscuous mode of + * VF vsis and from the VF reset path to reset promiscuous mode. + **/ +static i40e_status i40e_config_vf_promiscuous_mode(struct i40e_vf *vf, + u16 vsi_id, + bool allmulti, + bool alluni) +{ + struct i40e_pf *pf = vf->pf; + struct i40e_hw *hw = &pf->hw; + struct i40e_mac_filter *f; + i40e_status aq_ret = 0; + struct i40e_vsi *vsi; + int bkt; + + vsi = i40e_find_vsi_from_id(pf, vsi_id); + if (!i40e_vc_isvalid_vsi_id(vf, vsi_id) || !vsi) + return I40E_ERR_PARAM; + + if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) { + dev_err(&pf->pdev->dev, + "Unprivileged VF %d is attempting to configure promiscuous mode\n", + vf->vf_id); + /* Lie to the VF on purpose. */ + return 0; + } + + if (vf->port_vlan_id) { + aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw, vsi->seid, + allmulti, + vf->port_vlan_id, + NULL); + if (aq_ret) { + int aq_err = pf->hw.aq.asq_last_status; + + dev_err(&pf->pdev->dev, + "VF %d failed to set multicast promiscuous mode err %s aq_err %s\n", + vf->vf_id, + i40e_stat_str(&pf->hw, aq_ret), + i40e_aq_str(&pf->hw, aq_err)); + return aq_ret; + } + + aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw, vsi->seid, + alluni, + vf->port_vlan_id, + NULL); + if (aq_ret) { + int aq_err = pf->hw.aq.asq_last_status; + + dev_err(&pf->pdev->dev, + "VF %d failed to set unicast promiscuous mode err %s aq_err %s\n", + vf->vf_id, + i40e_stat_str(&pf->hw, aq_ret), + i40e_aq_str(&pf->hw, aq_err)); + } + return aq_ret; + } else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) { + hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) { + if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID) + continue; + aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw, + vsi->seid, + allmulti, + f->vlan, + NULL); + if (aq_ret) { + int aq_err = pf->hw.aq.asq_last_status; + + dev_err(&pf->pdev->dev, + "Could not add VLAN %d to multicast promiscuous domain err %s aq_err %s\n", + f->vlan, + i40e_stat_str(&pf->hw, aq_ret), + i40e_aq_str(&pf->hw, aq_err)); + } + + aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw, + vsi->seid, + alluni, + f->vlan, + NULL); + if (aq_ret) { + int aq_err = pf->hw.aq.asq_last_status; + + dev_err(&pf->pdev->dev, + "Could not add VLAN %d to Unicast promiscuous domain err %s aq_err %s\n", + f->vlan, + i40e_stat_str(&pf->hw, aq_ret), + i40e_aq_str(&pf->hw, aq_err)); + } + } + return aq_ret; + } + aq_ret = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, allmulti, + NULL); + if (aq_ret) { + int aq_err = pf->hw.aq.asq_last_status; + + dev_err(&pf->pdev->dev, + "VF %d failed to set multicast promiscuous mode err %s aq_err %s\n", + vf->vf_id, + i40e_stat_str(&pf->hw, aq_ret), + i40e_aq_str(&pf->hw, aq_err)); + return aq_ret; + } + + aq_ret = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid, alluni, + NULL, true); + if (aq_ret) { + int aq_err = pf->hw.aq.asq_last_status; + + dev_err(&pf->pdev->dev, + "VF %d failed to set unicast promiscuous mode err %s aq_err %s\n", + vf->vf_id, + i40e_stat_str(&pf->hw, aq_ret), + i40e_aq_str(&pf->hw, aq_err)); + } + + return aq_ret; +} + /** * i40e_trigger_vf_reset * @vf: pointer to the VF structure @@ -1145,6 +1275,9 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf) struct i40e_hw *hw = &pf->hw; u32 reg; + /* disable promisc modes in case they were enabled */ + i40e_config_vf_promiscuous_mode(vf, vf->lan_vsi_id, false, false); + /* free VF resources to begin resetting the VSI state */ i40e_free_vf_res(vf); @@ -1840,143 +1973,55 @@ static inline int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi) * i40e_vc_config_promiscuous_mode_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * called from the VF to configure the promiscuous mode of * VF vsis **/ -static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, - u8 *msg, u16 msglen) +static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, u8 *msg) { struct virtchnl_promisc_info *info = (struct virtchnl_promisc_info *)msg; struct i40e_pf *pf = vf->pf; - struct i40e_hw *hw = &pf->hw; - struct i40e_mac_filter *f; i40e_status aq_ret = 0; bool allmulti = false; - struct i40e_vsi *vsi; bool alluni = false; - int aq_err = 0; - int bkt; - vsi = i40e_find_vsi_from_id(pf, info->vsi_id); - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || - !i40e_vc_isvalid_vsi_id(vf, info->vsi_id) || - !vsi) { - aq_ret = I40E_ERR_PARAM; - goto error_param; - } - if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) { - dev_err(&pf->pdev->dev, - "Unprivileged VF %d is attempting to configure promiscuous mode\n", - vf->vf_id); - /* Lie to the VF on purpose. */ - aq_ret = 0; - goto error_param; - } + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) + return I40E_ERR_PARAM; + /* Multicast promiscuous handling*/ if (info->flags & FLAG_VF_MULTICAST_PROMISC) allmulti = true; - if (vf->port_vlan_id) { - aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw, vsi->seid, - allmulti, - vf->port_vlan_id, - NULL); - } else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) { - hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) { - if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID) - continue; - aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw, - vsi->seid, - allmulti, - f->vlan, - NULL); - aq_err = pf->hw.aq.asq_last_status; - if (aq_ret) { - dev_err(&pf->pdev->dev, - "Could not add VLAN %d to multicast promiscuous domain err %s aq_err %s\n", - f->vlan, - i40e_stat_str(&pf->hw, aq_ret), - i40e_aq_str(&pf->hw, aq_err)); - break; - } - } - } else { - aq_ret = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, - allmulti, NULL); - aq_err = pf->hw.aq.asq_last_status; - if (aq_ret) { - dev_err(&pf->pdev->dev, - "VF %d failed to set multicast promiscuous mode err %s aq_err %s\n", - vf->vf_id, - i40e_stat_str(&pf->hw, aq_ret), - i40e_aq_str(&pf->hw, aq_err)); - goto error_param; - } - } - + if (info->flags & FLAG_VF_UNICAST_PROMISC) + alluni = true; + aq_ret = i40e_config_vf_promiscuous_mode(vf, info->vsi_id, allmulti, + alluni); if (!aq_ret) { - dev_info(&pf->pdev->dev, - "VF %d successfully set multicast promiscuous mode\n", - vf->vf_id); - if (allmulti) + if (allmulti) { + dev_info(&pf->pdev->dev, + "VF %d successfully set multicast promiscuous mode\n", + vf->vf_id); set_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states); - else + } else { + dev_info(&pf->pdev->dev, + "VF %d successfully unset multicast promiscuous mode\n", + vf->vf_id); clear_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states); - } - - if (info->flags & FLAG_VF_UNICAST_PROMISC) - alluni = true; - if (vf->port_vlan_id) { - aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw, vsi->seid, - alluni, - vf->port_vlan_id, - NULL); - } else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) { - hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) { - if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID) - continue; - aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw, - vsi->seid, - alluni, - f->vlan, - NULL); - aq_err = pf->hw.aq.asq_last_status; - if (aq_ret) - dev_err(&pf->pdev->dev, - "Could not add VLAN %d to Unicast promiscuous domain err %s aq_err %s\n", - f->vlan, - i40e_stat_str(&pf->hw, aq_ret), - i40e_aq_str(&pf->hw, aq_err)); - } - } else { - aq_ret = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid, - alluni, NULL, - true); - aq_err = pf->hw.aq.asq_last_status; - if (aq_ret) { - dev_err(&pf->pdev->dev, - "VF %d failed to set unicast promiscuous mode %8.8x err %s aq_err %s\n", - vf->vf_id, info->flags, - i40e_stat_str(&pf->hw, aq_ret), - i40e_aq_str(&pf->hw, aq_err)); - goto error_param; } - } - - if (!aq_ret) { - dev_info(&pf->pdev->dev, - "VF %d successfully set unicast promiscuous mode\n", - vf->vf_id); - if (alluni) + if (alluni) { + dev_info(&pf->pdev->dev, + "VF %d successfully set unicast promiscuous mode\n", + vf->vf_id); set_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states); - else + } else { + dev_info(&pf->pdev->dev, + "VF %d successfully unset unicast promiscuous mode\n", + vf->vf_id); clear_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states); + } } -error_param: /* send the response to the VF */ return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, @@ -1987,12 +2032,11 @@ error_param: * i40e_vc_config_queues_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * called from the VF to configure the rx/tx * queues **/ -static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) +static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) { struct virtchnl_vsi_queue_config_info *qci = (struct virtchnl_vsi_queue_config_info *)msg; @@ -2105,12 +2149,11 @@ static int i40e_validate_queue_map(struct i40e_vf *vf, u16 vsi_id, * i40e_vc_config_irq_map_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * called from the VF to configure the irq to * queue map **/ -static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) +static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg) { struct virtchnl_irq_map_info *irqmap_info = (struct virtchnl_irq_map_info *)msg; @@ -2202,11 +2245,10 @@ static int i40e_ctrl_vf_rx_rings(struct i40e_vsi *vsi, unsigned long q_map, * i40e_vc_enable_queues_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * called from the VF to enable all or specific queue(s) **/ -static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) +static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg) { struct virtchnl_queue_select *vqs = (struct virtchnl_queue_select *)msg; @@ -2261,12 +2303,11 @@ error_param: * i40e_vc_disable_queues_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * called from the VF to disable all or specific * queue(s) **/ -static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) +static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg) { struct virtchnl_queue_select *vqs = (struct virtchnl_queue_select *)msg; @@ -2309,14 +2350,13 @@ error_param: * i40e_vc_request_queues_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * VFs get a default number of queues but can use this message to request a * different number. If the request is successful, PF will reset the VF and * return 0. If unsuccessful, PF will send message informing VF of number of * available queues and return result of sending VF a message. **/ -static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen) +static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg) { struct virtchnl_vf_res_request *vfres = (struct virtchnl_vf_res_request *)msg; @@ -2360,11 +2400,10 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen) * i40e_vc_get_stats_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * called from the VF to get vsi stats **/ -static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) +static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg) { struct virtchnl_queue_select *vqs = (struct virtchnl_queue_select *)msg; @@ -2458,7 +2497,7 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, !is_multicast_ether_addr(addr) && vf->pf_set_mac && !ether_addr_equal(addr, vf->default_lan_addr.addr)) { dev_err(&pf->pdev->dev, - "VF attempting to override administratively set MAC address, reload the VF driver to resume normal operation\n"); + "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n"); return -EPERM; } } @@ -2470,11 +2509,10 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, * i40e_vc_add_mac_addr_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * add guest mac address filter **/ -static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) +static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) { struct virtchnl_ether_addr_list *al = (struct virtchnl_ether_addr_list *)msg; @@ -2541,11 +2579,10 @@ error_param: * i40e_vc_del_mac_addr_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * remove guest mac address filter **/ -static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) +static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) { struct virtchnl_ether_addr_list *al = (struct virtchnl_ether_addr_list *)msg; @@ -2569,6 +2606,16 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) ret = I40E_ERR_INVALID_MAC_ADDR; goto error_param; } + + if (vf->pf_set_mac && + ether_addr_equal(al->list[i].addr, + vf->default_lan_addr.addr)) { + dev_err(&pf->pdev->dev, + "MAC addr %pM has been set by PF, cannot delete it for VF %d, reset VF to change MAC addr\n", + vf->default_lan_addr.addr, vf->vf_id); + ret = I40E_ERR_PARAM; + goto error_param; + } } vsi = pf->vsi[vf->lan_vsi_idx]; @@ -2601,11 +2648,10 @@ error_param: * i40e_vc_add_vlan_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * program guest vlan id **/ -static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) +static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg) { struct virtchnl_vlan_filter_list *vfl = (struct virtchnl_vlan_filter_list *)msg; @@ -2674,11 +2720,10 @@ error_param: * i40e_vc_remove_vlan_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * remove programmed guest vlan id **/ -static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) +static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg) { struct virtchnl_vlan_filter_list *vfl = (struct virtchnl_vlan_filter_list *)msg; @@ -2761,13 +2806,11 @@ error_param: * i40e_vc_iwarp_qvmap_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * @config: config qvmap or release it * * called from the VF for the iwarp msgs **/ -static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, u16 msglen, - bool config) +static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, bool config) { struct virtchnl_iwarp_qvlist_info *qvlist_info = (struct virtchnl_iwarp_qvlist_info *)msg; @@ -2798,11 +2841,10 @@ error_param: * i40e_vc_config_rss_key * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * Configure the VF's RSS key **/ -static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg, u16 msglen) +static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg) { struct virtchnl_rss_key *vrk = (struct virtchnl_rss_key *)msg; @@ -2830,11 +2872,10 @@ err: * i40e_vc_config_rss_lut * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * Configure the VF's RSS LUT **/ -static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg, u16 msglen) +static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg) { struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg; @@ -2862,11 +2903,10 @@ err: * i40e_vc_get_rss_hena * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * Return the RSS HENA bits allowed by the hardware **/ -static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg, u16 msglen) +static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg) { struct virtchnl_rss_hena *vrh = NULL; struct i40e_pf *pf = vf->pf; @@ -2898,11 +2938,10 @@ err: * i40e_vc_set_rss_hena * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * Set the RSS HENA bits for the VF **/ -static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg, u16 msglen) +static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg) { struct virtchnl_rss_hena *vrh = (struct virtchnl_rss_hena *)msg; @@ -2927,12 +2966,10 @@ err: * i40e_vc_enable_vlan_stripping * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * Enable vlan header stripping for the VF **/ -static int i40e_vc_enable_vlan_stripping(struct i40e_vf *vf, u8 *msg, - u16 msglen) +static int i40e_vc_enable_vlan_stripping(struct i40e_vf *vf, u8 *msg) { struct i40e_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; i40e_status aq_ret = 0; @@ -2954,12 +2991,10 @@ err: * i40e_vc_disable_vlan_stripping * @vf: pointer to the VF info * @msg: pointer to the msg buffer - * @msglen: msg length * * Disable vlan header stripping for the VF **/ -static int i40e_vc_disable_vlan_stripping(struct i40e_vf *vf, u8 *msg, - u16 msglen) +static int i40e_vc_disable_vlan_stripping(struct i40e_vf *vf, u8 *msg) { struct i40e_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; i40e_status aq_ret = 0; @@ -3659,65 +3694,65 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, ret = 0; break; case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE: - ret = i40e_vc_config_promiscuous_mode_msg(vf, msg, msglen); + ret = i40e_vc_config_promiscuous_mode_msg(vf, msg); break; case VIRTCHNL_OP_CONFIG_VSI_QUEUES: - ret = i40e_vc_config_queues_msg(vf, msg, msglen); + ret = i40e_vc_config_queues_msg(vf, msg); break; case VIRTCHNL_OP_CONFIG_IRQ_MAP: - ret = i40e_vc_config_irq_map_msg(vf, msg, msglen); + ret = i40e_vc_config_irq_map_msg(vf, msg); break; case VIRTCHNL_OP_ENABLE_QUEUES: - ret = i40e_vc_enable_queues_msg(vf, msg, msglen); + ret = i40e_vc_enable_queues_msg(vf, msg); i40e_vc_notify_vf_link_state(vf); break; case VIRTCHNL_OP_DISABLE_QUEUES: - ret = i40e_vc_disable_queues_msg(vf, msg, msglen); + ret = i40e_vc_disable_queues_msg(vf, msg); break; case VIRTCHNL_OP_ADD_ETH_ADDR: - ret = i40e_vc_add_mac_addr_msg(vf, msg, msglen); + ret = i40e_vc_add_mac_addr_msg(vf, msg); break; case VIRTCHNL_OP_DEL_ETH_ADDR: - ret = i40e_vc_del_mac_addr_msg(vf, msg, msglen); + ret = i40e_vc_del_mac_addr_msg(vf, msg); break; case VIRTCHNL_OP_ADD_VLAN: - ret = i40e_vc_add_vlan_msg(vf, msg, msglen); + ret = i40e_vc_add_vlan_msg(vf, msg); break; case VIRTCHNL_OP_DEL_VLAN: - ret = i40e_vc_remove_vlan_msg(vf, msg, msglen); + ret = i40e_vc_remove_vlan_msg(vf, msg); break; case VIRTCHNL_OP_GET_STATS: - ret = i40e_vc_get_stats_msg(vf, msg, msglen); + ret = i40e_vc_get_stats_msg(vf, msg); break; case VIRTCHNL_OP_IWARP: ret = i40e_vc_iwarp_msg(vf, msg, msglen); break; case VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP: - ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, true); + ret = i40e_vc_iwarp_qvmap_msg(vf, msg, true); break; case VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP: - ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, false); + ret = i40e_vc_iwarp_qvmap_msg(vf, msg, false); break; case VIRTCHNL_OP_CONFIG_RSS_KEY: - ret = i40e_vc_config_rss_key(vf, msg, msglen); + ret = i40e_vc_config_rss_key(vf, msg); break; case VIRTCHNL_OP_CONFIG_RSS_LUT: - ret = i40e_vc_config_rss_lut(vf, msg, msglen); + ret = i40e_vc_config_rss_lut(vf, msg); break; case VIRTCHNL_OP_GET_RSS_HENA_CAPS: - ret = i40e_vc_get_rss_hena(vf, msg, msglen); + ret = i40e_vc_get_rss_hena(vf, msg); break; case VIRTCHNL_OP_SET_RSS_HENA: - ret = i40e_vc_set_rss_hena(vf, msg, msglen); + ret = i40e_vc_set_rss_hena(vf, msg); break; case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING: - ret = i40e_vc_enable_vlan_stripping(vf, msg, msglen); + ret = i40e_vc_enable_vlan_stripping(vf, msg); break; case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING: - ret = i40e_vc_disable_vlan_stripping(vf, msg, msglen); + ret = i40e_vc_disable_vlan_stripping(vf, msg); break; case VIRTCHNL_OP_REQUEST_QUEUES: - ret = i40e_vc_request_queues_msg(vf, msg, msglen); + ret = i40e_vc_request_queues_msg(vf, msg); break; case VIRTCHNL_OP_ENABLE_CHANNELS: ret = i40e_vc_add_qch_msg(vf, msg); @@ -3786,6 +3821,35 @@ int i40e_vc_process_vflr_event(struct i40e_pf *pf) } /** + * i40e_validate_vf + * @pf: the physical function + * @vf_id: VF identifier + * + * Check that the VF is enabled and the VSI exists. + * + * Returns 0 on success, negative on failure + **/ +static int i40e_validate_vf(struct i40e_pf *pf, int vf_id) +{ + struct i40e_vsi *vsi; + struct i40e_vf *vf; + int ret = 0; + + if (vf_id >= pf->num_alloc_vfs) { + dev_err(&pf->pdev->dev, + "Invalid VF Identifier %d\n", vf_id); + ret = -EINVAL; + goto err_out; + } + vf = &pf->vf[vf_id]; + vsi = i40e_find_vsi_from_id(pf, vf->lan_vsi_id); + if (!vsi) + ret = -EINVAL; +err_out: + return ret; +} + +/** * i40e_ndo_set_vf_mac * @netdev: network interface device structure * @vf_id: VF identifier @@ -3806,14 +3870,11 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) u8 i; /* validate the request */ - if (vf_id >= pf->num_alloc_vfs) { - dev_err(&pf->pdev->dev, - "Invalid VF Identifier %d\n", vf_id); - ret = -EINVAL; + ret = i40e_validate_vf(pf, vf_id); + if (ret) goto error_param; - } - vf = &(pf->vf[vf_id]); + vf = &pf->vf[vf_id]; vsi = pf->vsi[vf->lan_vsi_idx]; /* When the VF is resetting wait until it is done. @@ -3873,9 +3934,11 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) mac, vf_id); } - /* Force the VF driver stop so it has to reload with new MAC address */ + /* Force the VF interface down so it has to bring up with new MAC + * address + */ i40e_vc_disable_vf(vf); - dev_info(&pf->pdev->dev, "Reload the VF driver to make this change effective.\n"); + dev_info(&pf->pdev->dev, "Bring down and up the VF interface to make this change effective.\n"); error_param: return ret; @@ -3930,11 +3993,9 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, int ret = 0; /* validate the request */ - if (vf_id >= pf->num_alloc_vfs) { - dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id); - ret = -EINVAL; + ret = i40e_validate_vf(pf, vf_id); + if (ret) goto error_pvid; - } if ((vlan_id > I40E_MAX_VLANID) || (qos > 7)) { dev_err(&pf->pdev->dev, "Invalid VF Parameters\n"); @@ -3948,7 +4009,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, goto error_pvid; } - vf = &(pf->vf[vf_id]); + vf = &pf->vf[vf_id]; vsi = pf->vsi[vf->lan_vsi_idx]; if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n", @@ -4068,11 +4129,9 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, int ret = 0; /* validate the request */ - if (vf_id >= pf->num_alloc_vfs) { - dev_err(&pf->pdev->dev, "Invalid VF Identifier %d.\n", vf_id); - ret = -EINVAL; + ret = i40e_validate_vf(pf, vf_id); + if (ret) goto error; - } if (min_tx_rate) { dev_err(&pf->pdev->dev, "Invalid min tx rate (%d) (greater than 0) specified for VF %d.\n", @@ -4080,7 +4139,7 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, return -EINVAL; } - vf = &(pf->vf[vf_id]); + vf = &pf->vf[vf_id]; vsi = pf->vsi[vf->lan_vsi_idx]; if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n", @@ -4116,13 +4175,11 @@ int i40e_ndo_get_vf_config(struct net_device *netdev, int ret = 0; /* validate the request */ - if (vf_id >= pf->num_alloc_vfs) { - dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id); - ret = -EINVAL; + ret = i40e_validate_vf(pf, vf_id); + if (ret) goto error_param; - } - vf = &(pf->vf[vf_id]); + vf = &pf->vf[vf_id]; /* first vsi is always the LAN vsi */ vsi = pf->vsi[vf->lan_vsi_idx]; if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { @@ -4199,7 +4256,7 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link) vf->link_forced = true; vf->link_up = true; pfe.event_data.link_event.link_status = true; - pfe.event_data.link_event.link_speed = I40E_LINK_SPEED_40GB; + pfe.event_data.link_event.link_speed = VIRTCHNL_LINK_SPEED_40GB; break; case IFLA_VF_LINK_STATE_DISABLE: vf->link_forced = true; diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c new file mode 100644 index 000000000000..add1e457886d --- /dev/null +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -0,0 +1,967 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2018 Intel Corporation. */ + +#include <linux/bpf_trace.h> +#include <net/xdp_sock.h> +#include <net/xdp.h> + +#include "i40e.h" +#include "i40e_txrx_common.h" +#include "i40e_xsk.h" + +/** + * i40e_alloc_xsk_umems - Allocate an array to store per ring UMEMs + * @vsi: Current VSI + * + * Returns 0 on success, <0 on failure + **/ +static int i40e_alloc_xsk_umems(struct i40e_vsi *vsi) +{ + if (vsi->xsk_umems) + return 0; + + vsi->num_xsk_umems_used = 0; + vsi->num_xsk_umems = vsi->alloc_queue_pairs; + vsi->xsk_umems = kcalloc(vsi->num_xsk_umems, sizeof(*vsi->xsk_umems), + GFP_KERNEL); + if (!vsi->xsk_umems) { + vsi->num_xsk_umems = 0; + return -ENOMEM; + } + + return 0; +} + +/** + * i40e_add_xsk_umem - Store an UMEM for a certain ring/qid + * @vsi: Current VSI + * @umem: UMEM to store + * @qid: Ring/qid to associate with the UMEM + * + * Returns 0 on success, <0 on failure + **/ +static int i40e_add_xsk_umem(struct i40e_vsi *vsi, struct xdp_umem *umem, + u16 qid) +{ + int err; + + err = i40e_alloc_xsk_umems(vsi); + if (err) + return err; + + vsi->xsk_umems[qid] = umem; + vsi->num_xsk_umems_used++; + + return 0; +} + +/** + * i40e_remove_xsk_umem - Remove an UMEM for a certain ring/qid + * @vsi: Current VSI + * @qid: Ring/qid associated with the UMEM + **/ +static void i40e_remove_xsk_umem(struct i40e_vsi *vsi, u16 qid) +{ + vsi->xsk_umems[qid] = NULL; + vsi->num_xsk_umems_used--; + + if (vsi->num_xsk_umems == 0) { + kfree(vsi->xsk_umems); + vsi->xsk_umems = NULL; + vsi->num_xsk_umems = 0; + } +} + +/** + * i40e_xsk_umem_dma_map - DMA maps all UMEM memory for the netdev + * @vsi: Current VSI + * @umem: UMEM to DMA map + * + * Returns 0 on success, <0 on failure + **/ +static int i40e_xsk_umem_dma_map(struct i40e_vsi *vsi, struct xdp_umem *umem) +{ + struct i40e_pf *pf = vsi->back; + struct device *dev; + unsigned int i, j; + dma_addr_t dma; + + dev = &pf->pdev->dev; + for (i = 0; i < umem->npgs; i++) { + dma = dma_map_page_attrs(dev, umem->pgs[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL, I40E_RX_DMA_ATTR); + if (dma_mapping_error(dev, dma)) + goto out_unmap; + + umem->pages[i].dma = dma; + } + + return 0; + +out_unmap: + for (j = 0; j < i; j++) { + dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL, I40E_RX_DMA_ATTR); + umem->pages[i].dma = 0; + } + + return -1; +} + +/** + * i40e_xsk_umem_dma_unmap - DMA unmaps all UMEM memory for the netdev + * @vsi: Current VSI + * @umem: UMEM to DMA map + **/ +static void i40e_xsk_umem_dma_unmap(struct i40e_vsi *vsi, struct xdp_umem *umem) +{ + struct i40e_pf *pf = vsi->back; + struct device *dev; + unsigned int i; + + dev = &pf->pdev->dev; + + for (i = 0; i < umem->npgs; i++) { + dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL, I40E_RX_DMA_ATTR); + + umem->pages[i].dma = 0; + } +} + +/** + * i40e_xsk_umem_enable - Enable/associate an UMEM to a certain ring/qid + * @vsi: Current VSI + * @umem: UMEM + * @qid: Rx ring to associate UMEM to + * + * Returns 0 on success, <0 on failure + **/ +static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem, + u16 qid) +{ + struct xdp_umem_fq_reuse *reuseq; + bool if_running; + int err; + + if (vsi->type != I40E_VSI_MAIN) + return -EINVAL; + + if (qid >= vsi->num_queue_pairs) + return -EINVAL; + + if (vsi->xsk_umems) { + if (qid >= vsi->num_xsk_umems) + return -EINVAL; + if (vsi->xsk_umems[qid]) + return -EBUSY; + } + + reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count); + if (!reuseq) + return -ENOMEM; + + xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq)); + + err = i40e_xsk_umem_dma_map(vsi, umem); + if (err) + return err; + + if_running = netif_running(vsi->netdev) && i40e_enabled_xdp_vsi(vsi); + + if (if_running) { + err = i40e_queue_pair_disable(vsi, qid); + if (err) + return err; + } + + err = i40e_add_xsk_umem(vsi, umem, qid); + if (err) + return err; + + if (if_running) { + err = i40e_queue_pair_enable(vsi, qid); + if (err) + return err; + } + + return 0; +} + +/** + * i40e_xsk_umem_disable - Diassociate an UMEM from a certain ring/qid + * @vsi: Current VSI + * @qid: Rx ring to associate UMEM to + * + * Returns 0 on success, <0 on failure + **/ +static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid) +{ + bool if_running; + int err; + + if (!vsi->xsk_umems || qid >= vsi->num_xsk_umems || + !vsi->xsk_umems[qid]) + return -EINVAL; + + if_running = netif_running(vsi->netdev) && i40e_enabled_xdp_vsi(vsi); + + if (if_running) { + err = i40e_queue_pair_disable(vsi, qid); + if (err) + return err; + } + + i40e_xsk_umem_dma_unmap(vsi, vsi->xsk_umems[qid]); + i40e_remove_xsk_umem(vsi, qid); + + if (if_running) { + err = i40e_queue_pair_enable(vsi, qid); + if (err) + return err; + } + + return 0; +} + +/** + * i40e_xsk_umem_query - Queries a certain ring/qid for its UMEM + * @vsi: Current VSI + * @umem: UMEM associated to the ring, if any + * @qid: Rx ring to associate UMEM to + * + * This function will store, if any, the UMEM associated to certain ring. + * + * Returns 0 on success, <0 on failure + **/ +int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem, + u16 qid) +{ + if (vsi->type != I40E_VSI_MAIN) + return -EINVAL; + + if (qid >= vsi->num_queue_pairs) + return -EINVAL; + + if (vsi->xsk_umems) { + if (qid >= vsi->num_xsk_umems) + return -EINVAL; + *umem = vsi->xsk_umems[qid]; + return 0; + } + + *umem = NULL; + return 0; +} + +/** + * i40e_xsk_umem_query - Queries a certain ring/qid for its UMEM + * @vsi: Current VSI + * @umem: UMEM to enable/associate to a ring, or NULL to disable + * @qid: Rx ring to (dis)associate UMEM (from)to + * + * This function enables or disables an UMEM to a certain ring. + * + * Returns 0 on success, <0 on failure + **/ +int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem, + u16 qid) +{ + return umem ? i40e_xsk_umem_enable(vsi, umem, qid) : + i40e_xsk_umem_disable(vsi, qid); +} + +/** + * i40e_run_xdp_zc - Executes an XDP program on an xdp_buff + * @rx_ring: Rx ring + * @xdp: xdp_buff used as input to the XDP program + * + * This function enables or disables an UMEM to a certain ring. + * + * Returns any of I40E_XDP_{PASS, CONSUMED, TX, REDIR} + **/ +static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) +{ + int err, result = I40E_XDP_PASS; + struct i40e_ring *xdp_ring; + struct bpf_prog *xdp_prog; + u32 act; + + rcu_read_lock(); + /* NB! xdp_prog will always be !NULL, due to the fact that + * this path is enabled by setting an XDP program. + */ + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + act = bpf_prog_run_xdp(xdp_prog, xdp); + xdp->handle += xdp->data - xdp->data_hard_start; + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index]; + result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring); + break; + case XDP_REDIRECT: + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); + result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED; + break; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + trace_xdp_exception(rx_ring->netdev, xdp_prog, act); + /* fallthrough -- handle aborts by dropping packet */ + case XDP_DROP: + result = I40E_XDP_CONSUMED; + break; + } + rcu_read_unlock(); + return result; +} + +/** + * i40e_alloc_buffer_zc - Allocates an i40e_rx_buffer + * @rx_ring: Rx ring + * @bi: Rx buffer to populate + * + * This function allocates an Rx buffer. The buffer can come from fill + * queue, or via the recycle queue (next_to_alloc). + * + * Returns true for a successful allocation, false otherwise + **/ +static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *bi) +{ + struct xdp_umem *umem = rx_ring->xsk_umem; + void *addr = bi->addr; + u64 handle, hr; + + if (addr) { + rx_ring->rx_stats.page_reuse_count++; + return true; + } + + if (!xsk_umem_peek_addr(umem, &handle)) { + rx_ring->rx_stats.alloc_page_failed++; + return false; + } + + hr = umem->headroom + XDP_PACKET_HEADROOM; + + bi->dma = xdp_umem_get_dma(umem, handle); + bi->dma += hr; + + bi->addr = xdp_umem_get_data(umem, handle); + bi->addr += hr; + + bi->handle = handle + umem->headroom; + + xsk_umem_discard_addr(umem); + return true; +} + +/** + * i40e_alloc_buffer_slow_zc - Allocates an i40e_rx_buffer + * @rx_ring: Rx ring + * @bi: Rx buffer to populate + * + * This function allocates an Rx buffer. The buffer can come from fill + * queue, or via the reuse queue. + * + * Returns true for a successful allocation, false otherwise + **/ +static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *bi) +{ + struct xdp_umem *umem = rx_ring->xsk_umem; + u64 handle, hr; + + if (!xsk_umem_peek_addr_rq(umem, &handle)) { + rx_ring->rx_stats.alloc_page_failed++; + return false; + } + + handle &= rx_ring->xsk_umem->chunk_mask; + + hr = umem->headroom + XDP_PACKET_HEADROOM; + + bi->dma = xdp_umem_get_dma(umem, handle); + bi->dma += hr; + + bi->addr = xdp_umem_get_data(umem, handle); + bi->addr += hr; + + bi->handle = handle + umem->headroom; + + xsk_umem_discard_addr_rq(umem); + return true; +} + +static __always_inline bool +__i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count, + bool alloc(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *bi)) +{ + u16 ntu = rx_ring->next_to_use; + union i40e_rx_desc *rx_desc; + struct i40e_rx_buffer *bi; + bool ok = true; + + rx_desc = I40E_RX_DESC(rx_ring, ntu); + bi = &rx_ring->rx_bi[ntu]; + do { + if (!alloc(rx_ring, bi)) { + ok = false; + goto no_buffers; + } + + dma_sync_single_range_for_device(rx_ring->dev, bi->dma, 0, + rx_ring->rx_buf_len, + DMA_BIDIRECTIONAL); + + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); + + rx_desc++; + bi++; + ntu++; + + if (unlikely(ntu == rx_ring->count)) { + rx_desc = I40E_RX_DESC(rx_ring, 0); + bi = rx_ring->rx_bi; + ntu = 0; + } + + rx_desc->wb.qword1.status_error_len = 0; + count--; + } while (count); + +no_buffers: + if (rx_ring->next_to_use != ntu) + i40e_release_rx_desc(rx_ring, ntu); + + return ok; +} + +/** + * i40e_alloc_rx_buffers_zc - Allocates a number of Rx buffers + * @rx_ring: Rx ring + * @count: The number of buffers to allocate + * + * This function allocates a number of Rx buffers from the reuse queue + * or fill ring and places them on the Rx ring. + * + * Returns true for a successful allocation, false otherwise + **/ +bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) +{ + return __i40e_alloc_rx_buffers_zc(rx_ring, count, + i40e_alloc_buffer_slow_zc); +} + +/** + * i40e_alloc_rx_buffers_fast_zc - Allocates a number of Rx buffers + * @rx_ring: Rx ring + * @count: The number of buffers to allocate + * + * This function allocates a number of Rx buffers from the fill ring + * or the internal recycle mechanism and places them on the Rx ring. + * + * Returns true for a successful allocation, false otherwise + **/ +static bool i40e_alloc_rx_buffers_fast_zc(struct i40e_ring *rx_ring, u16 count) +{ + return __i40e_alloc_rx_buffers_zc(rx_ring, count, + i40e_alloc_buffer_zc); +} + +/** + * i40e_get_rx_buffer_zc - Return the current Rx buffer + * @rx_ring: Rx ring + * @size: The size of the rx buffer (read from descriptor) + * + * This function returns the current, received Rx buffer, and also + * does DMA synchronization. the Rx ring. + * + * Returns the received Rx buffer + **/ +static struct i40e_rx_buffer *i40e_get_rx_buffer_zc(struct i40e_ring *rx_ring, + const unsigned int size) +{ + struct i40e_rx_buffer *bi; + + bi = &rx_ring->rx_bi[rx_ring->next_to_clean]; + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + bi->dma, 0, + size, + DMA_BIDIRECTIONAL); + + return bi; +} + +/** + * i40e_reuse_rx_buffer_zc - Recycle an Rx buffer + * @rx_ring: Rx ring + * @old_bi: The Rx buffer to recycle + * + * This function recycles a finished Rx buffer, and places it on the + * recycle queue (next_to_alloc). + **/ +static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *old_bi) +{ + struct i40e_rx_buffer *new_bi = &rx_ring->rx_bi[rx_ring->next_to_alloc]; + unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask; + u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM; + u16 nta = rx_ring->next_to_alloc; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + new_bi->dma = old_bi->dma & mask; + new_bi->dma += hr; + + new_bi->addr = (void *)((unsigned long)old_bi->addr & mask); + new_bi->addr += hr; + + new_bi->handle = old_bi->handle & mask; + new_bi->handle += rx_ring->xsk_umem->headroom; + + old_bi->addr = NULL; +} + +/** + * i40e_zca_free - Free callback for MEM_TYPE_ZERO_COPY allocations + * @alloc: Zero-copy allocator + * @handle: Buffer handle + **/ +void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle) +{ + struct i40e_rx_buffer *bi; + struct i40e_ring *rx_ring; + u64 hr, mask; + u16 nta; + + rx_ring = container_of(alloc, struct i40e_ring, zca); + hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM; + mask = rx_ring->xsk_umem->chunk_mask; + + nta = rx_ring->next_to_alloc; + bi = &rx_ring->rx_bi[nta]; + + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + handle &= mask; + + bi->dma = xdp_umem_get_dma(rx_ring->xsk_umem, handle); + bi->dma += hr; + + bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle); + bi->addr += hr; + + bi->handle = (u64)handle + rx_ring->xsk_umem->headroom; +} + +/** + * i40e_construct_skb_zc - Create skbufff from zero-copy Rx buffer + * @rx_ring: Rx ring + * @bi: Rx buffer + * @xdp: xdp_buff + * + * This functions allocates a new skb from a zero-copy Rx buffer. + * + * Returns the skb, or NULL on failure. + **/ +static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *bi, + struct xdp_buff *xdp) +{ + unsigned int metasize = xdp->data - xdp->data_meta; + unsigned int datasize = xdp->data_end - xdp->data; + struct sk_buff *skb; + + /* allocate a skb to store the frags */ + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, + xdp->data_end - xdp->data_hard_start, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) + return NULL; + + skb_reserve(skb, xdp->data - xdp->data_hard_start); + memcpy(__skb_put(skb, datasize), xdp->data, datasize); + if (metasize) + skb_metadata_set(skb, metasize); + + i40e_reuse_rx_buffer_zc(rx_ring, bi); + return skb; +} + +/** + * i40e_inc_ntc: Advance the next_to_clean index + * @rx_ring: Rx ring + **/ +static void i40e_inc_ntc(struct i40e_ring *rx_ring) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + prefetch(I40E_RX_DESC(rx_ring, ntc)); +} + +/** + * i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring + * @rx_ring: Rx ring + * @budget: NAPI budget + * + * Returns amount of work completed + **/ +int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) +{ + unsigned int total_rx_bytes = 0, total_rx_packets = 0; + u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); + unsigned int xdp_res, xdp_xmit = 0; + bool failure = false; + struct sk_buff *skb; + struct xdp_buff xdp; + + xdp.rxq = &rx_ring->xdp_rxq; + + while (likely(total_rx_packets < (unsigned int)budget)) { + struct i40e_rx_buffer *bi; + union i40e_rx_desc *rx_desc; + unsigned int size; + u16 vlan_tag; + u8 rx_ptype; + u64 qword; + + if (cleaned_count >= I40E_RX_BUFFER_WRITE) { + failure = failure || + !i40e_alloc_rx_buffers_fast_zc(rx_ring, + cleaned_count); + cleaned_count = 0; + } + + rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean); + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); + + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we have + * verified the descriptor has been written back. + */ + dma_rmb(); + + bi = i40e_clean_programming_status(rx_ring, rx_desc, + qword); + if (unlikely(bi)) { + i40e_reuse_rx_buffer_zc(rx_ring, bi); + cleaned_count++; + continue; + } + + size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> + I40E_RXD_QW1_LENGTH_PBUF_SHIFT; + if (!size) + break; + + bi = i40e_get_rx_buffer_zc(rx_ring, size); + xdp.data = bi->addr; + xdp.data_meta = xdp.data; + xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM; + xdp.data_end = xdp.data + size; + xdp.handle = bi->handle; + + xdp_res = i40e_run_xdp_zc(rx_ring, &xdp); + if (xdp_res) { + if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) { + xdp_xmit |= xdp_res; + bi->addr = NULL; + } else { + i40e_reuse_rx_buffer_zc(rx_ring, bi); + } + + total_rx_bytes += size; + total_rx_packets++; + + cleaned_count++; + i40e_inc_ntc(rx_ring); + continue; + } + + /* XDP_PASS path */ + + /* NB! We are not checking for errors using + * i40e_test_staterr with + * BIT(I40E_RXD_QW1_ERROR_SHIFT). This is due to that + * SBP is *not* set in PRT_SBPVSI (default not set). + */ + skb = i40e_construct_skb_zc(rx_ring, bi, &xdp); + if (!skb) { + rx_ring->rx_stats.alloc_buff_failed++; + break; + } + + cleaned_count++; + i40e_inc_ntc(rx_ring); + + if (eth_skb_pad(skb)) + continue; + + total_rx_bytes += skb->len; + total_rx_packets++; + + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); + rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> + I40E_RXD_QW1_PTYPE_SHIFT; + i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); + + vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ? + le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; + i40e_receive_skb(rx_ring, skb, vlan_tag); + } + + i40e_finalize_xdp_rx(rx_ring, xdp_xmit); + i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets); + return failure ? budget : (int)total_rx_packets; +} + +/** + * i40e_xmit_zc - Performs zero-copy Tx AF_XDP + * @xdp_ring: XDP Tx ring + * @budget: NAPI budget + * + * Returns true if the work is finished. + **/ +static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) +{ + struct i40e_tx_desc *tx_desc = NULL; + struct i40e_tx_buffer *tx_bi; + bool work_done = true; + dma_addr_t dma; + u32 len; + + while (budget-- > 0) { + if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) { + xdp_ring->tx_stats.tx_busy++; + work_done = false; + break; + } + + if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &dma, &len)) + break; + + dma_sync_single_for_device(xdp_ring->dev, dma, len, + DMA_BIDIRECTIONAL); + + tx_bi = &xdp_ring->tx_bi[xdp_ring->next_to_use]; + tx_bi->bytecount = len; + + tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use); + tx_desc->buffer_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = + build_ctob(I40E_TX_DESC_CMD_ICRC + | I40E_TX_DESC_CMD_EOP, + 0, len, 0); + + xdp_ring->next_to_use++; + if (xdp_ring->next_to_use == xdp_ring->count) + xdp_ring->next_to_use = 0; + } + + if (tx_desc) { + /* Request an interrupt for the last frame and bump tail ptr. */ + tx_desc->cmd_type_offset_bsz |= (I40E_TX_DESC_CMD_RS << + I40E_TXD_QW1_CMD_SHIFT); + i40e_xdp_ring_update_tail(xdp_ring); + + xsk_umem_consume_tx_done(xdp_ring->xsk_umem); + } + + return !!budget && work_done; +} + +/** + * i40e_clean_xdp_tx_buffer - Frees and unmaps an XDP Tx entry + * @tx_ring: XDP Tx ring + * @tx_bi: Tx buffer info to clean + **/ +static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring, + struct i40e_tx_buffer *tx_bi) +{ + xdp_return_frame(tx_bi->xdpf); + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_bi, dma), + dma_unmap_len(tx_bi, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_bi, len, 0); +} + +/** + * i40e_clean_xdp_tx_irq - Completes AF_XDP entries, and cleans XDP entries + * @tx_ring: XDP Tx ring + * @tx_bi: Tx buffer info to clean + * + * Returns true if cleanup/tranmission is done. + **/ +bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, + struct i40e_ring *tx_ring, int napi_budget) +{ + unsigned int ntc, total_bytes = 0, budget = vsi->work_limit; + u32 i, completed_frames, frames_ready, xsk_frames = 0; + struct xdp_umem *umem = tx_ring->xsk_umem; + u32 head_idx = i40e_get_head(tx_ring); + bool work_done = true, xmit_done; + struct i40e_tx_buffer *tx_bi; + + if (head_idx < tx_ring->next_to_clean) + head_idx += tx_ring->count; + frames_ready = head_idx - tx_ring->next_to_clean; + + if (frames_ready == 0) { + goto out_xmit; + } else if (frames_ready > budget) { + completed_frames = budget; + work_done = false; + } else { + completed_frames = frames_ready; + } + + ntc = tx_ring->next_to_clean; + + for (i = 0; i < completed_frames; i++) { + tx_bi = &tx_ring->tx_bi[ntc]; + + if (tx_bi->xdpf) + i40e_clean_xdp_tx_buffer(tx_ring, tx_bi); + else + xsk_frames++; + + tx_bi->xdpf = NULL; + total_bytes += tx_bi->bytecount; + + if (++ntc >= tx_ring->count) + ntc = 0; + } + + tx_ring->next_to_clean += completed_frames; + if (unlikely(tx_ring->next_to_clean >= tx_ring->count)) + tx_ring->next_to_clean -= tx_ring->count; + + if (xsk_frames) + xsk_umem_complete_tx(umem, xsk_frames); + + i40e_arm_wb(tx_ring, vsi, budget); + i40e_update_tx_stats(tx_ring, completed_frames, total_bytes); + +out_xmit: + xmit_done = i40e_xmit_zc(tx_ring, budget); + + return work_done && xmit_done; +} + +/** + * i40e_xsk_async_xmit - Implements the ndo_xsk_async_xmit + * @dev: the netdevice + * @queue_id: queue id to wake up + * + * Returns <0 for errors, 0 otherwise. + **/ +int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id) +{ + struct i40e_netdev_priv *np = netdev_priv(dev); + struct i40e_vsi *vsi = np->vsi; + struct i40e_ring *ring; + + if (test_bit(__I40E_VSI_DOWN, vsi->state)) + return -ENETDOWN; + + if (!i40e_enabled_xdp_vsi(vsi)) + return -ENXIO; + + if (queue_id >= vsi->num_queue_pairs) + return -ENXIO; + + if (!vsi->xdp_rings[queue_id]->xsk_umem) + return -ENXIO; + + ring = vsi->xdp_rings[queue_id]; + + /* The idea here is that if NAPI is running, mark a miss, so + * it will run again. If not, trigger an interrupt and + * schedule the NAPI from interrupt context. If NAPI would be + * scheduled here, the interrupt affinity would not be + * honored. + */ + if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) + i40e_force_wb(vsi, ring->q_vector); + + return 0; +} + +void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring) +{ + u16 i; + + for (i = 0; i < rx_ring->count; i++) { + struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; + + if (!rx_bi->addr) + continue; + + xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_bi->handle); + rx_bi->addr = NULL; + } +} + +/** + * i40e_xsk_clean_xdp_ring - Clean the XDP Tx ring on shutdown + * @xdp_ring: XDP Tx ring + **/ +void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring) +{ + u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use; + struct xdp_umem *umem = tx_ring->xsk_umem; + struct i40e_tx_buffer *tx_bi; + u32 xsk_frames = 0; + + while (ntc != ntu) { + tx_bi = &tx_ring->tx_bi[ntc]; + + if (tx_bi->xdpf) + i40e_clean_xdp_tx_buffer(tx_ring, tx_bi); + else + xsk_frames++; + + tx_bi->xdpf = NULL; + + ntc++; + if (ntc >= tx_ring->count) + ntc = 0; + } + + if (xsk_frames) + xsk_umem_complete_tx(umem, xsk_frames); +} + +/** + * i40e_xsk_any_rx_ring_enabled - Checks if Rx rings have AF_XDP UMEM attached + * @vsi: vsi + * + * Returns true if any of the Rx rings has an AF_XDP UMEM attached + **/ +bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi) +{ + int i; + + if (!vsi->xsk_umems) + return false; + + for (i = 0; i < vsi->num_queue_pairs; i++) { + if (vsi->xsk_umems[i]) + return true; + } + + return false; +} diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h new file mode 100644 index 000000000000..9038c5d5cf08 --- /dev/null +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2018 Intel Corporation. */ + +#ifndef _I40E_XSK_H_ +#define _I40E_XSK_H_ + +struct i40e_vsi; +struct xdp_umem; +struct zero_copy_allocator; + +int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair); +int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair); +int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem, + u16 qid); +int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem, + u16 qid); +void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle); +bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count); +int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget); + +bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, + struct i40e_ring *tx_ring, int napi_budget); +int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id); + +#endif /* _I40E_XSK_H_ */ |