summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-07-20 16:23:13 +0300
committerDavid S. Miller <davem@davemloft.net>2021-07-20 16:23:13 +0300
commit542bb39651d5f98665d637b1683d501fd320212c (patch)
tree1abf5208efa6f8760bcfbced231257f085263f42
parent2c0804044781f3de6c19efc01355611b839d5aa1 (diff)
parent1ec2230fc721e6270b0504c07945a74742c81f81 (diff)
downloadlinux-542bb39651d5f98665d637b1683d501fd320212c.tar.xz
Merge branch 'veth-flexible-channel-numbers'
Paolo Abeni says: ==================== veth: more flexible channels number configuration XDP setups can benefit from multiple veth RX/TX queues. Currently veth allow setting such number only at creation time via the 'numrxqueues' and 'numtxqueues' parameters. This series introduces support for the ethtool set_channel operation and allows configuring the queue number via a new module parameter. The veth default configuration is not changed. Finally self-tests are updated to check the new features, with both valid and invalid arguments. This iteration is a rebase of the most recent RFC, it does not provide a module parameter to configure the default number of queues, but I think could be worthy RFC v1 -> RFC v2: - report more consistent 'combined' count - make set_channel as resilient as possible to errors - drop module parameter - but I would still consider it. - more self-tests ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/veth.c305
-rw-r--r--mm/memcontrol.c2
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/core/dev.c6
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/scm.c4
-rw-r--r--net/dccp/proto.c2
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/fib_trie.c4
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/ip6_fib.c4
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/sit.c5
-rwxr-xr-xtools/testing/selftests/net/veth.sh183
15 files changed, 457 insertions, 74 deletions
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index bdb7ce3cb054..381670c08ba7 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -224,12 +224,13 @@ static void veth_get_channels(struct net_device *dev,
{
channels->tx_count = dev->real_num_tx_queues;
channels->rx_count = dev->real_num_rx_queues;
- channels->max_tx = dev->real_num_tx_queues;
- channels->max_rx = dev->real_num_rx_queues;
- channels->combined_count = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
- channels->max_combined = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
+ channels->max_tx = dev->num_tx_queues;
+ channels->max_rx = dev->num_rx_queues;
}
+static int veth_set_channels(struct net_device *dev,
+ struct ethtool_channels *ch);
+
static const struct ethtool_ops veth_ethtool_ops = {
.get_drvinfo = veth_get_drvinfo,
.get_link = ethtool_op_get_link,
@@ -239,6 +240,7 @@ static const struct ethtool_ops veth_ethtool_ops = {
.get_link_ksettings = veth_get_link_ksettings,
.get_ts_info = ethtool_op_get_ts_info,
.get_channels = veth_get_channels,
+ .set_channels = veth_set_channels,
};
/* general routines */
@@ -928,12 +930,12 @@ static int veth_poll(struct napi_struct *napi, int budget)
return done;
}
-static int __veth_napi_enable(struct net_device *dev)
+static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
{
struct veth_priv *priv = netdev_priv(dev);
int err, i;
- for (i = 0; i < dev->real_num_rx_queues; i++) {
+ for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL);
@@ -941,7 +943,7 @@ static int __veth_napi_enable(struct net_device *dev)
goto err_xdp_ring;
}
- for (i = 0; i < dev->real_num_rx_queues; i++) {
+ for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
napi_enable(&rq->xdp_napi);
@@ -949,19 +951,25 @@ static int __veth_napi_enable(struct net_device *dev)
}
return 0;
+
err_xdp_ring:
- for (i--; i >= 0; i--)
+ for (i--; i >= start; i--)
ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
return err;
}
-static void veth_napi_del(struct net_device *dev)
+static int __veth_napi_enable(struct net_device *dev)
+{
+ return __veth_napi_enable_range(dev, 0, dev->real_num_rx_queues);
+}
+
+static void veth_napi_del_range(struct net_device *dev, int start, int end)
{
struct veth_priv *priv = netdev_priv(dev);
int i;
- for (i = 0; i < dev->real_num_rx_queues; i++) {
+ for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
rcu_assign_pointer(priv->rq[i].napi, NULL);
@@ -970,7 +978,7 @@ static void veth_napi_del(struct net_device *dev)
}
synchronize_net();
- for (i = 0; i < dev->real_num_rx_queues; i++) {
+ for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
rq->rx_notify_masked = false;
@@ -978,41 +986,90 @@ static void veth_napi_del(struct net_device *dev)
}
}
+static void veth_napi_del(struct net_device *dev)
+{
+ veth_napi_del_range(dev, 0, dev->real_num_rx_queues);
+}
+
static bool veth_gro_requested(const struct net_device *dev)
{
return !!(dev->wanted_features & NETIF_F_GRO);
}
-static int veth_enable_xdp(struct net_device *dev)
+static int veth_enable_xdp_range(struct net_device *dev, int start, int end,
+ bool napi_already_on)
{
- bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP);
struct veth_priv *priv = netdev_priv(dev);
int err, i;
- if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
- for (i = 0; i < dev->real_num_rx_queues; i++) {
- struct veth_rq *rq = &priv->rq[i];
+ for (i = start; i < end; i++) {
+ struct veth_rq *rq = &priv->rq[i];
- if (!napi_already_on)
- netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
- err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id);
- if (err < 0)
- goto err_rxq_reg;
+ if (!napi_already_on)
+ netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
+ err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id);
+ if (err < 0)
+ goto err_rxq_reg;
- err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
- MEM_TYPE_PAGE_SHARED,
- NULL);
- if (err < 0)
- goto err_reg_mem;
+ err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED,
+ NULL);
+ if (err < 0)
+ goto err_reg_mem;
- /* Save original mem info as it can be overwritten */
- rq->xdp_mem = rq->xdp_rxq.mem;
- }
+ /* Save original mem info as it can be overwritten */
+ rq->xdp_mem = rq->xdp_rxq.mem;
+ }
+ return 0;
+
+err_reg_mem:
+ xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
+err_rxq_reg:
+ for (i--; i >= start; i--) {
+ struct veth_rq *rq = &priv->rq[i];
+
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+ if (!napi_already_on)
+ netif_napi_del(&rq->xdp_napi);
+ }
+
+ return err;
+}
+
+static void veth_disable_xdp_range(struct net_device *dev, int start, int end,
+ bool delete_napi)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ int i;
+
+ for (i = start; i < end; i++) {
+ struct veth_rq *rq = &priv->rq[i];
+
+ rq->xdp_rxq.mem = rq->xdp_mem;
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+
+ if (delete_napi)
+ netif_napi_del(&rq->xdp_napi);
+ }
+}
+
+static int veth_enable_xdp(struct net_device *dev)
+{
+ bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP);
+ struct veth_priv *priv = netdev_priv(dev);
+ int err, i;
+
+ if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
+ err = veth_enable_xdp_range(dev, 0, dev->real_num_rx_queues, napi_already_on);
+ if (err)
+ return err;
if (!napi_already_on) {
err = __veth_napi_enable(dev);
- if (err)
- goto err_rxq_reg;
+ if (err) {
+ veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true);
+ return err;
+ }
if (!veth_gro_requested(dev)) {
/* user-space did not require GRO, but adding XDP
@@ -1030,18 +1087,6 @@ static int veth_enable_xdp(struct net_device *dev)
}
return 0;
-err_reg_mem:
- xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
-err_rxq_reg:
- for (i--; i >= 0; i--) {
- struct veth_rq *rq = &priv->rq[i];
-
- xdp_rxq_info_unreg(&rq->xdp_rxq);
- if (!napi_already_on)
- netif_napi_del(&rq->xdp_napi);
- }
-
- return err;
}
static void veth_disable_xdp(struct net_device *dev)
@@ -1064,28 +1109,23 @@ static void veth_disable_xdp(struct net_device *dev)
}
}
- for (i = 0; i < dev->real_num_rx_queues; i++) {
- struct veth_rq *rq = &priv->rq[i];
-
- rq->xdp_rxq.mem = rq->xdp_mem;
- xdp_rxq_info_unreg(&rq->xdp_rxq);
- }
+ veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false);
}
-static int veth_napi_enable(struct net_device *dev)
+static int veth_napi_enable_range(struct net_device *dev, int start, int end)
{
struct veth_priv *priv = netdev_priv(dev);
int err, i;
- for (i = 0; i < dev->real_num_rx_queues; i++) {
+ for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
}
- err = __veth_napi_enable(dev);
+ err = __veth_napi_enable_range(dev, start, end);
if (err) {
- for (i = 0; i < dev->real_num_rx_queues; i++) {
+ for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i];
netif_napi_del(&rq->xdp_napi);
@@ -1095,6 +1135,128 @@ static int veth_napi_enable(struct net_device *dev)
return err;
}
+static int veth_napi_enable(struct net_device *dev)
+{
+ return veth_napi_enable_range(dev, 0, dev->real_num_rx_queues);
+}
+
+static void veth_disable_range_safe(struct net_device *dev, int start, int end)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+
+ if (start >= end)
+ return;
+
+ if (priv->_xdp_prog) {
+ veth_napi_del_range(dev, start, end);
+ veth_disable_xdp_range(dev, start, end, false);
+ } else if (veth_gro_requested(dev)) {
+ veth_napi_del_range(dev, start, end);
+ }
+}
+
+static int veth_enable_range_safe(struct net_device *dev, int start, int end)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ int err;
+
+ if (start >= end)
+ return 0;
+
+ if (priv->_xdp_prog) {
+ /* these channels are freshly initialized, napi is not on there even
+ * when GRO is requeste
+ */
+ err = veth_enable_xdp_range(dev, start, end, false);
+ if (err)
+ return err;
+
+ err = __veth_napi_enable_range(dev, start, end);
+ if (err) {
+ /* on error always delete the newly added napis */
+ veth_disable_xdp_range(dev, start, end, true);
+ return err;
+ }
+ } else if (veth_gro_requested(dev)) {
+ return veth_napi_enable_range(dev, start, end);
+ }
+ return 0;
+}
+
+static int veth_set_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct veth_priv *priv = netdev_priv(dev);
+ unsigned int old_rx_count, new_rx_count;
+ struct veth_priv *peer_priv;
+ struct net_device *peer;
+ int err;
+
+ /* sanity check. Upper bounds are already enforced by the caller */
+ if (!ch->rx_count || !ch->tx_count)
+ return -EINVAL;
+
+ /* avoid braking XDP, if that is enabled */
+ peer = rtnl_dereference(priv->peer);
+ peer_priv = peer ? netdev_priv(peer) : NULL;
+ if (priv->_xdp_prog && peer && ch->rx_count < peer->real_num_tx_queues)
+ return -EINVAL;
+
+ if (peer && peer_priv && peer_priv->_xdp_prog && ch->tx_count > peer->real_num_rx_queues)
+ return -EINVAL;
+
+ old_rx_count = dev->real_num_rx_queues;
+ new_rx_count = ch->rx_count;
+ if (netif_running(dev)) {
+ /* turn device off */
+ netif_carrier_off(dev);
+ if (peer)
+ netif_carrier_off(peer);
+
+ /* try to allocate new resurces, as needed*/
+ err = veth_enable_range_safe(dev, old_rx_count, new_rx_count);
+ if (err)
+ goto out;
+ }
+
+ err = netif_set_real_num_rx_queues(dev, ch->rx_count);
+ if (err)
+ goto revert;
+
+ err = netif_set_real_num_tx_queues(dev, ch->tx_count);
+ if (err) {
+ int err2 = netif_set_real_num_rx_queues(dev, old_rx_count);
+
+ /* this error condition could happen only if rx and tx change
+ * in opposite directions (e.g. tx nr raises, rx nr decreases)
+ * and we can't do anything to fully restore the original
+ * status
+ */
+ if (err2)
+ pr_warn("Can't restore rx queues config %d -> %d %d",
+ new_rx_count, old_rx_count, err2);
+ else
+ goto revert;
+ }
+
+out:
+ if (netif_running(dev)) {
+ /* note that we need to swap the arguments WRT the enable part
+ * to identify the range we have to disable
+ */
+ veth_disable_range_safe(dev, new_rx_count, old_rx_count);
+ netif_carrier_on(dev);
+ if (peer)
+ netif_carrier_on(peer);
+ }
+ return err;
+
+revert:
+ new_rx_count = old_rx_count;
+ old_rx_count = ch->rx_count;
+ goto out;
+}
+
static int veth_open(struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
@@ -1447,6 +1609,23 @@ static void veth_disable_gro(struct net_device *dev)
netdev_update_features(dev);
}
+static int veth_init_queues(struct net_device *dev, struct nlattr *tb[])
+{
+ int err;
+
+ if (!tb[IFLA_NUM_TX_QUEUES] && dev->num_tx_queues > 1) {
+ err = netif_set_real_num_tx_queues(dev, 1);
+ if (err)
+ return err;
+ }
+ if (!tb[IFLA_NUM_RX_QUEUES] && dev->num_rx_queues > 1) {
+ err = netif_set_real_num_rx_queues(dev, 1);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
static int veth_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
@@ -1556,13 +1735,21 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
priv = netdev_priv(dev);
rcu_assign_pointer(priv->peer, peer);
+ err = veth_init_queues(dev, tb);
+ if (err)
+ goto err_queues;
priv = netdev_priv(peer);
rcu_assign_pointer(priv->peer, dev);
+ err = veth_init_queues(peer, tb);
+ if (err)
+ goto err_queues;
veth_disable_gro(dev);
return 0;
+err_queues:
+ unregister_netdevice(dev);
err_register_dev:
/* nothing to do */
err_configure_peer:
@@ -1608,6 +1795,16 @@ static struct net *veth_get_link_net(const struct net_device *dev)
return peer ? dev_net(peer) : dev_net(dev);
}
+static unsigned int veth_get_num_queues(void)
+{
+ /* enforce the same queue limit as rtnl_create_link */
+ int queues = num_possible_cpus();
+
+ if (queues > 4096)
+ queues = 4096;
+ return queues;
+}
+
static struct rtnl_link_ops veth_link_ops = {
.kind = DRV_NAME,
.priv_size = sizeof(struct veth_priv),
@@ -1618,6 +1815,8 @@ static struct rtnl_link_ops veth_link_ops = {
.policy = veth_policy,
.maxtype = VETH_INFO_MAX,
.get_link_net = veth_get_link_net,
+ .get_num_tx_queues = veth_get_num_queues,
+ .get_num_rx_queues = veth_get_num_queues,
};
/*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ae1f5d0cb581..1bbf239b06f2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -968,7 +968,7 @@ static __always_inline bool memcg_kmem_bypass(void)
return false;
/* Memcg to charge can't be determined. */
- if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
+ if (!in_task() || !current->mm || (current->flags & PF_KTHREAD))
return true;
return false;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 4cdf8416869d..55275ef9a31a 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -67,7 +67,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg,
return 0;
size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN;
- array = kzalloc(size, GFP_KERNEL);
+ array = kzalloc(size, GFP_KERNEL_ACCOUNT);
if (array == NULL)
return -ENOBUFS;
diff --git a/net/core/dev.c b/net/core/dev.c
index 3ee58876e8f5..01713887208f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10119,7 +10119,7 @@ static int netif_alloc_rx_queues(struct net_device *dev)
BUG_ON(count < 1);
- rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ rx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!rx)
return -ENOMEM;
@@ -10186,7 +10186,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
if (count < 1 || count > 0xffff)
return -EINVAL;
- tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ tx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!tx)
return -ENOMEM;
@@ -10826,7 +10826,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;
- p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!p)
return NULL;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index a9f937975080..79df7cd9dbc1 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -57,7 +57,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
{
struct fib_rule *r;
- r = kzalloc(ops->rule_size, GFP_KERNEL);
+ r = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
if (r == NULL)
return -ENOMEM;
@@ -541,7 +541,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
- nlrule = kzalloc(ops->rule_size, GFP_KERNEL);
+ nlrule = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
if (!nlrule) {
err = -ENOMEM;
goto errout;
diff --git a/net/core/scm.c b/net/core/scm.c
index ae3085d9aae8..5c356f0dee30 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -79,7 +79,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
if (!fpl)
{
- fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+ fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL_ACCOUNT);
if (!fpl)
return -ENOMEM;
*fplp = fpl;
@@ -355,7 +355,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
return NULL;
new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (new_fpl) {
for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 7eb0fb231940..abb5c596a817 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1126,7 +1126,7 @@ static int __init dccp_init(void)
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_free_hashinfo2;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 73721a4448bd..d38124bd1b94 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -215,7 +215,7 @@ static void devinet_sysctl_unregister(struct in_device *idev)
static struct in_ifaddr *inet_alloc_ifa(void)
{
- return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
+ return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
}
static void inet_rcu_free_ifa(struct rcu_head *head)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 25cf387cca5b..8060524f4256 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2380,11 +2380,11 @@ void __init fib_trie_init(void)
{
fn_alias_kmem = kmem_cache_create("ip_fib_alias",
sizeof(struct fib_alias),
- 0, SLAB_PANIC, NULL);
+ 0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
LEAF_SIZE,
- 0, SLAB_PANIC, NULL);
+ 0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
}
struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8cb44040ec68..f931def6302e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4512,7 +4512,9 @@ void __init tcp_init(void)
tcp_hashinfo.bind_bucket_cachep =
kmem_cache_create("tcp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC |
+ SLAB_ACCOUNT,
+ NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e2f625e39455..bc330fffb4a8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1080,7 +1080,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
goto out;
}
- ifa = kzalloc(sizeof(*ifa), gfp_flags);
+ ifa = kzalloc(sizeof(*ifa), gfp_flags | __GFP_ACCOUNT);
if (!ifa) {
err = -ENOBUFS;
goto out;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 2d650dc24349..a8f118e469b7 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2449,8 +2449,8 @@ int __init fib6_init(void)
int ret = -ENOMEM;
fib6_node_kmem = kmem_cache_create("fib6_nodes",
- sizeof(struct fib6_node),
- 0, SLAB_HWCACHE_ALIGN,
+ sizeof(struct fib6_node), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
NULL);
if (!fib6_node_kmem)
goto out;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7b756a7dc036..5f7286acca33 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6638,7 +6638,7 @@ int __init ip6_route_init(void)
ret = -ENOMEM;
ip6_dst_ops_template.kmem_cachep =
kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!ip6_dst_ops_template.kmem_cachep)
goto out;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index df5bea818410..33adc12b697d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -321,7 +321,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
* we try harder to allocate.
*/
kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
- kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
+ kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) :
NULL;
rcu_read_lock();
@@ -334,7 +334,8 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
* For root users, retry allocating enough memory for
* the answer.
*/
- kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC);
+ kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC | __GFP_ACCOUNT |
+ __GFP_NOWARN);
if (!kp) {
ret = -ENOMEM;
goto out;
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 11d7cdb898c0..19eac3e44c06 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -13,7 +13,7 @@ readonly NS_DST=$BASE$DST
readonly BM_NET_V4=192.168.1.
readonly BM_NET_V6=2001:db8::
-readonly NPROCS=`nproc`
+readonly CPUS=`nproc`
ret=0
cleanup() {
@@ -75,6 +75,29 @@ chk_tso_flag() {
__chk_flag "$1" $2 $3 tcp-segmentation-offload
}
+chk_channels() {
+ local msg="$1"
+ local target=$2
+ local rx=$3
+ local tx=$4
+
+ local dev=veth$target
+
+ local cur_rx=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep RX: | tail -n 1 | awk '{print $2}' `
+ local cur_tx=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep TX: | tail -n 1 | awk '{print $2}'`
+ local cur_combined=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep Combined: | tail -n 1 | awk '{print $2}'`
+
+ printf "%-60s" "$msg"
+ if [ "$cur_rx" = "$rx" -a "$cur_tx" = "$tx" -a "$cur_combined" = "n/a" ]; then
+ echo " ok "
+ else
+ echo " fail rx:$rx:$cur_rx tx:$tx:$cur_tx combined:n/a:$cur_combined"
+ fi
+}
+
chk_gro() {
local msg="$1"
local expected=$2
@@ -107,11 +130,100 @@ chk_gro() {
fi
}
+__change_channels()
+{
+ local cur_cpu
+ local end=$1
+ local cur
+ local i
+
+ while true; do
+ printf -v cur '%(%s)T'
+ [ $cur -le $end ] || break
+
+ for i in `seq 1 $CPUS`; do
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx $i tx $i
+ ip netns exec $NS_DST ethtool -L veth$DST rx $i tx $i
+ done
+
+ for i in `seq 1 $((CPUS - 1))`; do
+ cur_cpu=$((CPUS - $i))
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx $cur_cpu tx $cur_cpu
+ ip netns exec $NS_DST ethtool -L veth$DST rx $cur_cpu tx $cur_cpu
+ done
+ done
+}
+
+__send_data() {
+ local end=$1
+
+ while true; do
+ printf -v cur '%(%s)T'
+ [ $cur -le $end ] || break
+
+ ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 1000 -M 300 -D $BM_NET_V4$DST
+ done
+}
+
+do_stress() {
+ local end
+ printf -v end '%(%s)T'
+ end=$((end + $STRESS))
+
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 3 tx 3
+ ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+
+ ip netns exec $NS_DST ./udpgso_bench_rx &
+ local rx_pid=$!
+
+ echo "Running stress test for $STRESS seconds..."
+ __change_channels $end &
+ local ch_pid=$!
+ __send_data $end &
+ local data_pid_1=$!
+ __send_data $end &
+ local data_pid_2=$!
+ __send_data $end &
+ local data_pid_3=$!
+ __send_data $end &
+ local data_pid_4=$!
+
+ wait $ch_pid $data_pid_1 $data_pid_2 $data_pid_3 $data_pid_4
+ kill -9 $rx_pid
+ echo "done"
+
+ # restore previous setting
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 tx 2
+ ip netns exec $NS_DST ethtool -L veth$DST rx 2 tx 1
+}
+
+usage() {
+ echo "Usage: $0 [-h] [-s <seconds>]"
+ echo -e "\t-h: show this help"
+ echo -e "\t-s: run optional stress tests for the given amount of seconds"
+}
+
+STRESS=0
+while getopts "hs:" option; do
+ case "$option" in
+ "h")
+ usage $0
+ exit 0
+ ;;
+ "s")
+ STRESS=$OPTARG
+ ;;
+ esac
+done
+
if [ ! -f ../bpf/xdp_dummy.o ]; then
echo "Missing xdp_dummy helper. Build bpf selftest first"
exit 1
fi
+[ $CPUS -lt 2 ] && echo "Only one CPU available, some tests will be skipped"
+[ $STRESS -gt 0 -a $CPUS -lt 3 ] && echo " stress test will be skipped, too"
+
create_ns
chk_gro_flag "default - gro flag" $SRC off
chk_gro_flag " - peer gro flag" $DST off
@@ -134,6 +246,8 @@ chk_gro " - aggregation with TSO off" 1
cleanup
create_ns
+chk_channels "default channels" $DST 1 1
+
ip -n $NS_DST link set dev veth$DST down
ip netns exec $NS_DST ethtool -K veth$DST gro on
chk_gro_flag "with gro enabled on link down - gro flag" $DST on
@@ -147,6 +261,56 @@ chk_gro " - aggregation with TSO off" 1
cleanup
create_ns
+
+CUR_TX=1
+CUR_RX=1
+if [ $CPUS -gt 1 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST tx 2
+ chk_channels "setting tx channels" $DST 1 2
+ CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+ chk_channels "setting both rx and tx channels" $DST 3 3
+ CUR_RX=3
+ CUR_TX=3
+fi
+
+ip netns exec $NS_DST ethtool -L veth$DST combined 2 2>/dev/null
+chk_channels "bad setting: combined channels" $DST $CUR_RX $CUR_TX
+
+ip netns exec $NS_DST ethtool -L veth$DST tx $((CPUS + 1)) 2>/dev/null
+chk_channels "setting invalid channels nr" $DST $CUR_RX $CUR_TX
+
+if [ $CPUS -gt 1 ]; then
+ # this also tests queues nr reduction
+ ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null
+ printf "%-60s" "bad setting: XDP with RX nr less than TX"
+ ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+ section xdp_dummy 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+
+ # the following tests will run with multiple channels active
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 2
+ ip netns exec $NS_DST ethtool -L veth$DST rx 2
+ ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+ section xdp_dummy 2>/dev/null
+ printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set"
+ ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+ CUR_RX=2
+ CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+ printf "%-60s" "bad setting: increasing peer TX nr above RX with XDP set"
+ ip netns exec $NS_SRC ethtool -L veth$SRC tx 3 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+ chk_channels "setting invalid channels nr" $DST 2 2
+fi
+
ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
chk_gro_flag "with xdp attached - gro flag" $DST on
chk_gro_flag " - peer gro flag" $SRC off
@@ -167,10 +331,27 @@ chk_gro_flag " - after gro on xdp off, gro flag" $DST on
chk_gro_flag " - peer gro flag" $SRC off
chk_tso_flag " - tso flag" $SRC on
chk_tso_flag " - peer tso flag" $DST on
+
+if [ $CPUS -gt 1 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST tx 1
+ chk_channels "decreasing tx channels with device down" $DST 2 1
+fi
+
ip -n $NS_DST link set dev veth$DST up
ip -n $NS_SRC link set dev veth$SRC up
chk_gro " - aggregation" 1
+if [ $CPUS -gt 1 ]; then
+ [ $STRESS -gt 0 -a $CPUS -gt 2 ] && do_stress
+
+ ip -n $NS_DST link set dev veth$DST down
+ ip -n $NS_SRC link set dev veth$SRC down
+ ip netns exec $NS_DST ethtool -L veth$DST tx 2
+ chk_channels "increasing tx channels with device down" $DST 2 2
+ ip -n $NS_DST link set dev veth$DST up
+ ip -n $NS_SRC link set dev veth$SRC up
+fi
+
ip netns exec $NS_DST ethtool -K veth$DST gro off
ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
chk_gro "aggregation again with default and TSO off" 10