summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-03-25 17:32:03 +0300
committerJakub Kicinski <kuba@kernel.org>2025-03-25 17:32:04 +0300
commitc1dacb45778ca9749cb2a5dd61c2d196979ecfce (patch)
treef7e0db8b201191607ada8f535212a87092ad1f12
parentb709857ecbf511bb25603790ff9c3f12abe36559 (diff)
parent29c8e323320f2a1c54d7dfb0937255fa8c2ff901 (diff)
downloadlinux-c1dacb45778ca9749cb2a5dd61c2d196979ecfce.tar.xz
Merge branch 'nexthop-convert-rtm_-new-del-nexthop-to-per-netns-rtnl'
Kuniyuki Iwashima says: ==================== nexthop: Convert RTM_{NEW,DEL}NEXTHOP to per-netns RTNL. Patch 1 - 5 move some validation for RTM_NEWNEXTHOP so that it can be called without RTNL. Patch 6 & 7 converts RTM_NEWNEXTHOP and RTM_DELNEXTHOP to per-netns RTNL. Note that RTM_GETNEXTHOP and RTM_GETNEXTHOPBUCKET are not touched in this series. rtm_get_nexthop() can be easily converted to RCU, but rtm_dump_nexthop() needs more work due to the left-to-right rbtree walk, which looks prone to node deletion and tree rotation without a retry mechanism. v1: https://lore.kernel.org/netdev/20250318233240.53946-1-kuniyu@amazon.com/ ==================== Link: https://patch.msgid.link/20250319230743.65267-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--net/ipv4/nexthop.c183
1 files changed, 112 insertions, 71 deletions
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 01df7dd795f0..467151517023 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1272,10 +1272,8 @@ static int nh_check_attr_group(struct net *net,
u16 nh_grp_type, struct netlink_ext_ack *extack)
{
unsigned int len = nla_len(tb[NHA_GROUP]);
- u8 nh_family = AF_UNSPEC;
struct nexthop_grp *nhg;
unsigned int i, j;
- u8 nhg_fdb = 0;
if (!len || len & (sizeof(struct nexthop_grp) - 1)) {
NL_SET_ERR_MSG(extack,
@@ -1307,10 +1305,41 @@ static int nh_check_attr_group(struct net *net,
}
}
- if (tb[NHA_FDB])
- nhg_fdb = 1;
nhg = nla_data(tb[NHA_GROUP]);
- for (i = 0; i < len; ++i) {
+ for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
+ if (!tb[i])
+ continue;
+ switch (i) {
+ case NHA_HW_STATS_ENABLE:
+ case NHA_FDB:
+ continue;
+ case NHA_RES_GROUP:
+ if (nh_grp_type == NEXTHOP_GRP_TYPE_RES)
+ continue;
+ break;
+ }
+ NL_SET_ERR_MSG(extack,
+ "No other attributes can be set in nexthop groups");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nh_check_attr_group_rtnl(struct net *net, struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
+{
+ u8 nh_family = AF_UNSPEC;
+ struct nexthop_grp *nhg;
+ unsigned int len;
+ unsigned int i;
+ u8 nhg_fdb;
+
+ len = nla_len(tb[NHA_GROUP]) / sizeof(*nhg);
+ nhg = nla_data(tb[NHA_GROUP]);
+ nhg_fdb = !!tb[NHA_FDB];
+
+ for (i = 0; i < len; i++) {
struct nexthop *nh;
bool is_fdb_nh;
@@ -1330,22 +1359,6 @@ static int nh_check_attr_group(struct net *net,
return -EINVAL;
}
}
- for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
- if (!tb[i])
- continue;
- switch (i) {
- case NHA_HW_STATS_ENABLE:
- case NHA_FDB:
- continue;
- case NHA_RES_GROUP:
- if (nh_grp_type == NEXTHOP_GRP_TYPE_RES)
- continue;
- break;
- }
- NL_SET_ERR_MSG(extack,
- "No other attributes can be set in nexthop groups");
- return -EINVAL;
- }
return 0;
}
@@ -2679,9 +2692,6 @@ static struct nexthop *nexthop_create_group(struct net *net,
int err;
int i;
- if (WARN_ON(!num_nh))
- return ERR_PTR(-EINVAL);
-
nh = nexthop_alloc();
if (!nh)
return ERR_PTR(-ENOMEM);
@@ -2915,11 +2925,6 @@ static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
struct nexthop *nh;
int err;
- if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) {
- NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
- return ERR_PTR(-EINVAL);
- }
-
if (!cfg->nh_id) {
cfg->nh_id = nh_find_unused_id(net);
if (!cfg->nh_id) {
@@ -3016,19 +3021,13 @@ static int rtm_to_nh_config_grp_res(struct nlattr *res, struct nh_config *cfg,
}
static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nh_config *cfg,
+ struct nlmsghdr *nlh, struct nlattr **tb,
+ struct nh_config *cfg,
struct netlink_ext_ack *extack)
{
struct nhmsg *nhm = nlmsg_data(nlh);
- struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)];
int err;
- err = nlmsg_parse(nlh, sizeof(*nhm), tb,
- ARRAY_SIZE(rtm_nh_policy_new) - 1,
- rtm_nh_policy_new, extack);
- if (err < 0)
- return err;
-
err = -EINVAL;
if (nhm->resvd || nhm->nh_scope) {
NL_SET_ERR_MSG(extack, "Invalid values in ancillary header");
@@ -3093,7 +3092,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
NL_SET_ERR_MSG(extack, "Invalid group type");
goto out;
}
- err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb),
+
+ err = nh_check_attr_group(net, tb, ARRAY_SIZE(rtm_nh_policy_new),
cfg->nh_grp_type, extack);
if (err)
goto out;
@@ -3126,25 +3126,6 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
goto out;
}
- if (!cfg->nh_fdb && tb[NHA_OIF]) {
- cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
- if (cfg->nh_ifindex)
- cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
-
- if (!cfg->dev) {
- NL_SET_ERR_MSG(extack, "Invalid device index");
- goto out;
- } else if (!(cfg->dev->flags & IFF_UP)) {
- NL_SET_ERR_MSG(extack, "Nexthop device is not up");
- err = -ENETDOWN;
- goto out;
- } else if (!netif_carrier_ok(cfg->dev)) {
- NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
- err = -ENETDOWN;
- goto out;
- }
- }
-
err = -EINVAL;
if (tb[NHA_GATEWAY]) {
struct nlattr *gwa = tb[NHA_GATEWAY];
@@ -3188,7 +3169,7 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]);
err = lwtunnel_valid_encap_type(cfg->nh_encap_type,
- extack, true);
+ extack, false);
if (err < 0)
goto out;
@@ -3207,22 +3188,76 @@ out:
return err;
}
+static int rtm_to_nh_config_rtnl(struct net *net, struct nlattr **tb,
+ struct nh_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ if (tb[NHA_GROUP])
+ return nh_check_attr_group_rtnl(net, tb, extack);
+
+ if (tb[NHA_OIF]) {
+ cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
+ if (cfg->nh_ifindex)
+ cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
+
+ if (!cfg->dev) {
+ NL_SET_ERR_MSG(extack, "Invalid device index");
+ return -EINVAL;
+ }
+
+ if (!(cfg->dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ return -ENETDOWN;
+ }
+
+ if (!netif_carrier_ok(cfg->dev)) {
+ NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
+ return -ENETDOWN;
+ }
+ }
+
+ return 0;
+}
+
/* rtnl */
static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)];
struct net *net = sock_net(skb->sk);
struct nh_config cfg;
struct nexthop *nh;
int err;
- err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
- if (!err) {
- nh = nexthop_add(net, &cfg, extack);
- if (IS_ERR(nh))
- err = PTR_ERR(nh);
+ err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+ ARRAY_SIZE(rtm_nh_policy_new) - 1,
+ rtm_nh_policy_new, extack);
+ if (err < 0)
+ goto out;
+
+ err = rtm_to_nh_config(net, skb, nlh, tb, &cfg, extack);
+ if (err)
+ goto out;
+
+ if (cfg.nlflags & NLM_F_REPLACE && !cfg.nh_id) {
+ NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
+ err = -EINVAL;
+ goto out;
}
+ rtnl_net_lock(net);
+
+ err = rtm_to_nh_config_rtnl(net, tb, &cfg, extack);
+ if (err)
+ goto unlock;
+
+ nh = nexthop_add(net, &cfg, extack);
+ if (IS_ERR(nh))
+ err = PTR_ERR(nh);
+
+unlock:
+ rtnl_net_unlock(net);
+out:
return err;
}
@@ -3279,13 +3314,17 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
return err;
+ rtnl_net_lock(net);
+
nh = nexthop_find_by_id(net, id);
- if (!nh)
- return -ENOENT;
+ if (nh)
+ remove_nexthop(net, nh, &nlinfo);
+ else
+ err = -ENOENT;
- remove_nexthop(net, nh, &nlinfo);
+ rtnl_net_unlock(net);
- return 0;
+ return err;
}
/* rtnl */
@@ -4037,18 +4076,20 @@ static struct pernet_operations nexthop_net_ops = {
};
static const struct rtnl_msg_handler nexthop_rtnl_msg_handlers[] __initconst = {
- {.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop},
- {.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop},
+ {.msgtype = RTM_NEWNEXTHOP, .doit = rtm_new_nexthop,
+ .flags = RTNL_FLAG_DOIT_PERNET},
+ {.msgtype = RTM_DELNEXTHOP, .doit = rtm_del_nexthop,
+ .flags = RTNL_FLAG_DOIT_PERNET},
{.msgtype = RTM_GETNEXTHOP, .doit = rtm_get_nexthop,
.dumpit = rtm_dump_nexthop},
{.msgtype = RTM_GETNEXTHOPBUCKET, .doit = rtm_get_nexthop_bucket,
.dumpit = rtm_dump_nexthop_bucket},
{.protocol = PF_INET, .msgtype = RTM_NEWNEXTHOP,
- .doit = rtm_new_nexthop},
+ .doit = rtm_new_nexthop, .flags = RTNL_FLAG_DOIT_PERNET},
{.protocol = PF_INET, .msgtype = RTM_GETNEXTHOP,
.dumpit = rtm_dump_nexthop},
{.protocol = PF_INET6, .msgtype = RTM_NEWNEXTHOP,
- .doit = rtm_new_nexthop},
+ .doit = rtm_new_nexthop, .flags = RTNL_FLAG_DOIT_PERNET},
{.protocol = PF_INET6, .msgtype = RTM_GETNEXTHOP,
.dumpit = rtm_dump_nexthop},
};