summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c33
-rw-r--r--drivers/net/ethernet/rocker/rocker_main.c4
-rw-r--r--drivers/net/ethernet/rocker/rocker_ofdpa.c25
-rw-r--r--include/net/ip6_fib.h11
-rw-r--r--include/net/ip6_route.h13
-rw-r--r--include/net/ip_fib.h25
-rw-r--r--include/net/nexthop.h113
-rw-r--r--net/core/filter.c3
-rw-r--r--net/ipv4/fib_frontend.c15
-rw-r--r--net/ipv4/fib_lookup.h1
-rw-r--r--net/ipv4/fib_rules.c8
-rw-r--r--net/ipv4/fib_semantics.c257
-rw-r--r--net/ipv4/fib_trie.c38
-rw-r--r--net/ipv4/nexthop.c111
-rw-r--r--net/ipv4/route.c5
-rw-r--r--net/ipv6/addrconf.c5
-rw-r--r--net/ipv6/ip6_fib.c22
-rw-r--r--net/ipv6/ndisc.c3
-rw-r--r--net/ipv6/route.c148
20 files changed, 698 insertions, 175 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index 8212bfd05733..e69766393990 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2019 Mellanox Technologies. */
#include <linux/netdevice.h>
+#include <net/nexthop.h>
#include "lag.h"
#include "lag_mp.h"
#include "mlx5_core.h"
@@ -110,6 +111,8 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
struct fib_info *fi)
{
struct lag_mp *mp = &ldev->lag_mp;
+ struct fib_nh *fib_nh0, *fib_nh1;
+ unsigned int nhs;
/* Handle delete event */
if (event == FIB_EVENT_ENTRY_DEL) {
@@ -120,9 +123,11 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
}
/* Handle add/replace event */
- if (fi->fib_nhs == 1) {
+ nhs = fib_info_num_path(fi);
+ if (nhs == 1) {
if (__mlx5_lag_is_active(ldev)) {
- struct net_device *nh_dev = fi->fib_nh[0].fib_nh_dev;
+ struct fib_nh *nh = fib_info_nh(fi, 0);
+ struct net_device *nh_dev = nh->fib_nh_dev;
int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
mlx5_lag_set_port_affinity(ldev, ++i);
@@ -130,14 +135,16 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
return;
}
- if (fi->fib_nhs != 2)
+ if (nhs != 2)
return;
/* Verify next hops are ports of the same hca */
- if (!(fi->fib_nh[0].fib_nh_dev == ldev->pf[0].netdev &&
- fi->fib_nh[1].fib_nh_dev == ldev->pf[1].netdev) &&
- !(fi->fib_nh[0].fib_nh_dev == ldev->pf[1].netdev &&
- fi->fib_nh[1].fib_nh_dev == ldev->pf[0].netdev)) {
+ fib_nh0 = fib_info_nh(fi, 0);
+ fib_nh1 = fib_info_nh(fi, 1);
+ if (!(fib_nh0->fib_nh_dev == ldev->pf[0].netdev &&
+ fib_nh1->fib_nh_dev == ldev->pf[1].netdev) &&
+ !(fib_nh0->fib_nh_dev == ldev->pf[1].netdev &&
+ fib_nh1->fib_nh_dev == ldev->pf[0].netdev)) {
mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n");
return;
}
@@ -174,7 +181,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
mlx5_lag_set_port_affinity(ldev, i);
}
} else if (event == FIB_EVENT_NH_ADD &&
- fi->fib_nhs == 2) {
+ fib_info_num_path(fi) == 2) {
mlx5_lag_set_port_affinity(ldev, 0);
}
}
@@ -238,6 +245,7 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
struct mlx5_fib_event_work *fib_work;
struct fib_entry_notifier_info *fen_info;
struct fib_nh_notifier_info *fnh_info;
+ struct net_device *fib_dev;
struct fib_info *fi;
if (info->family != AF_INET)
@@ -254,8 +262,13 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
fen_info = container_of(info, struct fib_entry_notifier_info,
info);
fi = fen_info->fi;
- if (fi->fib_dev != ldev->pf[0].netdev &&
- fi->fib_dev != ldev->pf[1].netdev) {
+ if (fi->nh) {
+ NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
+ return notifier_from_errno(-EINVAL);
+ }
+ fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
+ if (fib_dev != ldev->pf[0].netdev &&
+ fib_dev != ldev->pf[1].netdev) {
return NOTIFY_DONE;
}
fib_work = mlx5_lag_init_fib_work(ldev, event);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 0ec52be7cc33..23f17ea52061 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -21,6 +21,7 @@
#include <net/arp.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
+#include <net/nexthop.h>
#include <net/fib_rules.h>
#include <net/ip_tunnels.h>
#include <net/l3mdev.h>
@@ -3816,23 +3817,25 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
}
static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
- const struct fib_info *fi)
+ struct fib_info *fi)
{
- return fi->fib_nh->fib_nh_scope == RT_SCOPE_LINK ||
- mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
+ const struct fib_nh *nh = fib_info_nh(fi, 0);
+
+ return nh->fib_nh_scope == RT_SCOPE_LINK ||
+ mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
}
static struct mlxsw_sp_nexthop_group *
mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
{
+ unsigned int nhs = fib_info_num_path(fi);
struct mlxsw_sp_nexthop_group *nh_grp;
struct mlxsw_sp_nexthop *nh;
struct fib_nh *fib_nh;
int i;
int err;
- nh_grp = kzalloc(struct_size(nh_grp, nexthops, fi->fib_nhs),
- GFP_KERNEL);
+ nh_grp = kzalloc(struct_size(nh_grp, nexthops, nhs), GFP_KERNEL);
if (!nh_grp)
return ERR_PTR(-ENOMEM);
nh_grp->priv = fi;
@@ -3840,11 +3843,11 @@ mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
nh_grp->neigh_tbl = &arp_tbl;
nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
- nh_grp->count = fi->fib_nhs;
+ nh_grp->count = nhs;
fib_info_hold(fi);
for (i = 0; i < nh_grp->count; i++) {
nh = &nh_grp->nexthops[i];
- fib_nh = &fi->fib_nh[i];
+ fib_nh = fib_info_nh(fi, i);
err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
if (err)
goto err_nexthop4_init;
@@ -4282,9 +4285,9 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
const struct fib_entry_notifier_info *fen_info,
struct mlxsw_sp_fib_entry *fib_entry)
{
+ struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
- struct net_device *dev = fen_info->fi->fib_dev;
struct mlxsw_sp_ipip_entry *ipip_entry;
struct fib_info *fi = fen_info->fi;
@@ -6119,6 +6122,20 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
return notifier_from_errno(-EINVAL);
}
+ if (fen_info->fi->nh) {
+ NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
+ return notifier_from_errno(-EINVAL);
+ }
+ } else if (info->family == AF_INET6) {
+ struct fib6_entry_notifier_info *fen6_info;
+
+ fen6_info = container_of(info,
+ struct fib6_entry_notifier_info,
+ info);
+ if (fen6_info->rt->nh) {
+ NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported");
+ return notifier_from_errno(-EINVAL);
+ }
}
break;
}
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 7ae6c124bfe9..45b3325c3a38 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2214,6 +2214,10 @@ static int rocker_router_fib_event(struct notifier_block *nb,
NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
return notifier_from_errno(-EINVAL);
}
+ if (fen_info->fi->nh) {
+ NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
+ return notifier_from_errno(-EINVAL);
+ }
}
memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 30a49802fb51..47ed9d41047f 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -22,6 +22,7 @@
#include <net/neighbour.h>
#include <net/switchdev.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
#include <net/arp.h>
#include "rocker.h"
@@ -2286,8 +2287,8 @@ static int ofdpa_port_fib_ipv4(struct ofdpa_port *ofdpa_port, __be32 dst,
/* XXX support ECMP */
- nh = fi->fib_nh;
- nh_on_port = (fi->fib_dev == ofdpa_port->dev);
+ nh = fib_info_nh(fi, 0);
+ nh_on_port = (nh->fib_nh_dev == ofdpa_port->dev);
has_gw = !!nh->fib_nh_gw4;
if (has_gw && nh_on_port) {
@@ -2737,11 +2738,13 @@ static int ofdpa_fib4_add(struct rocker *rocker,
{
struct ofdpa *ofdpa = rocker->wpriv;
struct ofdpa_port *ofdpa_port;
+ struct fib_nh *nh;
int err;
if (ofdpa->fib_aborted)
return 0;
- ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker);
+ nh = fib_info_nh(fen_info->fi, 0);
+ ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker);
if (!ofdpa_port)
return 0;
err = ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst),
@@ -2749,7 +2752,7 @@ static int ofdpa_fib4_add(struct rocker *rocker,
fen_info->tb_id, 0);
if (err)
return err;
- fen_info->fi->fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
+ nh->fib_nh_flags |= RTNH_F_OFFLOAD;
return 0;
}
@@ -2758,13 +2761,15 @@ static int ofdpa_fib4_del(struct rocker *rocker,
{
struct ofdpa *ofdpa = rocker->wpriv;
struct ofdpa_port *ofdpa_port;
+ struct fib_nh *nh;
if (ofdpa->fib_aborted)
return 0;
- ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker);
+ nh = fib_info_nh(fen_info->fi, 0);
+ ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker);
if (!ofdpa_port)
return 0;
- fen_info->fi->fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
+ nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
return ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst),
fen_info->dst_len, fen_info->fi,
fen_info->tb_id, OFDPA_OP_FLAG_REMOVE);
@@ -2784,14 +2789,16 @@ static void ofdpa_fib4_abort(struct rocker *rocker)
spin_lock_irqsave(&ofdpa->flow_tbl_lock, flags);
hash_for_each_safe(ofdpa->flow_tbl, bkt, tmp, flow_entry, entry) {
+ struct fib_nh *nh;
+
if (flow_entry->key.tbl_id !=
ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING)
continue;
- ofdpa_port = ofdpa_port_dev_lower_find(flow_entry->fi->fib_dev,
- rocker);
+ nh = fib_info_nh(flow_entry->fi, 0);
+ ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker);
if (!ofdpa_port)
continue;
- flow_entry->fi->fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
+ nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
ofdpa_flow_tbl_del(ofdpa_port, OFDPA_OP_FLAG_REMOVE,
flow_entry);
}
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index ebe5d65f97e0..1a8acd51b277 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -146,7 +146,10 @@ struct fib6_info {
* destination, but not the same gateway. nsiblings is just a cache
* to speed up lookup.
*/
- struct list_head fib6_siblings;
+ union {
+ struct list_head fib6_siblings;
+ struct list_head nh_list;
+ };
unsigned int fib6_nsiblings;
refcount_t fib6_ref;
@@ -170,6 +173,7 @@ struct fib6_info {
unused:3;
struct rcu_head rcu;
+ struct nexthop *nh;
struct fib6_nh fib6_nh[0];
};
@@ -441,11 +445,6 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr)
rcu_read_unlock();
}
-static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
-{
- return f6i->fib6_nh->fib_nh_dev;
-}
-
int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index a6ce6ea856b9..7375a165fd98 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -27,6 +27,7 @@ struct route_info {
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/route.h>
+#include <net/nexthop.h>
#define RT6_LOOKUP_F_IFACE 0x00000001
#define RT6_LOOKUP_F_REACHABLE 0x00000002
@@ -66,10 +67,13 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}
+/* fib entries using a nexthop object can not be coalesced into
+ * a multipath route
+ */
static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
{
/* the RTF_ADDRCONF flag filters out RA's */
- return !(f6i->fib6_flags & RTF_ADDRCONF) &&
+ return !(f6i->fib6_flags & RTF_ADDRCONF) && !f6i->nh &&
f6i->fib6_nh->fib_nh_gw_family;
}
@@ -275,8 +279,13 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
{
- struct fib6_nh *nha = a->fib6_nh, *nhb = b->fib6_nh;
+ struct fib6_nh *nha, *nhb;
+
+ if (a->nh || b->nh)
+ return nexthop_cmp(a->nh, b->nh);
+ nha = a->fib6_nh;
+ nhb = b->fib6_nh;
return nha->fib_nh_dev == nhb->fib_nh_dev &&
ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) &&
!lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 70ba0302c8c9..071d280de389 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -129,9 +129,12 @@ struct fib_nh {
* This structure contains data shared by many of routes.
*/
+struct nexthop;
+
struct fib_info {
struct hlist_node fib_hash;
struct hlist_node fib_lhash;
+ struct list_head nh_list;
struct net *fib_net;
int fib_treeref;
refcount_t fib_clntref;
@@ -151,9 +154,9 @@ struct fib_info {
int fib_nhs;
bool fib_nh_is_v6;
bool nh_updated;
+ struct nexthop *nh;
struct rcu_head rcu;
struct fib_nh fib_nh[0];
-#define fib_dev fib_nh[0].fib_nh_dev
};
@@ -190,19 +193,14 @@ struct fib_result_nl {
int err;
};
-static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
-{
- return &fi->fib_nh[nhsel].nh_common;
-}
-
#ifdef CONFIG_IP_MULTIPLE_TABLES
#define FIB_TABLE_HASHSZ 256
#else
#define FIB_TABLE_HASHSZ 2
#endif
-__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh,
- unsigned char scope);
+__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
+ unsigned char scope);
__be32 fib_result_prefsrc(struct net *net, struct fib_result *res);
#define FIB_RES_NHC(res) ((res).nhc)
@@ -461,11 +459,18 @@ static inline void fib_combine_itag(u32 *itag, const struct fib_result *res)
{
#ifdef CONFIG_IP_ROUTE_CLASSID
struct fib_nh_common *nhc = res->nhc;
- struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
#ifdef CONFIG_IP_MULTIPLE_TABLES
u32 rtag;
#endif
- *itag = nh->nh_tclassid << 16;
+ if (nhc->nhc_family == AF_INET) {
+ struct fib_nh *nh;
+
+ nh = container_of(nhc, struct fib_nh, nh_common);
+ *itag = nh->nh_tclassid << 16;
+ } else {
+ *itag = 0;
+ }
+
#ifdef CONFIG_IP_MULTIPLE_TABLES
rtag = res->tclassid;
if (*itag == 0)
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 6e1b8f53624c..aff7b2410057 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -10,6 +10,7 @@
#define __LINUX_NEXTHOP_H
#include <linux/netdevice.h>
+#include <linux/route.h>
#include <linux/types.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
@@ -77,6 +78,8 @@ struct nh_group {
struct nexthop {
struct rb_node rb_node; /* entry on netns rbtree */
+ struct list_head fi_list; /* v4 entries using nh */
+ struct list_head f6i_list; /* v6 entries using nh */
struct list_head grp_list; /* nh group entries using this nh */
struct net *net;
@@ -110,6 +113,12 @@ static inline void nexthop_put(struct nexthop *nh)
call_rcu(&nh->rcu, nexthop_free_rcu);
}
+static inline bool nexthop_cmp(const struct nexthop *nh1,
+ const struct nexthop *nh2)
+{
+ return nh1 == nh2;
+}
+
static inline bool nexthop_is_multipath(const struct nexthop *nh)
{
if (nh->is_group) {
@@ -192,4 +201,108 @@ static inline bool nexthop_is_blackhole(const struct nexthop *nh)
nhi = rcu_dereference_rtnl(nh->nh_info);
return nhi->reject_nh;
}
+
+static inline void nexthop_path_fib_result(struct fib_result *res, int hash)
+{
+ struct nh_info *nhi;
+ struct nexthop *nh;
+
+ nh = nexthop_select_path(res->fi->nh, hash);
+ nhi = rcu_dereference(nh->nh_info);
+ res->nhc = &nhi->fib_nhc;
+}
+
+/* called with rcu read lock or rtnl held */
+static inline
+struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
+{
+ struct nh_info *nhi;
+
+ BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0);
+ BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0);
+
+ if (nexthop_is_multipath(nh)) {
+ nh = nexthop_mpath_select(nh, nhsel);
+ if (!nh)
+ return NULL;
+ }
+
+ nhi = rcu_dereference_rtnl(nh->nh_info);
+ return &nhi->fib_nhc;
+}
+
+static inline unsigned int fib_info_num_path(const struct fib_info *fi)
+{
+ if (unlikely(fi->nh))
+ return nexthop_num_path(fi->nh);
+
+ return fi->fib_nhs;
+}
+
+int fib_check_nexthop(struct nexthop *nh, u8 scope,
+ struct netlink_ext_ack *extack);
+
+static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
+{
+ if (unlikely(fi->nh))
+ return nexthop_fib_nhc(fi->nh, nhsel);
+
+ return &fi->fib_nh[nhsel].nh_common;
+}
+
+/* only used when fib_nh is built into fib_info */
+static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
+{
+ WARN_ON(fi->nh);
+
+ return &fi->fib_nh[nhsel];
+}
+
+/*
+ * IPv6 variants
+ */
+int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
+ struct netlink_ext_ack *extack);
+
+static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
+{
+ struct nh_info *nhi;
+
+ if (nexthop_is_multipath(nh)) {
+ nh = nexthop_mpath_select(nh, 0);
+ if (!nh)
+ return NULL;
+ }
+
+ nhi = rcu_dereference_rtnl(nh->nh_info);
+ if (nhi->family == AF_INET6)
+ return &nhi->fib6_nh;
+
+ return NULL;
+}
+
+static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
+{
+ struct fib6_nh *fib6_nh;
+
+ fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh;
+ return fib6_nh->fib_nh_dev;
+}
+
+static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
+{
+ struct nexthop *nh = res->f6i->nh;
+ struct nh_info *nhi;
+
+ nh = nexthop_select_path(nh, hash);
+
+ nhi = rcu_dereference_rtnl(nh->nh_info);
+ if (nhi->reject_nh) {
+ res->fib6_type = RTN_BLACKHOLE;
+ res->fib6_flags |= RTF_REJECT;
+ res->nh = nexthop_fib6_nh(nh);
+ } else {
+ res->nh = &nhi->fib6_nh;
+ }
+}
#endif
diff --git a/net/core/filter.c b/net/core/filter.c
index 55bfc941d17a..2ae72bbfa6d2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -66,6 +66,7 @@
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
#include <net/flow.h>
#include <net/arp.h>
#include <net/ipv6.h>
@@ -4674,7 +4675,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (res.type != RTN_UNICAST)
return BPF_FIB_LKUP_RET_NOT_FWDED;
- if (res.fi->fib_nhs > 1)
+ if (fib_info_num_path(res.fi) > 1)
fib_select_path(net, &res, &fl4, NULL);
if (check_mtu) {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c7cdb8d0d164..5ea2750982f2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -43,6 +43,7 @@
#include <net/sock.h>
#include <net/arp.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
#include <net/rtnetlink.h>
#include <net/xfrm.h>
#include <net/l3mdev.h>
@@ -234,7 +235,9 @@ static inline unsigned int __inet_dev_addr_type(struct net *net,
if (table) {
ret = RTN_UNICAST;
if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
- if (!dev || dev == res.fi->fib_dev)
+ struct fib_nh_common *nhc = fib_info_nhc(res.fi, 0);
+
+ if (!dev || dev == nhc->nhc_dev)
ret = res.type;
}
}
@@ -321,19 +324,19 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev)
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int ret;
- for (ret = 0; ret < fi->fib_nhs; ret++) {
- struct fib_nh *nh = &fi->fib_nh[ret];
+ for (ret = 0; ret < fib_info_num_path(fi); ret++) {
+ const struct fib_nh_common *nhc = fib_info_nhc(fi, ret);
- if (nh->fib_nh_dev == dev) {
+ if (nhc->nhc_dev == dev) {
dev_match = true;
break;
- } else if (l3mdev_master_ifindex_rcu(nh->fib_nh_dev) == dev->ifindex) {
+ } else if (l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex) {
dev_match = true;
break;
}
}
#else
- if (fi->fib_nh[0].fib_nh_dev == dev)
+ if (fib_info_nhc(fi, 0)->nhc_dev == dev)
dev_match = true;
#endif
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 7945f0534db7..a68b5e21ec51 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/list.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
struct fib_alias {
struct hlist_node fa_list;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index cfec3af54c8d..88807c138df4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -31,6 +31,7 @@
#include <net/route.h>
#include <net/tcp.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
#include <net/fib_rules.h>
struct fib4_rule {
@@ -145,8 +146,11 @@ static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg
struct fib_result *result = (struct fib_result *) arg->result;
struct net_device *dev = NULL;
- if (result->fi)
- dev = result->fi->fib_dev;
+ if (result->fi) {
+ struct fib_nh_common *nhc = fib_info_nhc(result->fi, 0);
+
+ dev = nhc->nhc_dev;
+ }
/* do not accept result if the route does
* not meet the required prefix length
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 78648072783e..01e587a5dcb1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -42,6 +42,7 @@
#include <net/sock.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
+#include <net/nexthop.h>
#include <net/netlink.h>
#include <net/rtnh.h>
#include <net/lwtunnel.h>
@@ -60,18 +61,21 @@ static unsigned int fib_info_cnt;
#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
+/* for_nexthops and change_nexthops only used when nexthop object
+ * is not set in a fib_info. The logic within can reference fib_nh.
+ */
#ifdef CONFIG_IP_ROUTE_MULTIPATH
#define for_nexthops(fi) { \
int nhsel; const struct fib_nh *nh; \
for (nhsel = 0, nh = (fi)->fib_nh; \
- nhsel < (fi)->fib_nhs; \
+ nhsel < fib_info_num_path((fi)); \
nh++, nhsel++)
#define change_nexthops(fi) { \
int nhsel; struct fib_nh *nexthop_nh; \
for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
- nhsel < (fi)->fib_nhs; \
+ nhsel < fib_info_num_path((fi)); \
nexthop_nh++, nhsel++)
#else /* CONFIG_IP_ROUTE_MULTIPATH */
@@ -232,9 +236,13 @@ static void free_fib_info_rcu(struct rcu_head *head)
{
struct fib_info *fi = container_of(head, struct fib_info, rcu);
- change_nexthops(fi) {
- fib_nh_release(fi->fib_net, nexthop_nh);
- } endfor_nexthops(fi);
+ if (fi->nh) {
+ nexthop_put(fi->nh);
+ } else {
+ change_nexthops(fi) {
+ fib_nh_release(fi->fib_net, nexthop_nh);
+ } endfor_nexthops(fi);
+ }
ip_fib_metrics_put(fi->fib_metrics);
@@ -260,22 +268,34 @@ void fib_release_info(struct fib_info *fi)
hlist_del(&fi->fib_hash);
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
- change_nexthops(fi) {
- if (!nexthop_nh->fib_nh_dev)
- continue;
- hlist_del(&nexthop_nh->nh_hash);
- } endfor_nexthops(fi)
+ if (fi->nh) {
+ list_del(&fi->nh_list);
+ } else {
+ change_nexthops(fi) {
+ if (!nexthop_nh->fib_nh_dev)
+ continue;
+ hlist_del(&nexthop_nh->nh_hash);
+ } endfor_nexthops(fi)
+ }
fi->fib_dead = 1;
fib_info_put(fi);
}
spin_unlock_bh(&fib_info_lock);
}
-static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
+static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
{
- const struct fib_nh *onh = ofi->fib_nh;
+ const struct fib_nh *onh;
+
+ if (fi->nh || ofi->nh)
+ return nexthop_cmp(fi->nh, ofi->nh) ? 0 : -1;
+
+ if (ofi->fib_nhs == 0)
+ return 0;
for_nexthops(fi) {
+ onh = fib_info_nh(ofi, nhsel);
+
if (nh->fib_nh_oif != onh->fib_nh_oif ||
nh->fib_nh_gw_family != onh->fib_nh_gw_family ||
nh->fib_nh_scope != onh->fib_nh_scope ||
@@ -296,8 +316,6 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
if (nh->fib_nh_gw_family == AF_INET6 &&
ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6))
return -1;
-
- onh++;
} endfor_nexthops(fi);
return 0;
}
@@ -319,14 +337,19 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
val ^= (fi->fib_protocol << 8) | fi->fib_scope;
val ^= (__force u32)fi->fib_prefsrc;
val ^= fi->fib_priority;
- for_nexthops(fi) {
- val ^= fib_devindex_hashfn(nh->fib_nh_oif);
- } endfor_nexthops(fi)
+
+ if (fi->nh) {
+ val ^= fib_devindex_hashfn(fi->nh->id);
+ } else {
+ for_nexthops(fi) {
+ val ^= fib_devindex_hashfn(nh->fib_nh_oif);
+ } endfor_nexthops(fi)
+ }
return (val ^ (val >> 7) ^ (val >> 12)) & mask;
}
-static struct fib_info *fib_find_info(const struct fib_info *nfi)
+static struct fib_info *fib_find_info(struct fib_info *nfi)
{
struct hlist_head *head;
struct fib_info *fi;
@@ -348,7 +371,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
memcmp(nfi->fib_metrics, fi->fib_metrics,
sizeof(u32) * RTAX_MAX) == 0 &&
!((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
- (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
+ nh_comp(fi, nfi) == 0)
return fi;
}
@@ -390,34 +413,40 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
+ nla_total_size(4) /* RTA_PRIORITY */
+ nla_total_size(4) /* RTA_PREFSRC */
+ nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
+ unsigned int nhs = fib_info_num_path(fi);
/* space for nested metrics */
payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
- if (fi->fib_nhs) {
+ if (fi->nh)
+ payload += nla_total_size(4); /* RTA_NH_ID */
+
+ if (nhs) {
size_t nh_encapsize = 0;
- /* Also handles the special case fib_nhs == 1 */
+ /* Also handles the special case nhs == 1 */
/* each nexthop is packed in an attribute */
size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
+ unsigned int i;
/* may contain flow and gateway attribute */
nhsize += 2 * nla_total_size(4);
/* grab encap info */
- for_nexthops(fi) {
- if (nh->fib_nh_lws) {
+ for (i = 0; i < fib_info_num_path(fi); i++) {
+ struct fib_nh_common *nhc = fib_info_nhc(fi, i);
+
+ if (nhc->nhc_lwtstate) {
/* RTA_ENCAP_TYPE */
nh_encapsize += lwtunnel_get_encap_size(
- nh->fib_nh_lws);
+ nhc->nhc_lwtstate);
/* RTA_ENCAP */
nh_encapsize += nla_total_size(2);
}
- } endfor_nexthops(fi);
+ }
/* all nexthops are packed in a nested attribute */
- payload += nla_total_size((fi->fib_nhs * nhsize) +
- nh_encapsize);
+ payload += nla_total_size((nhs * nhsize) + nh_encapsize);
}
@@ -578,12 +607,14 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
return nhs;
}
+/* only called when fib_nh is integrated into fib_info */
static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
int remaining, struct fib_config *cfg,
struct netlink_ext_ack *extack)
{
struct net *net = fi->fib_net;
struct fib_config fib_cfg;
+ struct fib_nh *nh;
int ret;
change_nexthops(fi) {
@@ -646,24 +677,25 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
} endfor_nexthops(fi);
ret = -EINVAL;
- if (cfg->fc_oif && fi->fib_nh->fib_nh_oif != cfg->fc_oif) {
+ nh = fib_info_nh(fi, 0);
+ if (cfg->fc_oif && nh->fib_nh_oif != cfg->fc_oif) {
NL_SET_ERR_MSG(extack,
"Nexthop device index does not match RTA_OIF");
goto errout;
}
if (cfg->fc_gw_family) {
- if (cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family ||
+ if (cfg->fc_gw_family != nh->fib_nh_gw_family ||
(cfg->fc_gw_family == AF_INET &&
- fi->fib_nh->fib_nh_gw4 != cfg->fc_gw4) ||
+ nh->fib_nh_gw4 != cfg->fc_gw4) ||
(cfg->fc_gw_family == AF_INET6 &&
- ipv6_addr_cmp(&fi->fib_nh->fib_nh_gw6, &cfg->fc_gw6))) {
+ ipv6_addr_cmp(&nh->fib_nh_gw6, &cfg->fc_gw6))) {
NL_SET_ERR_MSG(extack,
"Nexthop gateway does not match RTA_GATEWAY or RTA_VIA");
goto errout;
}
}
#ifdef CONFIG_IP_ROUTE_CLASSID
- if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
+ if (cfg->fc_flow && nh->nh_tclassid != cfg->fc_flow) {
NL_SET_ERR_MSG(extack,
"Nexthop class id does not match RTA_FLOW");
goto errout;
@@ -674,12 +706,13 @@ errout:
return ret;
}
+/* only called when fib_nh is integrated into fib_info */
static void fib_rebalance(struct fib_info *fi)
{
int total;
int w;
- if (fi->fib_nhs < 2)
+ if (fib_info_num_path(fi) < 2)
return;
total = 0;
@@ -761,27 +794,29 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
return 1;
if (cfg->fc_oif || cfg->fc_gw_family) {
+ struct fib_nh *nh = fib_info_nh(fi, 0);
+
if (cfg->fc_encap) {
if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
- fi->fib_nh, cfg, extack))
+ nh, cfg, extack))
return 1;
}
#ifdef CONFIG_IP_ROUTE_CLASSID
if (cfg->fc_flow &&
- cfg->fc_flow != fi->fib_nh->nh_tclassid)
+ cfg->fc_flow != nh->nh_tclassid)
return 1;
#endif
- if ((cfg->fc_oif && cfg->fc_oif != fi->fib_nh->fib_nh_oif) ||
+ if ((cfg->fc_oif && cfg->fc_oif != nh->fib_nh_oif) ||
(cfg->fc_gw_family &&
- cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family))
+ cfg->fc_gw_family != nh->fib_nh_gw_family))
return 1;
if (cfg->fc_gw_family == AF_INET &&
- cfg->fc_gw4 != fi->fib_nh->fib_nh_gw4)
+ cfg->fc_gw4 != nh->fib_nh_gw4)
return 1;
if (cfg->fc_gw_family == AF_INET6 &&
- ipv6_addr_cmp(&cfg->fc_gw6, &fi->fib_nh->fib_nh_gw6))
+ ipv6_addr_cmp(&cfg->fc_gw6, &nh->fib_nh_gw6))
return 1;
return 0;
@@ -1189,9 +1224,15 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
fib_info_hash_free(old_laddrhash, bytes);
}
-__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh,
- unsigned char scope)
+__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
+ unsigned char scope)
{
+ struct fib_nh *nh;
+
+ if (nhc->nhc_family != AF_INET)
+ return inet_select_addr(nhc->nhc_dev, 0, scope);
+
+ nh = container_of(nhc, struct fib_nh, nh_common);
nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);
nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
@@ -1201,16 +1242,19 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh,
__be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
{
struct fib_nh_common *nhc = res->nhc;
- struct fib_nh *nh;
if (res->fi->fib_prefsrc)
return res->fi->fib_prefsrc;
- nh = container_of(nhc, struct fib_nh, nh_common);
- if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
- return nh->nh_saddr;
+ if (nhc->nhc_family == AF_INET) {
+ struct fib_nh *nh;
- return fib_info_update_nh_saddr(net, nh, res->fi->fib_scope);
+ nh = container_of(nhc, struct fib_nh, nh_common);
+ if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
+ return nh->nh_saddr;
+ }
+
+ return fib_info_update_nhc_saddr(net, nhc, res->fi->fib_scope);
}
static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
@@ -1242,6 +1286,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
{
int err;
struct fib_info *fi = NULL;
+ struct nexthop *nh = NULL;
struct fib_info *ofi;
int nhs = 1;
struct net *net = cfg->fc_nlinfo.nl_net;
@@ -1313,14 +1358,25 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
fi->fib_tb_id = cfg->fc_table;
fi->fib_nhs = nhs;
- change_nexthops(fi) {
- nexthop_nh->nh_parent = fi;
- } endfor_nexthops(fi)
+ if (nh) {
+ if (!nexthop_get(nh)) {
+ NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ err = -EINVAL;
+ } else {
+ err = 0;
+ fi->nh = nh;
+ }
+ } else {
+ change_nexthops(fi) {
+ nexthop_nh->nh_parent = fi;
+ } endfor_nexthops(fi)
- if (cfg->fc_mp)
- err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
- else
- err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack);
+ if (cfg->fc_mp)
+ err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg,
+ extack);
+ else
+ err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack);
+ }
if (err != 0)
goto failure;
@@ -1351,7 +1407,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
- if (cfg->fc_scope == RT_SCOPE_HOST) {
+ if (fi->nh) {
+ err = fib_check_nexthop(fi->nh, cfg->fc_scope, extack);
+ if (err)
+ goto failure;
+ } else if (cfg->fc_scope == RT_SCOPE_HOST) {
struct fib_nh *nh = fi->fib_nh;
/* Local address is added. */
@@ -1366,7 +1426,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
nh->fib_nh_scope = RT_SCOPE_NOWHERE;
- nh->fib_nh_dev = dev_get_by_index(net, fi->fib_nh->fib_nh_oif);
+ nh->fib_nh_dev = dev_get_by_index(net, nh->fib_nh_oif);
err = -ENODEV;
if (!nh->fib_nh_dev)
goto failure;
@@ -1391,13 +1451,16 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
goto err_inval;
}
- change_nexthops(fi) {
- fib_info_update_nh_saddr(net, nexthop_nh, fi->fib_scope);
- if (nexthop_nh->fib_nh_gw_family == AF_INET6)
- fi->fib_nh_is_v6 = true;
- } endfor_nexthops(fi)
+ if (!fi->nh) {
+ change_nexthops(fi) {
+ fib_info_update_nhc_saddr(net, &nexthop_nh->nh_common,
+ fi->fib_scope);
+ if (nexthop_nh->fib_nh_gw_family == AF_INET6)
+ fi->fib_nh_is_v6 = true;
+ } endfor_nexthops(fi)
- fib_rebalance(fi);
+ fib_rebalance(fi);
+ }
link_it:
ofi = fib_find_info(fi);
@@ -1419,16 +1482,20 @@ link_it:
head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
hlist_add_head(&fi->fib_lhash, head);
}
- change_nexthops(fi) {
- struct hlist_head *head;
- unsigned int hash;
+ if (fi->nh) {
+ list_add(&fi->nh_list, &nh->fi_list);
+ } else {
+ change_nexthops(fi) {
+ struct hlist_head *head;
+ unsigned int hash;
- if (!nexthop_nh->fib_nh_dev)
- continue;
- hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
- head = &fib_info_devhash[hash];
- hlist_add_head(&nexthop_nh->nh_hash, head);
- } endfor_nexthops(fi)
+ if (!nexthop_nh->fib_nh_dev)
+ continue;
+ hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
+ head = &fib_info_devhash[hash];
+ hlist_add_head(&nexthop_nh->nh_hash, head);
+ } endfor_nexthops(fi)
+ }
spin_unlock_bh(&fib_info_lock);
return fi;
@@ -1555,6 +1622,12 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
if (!mp)
goto nla_put_failure;
+ if (unlikely(fi->nh)) {
+ if (nexthop_mpath_fill_node(skb, fi->nh) < 0)
+ goto nla_put_failure;
+ goto mp_end;
+ }
+
for_nexthops(fi) {
if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight) < 0)
goto nla_put_failure;
@@ -1565,6 +1638,7 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
#endif
} endfor_nexthops(fi);
+mp_end:
nla_nest_end(skb, mp);
return 0;
@@ -1583,6 +1657,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
struct fib_info *fi, unsigned int flags)
{
+ unsigned int nhs = fib_info_num_path(fi);
struct nlmsghdr *nlh;
struct rtmsg *rtm;
@@ -1618,18 +1693,31 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
if (fi->fib_prefsrc &&
nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
goto nla_put_failure;
- if (fi->fib_nhs == 1) {
- struct fib_nh *nh = &fi->fib_nh[0];
+
+ if (fi->nh) {
+ if (nla_put_u32(skb, RTA_NH_ID, fi->nh->id))
+ goto nla_put_failure;
+ if (nexthop_is_blackhole(fi->nh))
+ rtm->rtm_type = RTN_BLACKHOLE;
+ }
+
+ if (nhs == 1) {
+ const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
unsigned char flags = 0;
- if (fib_nexthop_info(skb, &nh->nh_common, &flags, false) < 0)
+ if (fib_nexthop_info(skb, nhc, &flags, false) < 0)
goto nla_put_failure;
rtm->rtm_flags = flags;
#ifdef CONFIG_IP_ROUTE_CLASSID
- if (nh->nh_tclassid &&
- nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
- goto nla_put_failure;
+ if (nhc->nhc_family == AF_INET) {
+ struct fib_nh *nh;
+
+ nh = container_of(nhc, struct fib_nh, nh_common);
+ if (nh->nh_tclassid &&
+ nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
+ goto nla_put_failure;
+ }
#endif
} else {
if (fib_add_multipath(skb, fi) < 0)
@@ -1757,6 +1845,8 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
* NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host
* NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed
* NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed
+ *
+ * only used when fib_nh is built into fib_info
*/
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
@@ -1838,6 +1928,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
struct fib_info *next_fi = fa->fa_info;
+ struct fib_nh *nh;
if (fa->fa_slen != slen)
continue;
@@ -1859,8 +1950,9 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
if (next_fi->fib_scope != res->scope ||
fa->fa_type != RTN_UNICAST)
continue;
- if (!next_fi->fib_nh[0].fib_nh_gw4 ||
- next_fi->fib_nh[0].fib_nh_scope != RT_SCOPE_LINK)
+
+ nh = fib_info_nh(next_fi, 0);
+ if (!nh->fib_nh_gw4 || nh->fib_nh_scope != RT_SCOPE_LINK)
continue;
fib_alias_accessed(fa);
@@ -1902,6 +1994,8 @@ out:
/*
* Dead device goes up. We wake up dead nexthops.
* It takes sense only on multipath routes.
+ *
+ * only used when fib_nh is built into fib_info
*/
int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
{
@@ -1996,6 +2090,11 @@ void fib_select_multipath(struct fib_result *res, int hash)
struct net *net = fi->fib_net;
bool first = false;
+ if (unlikely(res->fi->nh)) {
+ nexthop_path_fib_result(res, hash);
+ return;
+ }
+
change_nexthops(fi) {
if (net->ipv4.sysctl_fib_multipath_use_neigh) {
if (!fib_good_nh(nexthop_nh))
@@ -2024,7 +2123,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
goto check_saddr;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res->fi->fib_nhs > 1) {
+ if (fib_info_num_path(res->fi) > 1) {
int h = fib_multipath_hash(net, fl4, skb, NULL);
fib_select_multipath(res, h);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b53ecef89d59..716f2d66cb3f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1461,6 +1461,7 @@ found:
fib_alias_accessed(fa);
err = fib_props[fa->fa_type].error;
if (unlikely(err < 0)) {
+out_reject:
#ifdef CONFIG_IP_FIB_TRIE_STATS
this_cpu_inc(stats->semantic_match_passed);
#endif
@@ -1469,7 +1470,13 @@ found:
}
if (fi->fib_flags & RTNH_F_DEAD)
continue;
- for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
+
+ if (unlikely(fi->nh && nexthop_is_blackhole(fi->nh))) {
+ err = fib_props[RTN_BLACKHOLE].error;
+ goto out_reject;
+ }
+
+ for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
if (nhc->nhc_flags & RTNH_F_DEAD)
@@ -2717,14 +2724,18 @@ static void fib_route_seq_stop(struct seq_file *seq, void *v)
rcu_read_unlock();
}
-static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
+static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
{
unsigned int flags = 0;
if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
flags = RTF_REJECT;
- if (fi && fi->fib_nh->fib_nh_gw4)
- flags |= RTF_GATEWAY;
+ if (fi) {
+ const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
+
+ if (nhc->nhc_gw.ipv4)
+ flags |= RTF_GATEWAY;
+ }
if (mask == htonl(0xFFFFFFFF))
flags |= RTF_HOST;
flags |= RTF_UP;
@@ -2755,7 +2766,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
prefix = htonl(l->key);
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
- const struct fib_info *fi = fa->fa_info;
+ struct fib_info *fi = fa->fa_info;
__be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
@@ -2768,26 +2779,31 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
seq_setwidth(seq, 127);
- if (fi)
+ if (fi) {
+ struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
+ __be32 gw = 0;
+
+ if (nhc->nhc_gw_family == AF_INET)
+ gw = nhc->nhc_gw.ipv4;
+
seq_printf(seq,
"%s\t%08X\t%08X\t%04X\t%d\t%u\t"
"%d\t%08X\t%d\t%u\t%u",
- fi->fib_dev ? fi->fib_dev->name : "*",
- prefix,
- fi->fib_nh->fib_nh_gw4, flags, 0, 0,
+ nhc->nhc_dev ? nhc->nhc_dev->name : "*",
+ prefix, gw, flags, 0, 0,
fi->fib_priority,
mask,
(fi->fib_advmss ?
fi->fib_advmss + 40 : 0),
fi->fib_window,
fi->fib_rtt >> 3);
- else
+ } else {
seq_printf(seq,
"*\t%08X\t%08X\t%04X\t%d\t%u\t"
"%d\t%08X\t%d\t%u\t%u",
prefix, 0, flags, 0, 0, 0,
mask, 0, 0, 0);
-
+ }
seq_pad(seq, '\n');
}
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 7a5a3d08fec3..5e48762b6b5f 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -105,6 +105,8 @@ static struct nexthop *nexthop_alloc(void)
nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
if (nh) {
+ INIT_LIST_HEAD(&nh->fi_list);
+ INIT_LIST_HEAD(&nh->f6i_list);
INIT_LIST_HEAD(&nh->grp_list);
}
return nh;
@@ -515,6 +517,89 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
}
EXPORT_SYMBOL_GPL(nexthop_select_path);
+int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct nh_info *nhi;
+
+ /* fib6_src is unique to a fib6_info and limits the ability to cache
+ * routes in fib6_nh within a nexthop that is potentially shared
+ * across multiple fib entries. If the config wants to use source
+ * routing it can not use nexthop objects. mlxsw also does not allow
+ * fib6_src on routes.
+ */
+ if (!ipv6_addr_any(&cfg->fc_src)) {
+ NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
+ return -EINVAL;
+ }
+
+ if (nh->is_group) {
+ struct nh_group *nhg;
+
+ nhg = rtnl_dereference(nh->nh_grp);
+ if (nhg->has_v4)
+ goto no_v4_nh;
+ } else {
+ nhi = rtnl_dereference(nh->nh_info);
+ if (nhi->family == AF_INET)
+ goto no_v4_nh;
+ }
+
+ return 0;
+no_v4_nh:
+ NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop");
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(fib6_check_nexthop);
+
+static int nexthop_check_scope(struct nexthop *nh, u8 scope,
+ struct netlink_ext_ack *extack)
+{
+ struct nh_info *nhi;
+
+ nhi = rtnl_dereference(nh->nh_info);
+ if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) {
+ NL_SET_ERR_MSG(extack,
+ "Route with host scope can not have a gateway");
+ return -EINVAL;
+ }
+
+ if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) {
+ NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Invoked by fib add code to verify nexthop by id is ok with
+ * config for prefix; parts of fib_check_nh not done when nexthop
+ * object is used.
+ */
+int fib_check_nexthop(struct nexthop *nh, u8 scope,
+ struct netlink_ext_ack *extack)
+{
+ int err = 0;
+
+ if (nh->is_group) {
+ struct nh_group *nhg;
+
+ if (scope == RT_SCOPE_HOST) {
+ NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");
+ err = -EINVAL;
+ goto out;
+ }
+
+ nhg = rtnl_dereference(nh->nh_grp);
+ /* all nexthops in a group have the same scope */
+ err = nexthop_check_scope(nhg->nh_entries[0].nh, scope, extack);
+ } else {
+ err = nexthop_check_scope(nh, scope, extack);
+ }
+out:
+ return err;
+}
+
static void nh_group_rebalance(struct nh_group *nhg)
{
int total = 0;
@@ -607,9 +692,32 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
}
}
+static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
+{
+ struct fib6_info *f6i, *tmp;
+ bool do_flush = false;
+ struct fib_info *fi;
+
+ list_for_each_entry(fi, &nh->fi_list, nh_list) {
+ fi->fib_flags |= RTNH_F_DEAD;
+ do_flush = true;
+ }
+ if (do_flush)
+ fib_flush(net);
+
+ /* ip6_del_rt removes the entry from this list hence the _safe */
+ list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
+ /* __ip6_del_rt does a release, so do a hold here */
+ fib6_info_hold(f6i);
+ ipv6_stub->ip6_del_rt(net, f6i);
+ }
+}
+
static void __remove_nexthop(struct net *net, struct nexthop *nh,
struct nl_info *nlinfo)
{
+ __remove_nexthop_fib(net, nh);
+
if (nh->is_group) {
remove_nexthop_group(nh, nlinfo);
} else {
@@ -815,7 +923,8 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
if (!err) {
nh->nh_flags = fib_nh->fib_nh_flags;
- fib_info_update_nh_saddr(net, fib_nh, fib_nh->fib_nh_scope);
+ fib_info_update_nhc_saddr(net, &fib_nh->nh_common,
+ fib_nh->fib_nh_scope);
} else {
fib_nh_release(net, fib_nh);
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 11ddc276776e..4a1168451f3a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -99,6 +99,7 @@
#include <net/inetpeer.h>
#include <net/sock.h>
#include <net/ip_fib.h>
+#include <net/nexthop.h>
#include <net/arp.h>
#include <net/tcp.h>
#include <net/icmp.h>
@@ -1584,7 +1585,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
#ifdef CONFIG_IP_ROUTE_CLASSID
- {
+ if (nhc->nhc_family == AF_INET) {
struct fib_nh *nh;
nh = container_of(nhc, struct fib_nh, nh_common);
@@ -1950,7 +1951,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res->fi && res->fi->fib_nhs > 1) {
+ if (res->fi && fib_info_num_path(res->fi) > 1) {
int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4c30726fa7c7..d7fff86c2ef0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2421,6 +2421,10 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
goto out;
for_each_fib6_node_rt_rcu(fn) {
+ /* prefix routes only use builtin fib6_nh */
+ if (rt->nh)
+ continue;
+
if (rt->fib6_nh->fib_nh_dev->ifindex != dev->ifindex)
continue;
if (no_gw && rt->fib6_nh->fib_nh_gw_family)
@@ -6352,6 +6356,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
list_for_each_entry(ifa, &idev->addr_list, if_list) {
spin_lock(&ifa->lock);
if (ifa->rt) {
+ /* host routes only use builtin fib6_nh */
struct fib6_nh *nh = ifa->rt->fib6_nh;
int cpu;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index cdfb8500ccae..02feda73a98e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -159,6 +159,7 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
if (!f6i)
return NULL;
+ /* fib6_siblings is a union with nh_list, so this initializes both */
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
@@ -171,7 +172,11 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
WARN_ON(f6i->fib6_node);
- fib6_nh_release(f6i->fib6_nh);
+ if (f6i->nh)
+ nexthop_put(f6i->nh);
+ else
+ fib6_nh_release(f6i->fib6_nh);
+
ip_fib_metrics_put(f6i->fib6_metrics);
kfree(f6i);
}
@@ -927,6 +932,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
fib6_drop_pcpu_from(rt, table);
+ if (rt->nh && !list_empty(&rt->nh_list))
+ list_del_init(&rt->nh_list);
+
if (refcount_read(&rt->fib6_ref) != 1) {
/* This route is used as dummy address holder in some split
* nodes. It is not leaked, but it still holds other resources,
@@ -1334,6 +1342,8 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
err = fib6_add_rt2node(fn, rt, info, extack);
if (!err) {
+ if (rt->nh)
+ list_add(&rt->nh_list, &rt->nh->f6i_list);
__fib6_update_sernum_upto_root(rt, sernum);
fib6_start_gc(info->nl_net, rt);
}
@@ -2295,9 +2305,13 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
{
struct fib6_info *rt = v;
struct ipv6_route_iter *iter = seq->private;
+ struct fib6_nh *fib6_nh = rt->fib6_nh;
unsigned int flags = rt->fib6_flags;
const struct net_device *dev;
+ if (rt->nh)
+ fib6_nh = nexthop_fib6_nh(rt->nh);
+
seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
#ifdef CONFIG_IPV6_SUBTREES
@@ -2305,14 +2319,14 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
#else
seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif
- if (rt->fib6_nh->fib_nh_gw_family) {
+ if (fib6_nh->fib_nh_gw_family) {
flags |= RTF_GATEWAY;
- seq_printf(seq, "%pi6", &rt->fib6_nh->fib_nh_gw6);
+ seq_printf(seq, "%pi6", &fib6_nh->fib_nh_gw6);
} else {
seq_puts(seq, "00000000000000000000000000000000");
}
- dev = rt->fib6_nh->fib_nh_dev;
+ dev = fib6_nh->fib_nh_dev;
seq_printf(seq, " %08x %08x %08x %08x %8s\n",
rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
flags, dev ? dev->name : "");
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f874dde1ee85..6e3c51109c83 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1289,9 +1289,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
!in6_dev->cnf.accept_ra_rtr_pref)
pref = ICMPV6_ROUTER_PREF_MEDIUM;
#endif
-
+ /* routes added from RAs do not use nexthop objects */
rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
-
if (rt) {
neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
rt->fib6_nh->fib_nh_dev, NULL,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9b9a0159f7fd..df5be3d5d3e5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -432,15 +432,21 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
struct fib6_info *sibling, *next_sibling;
struct fib6_info *match = res->f6i;
- if (!match->fib6_nsiblings || have_oif_match)
+ if ((!match->fib6_nsiblings && !match->nh) || have_oif_match)
goto out;
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
*/
- if (!fl6->mp_hash)
+ if (!fl6->mp_hash &&
+ (!match->nh || nexthop_is_multipath(match->nh)))
fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
+ if (unlikely(match->nh)) {
+ nexthop_path_fib6_result(res, fl6->mp_hash);
+ return;
+ }
+
if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
goto out;
@@ -496,7 +502,13 @@ static void rt6_device_match(struct net *net, struct fib6_result *res,
struct fib6_nh *nh;
if (!oif && ipv6_addr_any(saddr)) {
- nh = f6i->fib6_nh;
+ if (unlikely(f6i->nh)) {
+ nh = nexthop_fib6_nh(f6i->nh);
+ if (nexthop_is_blackhole(f6i->nh))
+ goto out_blackhole;
+ } else {
+ nh = f6i->fib6_nh;
+ }
if (!(nh->fib_nh_flags & RTNH_F_DEAD))
goto out;
}
@@ -515,7 +527,14 @@ static void rt6_device_match(struct net *net, struct fib6_result *res,
goto out;
}
- nh = f6i->fib6_nh;
+ if (unlikely(f6i->nh)) {
+ nh = nexthop_fib6_nh(f6i->nh);
+ if (nexthop_is_blackhole(f6i->nh))
+ goto out_blackhole;
+ } else {
+ nh = f6i->fib6_nh;
+ }
+
if (nh->fib_nh_flags & RTNH_F_DEAD) {
res->f6i = net->ipv6.fib6_null_entry;
nh = res->f6i->fib6_nh;
@@ -524,6 +543,12 @@ out:
res->nh = nh;
res->fib6_type = res->f6i->fib6_type;
res->fib6_flags = res->f6i->fib6_flags;
+ return;
+
+out_blackhole:
+ res->fib6_flags |= RTF_REJECT;
+ res->fib6_type = RTN_BLACKHOLE;
+ res->nh = nh;
}
#ifdef CONFIG_IPV6_ROUTER_PREF
@@ -1117,6 +1142,8 @@ restart:
rt = net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
goto out;
+ } else if (res.fib6_flags & RTF_REJECT) {
+ goto do_create;
}
fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
@@ -1128,6 +1155,7 @@ restart:
if (ip6_hold_safe(net, &rt))
dst_use_noref(&rt->dst, jiffies);
} else {
+do_create:
rt = ip6_create_rt_rcu(&res);
}
@@ -3217,7 +3245,9 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
{
struct net *net = cfg->fc_nlinfo.nl_net;
struct fib6_info *rt = NULL;
+ struct nexthop *nh = NULL;
struct fib6_table *table;
+ struct fib6_nh *fib6_nh;
int err = -EINVAL;
int addr_type;
@@ -3270,7 +3300,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
err = -ENOMEM;
- rt = fib6_info_alloc(gfp_flags, true);
+ rt = fib6_info_alloc(gfp_flags, !nh);
if (!rt)
goto out;
@@ -3310,19 +3340,35 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
rt->fib6_src.plen = cfg->fc_src_len;
#endif
- err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
- if (err)
- goto out;
+ if (nh) {
+ if (!nexthop_get(nh)) {
+ NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ goto out;
+ }
+ if (rt->fib6_src.plen) {
+ NL_SET_ERR_MSG(extack, "Nexthops can not be used wtih source routing");
+ goto out;
+ }
+ rt->nh = nh;
+ fib6_nh = nexthop_fib6_nh(rt->nh);
+ } else {
+ err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
+ if (err)
+ goto out;
- /* We cannot add true routes via loopback here,
- * they would result in kernel looping; promote them to reject routes
- */
- addr_type = ipv6_addr_type(&cfg->fc_dst);
- if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev, addr_type))
- rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
+ fib6_nh = rt->fib6_nh;
+
+ /* We cannot add true routes via loopback here, they would
+ * result in kernel looping; promote them to reject routes
+ */
+ addr_type = ipv6_addr_type(&cfg->fc_dst);
+ if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev,
+ addr_type))
+ rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
+ }
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
- struct net_device *dev = fib6_info_nh_dev(rt);
+ struct net_device *dev = fib6_nh->fib_nh_dev;
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
NL_SET_ERR_MSG(extack, "Invalid source address");
@@ -3678,6 +3724,9 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
goto out;
for_each_fib6_node_rt_rcu(fn) {
+ /* these routes do not use nexthops */
+ if (rt->nh)
+ continue;
if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
continue;
if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
@@ -3741,8 +3790,13 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
rcu_read_lock();
for_each_fib6_node_rt_rcu(&table->tb6_root) {
- struct fib6_nh *nh = rt->fib6_nh;
+ struct fib6_nh *nh;
+
+ /* RA routes do not use nexthops */
+ if (rt->nh)
+ continue;
+ nh = rt->fib6_nh;
if (dev == nh->fib_nh_dev &&
((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ipv6_addr_equal(&nh->fib_nh_gw6, addr))
@@ -3993,7 +4047,8 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
struct net *net = ((struct arg_dev_net_ip *)arg)->net;
struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
- if (((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
+ if (!rt->nh &&
+ ((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
rt != net->ipv6.fib6_null_entry &&
ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
spin_lock_bh(&rt6_exception_lock);
@@ -4021,8 +4076,13 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
{
struct in6_addr *gateway = (struct in6_addr *)arg;
- struct fib6_nh *nh = rt->fib6_nh;
+ struct fib6_nh *nh;
+ /* RA routes do not use nexthops */
+ if (rt->nh)
+ return 0;
+
+ nh = rt->fib6_nh;
if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
return -1;
@@ -4069,6 +4129,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
return NULL;
}
+/* only called for fib entries with builtin fib6_nh */
static bool rt6_is_dead(const struct fib6_info *rt)
{
if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
@@ -4147,7 +4208,7 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg)
const struct arg_netdev_event *arg = p_arg;
struct net *net = dev_net(arg->dev);
- if (rt != net->ipv6.fib6_null_entry &&
+ if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
rt->fib6_nh->fib_nh_dev == arg->dev) {
rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
fib6_update_sernum_upto_root(net, rt);
@@ -4172,6 +4233,7 @@ void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
}
+/* only called for fib entries with inline fib6_nh */
static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
const struct net_device *dev)
{
@@ -4232,7 +4294,7 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
const struct net_device *dev = arg->dev;
struct net *net = dev_net(dev);
- if (rt == net->ipv6.fib6_null_entry)
+ if (rt == net->ipv6.fib6_null_entry || rt->nh)
return 0;
switch (arg->event) {
@@ -4786,6 +4848,9 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
{
int nexthop_len = 0;
+ if (rt->nh)
+ nexthop_len += nla_total_size(4); /* RTA_NH_ID */
+
if (rt->fib6_nsiblings) {
nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
+ NLA_ALIGN(sizeof(struct rtnexthop))
@@ -4812,6 +4877,35 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
+ nexthop_len;
}
+static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
+ unsigned char *flags)
+{
+ if (nexthop_is_multipath(nh)) {
+ struct nlattr *mp;
+
+ mp = nla_nest_start(skb, RTA_MULTIPATH);
+ if (!mp)
+ goto nla_put_failure;
+
+ if (nexthop_mpath_fill_node(skb, nh))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, mp);
+ } else {
+ struct fib6_nh *fib6_nh;
+
+ fib6_nh = nexthop_fib6_nh(nh);
+ if (fib_nexthop_info(skb, &fib6_nh->nh_common,
+ flags, false) < 0)
+ goto nla_put_failure;
+ }
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct fib6_info *rt, struct dst_entry *dst,
struct in6_addr *dest, struct in6_addr *src,
@@ -4821,6 +4915,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct rt6_info *rt6 = (struct rt6_info *)dst;
struct rt6key *rt6_dst, *rt6_src;
u32 *pmetrics, table, rt6_flags;
+ unsigned char nh_flags = 0;
struct nlmsghdr *nlh;
struct rtmsg *rtm;
long expires = 0;
@@ -4940,9 +5035,18 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
}
nla_nest_end(skb, mp);
- } else {
- unsigned char nh_flags = 0;
+ } else if (rt->nh) {
+ if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
+ goto nla_put_failure;
+
+ if (nexthop_is_blackhole(rt->nh))
+ rtm->rtm_type = RTN_BLACKHOLE;
+ if (rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
+ goto nla_put_failure;
+
+ rtm->rtm_flags |= nh_flags;
+ } else {
if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common,
&nh_flags, false) < 0)
goto nla_put_failure;