diff options
Diffstat (limited to 'net/ipv4/fib_trie.c')
| -rw-r--r-- | net/ipv4/fib_trie.c | 256 |
1 files changed, 190 insertions, 66 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index a573e37e0615..b9df9c09b84e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1,8 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. * * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet * & Swedish University of Agricultural Sciences. @@ -18,28 +15,19 @@ * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. * http://www.csc.kth.se/~snilsson/software/dyntrie2/ * - * * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 * - * * Code from fib_hash has been reused which includes the following header: * - * * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * IPv4 FIB: lookup engine and maintenance routines. * - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Substantial contributions to this work comes from: * * David S. Miller, <davem@davemloft.net> @@ -86,11 +74,13 @@ #include <trace/events/fib.h> #include "fib_lookup.h" -static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, +static int call_fib_entry_notifier(struct notifier_block *nb, enum fib_event_type event_type, u32 dst, - int dst_len, struct fib_alias *fa) + int dst_len, struct fib_alias *fa, + struct netlink_ext_ack *extack) { struct fib_entry_notifier_info info = { + .info.extack = extack, .dst = dst, .dst_len = dst_len, .fi = fa->fa_info, @@ -98,7 +88,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, .type = fa->fa_type, .tb_id = fa->tb_id, }; - return call_fib4_notifier(nb, net, event_type, &info.info); + return call_fib4_notifier(nb, event_type, &info.info); } static int call_fib_entry_notifiers(struct net *net, @@ -183,14 +173,16 @@ struct trie { }; static struct key_vector *resize(struct trie *t, struct key_vector *tn); -static size_t tnode_free_size; +static unsigned int tnode_free_size; /* - * synchronize_rcu after call_rcu for that many pages; it should be especially - * useful before resizing the root node with PREEMPT_NONE configs; the value was - * obtained experimentally, aiming to avoid visible slowdown. + * synchronize_rcu after call_rcu for outstanding dirty memory; it should be + * especially useful before resizing the root node with PREEMPT_NONE configs; + * the value was obtained experimentally, aiming to avoid visible slowdown. */ -static const int sync_pages = 128; +unsigned int sysctl_fib_sync_mem = 512 * 1024; +unsigned int sysctl_fib_sync_mem_min = 64 * 1024; +unsigned int sysctl_fib_sync_mem_max = 64 * 1024 * 1024; static struct kmem_cache *fn_alias_kmem __ro_after_init; static struct kmem_cache *trie_leaf_kmem __ro_after_init; @@ -348,12 +340,18 @@ static struct tnode *tnode_alloc(int bits) static inline void empty_child_inc(struct key_vector *n) { - ++tn_info(n)->empty_children ? : ++tn_info(n)->full_children; + tn_info(n)->empty_children++; + + if (!tn_info(n)->empty_children) + tn_info(n)->full_children++; } static inline void empty_child_dec(struct key_vector *n) { - tn_info(n)->empty_children-- ? : tn_info(n)->full_children--; + if (!tn_info(n)->empty_children) + tn_info(n)->full_children--; + + tn_info(n)->empty_children--; } static struct key_vector *leaf_new(t_key key, struct fib_alias *fa) @@ -504,7 +502,7 @@ static void tnode_free(struct key_vector *tn) tn = container_of(head, struct tnode, rcu)->kv; } - if (tnode_free_size >= PAGE_SIZE * sync_pages) { + if (tnode_free_size >= sysctl_fib_sync_mem) { tnode_free_size = 0; synchronize_rcu(); } @@ -1459,6 +1457,7 @@ found: fib_alias_accessed(fa); err = fib_props[fa->fa_type].error; if (unlikely(err < 0)) { +out_reject: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif @@ -1467,20 +1466,24 @@ found: } if (fi->fib_flags & RTNH_F_DEAD) continue; - for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { - const struct fib_nh *nh = &fi->fib_nh[nhsel]; - struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev); - if (nh->nh_flags & RTNH_F_DEAD) + if (unlikely(fi->nh && nexthop_is_blackhole(fi->nh))) { + err = fib_props[RTN_BLACKHOLE].error; + goto out_reject; + } + + for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) { + struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel); + + if (nhc->nhc_flags & RTNH_F_DEAD) continue; - if (in_dev && - IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - nh->nh_flags & RTNH_F_LINKDOWN && + if (ip_ignore_linkdown(nhc->nhc_dev) && + nhc->nhc_flags & RTNH_F_LINKDOWN && !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) continue; if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { if (flp->flowi4_oif && - flp->flowi4_oif != nh->nh_oif) + flp->flowi4_oif != nhc->nhc_oif) continue; } @@ -1490,6 +1493,7 @@ found: res->prefix = htonl(n->key); res->prefixlen = KEYLENGTH - fa->fa_slen; res->nh_sel = nhsel; + res->nhc = nhc; res->type = fa->fa_type; res->scope = fi->fib_scope; res->fi = fi; @@ -1498,7 +1502,7 @@ found: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif - trace_fib_table_lookup(tb->tb_id, flp, nh, err); + trace_fib_table_lookup(tb->tb_id, flp, nhc, err); return err; } @@ -1942,10 +1946,83 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) return found; } -static void fib_leaf_notify(struct net *net, struct key_vector *l, - struct fib_table *tb, struct notifier_block *nb) +/* derived from fib_trie_free */ +static void __fib_info_notify_update(struct net *net, struct fib_table *tb, + struct nl_info *info) +{ + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *pn = t->kv; + unsigned long cindex = 1; + struct fib_alias *fa; + + for (;;) { + struct key_vector *n; + + if (!(cindex--)) { + t_key pkey = pn->key; + + if (IS_TRIE(pn)) + break; + + pn = node_parent(pn); + cindex = get_index(pkey, pn); + continue; + } + + /* grab the next available node */ + n = get_child(pn, cindex); + if (!n) + continue; + + if (IS_TNODE(n)) { + /* record pn and cindex for leaf walking */ + pn = n; + cindex = 1ul << n->bits; + + continue; + } + + hlist_for_each_entry(fa, &n->leaf, fa_list) { + struct fib_info *fi = fa->fa_info; + + if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id) + continue; + + rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa, + KEYLENGTH - fa->fa_slen, tb->tb_id, + info, NLM_F_REPLACE); + + /* call_fib_entry_notifiers will be removed when + * in-kernel notifier is implemented and supported + * for nexthop objects + */ + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, + n->key, + KEYLENGTH - fa->fa_slen, fa, + NULL); + } + } +} + +void fib_info_notify_update(struct net *net, struct nl_info *info) +{ + unsigned int h; + + for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + struct hlist_head *head = &net->ipv4.fib_table_hash[h]; + struct fib_table *tb; + + hlist_for_each_entry_rcu(tb, head, tb_hlist) + __fib_info_notify_update(net, tb, info); + } +} + +static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb, + struct notifier_block *nb, + struct netlink_ext_ack *extack) { struct fib_alias *fa; + int err; hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { struct fib_info *fi = fa->fa_info; @@ -1959,39 +2036,53 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l, if (tb->tb_id != fa->tb_id) continue; - call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key, - KEYLENGTH - fa->fa_slen, fa); + err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_ADD, l->key, + KEYLENGTH - fa->fa_slen, + fa, extack); + if (err) + return err; } + return 0; } -static void fib_table_notify(struct net *net, struct fib_table *tb, - struct notifier_block *nb) +static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb, + struct netlink_ext_ack *extack) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *l, *tp = t->kv; t_key key = 0; + int err; while ((l = leaf_walk_rcu(&tp, key)) != NULL) { - fib_leaf_notify(net, l, tb, nb); + err = fib_leaf_notify(l, tb, nb, extack); + if (err) + return err; key = l->key + 1; /* stop in case of wrap around */ if (key < l->key) break; } + return 0; } -void fib_notify(struct net *net, struct notifier_block *nb) +int fib_notify(struct net *net, struct notifier_block *nb, + struct netlink_ext_ack *extack) { unsigned int h; + int err; for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; struct fib_table *tb; - hlist_for_each_entry_rcu(tb, head, tb_hlist) - fib_table_notify(net, tb, nb); + hlist_for_each_entry_rcu(tb, head, tb_hlist) { + err = fib_table_notify(tb, nb, extack); + if (err) + return err; + } } + return 0; } static void __trie_free_rcu(struct rcu_head *head) @@ -2017,22 +2108,26 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, { unsigned int flags = NLM_F_MULTI; __be32 xkey = htonl(l->key); + int i, s_i, i_fa, s_fa, err; struct fib_alias *fa; - int i, s_i; - if (filter->filter_set) + if (filter->filter_set || + !filter->dump_exceptions || !filter->dump_routes) flags |= NLM_F_DUMP_FILTERED; s_i = cb->args[4]; + s_fa = cb->args[5]; i = 0; /* rcu_read_lock is hold by caller */ hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { - int err; + struct fib_info *fi = fa->fa_info; if (i < s_i) goto next; + i_fa = 0; + if (tb->tb_id != fa->tb_id) goto next; @@ -2041,29 +2136,49 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, goto next; if ((filter->protocol && - fa->fa_info->fib_protocol != filter->protocol)) + fi->fib_protocol != filter->protocol)) goto next; if (filter->dev && - !fib_info_nh_uses_dev(fa->fa_info, filter->dev)) + !fib_info_nh_uses_dev(fi, filter->dev)) goto next; } - err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWROUTE, - tb->tb_id, fa->fa_type, - xkey, KEYLENGTH - fa->fa_slen, - fa->fa_tos, fa->fa_info, flags); - if (err < 0) { - cb->args[4] = i; - return err; + if (filter->dump_routes) { + if (!s_fa) { + err = fib_dump_info(skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWROUTE, + tb->tb_id, fa->fa_type, + xkey, + KEYLENGTH - fa->fa_slen, + fa->fa_tos, fi, flags); + if (err < 0) + goto stop; + } + + i_fa++; } + + if (filter->dump_exceptions) { + err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi, + &i_fa, s_fa, flags); + if (err < 0) + goto stop; + } + next: i++; } cb->args[4] = i; return skb->len; + +stop: + cb->args[4] = i; + cb->args[5] = i_fa; + return err; } /* rcu_read_lock needs to be hold by caller from readside */ @@ -2645,14 +2760,18 @@ static void fib_route_seq_stop(struct seq_file *seq, void *v) rcu_read_unlock(); } -static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) +static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi) { unsigned int flags = 0; if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT) flags = RTF_REJECT; - if (fi && fi->fib_nh->nh_gw) - flags |= RTF_GATEWAY; + if (fi) { + const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); + + if (nhc->nhc_gw.ipv4) + flags |= RTF_GATEWAY; + } if (mask == htonl(0xFFFFFFFF)) flags |= RTF_HOST; flags |= RTF_UP; @@ -2683,7 +2802,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) prefix = htonl(l->key); hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { - const struct fib_info *fi = fa->fa_info; + struct fib_info *fi = fa->fa_info; __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen); unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); @@ -2696,26 +2815,31 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) seq_setwidth(seq, 127); - if (fi) + if (fi) { + struct fib_nh_common *nhc = fib_info_nhc(fi, 0); + __be32 gw = 0; + + if (nhc->nhc_gw_family == AF_INET) + gw = nhc->nhc_gw.ipv4; + seq_printf(seq, "%s\t%08X\t%08X\t%04X\t%d\t%u\t" "%d\t%08X\t%d\t%u\t%u", - fi->fib_dev ? fi->fib_dev->name : "*", - prefix, - fi->fib_nh->nh_gw, flags, 0, 0, + nhc->nhc_dev ? nhc->nhc_dev->name : "*", + prefix, gw, flags, 0, 0, fi->fib_priority, mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0), fi->fib_window, fi->fib_rtt >> 3); - else + } else { seq_printf(seq, "*\t%08X\t%08X\t%04X\t%d\t%u\t" "%d\t%08X\t%d\t%u\t%u", prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0); - + } seq_pad(seq, '\n'); } |
