diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/devinet.c | 1 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 16 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 110 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 18 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 18 | ||||
-rw-r--r-- | net/ipv4/raw.c | 18 | ||||
-rw-r--r-- | net/ipv4/route.c | 20 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 18 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 7 | ||||
-rw-r--r-- | net/ipv4/udp.c | 18 |
10 files changed, 124 insertions, 120 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5345b0bee6df..acf553f95b5b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1203,6 +1203,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, break; /* fall through */ case NETDEV_NOTIFY_PEERS: + case NETDEV_BONDING_FAILOVER: /* Send gratuitous ARP to notify of link change */ inetdev_send_gratuitous_arp(dev, in_dev); break; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 451088330bbb..22524716fe70 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -44,6 +44,7 @@ #include <net/arp.h> #include <net/ip_fib.h> #include <net/rtnetlink.h> +#include <net/xfrm.h> #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -188,9 +189,9 @@ EXPORT_SYMBOL(inet_dev_addr_type); * - check, that packet arrived from expected physical interface. * called with rcu_read_lock() */ -int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, - struct net_device *dev, __be32 *spec_dst, - u32 *itag, u32 mark) +int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, + int oif, struct net_device *dev, __be32 *spec_dst, + u32 *itag) { struct in_device *in_dev; struct flowi4 fl4; @@ -202,7 +203,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, fl4.flowi4_oif = 0; fl4.flowi4_iif = oif; - fl4.flowi4_mark = mark; fl4.daddr = src; fl4.saddr = dst; fl4.flowi4_tos = tos; @@ -212,10 +212,12 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, in_dev = __in_dev_get_rcu(dev); if (in_dev) { no_addr = in_dev->ifa_list == NULL; - rpf = IN_DEV_RPFILTER(in_dev); + + /* Ignore rp_filter for packets protected by IPsec. */ + rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev); + accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); - if (mark && !IN_DEV_SRC_VMARK(in_dev)) - fl4.flowi4_mark = 0; + fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; } if (in_dev == NULL) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index e9013d6c1f51..9ac481a10d37 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -126,7 +126,7 @@ struct tnode { struct work_struct work; struct tnode *tnode_free; }; - struct rt_trie_node *child[0]; + struct rt_trie_node __rcu *child[0]; }; #ifdef CONFIG_IP_FIB_TRIE_STATS @@ -151,7 +151,7 @@ struct trie_stat { }; struct trie { - struct rt_trie_node *trie; + struct rt_trie_node __rcu *trie; #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats stats; #endif @@ -177,16 +177,29 @@ static const int sync_pages = 128; static struct kmem_cache *fn_alias_kmem __read_mostly; static struct kmem_cache *trie_leaf_kmem __read_mostly; -static inline struct tnode *node_parent(struct rt_trie_node *node) +/* + * caller must hold RTNL + */ +static inline struct tnode *node_parent(const struct rt_trie_node *node) { - return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); + unsigned long parent; + + parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held()); + + return (struct tnode *)(parent & ~NODE_TYPE_MASK); } -static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) +/* + * caller must hold RCU read lock or RTNL + */ +static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) { - struct tnode *ret = node_parent(node); + unsigned long parent; + + parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() || + lockdep_rtnl_is_held()); - return rcu_dereference_rtnl(ret); + return (struct tnode *)(parent & ~NODE_TYPE_MASK); } /* Same as rcu_assign_pointer @@ -198,18 +211,24 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) node->parent = (unsigned long)ptr | NODE_TYPE(node); } -static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) +/* + * caller must hold RTNL + */ +static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i) { BUG_ON(i >= 1U << tn->bits); - return tn->child[i]; + return rtnl_dereference(tn->child[i]); } -static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) +/* + * caller must hold RCU read lock or RTNL + */ +static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i) { - struct rt_trie_node *ret = tnode_get_child(tn, i); + BUG_ON(i >= 1U << tn->bits); - return rcu_dereference_rtnl(ret); + return rcu_dereference_rtnl(tn->child[i]); } static inline int tnode_child_length(const struct tnode *tn) @@ -487,7 +506,7 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, int wasfull) { - struct rt_trie_node *chi = tn->child[i]; + struct rt_trie_node *chi = rtnl_dereference(tn->child[i]); int isfull; BUG_ON(i >= 1<<tn->bits); @@ -665,7 +684,7 @@ one_child: for (i = 0; i < tnode_child_length(tn); i++) { struct rt_trie_node *n; - n = tn->child[i]; + n = rtnl_dereference(tn->child[i]); if (!n) continue; @@ -679,6 +698,20 @@ one_child: return (struct rt_trie_node *) tn; } + +static void tnode_clean_free(struct tnode *tn) +{ + int i; + struct tnode *tofree; + + for (i = 0; i < tnode_child_length(tn); i++) { + tofree = (struct tnode *)rtnl_dereference(tn->child[i]); + if (tofree) + tnode_free(tofree); + } + tnode_free(tn); +} + static struct tnode *inflate(struct trie *t, struct tnode *tn) { struct tnode *oldtnode = tn; @@ -755,8 +788,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) inode = (struct tnode *) node; if (inode->bits == 1) { - put_child(t, tn, 2*i, inode->child[0]); - put_child(t, tn, 2*i+1, inode->child[1]); + put_child(t, tn, 2*i, rtnl_dereference(inode->child[0])); + put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1])); tnode_free_safe(inode); continue; @@ -797,8 +830,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) size = tnode_child_length(left); for (j = 0; j < size; j++) { - put_child(t, left, j, inode->child[j]); - put_child(t, right, j, inode->child[j + size]); + put_child(t, left, j, rtnl_dereference(inode->child[j])); + put_child(t, right, j, rtnl_dereference(inode->child[j + size])); } put_child(t, tn, 2*i, resize(t, left)); put_child(t, tn, 2*i+1, resize(t, right)); @@ -808,18 +841,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) tnode_free_safe(oldtnode); return tn; nomem: - { - int size = tnode_child_length(tn); - int j; - - for (j = 0; j < size; j++) - if (tn->child[j]) - tnode_free((struct tnode *)tn->child[j]); - - tnode_free(tn); - - return ERR_PTR(-ENOMEM); - } + tnode_clean_free(tn); + return ERR_PTR(-ENOMEM); } static struct tnode *halve(struct trie *t, struct tnode *tn) @@ -890,18 +913,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) tnode_free_safe(oldtnode); return tn; nomem: - { - int size = tnode_child_length(tn); - int j; - - for (j = 0; j < size; j++) - if (tn->child[j]) - tnode_free((struct tnode *)tn->child[j]); - - tnode_free(tn); - - return ERR_PTR(-ENOMEM); - } + tnode_clean_free(tn); + return ERR_PTR(-ENOMEM); } /* readside must use rcu_read_lock currently dump routines @@ -1033,7 +1046,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) t_key cindex; pos = 0; - n = t->trie; + n = rtnl_dereference(t->trie); /* If we point to NULL, stop. Either the tree is empty and we should * just put a new leaf in if, or we have reached an empty child slot, @@ -1319,6 +1332,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) } } + if (!plen) + tb->tb_num_default++; + list_add_tail_rcu(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head)); @@ -1684,6 +1700,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) list_del_rcu(&fa->fa_list); + if (!plen) + tb->tb_num_default--; + if (list_empty(fa_head)) { hlist_del_rcu(&li->hlist); free_leaf_info(li); @@ -1756,7 +1775,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) continue; if (IS_LEAF(c)) { - prefetch(p->child[idx]); + prefetch(rcu_dereference_rtnl(p->child[idx])); return (struct leaf *) c; } @@ -1974,6 +1993,7 @@ struct fib_table *fib_trie_table(u32 id) tb->tb_id = id; tb->tb_default = -1; + tb->tb_num_default = 0; t = (struct trie *) tb->tb_data; memset(t, 0, sizeof(*t)); @@ -2272,7 +2292,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) /* walk rest of this hash chain */ h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); - while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { + while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) { tb = hlist_entry(tb_node, struct fib_table, tb_hlist); n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); if (n) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 38f23e721b80..8514db54a7f4 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -355,20 +355,14 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options *opt = inet_rsk(req)->opt; - struct flowi4 fl4 = { - .flowi4_oif = sk->sk_bound_dev_if, - .flowi4_mark = sk->sk_mark, - .daddr = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .saddr = ireq->loc_addr, - .flowi4_tos = RT_CONN_FLAGS(sk), - .flowi4_proto = sk->sk_protocol, - .flowi4_flags = inet_sk_flowi_flags(sk), - .fl4_sport = inet_sk(sk)->inet_sport, - .fl4_dport = ireq->rmt_port, - }; struct net *net = sock_net(sk); + struct flowi4 fl4; + flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + sk->sk_protocol, inet_sk_flowi_flags(sk), + (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 459c011b1d4a..bdad3d60aa82 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1474,16 +1474,14 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar } { - struct flowi4 fl4 = { - .flowi4_oif = arg->bound_dev_if, - .daddr = daddr, - .saddr = rt->rt_spec_dst, - .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), - .fl4_sport = tcp_hdr(skb)->dest, - .fl4_dport = tcp_hdr(skb)->source, - .flowi4_proto = sk->sk_protocol, - .flowi4_flags = ip_reply_arg_flowi_flags(arg), - }; + struct flowi4 fl4; + + flowi4_init_output(&fl4, arg->bound_dev_if, 0, + RT_TOS(ip_hdr(skb)->tos), + RT_SCOPE_UNIVERSE, sk->sk_protocol, + ip_reply_arg_flowi_flags(arg), + daddr, rt->rt_spec_dst, + tcp_hdr(skb)->source, tcp_hdr(skb)->dest); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bceaec42c37d..2b50cc2da90a 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -548,17 +548,13 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } { - struct flowi4 fl4 = { - .flowi4_oif = ipc.oif, - .flowi4_mark = sk->sk_mark, - .daddr = daddr, - .saddr = saddr, - .flowi4_tos = tos, - .flowi4_proto = (inet->hdrincl ? - IPPROTO_RAW : - sk->sk_protocol), - .flowi4_flags = FLOWI_FLAG_CAN_SLEEP, - }; + struct flowi4 fl4; + + flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, + RT_SCOPE_UNIVERSE, + inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0); + if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); if (err) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c1acf69858fd..e9aee81de3e3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1871,8 +1871,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto e_inval; spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); } else { - err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, - &itag, 0); + err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, + &itag); if (err < 0) goto e_err; } @@ -1981,8 +1981,8 @@ static int __mkroute_input(struct sk_buff *skb, } - err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res), - in_dev->dev, &spec_dst, &itag, skb->mark); + err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), + in_dev->dev, &spec_dst, &itag); if (err < 0) { ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); @@ -2150,9 +2150,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto brd_input; if (res.type == RTN_LOCAL) { - err = fib_validate_source(saddr, daddr, tos, + err = fib_validate_source(skb, saddr, daddr, tos, net->loopback_dev->ifindex, - dev, &spec_dst, &itag, skb->mark); + dev, &spec_dst, &itag); if (err < 0) goto martian_source_keep_err; if (err) @@ -2176,8 +2176,8 @@ brd_input: if (ipv4_is_zeronet(saddr)) spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); else { - err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, - &itag, skb->mark); + err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, + &itag); if (err < 0) goto martian_source_keep_err; if (err) @@ -2615,7 +2615,9 @@ static struct rtable *ip_route_output_slow(struct net *net, fib_select_multipath(&res); else #endif - if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif) + if (!res.prefixlen && + res.table->tb_num_default > 1 && + res.type == RTN_UNICAST && !fl4.flowi4_oif) fib_select_default(&res); if (!fl4.saddr) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 8b44c6d2a79b..71e029691908 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -345,17 +345,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * no easy way to do this. */ { - struct flowi4 fl4 = { - .flowi4_mark = sk->sk_mark, - .daddr = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .saddr = ireq->loc_addr, - .flowi4_tos = RT_CONN_FLAGS(sk), - .flowi4_proto = IPPROTO_TCP, - .flowi4_flags = inet_sk_flowi_flags(sk), - .fl4_sport = th->dest, - .fl4_dport = th->source, - }; + struct flowi4 fl4; + + flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), + RT_SCOPE_UNIVERSE, IPPROTO_TCP, + inet_sk_flowi_flags(sk), + (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, + ireq->loc_addr, th->source, th->dest); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b22d45010545..054a59d21eb0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -999,7 +999,8 @@ new_segment: /* We have some space in skb head. Superb! */ if (copy > skb_tailroom(skb)) copy = skb_tailroom(skb); - if ((err = skb_add_data(skb, from, copy)) != 0) + err = skb_add_data_nocache(sk, skb, from, copy); + if (err) goto do_fault; } else { int merge = 0; @@ -1042,8 +1043,8 @@ new_segment: /* Time to copy data. We are close to * the end! */ - err = skb_copy_to_page(sk, from, skb, page, - off, copy); + err = skb_copy_to_page_nocache(sk, from, skb, + page, off, copy); if (err) { /* If this page was new, give it to the * socket so it does not get leaked. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f87a8eb76f3b..a15c8fb653af 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -909,20 +909,14 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, rt = (struct rtable *)sk_dst_check(sk, 0); if (rt == NULL) { - struct flowi4 fl4 = { - .flowi4_oif = ipc.oif, - .flowi4_mark = sk->sk_mark, - .daddr = faddr, - .saddr = saddr, - .flowi4_tos = tos, - .flowi4_proto = sk->sk_protocol, - .flowi4_flags = (inet_sk_flowi_flags(sk) | - FLOWI_FLAG_CAN_SLEEP), - .fl4_sport = inet->inet_sport, - .fl4_dport = dport, - }; + struct flowi4 fl4; struct net *net = sock_net(sk); + flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, + RT_SCOPE_UNIVERSE, sk->sk_protocol, + inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, + faddr, saddr, dport, inet->inet_sport); + security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { |