summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/devinet.c1
-rw-r--r--net/ipv4/fib_frontend.c16
-rw-r--r--net/ipv4/fib_trie.c110
-rw-r--r--net/ipv4/inet_connection_sock.c18
-rw-r--r--net/ipv4/ip_output.c18
-rw-r--r--net/ipv4/raw.c18
-rw-r--r--net/ipv4/route.c20
-rw-r--r--net/ipv4/syncookies.c18
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/udp.c18
10 files changed, 124 insertions, 120 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5345b0bee6df..acf553f95b5b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1203,6 +1203,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
break;
/* fall through */
case NETDEV_NOTIFY_PEERS:
+ case NETDEV_BONDING_FAILOVER:
/* Send gratuitous ARP to notify of link change */
inetdev_send_gratuitous_arp(dev, in_dev);
break;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 451088330bbb..22524716fe70 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -44,6 +44,7 @@
#include <net/arp.h>
#include <net/ip_fib.h>
#include <net/rtnetlink.h>
+#include <net/xfrm.h>
#ifndef CONFIG_IP_MULTIPLE_TABLES
@@ -188,9 +189,9 @@ EXPORT_SYMBOL(inet_dev_addr_type);
* - check, that packet arrived from expected physical interface.
* called with rcu_read_lock()
*/
-int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
- struct net_device *dev, __be32 *spec_dst,
- u32 *itag, u32 mark)
+int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos,
+ int oif, struct net_device *dev, __be32 *spec_dst,
+ u32 *itag)
{
struct in_device *in_dev;
struct flowi4 fl4;
@@ -202,7 +203,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
fl4.flowi4_oif = 0;
fl4.flowi4_iif = oif;
- fl4.flowi4_mark = mark;
fl4.daddr = src;
fl4.saddr = dst;
fl4.flowi4_tos = tos;
@@ -212,10 +212,12 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
in_dev = __in_dev_get_rcu(dev);
if (in_dev) {
no_addr = in_dev->ifa_list == NULL;
- rpf = IN_DEV_RPFILTER(in_dev);
+
+ /* Ignore rp_filter for packets protected by IPsec. */
+ rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev);
+
accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
- if (mark && !IN_DEV_SRC_VMARK(in_dev))
- fl4.flowi4_mark = 0;
+ fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
}
if (in_dev == NULL)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e9013d6c1f51..9ac481a10d37 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -126,7 +126,7 @@ struct tnode {
struct work_struct work;
struct tnode *tnode_free;
};
- struct rt_trie_node *child[0];
+ struct rt_trie_node __rcu *child[0];
};
#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -151,7 +151,7 @@ struct trie_stat {
};
struct trie {
- struct rt_trie_node *trie;
+ struct rt_trie_node __rcu *trie;
#ifdef CONFIG_IP_FIB_TRIE_STATS
struct trie_use_stats stats;
#endif
@@ -177,16 +177,29 @@ static const int sync_pages = 128;
static struct kmem_cache *fn_alias_kmem __read_mostly;
static struct kmem_cache *trie_leaf_kmem __read_mostly;
-static inline struct tnode *node_parent(struct rt_trie_node *node)
+/*
+ * caller must hold RTNL
+ */
+static inline struct tnode *node_parent(const struct rt_trie_node *node)
{
- return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
+ unsigned long parent;
+
+ parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held());
+
+ return (struct tnode *)(parent & ~NODE_TYPE_MASK);
}
-static inline struct tnode *node_parent_rcu(struct rt_trie_node *node)
+/*
+ * caller must hold RCU read lock or RTNL
+ */
+static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
{
- struct tnode *ret = node_parent(node);
+ unsigned long parent;
+
+ parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() ||
+ lockdep_rtnl_is_held());
- return rcu_dereference_rtnl(ret);
+ return (struct tnode *)(parent & ~NODE_TYPE_MASK);
}
/* Same as rcu_assign_pointer
@@ -198,18 +211,24 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
node->parent = (unsigned long)ptr | NODE_TYPE(node);
}
-static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i)
+/*
+ * caller must hold RTNL
+ */
+static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i)
{
BUG_ON(i >= 1U << tn->bits);
- return tn->child[i];
+ return rtnl_dereference(tn->child[i]);
}
-static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
+/*
+ * caller must hold RCU read lock or RTNL
+ */
+static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
{
- struct rt_trie_node *ret = tnode_get_child(tn, i);
+ BUG_ON(i >= 1U << tn->bits);
- return rcu_dereference_rtnl(ret);
+ return rcu_dereference_rtnl(tn->child[i]);
}
static inline int tnode_child_length(const struct tnode *tn)
@@ -487,7 +506,7 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i,
static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
int wasfull)
{
- struct rt_trie_node *chi = tn->child[i];
+ struct rt_trie_node *chi = rtnl_dereference(tn->child[i]);
int isfull;
BUG_ON(i >= 1<<tn->bits);
@@ -665,7 +684,7 @@ one_child:
for (i = 0; i < tnode_child_length(tn); i++) {
struct rt_trie_node *n;
- n = tn->child[i];
+ n = rtnl_dereference(tn->child[i]);
if (!n)
continue;
@@ -679,6 +698,20 @@ one_child:
return (struct rt_trie_node *) tn;
}
+
+static void tnode_clean_free(struct tnode *tn)
+{
+ int i;
+ struct tnode *tofree;
+
+ for (i = 0; i < tnode_child_length(tn); i++) {
+ tofree = (struct tnode *)rtnl_dereference(tn->child[i]);
+ if (tofree)
+ tnode_free(tofree);
+ }
+ tnode_free(tn);
+}
+
static struct tnode *inflate(struct trie *t, struct tnode *tn)
{
struct tnode *oldtnode = tn;
@@ -755,8 +788,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
inode = (struct tnode *) node;
if (inode->bits == 1) {
- put_child(t, tn, 2*i, inode->child[0]);
- put_child(t, tn, 2*i+1, inode->child[1]);
+ put_child(t, tn, 2*i, rtnl_dereference(inode->child[0]));
+ put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1]));
tnode_free_safe(inode);
continue;
@@ -797,8 +830,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
size = tnode_child_length(left);
for (j = 0; j < size; j++) {
- put_child(t, left, j, inode->child[j]);
- put_child(t, right, j, inode->child[j + size]);
+ put_child(t, left, j, rtnl_dereference(inode->child[j]));
+ put_child(t, right, j, rtnl_dereference(inode->child[j + size]));
}
put_child(t, tn, 2*i, resize(t, left));
put_child(t, tn, 2*i+1, resize(t, right));
@@ -808,18 +841,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
tnode_free_safe(oldtnode);
return tn;
nomem:
- {
- int size = tnode_child_length(tn);
- int j;
-
- for (j = 0; j < size; j++)
- if (tn->child[j])
- tnode_free((struct tnode *)tn->child[j]);
-
- tnode_free(tn);
-
- return ERR_PTR(-ENOMEM);
- }
+ tnode_clean_free(tn);
+ return ERR_PTR(-ENOMEM);
}
static struct tnode *halve(struct trie *t, struct tnode *tn)
@@ -890,18 +913,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
tnode_free_safe(oldtnode);
return tn;
nomem:
- {
- int size = tnode_child_length(tn);
- int j;
-
- for (j = 0; j < size; j++)
- if (tn->child[j])
- tnode_free((struct tnode *)tn->child[j]);
-
- tnode_free(tn);
-
- return ERR_PTR(-ENOMEM);
- }
+ tnode_clean_free(tn);
+ return ERR_PTR(-ENOMEM);
}
/* readside must use rcu_read_lock currently dump routines
@@ -1033,7 +1046,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
t_key cindex;
pos = 0;
- n = t->trie;
+ n = rtnl_dereference(t->trie);
/* If we point to NULL, stop. Either the tree is empty and we should
* just put a new leaf in if, or we have reached an empty child slot,
@@ -1319,6 +1332,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
}
}
+ if (!plen)
+ tb->tb_num_default++;
+
list_add_tail_rcu(&new_fa->fa_list,
(fa ? &fa->fa_list : fa_head));
@@ -1684,6 +1700,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
list_del_rcu(&fa->fa_list);
+ if (!plen)
+ tb->tb_num_default--;
+
if (list_empty(fa_head)) {
hlist_del_rcu(&li->hlist);
free_leaf_info(li);
@@ -1756,7 +1775,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
continue;
if (IS_LEAF(c)) {
- prefetch(p->child[idx]);
+ prefetch(rcu_dereference_rtnl(p->child[idx]));
return (struct leaf *) c;
}
@@ -1974,6 +1993,7 @@ struct fib_table *fib_trie_table(u32 id)
tb->tb_id = id;
tb->tb_default = -1;
+ tb->tb_num_default = 0;
t = (struct trie *) tb->tb_data;
memset(t, 0, sizeof(*t));
@@ -2272,7 +2292,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
/* walk rest of this hash chain */
h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
- while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) {
+ while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
if (n)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 38f23e721b80..8514db54a7f4 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -355,20 +355,14 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
struct rtable *rt;
const struct inet_request_sock *ireq = inet_rsk(req);
struct ip_options *opt = inet_rsk(req)->opt;
- struct flowi4 fl4 = {
- .flowi4_oif = sk->sk_bound_dev_if,
- .flowi4_mark = sk->sk_mark,
- .daddr = ((opt && opt->srr) ?
- opt->faddr : ireq->rmt_addr),
- .saddr = ireq->loc_addr,
- .flowi4_tos = RT_CONN_FLAGS(sk),
- .flowi4_proto = sk->sk_protocol,
- .flowi4_flags = inet_sk_flowi_flags(sk),
- .fl4_sport = inet_sk(sk)->inet_sport,
- .fl4_dport = ireq->rmt_port,
- };
struct net *net = sock_net(sk);
+ struct flowi4 fl4;
+ flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
+ RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+ sk->sk_protocol, inet_sk_flowi_flags(sk),
+ (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
+ ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
security_req_classify_flow(req, flowi4_to_flowi(&fl4));
rt = ip_route_output_flow(net, &fl4, sk);
if (IS_ERR(rt))
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 459c011b1d4a..bdad3d60aa82 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1474,16 +1474,14 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
}
{
- struct flowi4 fl4 = {
- .flowi4_oif = arg->bound_dev_if,
- .daddr = daddr,
- .saddr = rt->rt_spec_dst,
- .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
- .fl4_sport = tcp_hdr(skb)->dest,
- .fl4_dport = tcp_hdr(skb)->source,
- .flowi4_proto = sk->sk_protocol,
- .flowi4_flags = ip_reply_arg_flowi_flags(arg),
- };
+ struct flowi4 fl4;
+
+ flowi4_init_output(&fl4, arg->bound_dev_if, 0,
+ RT_TOS(ip_hdr(skb)->tos),
+ RT_SCOPE_UNIVERSE, sk->sk_protocol,
+ ip_reply_arg_flowi_flags(arg),
+ daddr, rt->rt_spec_dst,
+ tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(sock_net(sk), &fl4);
if (IS_ERR(rt))
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bceaec42c37d..2b50cc2da90a 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -548,17 +548,13 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
}
{
- struct flowi4 fl4 = {
- .flowi4_oif = ipc.oif,
- .flowi4_mark = sk->sk_mark,
- .daddr = daddr,
- .saddr = saddr,
- .flowi4_tos = tos,
- .flowi4_proto = (inet->hdrincl ?
- IPPROTO_RAW :
- sk->sk_protocol),
- .flowi4_flags = FLOWI_FLAG_CAN_SLEEP,
- };
+ struct flowi4 fl4;
+
+ flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
+ RT_SCOPE_UNIVERSE,
+ inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0);
+
if (!inet->hdrincl) {
err = raw_probe_proto_opt(&fl4, msg);
if (err)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c1acf69858fd..e9aee81de3e3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1871,8 +1871,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto e_inval;
spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
} else {
- err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
- &itag, 0);
+ err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
+ &itag);
if (err < 0)
goto e_err;
}
@@ -1981,8 +1981,8 @@ static int __mkroute_input(struct sk_buff *skb,
}
- err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res),
- in_dev->dev, &spec_dst, &itag, skb->mark);
+ err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
+ in_dev->dev, &spec_dst, &itag);
if (err < 0) {
ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
saddr);
@@ -2150,9 +2150,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto brd_input;
if (res.type == RTN_LOCAL) {
- err = fib_validate_source(saddr, daddr, tos,
+ err = fib_validate_source(skb, saddr, daddr, tos,
net->loopback_dev->ifindex,
- dev, &spec_dst, &itag, skb->mark);
+ dev, &spec_dst, &itag);
if (err < 0)
goto martian_source_keep_err;
if (err)
@@ -2176,8 +2176,8 @@ brd_input:
if (ipv4_is_zeronet(saddr))
spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
else {
- err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
- &itag, skb->mark);
+ err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
+ &itag);
if (err < 0)
goto martian_source_keep_err;
if (err)
@@ -2615,7 +2615,9 @@ static struct rtable *ip_route_output_slow(struct net *net,
fib_select_multipath(&res);
else
#endif
- if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif)
+ if (!res.prefixlen &&
+ res.table->tb_num_default > 1 &&
+ res.type == RTN_UNICAST && !fl4.flowi4_oif)
fib_select_default(&res);
if (!fl4.saddr)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 8b44c6d2a79b..71e029691908 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -345,17 +345,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
* no easy way to do this.
*/
{
- struct flowi4 fl4 = {
- .flowi4_mark = sk->sk_mark,
- .daddr = ((opt && opt->srr) ?
- opt->faddr : ireq->rmt_addr),
- .saddr = ireq->loc_addr,
- .flowi4_tos = RT_CONN_FLAGS(sk),
- .flowi4_proto = IPPROTO_TCP,
- .flowi4_flags = inet_sk_flowi_flags(sk),
- .fl4_sport = th->dest,
- .fl4_dport = th->source,
- };
+ struct flowi4 fl4;
+
+ flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk),
+ RT_SCOPE_UNIVERSE, IPPROTO_TCP,
+ inet_sk_flowi_flags(sk),
+ (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
+ ireq->loc_addr, th->source, th->dest);
security_req_classify_flow(req, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(sock_net(sk), &fl4);
if (IS_ERR(rt)) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b22d45010545..054a59d21eb0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -999,7 +999,8 @@ new_segment:
/* We have some space in skb head. Superb! */
if (copy > skb_tailroom(skb))
copy = skb_tailroom(skb);
- if ((err = skb_add_data(skb, from, copy)) != 0)
+ err = skb_add_data_nocache(sk, skb, from, copy);
+ if (err)
goto do_fault;
} else {
int merge = 0;
@@ -1042,8 +1043,8 @@ new_segment:
/* Time to copy data. We are close to
* the end! */
- err = skb_copy_to_page(sk, from, skb, page,
- off, copy);
+ err = skb_copy_to_page_nocache(sk, from, skb,
+ page, off, copy);
if (err) {
/* If this page was new, give it to the
* socket so it does not get leaked.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f87a8eb76f3b..a15c8fb653af 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -909,20 +909,14 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
rt = (struct rtable *)sk_dst_check(sk, 0);
if (rt == NULL) {
- struct flowi4 fl4 = {
- .flowi4_oif = ipc.oif,
- .flowi4_mark = sk->sk_mark,
- .daddr = faddr,
- .saddr = saddr,
- .flowi4_tos = tos,
- .flowi4_proto = sk->sk_protocol,
- .flowi4_flags = (inet_sk_flowi_flags(sk) |
- FLOWI_FLAG_CAN_SLEEP),
- .fl4_sport = inet->inet_sport,
- .fl4_dport = dport,
- };
+ struct flowi4 fl4;
struct net *net = sock_net(sk);
+ flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
+ RT_SCOPE_UNIVERSE, sk->sk_protocol,
+ inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP,
+ faddr, saddr, dport, inet->inet_sport);
+
security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
rt = ip_route_output_flow(net, &fl4, sk);
if (IS_ERR(rt)) {