From 2f09a4d5daaa36690d506fafda9c24f2be866f6b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Aug 2010 22:38:09 -0700 Subject: xfrm: Use GFP_ATOMIC in xfrm_compile_policy As xfrm_compile_policy runs within a read_lock, we cannot use GFP_KERNEL for memory allocations. Reported-by: Luca Tettamanti Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ba59983aaffe..b14ed4b1f27c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2504,7 +2504,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, if (p->dir > XFRM_POLICY_OUT) return NULL; - xp = xfrm_policy_alloc(net, GFP_KERNEL); + xp = xfrm_policy_alloc(net, GFP_ATOMIC); if (xp == NULL) { *dir = -ENOBUFS; return NULL; -- cgit v1.2.3 From f3d3f616e35db2ceeb11564eafd50759bb5bca8a Mon Sep 17 00:00:00 2001 From: Min Zhang Date: Sat, 14 Aug 2010 22:42:51 -0700 Subject: ipv6: remove sysctl jiffies conversion on gc_elasticity and min_adv_mss sysctl output ipv6 gc_elasticity and min_adv_mss as values divided by HZ. However, they are not in unit of jiffies, since ip6_rt_min_advmss refers to packet size and ip6_rt_fc_elasticity is used as scaler as in expire>>ip6_rt_gc_elasticity, so replace the jiffies conversion handler will regular handler for them. This has impact on scripts that are currently working assuming the divide by HZ, will yield different results with this patch in place. Signed-off-by: Min Zhang Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8f2d0400cf8a..d126365ac046 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2580,7 +2580,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .proc_handler = proc_dointvec, }, { .procname = "mtu_expires", @@ -2594,7 +2594,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .proc_handler = proc_dointvec, }, { .procname = "gc_min_interval_ms", -- cgit v1.2.3 From daa3766e705c2605bd7f63b80c7742014ef9e04e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 15 Aug 2010 23:21:50 -0700 Subject: Revert "netlink: netlink_recvmsg() fix" This reverts commit 1235f504aaba2ebeabc863fdb3ceac764a317d47. It causes regressions worse than the problem it was trying to fix. Eric will try to solve the problem another way. Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2cbf380377d5..8648a9922aab 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1406,7 +1406,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct netlink_sock *nlk = nlk_sk(sk); int noblock = flags&MSG_DONTWAIT; size_t copied; - struct sk_buff *skb; + struct sk_buff *skb, *frag __maybe_unused = NULL; int err; if (flags&MSG_OOB) @@ -1441,21 +1441,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, kfree_skb(skb); skb = compskb; } else { - /* - * Before setting frag_list to NULL, we must get a - * private copy of skb if shared (because of MSG_PEEK) - */ - if (skb_shared(skb)) { - struct sk_buff *nskb; - - nskb = pskb_copy(skb, GFP_KERNEL); - kfree_skb(skb); - skb = nskb; - err = -ENOMEM; - if (!skb) - goto out; - } - kfree_skb(skb_shinfo(skb)->frag_list); + frag = skb_shinfo(skb)->frag_list; skb_shinfo(skb)->frag_list = NULL; } } @@ -1492,6 +1478,10 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, if (flags & MSG_TRUNC) copied = skb->len; +#ifdef CONFIG_COMPAT_NETLINK_MESSAGES + skb_shinfo(skb)->frag_list = frag; +#endif + skb_free_datagram(sk, skb); if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) -- cgit v1.2.3 From 001389b9581c13fe5fc357a0f89234f85af4215d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Aug 2010 10:22:10 +0000 Subject: netfilter: {ip,ip6,arp}_tables: avoid lockdep false positive After commit 24b36f019 (netfilter: {ip,ip6,arp}_tables: dont block bottom half more than necessary), lockdep can raise a warning because we attempt to lock a spinlock with BH enabled, while the same lock is usually locked by another cpu in a softirq context. Disable again BH to avoid these lockdep warnings. Reported-by: Linus Torvalds Diagnosed-by: David S. Miller Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/netfilter/arp_tables.c | 2 ++ net/ipv4/netfilter/ip_tables.c | 2 ++ net/ipv6/netfilter/ip6_tables.c | 2 ++ 3 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 6bccba31d132..51d6c3167975 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -735,6 +735,7 @@ static void get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + local_bh_disable(); xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { ADD_COUNTER(counters[i], iter->counters.bcnt, @@ -742,6 +743,7 @@ static void get_counters(const struct xt_table_info *t, ++i; } xt_info_wrunlock(cpu); + local_bh_enable(); } put_cpu(); } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index c439721b165a..97b64b22c412 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -909,6 +909,7 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + local_bh_disable(); xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { ADD_COUNTER(counters[i], iter->counters.bcnt, @@ -916,6 +917,7 @@ get_counters(const struct xt_table_info *t, ++i; /* macro does multi eval of i */ } xt_info_wrunlock(cpu); + local_bh_enable(); } put_cpu(); } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 5359ef4daac5..29a7bca29e3f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -922,6 +922,7 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + local_bh_disable(); xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { ADD_COUNTER(counters[i], iter->counters.bcnt, @@ -929,6 +930,7 @@ get_counters(const struct xt_table_info *t, ++i; } xt_info_wrunlock(cpu); + local_bh_enable(); } put_cpu(); } -- cgit v1.2.3 From 1c40be12f7d8ca1d387510d39787b12e512a7ce8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Aug 2010 20:04:22 +0000 Subject: net sched: fix some kernel memory leaks We leak at least 32bits of kernel memory to user land in tc dump, because we dont init all fields (capab ?) of the dumped structure. Use C99 initializers so that holes and non explicit fields are zeroed. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/act_gact.c | 21 ++++++++++++--------- net/sched/act_mirred.c | 15 ++++++++------- net/sched/act_nat.c | 22 +++++++++++----------- net/sched/act_simple.c | 11 ++++++----- net/sched/act_skbedit.c | 11 ++++++----- 5 files changed, 43 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 8406c6654990..c2ed90a4c0b4 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -152,21 +152,24 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); - struct tc_gact opt; struct tcf_gact *gact = a->priv; + struct tc_gact opt = { + .index = gact->tcf_index, + .refcnt = gact->tcf_refcnt - ref, + .bindcnt = gact->tcf_bindcnt - bind, + .action = gact->tcf_action, + }; struct tcf_t t; - opt.index = gact->tcf_index; - opt.refcnt = gact->tcf_refcnt - ref; - opt.bindcnt = gact->tcf_bindcnt - bind; - opt.action = gact->tcf_action; NLA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt); #ifdef CONFIG_GACT_PROB if (gact->tcfg_ptype) { - struct tc_gact_p p_opt; - p_opt.paction = gact->tcfg_paction; - p_opt.pval = gact->tcfg_pval; - p_opt.ptype = gact->tcfg_ptype; + struct tc_gact_p p_opt = { + .paction = gact->tcfg_paction, + .pval = gact->tcfg_pval, + .ptype = gact->tcfg_ptype, + }; + NLA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt); } #endif diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 11f195af2da0..0c311be92827 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -219,15 +219,16 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i { unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = a->priv; - struct tc_mirred opt; + struct tc_mirred opt = { + .index = m->tcf_index, + .action = m->tcf_action, + .refcnt = m->tcf_refcnt - ref, + .bindcnt = m->tcf_bindcnt - bind, + .eaction = m->tcfm_eaction, + .ifindex = m->tcfm_ifindex, + }; struct tcf_t t; - opt.index = m->tcf_index; - opt.action = m->tcf_action; - opt.refcnt = m->tcf_refcnt - ref; - opt.bindcnt = m->tcf_bindcnt - bind; - opt.eaction = m->tcfm_eaction; - opt.ifindex = m->tcfm_ifindex; NLA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt); t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 509a2d53a99d..186eb837e600 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -272,19 +272,19 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, { unsigned char *b = skb_tail_pointer(skb); struct tcf_nat *p = a->priv; - struct tc_nat opt; + struct tc_nat opt = { + .old_addr = p->old_addr, + .new_addr = p->new_addr, + .mask = p->mask, + .flags = p->flags, + + .index = p->tcf_index, + .action = p->tcf_action, + .refcnt = p->tcf_refcnt - ref, + .bindcnt = p->tcf_bindcnt - bind, + }; struct tcf_t t; - opt.old_addr = p->old_addr; - opt.new_addr = p->new_addr; - opt.mask = p->mask; - opt.flags = p->flags; - - opt.index = p->tcf_index; - opt.action = p->tcf_action; - opt.refcnt = p->tcf_refcnt - ref; - opt.bindcnt = p->tcf_bindcnt - bind; - NLA_PUT(skb, TCA_NAT_PARMS, sizeof(opt), &opt); t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 4a1d640b0cf1..97e84f3ee775 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -164,13 +164,14 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, { unsigned char *b = skb_tail_pointer(skb); struct tcf_defact *d = a->priv; - struct tc_defact opt; + struct tc_defact opt = { + .index = d->tcf_index, + .refcnt = d->tcf_refcnt - ref, + .bindcnt = d->tcf_bindcnt - bind, + .action = d->tcf_action, + }; struct tcf_t t; - opt.index = d->tcf_index; - opt.refcnt = d->tcf_refcnt - ref; - opt.bindcnt = d->tcf_bindcnt - bind; - opt.action = d->tcf_action; NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); NLA_PUT_STRING(skb, TCA_DEF_DATA, d->tcfd_defdata); t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index e9607fe55b58..66cbf4eb8855 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -159,13 +159,14 @@ static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, { unsigned char *b = skb_tail_pointer(skb); struct tcf_skbedit *d = a->priv; - struct tc_skbedit opt; + struct tc_skbedit opt = { + .index = d->tcf_index, + .refcnt = d->tcf_refcnt - ref, + .bindcnt = d->tcf_bindcnt - bind, + .action = d->tcf_action, + }; struct tcf_t t; - opt.index = d->tcf_index; - opt.refcnt = d->tcf_refcnt - ref; - opt.bindcnt = d->tcf_bindcnt - bind; - opt.action = d->tcf_action; NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt); if (d->flags & SKBEDIT_F_PRIORITY) NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority), -- cgit v1.2.3 From e5093aec2e6b60c3df2420057ffab9ed4a6d2792 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 11 Aug 2010 02:02:10 +0000 Subject: net: Fix a memmove bug in dev_gro_receive() >Xin Xiaohui wrote: > I looked into the code dev_gro_receive(), found the code here: > if the frags[0] is pulled to 0, then the page will be released, > and memmove() frags left. > Is that right? I'm not sure if memmove do right or not, but > frags[0].size is never set after memove at least. what I think > a simple way is not to do anything if we found frags[0].size == 0. > The patch is as followed. ... This version of the patch fixes the bug directly in memmove. Reported-by: "Xin, Xiaohui" Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 1ae654391442..3721fbb9a83c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3143,7 +3143,7 @@ pull: put_page(skb_shinfo(skb)->frags[0].page); memmove(skb_shinfo(skb)->frags, skb_shinfo(skb)->frags + 1, - --skb_shinfo(skb)->nr_frags); + --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); } } -- cgit v1.2.3