From 5b0daa3474d52bed906c4d5e92b44e10148c6972 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 29 May 2010 00:12:13 -0700 Subject: skb: make skb_recycle_check() return a bool value Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7cdfb4d52847..bf243fc54959 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -501,7 +501,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, 1, -1); } -extern int skb_recycle_check(struct sk_buff *skb, int skb_size); +extern bool skb_recycle_check(struct sk_buff *skb, int skb_size); extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); extern struct sk_buff *skb_clone(struct sk_buff *skb, -- cgit v1.2.3 From 7489aec8eed4f2f1eb3b4d35763bd3ea30b32ef5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 May 2010 16:41:35 +0200 Subject: netfilter: xtables: stackptr should be percpu commit f3c5c1bfd4 (netfilter: xtables: make ip_tables reentrant) introduced a performance regression, because stackptr array is shared by all cpus, adding cache line ping pongs. (16 cpus share a 64 bytes cache line) Fix this using alloc_percpu() Signed-off-by: Eric Dumazet Acked-By: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 2 +- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- net/netfilter/x_tables.c | 13 +++---------- 4 files changed, 6 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c00cc0c4d0b7..24e5d01d27d0 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -397,7 +397,7 @@ struct xt_table_info { * @stacksize jumps (number of user chains) can possibly be made. */ unsigned int stacksize; - unsigned int *stackptr; + unsigned int __percpu *stackptr; void ***jumpstack; /* ipt_entry tables: one per CPU */ /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */ diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 63958f3394a5..4b6c5ca610fc 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -336,7 +336,7 @@ ipt_do_table(struct sk_buff *skb, cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; - stackptr = &private->stackptr[cpu]; + stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 6f517bd83692..9d2d68f0e605 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -363,7 +363,7 @@ ip6t_do_table(struct sk_buff *skb, cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; - stackptr = &private->stackptr[cpu]; + stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 47b1e7917a9c..e34622fa0003 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -699,10 +699,8 @@ void xt_free_table_info(struct xt_table_info *info) vfree(info->jumpstack); else kfree(info->jumpstack); - if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE) - vfree(info->stackptr); - else - kfree(info->stackptr); + + free_percpu(info->stackptr); kfree(info); } @@ -753,14 +751,9 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) unsigned int size; int cpu; - size = sizeof(unsigned int) * nr_cpu_ids; - if (size > PAGE_SIZE) - i->stackptr = vmalloc(size); - else - i->stackptr = kmalloc(size, GFP_KERNEL); + i->stackptr = alloc_percpu(unsigned int); if (i->stackptr == NULL) return -ENOMEM; - memset(i->stackptr, 0, size); size = sizeof(void **) * nr_cpu_ids; if (size > PAGE_SIZE) -- cgit v1.2.3 From b1faf5666438090a4dc4fceac8502edc7788b7e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 May 2010 23:44:05 -0700 Subject: net: sock_queue_err_skb() dont mess with sk_forward_alloc Correct sk_forward_alloc handling for error_queue would need to use a backlog of frames that softirq handler could not deliver because socket is owned by user thread. Or extend backlog processing to be able to process normal and error packets. Another possibility is to not use mem charge for error queue, this is what I implemented in this patch. Note: this reverts commit 29030374 (net: fix sk_forward_alloc corruptions), since we dont need to lock socket anymore. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 15 +-------------- net/core/skbuff.c | 30 ++++++++++++++++++++++++++++-- net/ipv4/udp.c | 6 ++---- net/ipv6/udp.c | 6 ++---- 4 files changed, 33 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index ca241ea14875..731150d52799 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1524,20 +1524,7 @@ extern void sk_stop_timer(struct sock *sk, struct timer_list* timer); extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); -static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) -{ - /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces - number of warnings when compiling with -W --ANK - */ - if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned)sk->sk_rcvbuf) - return -ENOMEM; - skb_set_owner_r(skb, sk); - skb_queue_tail(&sk->sk_error_queue, skb); - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, skb->len); - return 0; -} +extern int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb); /* * Recover an error report and clear atomically diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4e7ac09c281a..9f07e749d7b1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2965,6 +2965,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) } EXPORT_SYMBOL_GPL(skb_cow_data); +static void sock_rmem_free(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); +} + +/* + * Note: We dont mem charge error packets (no sk_forward_alloc changes) + */ +int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) +{ + if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= + (unsigned)sk->sk_rcvbuf) + return -ENOMEM; + + skb_orphan(skb); + skb->sk = sk; + skb->destructor = sock_rmem_free; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); + + skb_queue_tail(&sk->sk_error_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, skb->len); + return 0; +} +EXPORT_SYMBOL(sock_queue_err_skb); + void skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps) { @@ -2997,9 +3025,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; - bh_lock_sock(sk); err = sock_queue_err_skb(sk, skb); - bh_unlock_sock(sk); if (err) kfree_skb(skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 50678f9a2763..eec4ff456e33 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -633,11 +633,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) if (!inet->recverr) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; - } else { - bh_lock_sock(sk); + } else ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); - bh_unlock_sock(sk); - } + sk->sk_err = err; sk->sk_error_report(sk); out: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3048f906c042..87be58673b55 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -466,11 +466,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state != TCP_ESTABLISHED && !np->recverr) goto out; - if (np->recverr) { - bh_lock_sock(sk); + if (np->recverr) ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1)); - bh_unlock_sock(sk); - } + sk->sk_err = err; sk->sk_error_report(sk); out: -- cgit v1.2.3