summaryrefslogtreecommitdiff
AgeCommit message (Collapse)AuthorFilesLines
2012-02-24x86-64: Improve insn scheduling in SAVE_ARGS_IRQJan Beulich1-4/+3
In one case, use an address register that was computed earlier (and with a simpler instruction), thus reducing the risk of a stall. In the second case, eliminate a branch by using a conditional move (as is already done in call_softirq and xen_do_hypervisor_callback). Signed-off-by: Jan Beulich <jbeulich@suse.com> Link: http://lkml.kernel.org/r/4F4788A50200007800074A26@nat28.tlf.novell.com Reviewed-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2012-02-24bitops: Add missing parentheses to new get_order macroJoerg Roedel1-2/+2
The new get_order macro introcuded in commit d66acc39c7cee323733c8503b9de1821a56dff7e does not use parentheses around all uses of the parameter n. This causes new compile warnings, for example in the amd_iommu_init.c function: drivers/iommu/amd_iommu_init.c:561:6: warning: suggest parentheses around comparison in operand of ‘&’ [-Wparentheses] drivers/iommu/amd_iommu_init.c:561:6: warning: suggest parentheses around comparison in operand of ‘&’ [-Wparentheses] Fix those warnings by adding the missing parentheses. Reported-by: Ingo Molnar <mingo@elte.hu> Cc: David Howells <dhowells@redhat.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Joerg Roedel <joerg.roedel@amd.com> Link: http://lkml.kernel.org/r/1330088295-28732-1-git-send-email-joerg.roedel@amd.com Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2012-02-21bitops: Optimise get_order()David Howells1-12/+28
Optimise get_order() to use bit scanning instructions if such exist rather than a loop. Also, make it possible to use get_order() in static initialisations too by building it on top of ilog2() in the constant parameter case. This has been tested for i386 and x86_64 using the following userspace program, and for FRV by making appropriate substitutions for fls() and fls64(). It will abort if the case for get_order() deviates from the original except for the order of 0, for which get_order() produces an undefined result. This program tests both dynamic and static parameters. #include <stdlib.h> #include <stdio.h> #ifdef __x86_64__ #define BITS_PER_LONG 64 #else #define BITS_PER_LONG 32 #endif #define PAGE_SHIFT 12 typedef unsigned long long __u64, u64; typedef unsigned int __u32, u32; #define noinline __attribute__((noinline)) static inline int fls(int x) { int bitpos = -1; asm("bsrl %1,%0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; } static __always_inline int fls64(__u64 x) { #if BITS_PER_LONG == 64 long bitpos = -1; asm("bsrq %1,%0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; #else __u32 h = x >> 32, l = x; int bitpos = -1; asm("bsrl %1,%0 \n" "subl %2,%0 \n" "bsrl %3,%0 \n" : "+r" (bitpos) : "rm" (l), "i"(32), "rm" (h)); return bitpos + 33; #endif } static inline __attribute__((const)) int __ilog2_u32(u32 n) { return fls(n) - 1; } static inline __attribute__((const)) int __ilog2_u64(u64 n) { return fls64(n) - 1; } extern __attribute__((const, noreturn)) int ____ilog2_NaN(void); #define ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ (n) < 1 ? ____ilog2_NaN() : \ (n) & (1ULL << 63) ? 63 : \ (n) & (1ULL << 62) ? 62 : \ (n) & (1ULL << 61) ? 61 : \ (n) & (1ULL << 60) ? 60 : \ (n) & (1ULL << 59) ? 59 : \ (n) & (1ULL << 58) ? 58 : \ (n) & (1ULL << 57) ? 57 : \ (n) & (1ULL << 56) ? 56 : \ (n) & (1ULL << 55) ? 55 : \ (n) & (1ULL << 54) ? 54 : \ (n) & (1ULL << 53) ? 53 : \ (n) & (1ULL << 52) ? 52 : \ (n) & (1ULL << 51) ? 51 : \ (n) & (1ULL << 50) ? 50 : \ (n) & (1ULL << 49) ? 49 : \ (n) & (1ULL << 48) ? 48 : \ (n) & (1ULL << 47) ? 47 : \ (n) & (1ULL << 46) ? 46 : \ (n) & (1ULL << 45) ? 45 : \ (n) & (1ULL << 44) ? 44 : \ (n) & (1ULL << 43) ? 43 : \ (n) & (1ULL << 42) ? 42 : \ (n) & (1ULL << 41) ? 41 : \ (n) & (1ULL << 40) ? 40 : \ (n) & (1ULL << 39) ? 39 : \ (n) & (1ULL << 38) ? 38 : \ (n) & (1ULL << 37) ? 37 : \ (n) & (1ULL << 36) ? 36 : \ (n) & (1ULL << 35) ? 35 : \ (n) & (1ULL << 34) ? 34 : \ (n) & (1ULL << 33) ? 33 : \ (n) & (1ULL << 32) ? 32 : \ (n) & (1ULL << 31) ? 31 : \ (n) & (1ULL << 30) ? 30 : \ (n) & (1ULL << 29) ? 29 : \ (n) & (1ULL << 28) ? 28 : \ (n) & (1ULL << 27) ? 27 : \ (n) & (1ULL << 26) ? 26 : \ (n) & (1ULL << 25) ? 25 : \ (n) & (1ULL << 24) ? 24 : \ (n) & (1ULL << 23) ? 23 : \ (n) & (1ULL << 22) ? 22 : \ (n) & (1ULL << 21) ? 21 : \ (n) & (1ULL << 20) ? 20 : \ (n) & (1ULL << 19) ? 19 : \ (n) & (1ULL << 18) ? 18 : \ (n) & (1ULL << 17) ? 17 : \ (n) & (1ULL << 16) ? 16 : \ (n) & (1ULL << 15) ? 15 : \ (n) & (1ULL << 14) ? 14 : \ (n) & (1ULL << 13) ? 13 : \ (n) & (1ULL << 12) ? 12 : \ (n) & (1ULL << 11) ? 11 : \ (n) & (1ULL << 10) ? 10 : \ (n) & (1ULL << 9) ? 9 : \ (n) & (1ULL << 8) ? 8 : \ (n) & (1ULL << 7) ? 7 : \ (n) & (1ULL << 6) ? 6 : \ (n) & (1ULL << 5) ? 5 : \ (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ (n) & (1ULL << 1) ? 1 : \ (n) & (1ULL << 0) ? 0 : \ ____ilog2_NaN() \ ) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ ) static noinline __attribute__((const)) int old_get_order(unsigned long size) { int order; size = (size - 1) >> (PAGE_SHIFT - 1); order = -1; do { size >>= 1; order++; } while (size); return order; } static noinline __attribute__((const)) int __get_order(unsigned long size) { int order; size--; size >>= PAGE_SHIFT; #if BITS_PER_LONG == 32 order = fls(size); #else order = fls64(size); #endif return order; } #define get_order(n) \ ( \ __builtin_constant_p(n) ? ( \ (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ ((n < (1UL << PAGE_SHIFT)) ? 0 : \ ilog2((n) - 1) - PAGE_SHIFT + 1) \ ) : \ __get_order(n) \ ) #define order(N) \ { (1UL << N) - 1, get_order((1UL << N) - 1) }, \ { (1UL << N), get_order((1UL << N)) }, \ { (1UL << N) + 1, get_order((1UL << N) + 1) } struct order { unsigned long n, order; }; static const struct order order_table[] = { order(0), order(1), order(2), order(3), order(4), order(5), order(6), order(7), order(8), order(9), order(10), order(11), order(12), order(13), order(14), order(15), order(16), order(17), order(18), order(19), order(20), order(21), order(22), order(23), order(24), order(25), order(26), order(27), order(28), order(29), order(30), order(31), #if BITS_PER_LONG == 64 order(32), order(33), order(34), order(35), #endif { 0x2929 } }; void check(int loop, unsigned long n) { unsigned long old, new; printf("[%2d]: %09lx | ", loop, n); old = old_get_order(n); new = get_order(n); printf("%3ld, %3ld\n", old, new); if (n != 0 && old != new) abort(); } int main(int argc, char **argv) { const struct order *p; unsigned long n; int loop; for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) { n = 1UL << loop; check(loop, n - 1); check(loop, n); check(loop, n + 1); } for (p = order_table; p->n != 0x2929; p++) { unsigned long old, new; old = old_get_order(p->n); new = p->order; printf("%09lx\t%3ld, %3ld\n", p->n, old, new); if (p->n != 0 && old != new) abort(); } return 0; } Disassembling the x86_64 version of the above code shows: 0000000000400510 <old_get_order>: 400510: 48 83 ef 01 sub $0x1,%rdi 400514: b8 ff ff ff ff mov $0xffffffff,%eax 400519: 48 c1 ef 0b shr $0xb,%rdi 40051d: 0f 1f 00 nopl (%rax) 400520: 83 c0 01 add $0x1,%eax 400523: 48 d1 ef shr %rdi 400526: 75 f8 jne 400520 <old_get_order+0x10> 400528: f3 c3 repz retq 40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) 0000000000400530 <__get_order>: 400530: 48 83 ef 01 sub $0x1,%rdi 400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax 40053b: 48 c1 ef 0c shr $0xc,%rdi 40053f: 48 0f bd c7 bsr %rdi,%rax 400543: 83 c0 01 add $0x1,%eax 400546: c3 retq 400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) 40054e: 00 00 As can be seen, the new __get_order() function is simpler than the old_get_order() function. Signed-off-by: David Howells <dhowells@redhat.com> Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2012-02-21bitops: Adjust the comment on get_order() to describe the size==0 caseDavid Howells1-1/+22
Adjust the comment on get_order() to note that the result of passing a size of 0 results in an undefined value. Signed-off-by: David Howells <dhowells@redhat.com> Link: http://lkml.kernel.org/r/20120220223917.16199.9416.stgit@warthog.procyon.org.uk Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2012-02-07x86/spinlocks: Eliminate TICKET_MASKJan Beulich2-3/+2
The definition of it being questionable already (unnecessarily including a cast), and it being used in a single place that can be written shorter without it, remove this #define. Along the same lines, simplify __ticket_spin_is_locked()'s main expression, which was the more convoluted way because of needs that went away with the recent type changes by Jeremy. This is pure cleanup, no functional change intended. Signed-off-by: Jan Beulich <jbeulich@suse.com> Acked-by: Jeremy Fitzhardinge <jeremy@goop.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/4F2C06020200007800071066@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
2012-01-27x86-64: Handle byte-wise tail copying in memcpy() without a loopJan Beulich1-9/+10
While hard to measure, reducing the number of possibly/likely mis-predicted branches can generally be expected to be slightly better. Other than apparent at the first glance, this also doesn't grow the function size (the alignment gap to the next function just gets smaller). Signed-off-by: Jan Beulich <jbeulich@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/4F218584020000780006F422@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
2012-01-27x86-64: Fix memcpy() to support sizes of 4Gb and aboveJan Beulich1-15/+10
While currently there doesn't appear to be any reachable in-tree case where such large memory blocks may be passed to memcpy(), we already had hit the problem in our Xen kernels. Just like done recently for mmeset(), rather than working around it, prevent others from falling into the same trap by fixing this long standing limitation. Signed-off-by: Jan Beulich <jbeulich@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/4F21846F020000780006F3FA@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
2012-01-26x86-64: Fix memset() to support sizes of 4Gb and aboveJan Beulich1-18/+15
While currently there doesn't appear to be any reachable in-tree case where such large memory blocks may be passed to memset() (alloc_bootmem() being the primary non-reachable one, as it gets called with suitably large sizes in FLATMEM configurations), we have recently hit the problem a second time in our Xen kernels. Rather than working around it a second time, prevent others from falling into the same trap by fixing this long standing limitation. Signed-off-by: Jan Beulich <jbeulich@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/4F05D992020000780006AA09@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
2012-01-06x86-64: Slightly shorten copy_page()Jan Beulich1-8/+4
%r13 got saved and restored without ever getting touched, so there's no need to do so. Signed-off-by: Jan Beulich <jbeulich@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/4F05D9F9020000780006AA0D@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
2012-01-04x86: Fix atomic64_xxx_cx8() functionsEric Dumazet1-2/+2
It appears about all functions in arch/x86/lib/atomic64_cx8_32.S are wrong in case cmpxchg8b must be restarted, because LOCK_PREFIX macro defines a label "1" clashing with other local labels : 1: some_instructions LOCK_PREFIX cmpxchg8b (%ebp) jne 1b / jumps to beginning of LOCK_PREFIX ! A possible fix is to use a magic label "672" in LOCK_PREFIX asm definition, similar to the "671" one we defined in LOCK_PREFIX_HERE. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: Jan Beulich <JBeulich@suse.com> Cc: Christoph Lameter <cl@linux.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/1325608540.2320.103.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC Signed-off-by: Ingo Molnar <mingo@elte.hu>
2012-01-04x86: Fix and improve cmpxchg_double{,_local}()Jan Beulich4-91/+25
Just like the per-CPU ones they had several problems/shortcomings: Only the first memory operand was mentioned in the asm() operands, and the 2x64-bit version didn't have a memory clobber while the 2x32-bit one did. The former allowed the compiler to not recognize the need to re-load the data in case it had it cached in some register, while the latter was overly destructive. The types of the local copies of the old and new values were incorrect (the types of the pointed-to variables should be used here, to make sure the respective old/new variable types are compatible). The __dummy/__junk variables were pointless, given that local copies of the inputs already existed (and can hence be used for discarded outputs). The 32-bit variant of cmpxchg_double_local() referenced cmpxchg16b_local(). At once also: - change the return value type to what it really is: 'bool' - unify 32- and 64-bit variants - abstract out the common part of the 'normal' and 'local' variants Signed-off-by: Jan Beulich <jbeulich@suse.com> Cc: Christoph Lameter <cl@linux.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/4F01F12A020000780006A19B@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
2012-01-04Merge commit 'v3.2-rc7' into x86/asmIngo Molnar537-3993/+7130
Merge reason: Update from -rc4 to -rc7. Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-12-24Linux 3.2-rc7Linus Torvalds1-1/+1
2011-12-24Merge branch 'for-linus' of ↵Linus Torvalds1-4/+32
git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: VFS: Fix race between CPU hotplug and lglocks
2011-12-24Merge tag 'writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linuxLinus Torvalds2-13/+13
for linus: writeback reason binary tracing format fix * tag 'writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux: writeback: show writeback reason with __print_symbolic
2011-12-24Merge branch 'rc-fixes' of ↵Linus Torvalds1-3/+2
git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild * 'rc-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild: kconfig: adapt update-po-config to new UML layout
2011-12-24Merge branch 'v4l_for_linus' of ↵Linus Torvalds2-2/+2
git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media * 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media: [media] omap3isp: Fix crash caused by subdevs now having a pointer to devnodes
2011-12-24Merge branch 'for-linus' of ↵Linus Torvalds2-5/+7
git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: Btrfs: call d_instantiate after all ops are setup Btrfs: fix worker lock misuse in find_worker
2011-12-24Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparcLinus Torvalds1-2/+2
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc: sparc64: Fix MSIQ HV call ordering in pci_sun4v_msiq_build_irq().
2011-12-24Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/netLinus Torvalds10-20/+26
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: netfilter: xt_connbytes: handle negation correctly net: relax rcvbuf limits rps: fix insufficient bounds checking in store_rps_dev_flow_table_cnt() net: introduce DST_NOPEER dst flag mqprio: Avoid panic if no options are provided bridge: provide a mtu() method for fake_dst_ops
2011-12-23Merge branch 'nf' of git://1984.lsi.us.es/netDavid S. Miller1-3/+3
2011-12-23netfilter: xt_connbytes: handle negation correctlyFlorian Westphal1-3/+3
"! --connbytes 23:42" should match if the packet/byte count is not in range. As there is no explict "invert match" toggle in the match structure, userspace swaps the from and to arguments (i.e., as if "--connbytes 42:23" were given). However, "what <= 23 && what >= 42" will always be false. Change things so we use "||" in case "from" is larger than "to". This change may look like it breaks backwards compatibility when "to" is 0. However, older iptables binaries will refuse "connbytes 42:0", and current releases treat it to mean "! --connbytes 0:42", so we should be fine. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2011-12-23Btrfs: call d_instantiate after all ops are setupAl Viro1-4/+5
This closes races where btrfs is calling d_instantiate too soon during inode creation. All of the callers of btrfs_add_nondir are updated to instantiate after the inode is fully setup in memory. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Chris Mason <chris.mason@oracle.com>
2011-12-23Btrfs: fix worker lock misuse in find_workerChris Mason1-1/+2
Dan Carpenter noticed that we were doing a double unlock on the worker lock, and sometimes picking a worker thread without the lock held. This fixes both errors. Signed-off-by: Chris Mason <chris.mason@oracle.com> Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
2011-12-23net: relax rcvbuf limitsEric Dumazet3-10/+6
skb->truesize might be big even for a small packet. Its even bigger after commit 87fb4b7b533 (net: more accurate skb truesize) and big MTU. We should allow queueing at least one packet per receiver, even with a low RCVBUF setting. Reported-by: Michal Simek <monstr@monstr.eu> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-23rps: fix insufficient bounds checking in store_rps_dev_flow_table_cnt()Xi Wang1-2/+5
Setting a large rps_flow_cnt like (1 << 30) on 32-bit platform will cause a kernel oops due to insufficient bounds checking. if (count > 1<<30) { /* Enforce a limit to prevent overflow */ return -EINVAL; } count = roundup_pow_of_two(count); table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count)); Note that the macro RPS_DEV_FLOW_TABLE_SIZE(count) is defined as: ... + (count * sizeof(struct rps_dev_flow)) where sizeof(struct rps_dev_flow) is 8. (1 << 30) * 8 will overflow 32 bits. This patch replaces the magic number (1 << 30) with a symbolic bound. Suggested-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: Xi Wang <xi.wang@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-23net: introduce DST_NOPEER dst flagEric Dumazet4-4/+5
Chris Boot reported crashes occurring in ipv6_select_ident(). [ 461.457562] RIP: 0010:[<ffffffff812dde61>] [<ffffffff812dde61>] ipv6_select_ident+0x31/0xa7 [ 461.578229] Call Trace: [ 461.580742] <IRQ> [ 461.582870] [<ffffffff812efa7f>] ? udp6_ufo_fragment+0x124/0x1a2 [ 461.589054] [<ffffffff812dbfe0>] ? ipv6_gso_segment+0xc0/0x155 [ 461.595140] [<ffffffff812700c6>] ? skb_gso_segment+0x208/0x28b [ 461.601198] [<ffffffffa03f236b>] ? ipv6_confirm+0x146/0x15e [nf_conntrack_ipv6] [ 461.608786] [<ffffffff81291c4d>] ? nf_iterate+0x41/0x77 [ 461.614227] [<ffffffff81271d64>] ? dev_hard_start_xmit+0x357/0x543 [ 461.620659] [<ffffffff81291cf6>] ? nf_hook_slow+0x73/0x111 [ 461.626440] [<ffffffffa0379745>] ? br_parse_ip_options+0x19a/0x19a [bridge] [ 461.633581] [<ffffffff812722ff>] ? dev_queue_xmit+0x3af/0x459 [ 461.639577] [<ffffffffa03747d2>] ? br_dev_queue_push_xmit+0x72/0x76 [bridge] [ 461.646887] [<ffffffffa03791e3>] ? br_nf_post_routing+0x17d/0x18f [bridge] [ 461.653997] [<ffffffff81291c4d>] ? nf_iterate+0x41/0x77 [ 461.659473] [<ffffffffa0374760>] ? br_flood+0xfa/0xfa [bridge] [ 461.665485] [<ffffffff81291cf6>] ? nf_hook_slow+0x73/0x111 [ 461.671234] [<ffffffffa0374760>] ? br_flood+0xfa/0xfa [bridge] [ 461.677299] [<ffffffffa0379215>] ? nf_bridge_update_protocol+0x20/0x20 [bridge] [ 461.684891] [<ffffffffa03bb0e5>] ? nf_ct_zone+0xa/0x17 [nf_conntrack] [ 461.691520] [<ffffffffa0374760>] ? br_flood+0xfa/0xfa [bridge] [ 461.697572] [<ffffffffa0374812>] ? NF_HOOK.constprop.8+0x3c/0x56 [bridge] [ 461.704616] [<ffffffffa0379031>] ? nf_bridge_push_encap_header+0x1c/0x26 [bridge] [ 461.712329] [<ffffffffa037929f>] ? br_nf_forward_finish+0x8a/0x95 [bridge] [ 461.719490] [<ffffffffa037900a>] ? nf_bridge_pull_encap_header+0x1c/0x27 [bridge] [ 461.727223] [<ffffffffa0379974>] ? br_nf_forward_ip+0x1c0/0x1d4 [bridge] [ 461.734292] [<ffffffff81291c4d>] ? nf_iterate+0x41/0x77 [ 461.739758] [<ffffffffa03748cc>] ? __br_deliver+0xa0/0xa0 [bridge] [ 461.746203] [<ffffffff81291cf6>] ? nf_hook_slow+0x73/0x111 [ 461.751950] [<ffffffffa03748cc>] ? __br_deliver+0xa0/0xa0 [bridge] [ 461.758378] [<ffffffffa037533a>] ? NF_HOOK.constprop.4+0x56/0x56 [bridge] This is caused by bridge netfilter special dst_entry (fake_rtable), a special shared entry, where attaching an inetpeer makes no sense. Problem is present since commit 87c48fa3b46 (ipv6: make fragment identifications less predictable) Introduce DST_NOPEER dst flag and make sure ipv6_select_ident() and __ip_select_ident() fallback to the 'no peer attached' handling. Reported-by: Chris Boot <bootc@bootc.net> Tested-by: Chris Boot <bootc@bootc.net> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-23mqprio: Avoid panic if no options are providedThomas Graf1-1/+1
Userspace may not provide TCA_OPTIONS, in fact tc currently does so not do so if no arguments are specified on the command line. Return EINVAL instead of panicing. Signed-off-by: Thomas Graf <tgraf@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-23bridge: provide a mtu() method for fake_dst_opsEric Dumazet1-0/+6
Commit 618f9bc74a039da76 (net: Move mtu handling down to the protocol depended handlers) forgot the bridge netfilter case, adding a NULL dereference in ip_fragment(). Reported-by: Chris Boot <bootc@bootc.net> CC: Steffen Klassert <steffen.klassert@secunet.com> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: Steffen Klassert <steffen.klassert@secunet.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-23Merge branch 'for-linus' of git://neil.brown.name/mdLinus Torvalds4-10/+13
* 'for-linus' of git://neil.brown.name/md: md/bitmap: It is OK to clear bits during recovery. md: don't give up looking for spares on first failure-to-add md/raid5: ensure correct assessment of drives during degraded reshape. md/linear: fix hot-add of devices to linear arrays.
2011-12-23md/bitmap: It is OK to clear bits during recovery.NeilBrown1-4/+1
commit d0a4bb492772ce5c4bdfba3744a99ed6f6fb238f introduced a regression which is annoying but fairly harmless. When writing to an array that is undergoing recovery (a spare in being integrated into the array), writing to the array will set bits in the bitmap, but they will not be cleared when the write completes. For bits covering areas that have not been recovered yet this is not a problem as the recovery will clear the bits. However bits set in already-recovered region will stay set and never be cleared. This doesn't risk data integrity. The only negatives are: - next time there is a crash, more resyncing than necessary will be done. - the bitmap doesn't look clean, which is confusing. While an array is recovering we don't want to update the 'events_cleared' setting in the bitmap but we do still want to clear bits that have very recently been set - providing they were written to the recovering device. So split those two needs - which previously both depended on 'success' and always clear the bit of the write went to all devices. Signed-off-by: NeilBrown <neilb@suse.de>
2011-12-23md: don't give up looking for spares on first failure-to-addNeilBrown1-2/+1
Before performing a recovery we try to remove any spares that might not be working, then add any that might have become relevant. Currently we abort on the first spare that cannot be added. This is a false optimisation. It is conceivable that - depending on rules in the personality - a subsequent spare might be accepted. Also the loop does other things like count the available spares and reset the 'recovery_offset' value. If we abort early these might not happen properly. So remove the early abort. In particular if you have an array what is undergoing recovery and which has extra spares, then the recovery may not restart after as reboot as the could of 'spares' might end up as zero. Reported-by: Anssi Hannula <anssi.hannula@iki.fi> Signed-off-by: NeilBrown <neilb@suse.de>
2011-12-23md/raid5: ensure correct assessment of drives during degraded reshape.NeilBrown1-4/+10
While reshaping a degraded array (as when reshaping a RAID0 by first converting it to a degraded RAID4) we currently get confused about which devices are in_sync. In most cases we get it right, but in the region that is being reshaped we need to treat non-failed devices as in-sync when we have the data but haven't actually written it out yet. Reported-by: Adam Kwolek <adam.kwolek@intel.com> Signed-off-by: NeilBrown <neilb@suse.de>
2011-12-23md/linear: fix hot-add of devices to linear arrays.NeilBrown1-0/+1
commit d70ed2e4fafdbef0800e73942482bb075c21578b broke hot-add to a linear array. After that commit, metadata if not written to devices until they have been fully integrated into the array as determined by saved_raid_disk. That patch arranged to clear that field after a recovery completed. However for linear arrays, there is no recovery - the integration is instantaneous. So we need to explicitly clear the saved_raid_disk field. Signed-off-by: NeilBrown <neilb@suse.de>
2011-12-23sparc64: Fix MSIQ HV call ordering in pci_sun4v_msiq_build_irq().David S. Miller1-2/+2
This silently was working for many years and stopped working on Niagara-T3 machines. We need to set the MSIQ to VALID before we can set it's state to IDLE. On Niagara-T3, setting the state to IDLE first was causing HV_EINVAL errors. The hypervisor documentation says, rather ambiguously, that the MSIQ must be "initialized" before one can set the state. I previously understood this to mean merely that a successful setconf() operation has been performed on the MSIQ, which we have done at this point. But it seems to also mean that it has been set VALID too. Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-23Merge branch 'usb-linus' of ↵Linus Torvalds4-8/+9
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb * 'usb-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb: USB: Fix usb/isp1760 build on sparc usb: gadget: epautoconf: do not change number of streams usb: dwc3: core: fix cached revision on our structure usb: musb: fix reset issue with full speed device
2011-12-23Merge branch 'upstream-linus' of git://github.com/jgarzik/libata-devLinus Torvalds1-1/+1
* 'upstream-linus' of git://github.com/jgarzik/libata-dev: pata_of_platform: Add missing CONFIG_OF_IRQ dependency.
2011-12-23pata_of_platform: Add missing CONFIG_OF_IRQ dependency.David Miller1-1/+1
Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
2011-12-22ipv4: using prefetch requires including prefetch.hStephen Rothwell1-0/+1
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Acked-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: David Miller <davem@davemloft.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-12-22VFS: Fix race between CPU hotplug and lglocksSrivatsa S. Bhat1-4/+32
Currently, the *_global_[un]lock_online() routines are not at all synchronized with CPU hotplug. Soft-lockups detected as a consequence of this race was reported earlier at https://lkml.org/lkml/2011/8/24/185. (Thanks to Cong Meng for finding out that the root-cause of this issue is the race condition between br_write_[un]lock() and CPU hotplug, which results in the lock states getting messed up). Fixing this race by just adding {get,put}_online_cpus() at appropriate places in *_global_[un]lock_online() is not a good option, because, then suddenly br_write_[un]lock() would become blocking, whereas they have been kept as non-blocking all this time, and we would want to keep them that way. So, overall, we want to ensure 3 things: 1. br_write_lock() and br_write_unlock() must remain as non-blocking. 2. The corresponding lock and unlock of the per-cpu spinlocks must not happen for different sets of CPUs. 3. Either prevent any new CPU online operation in between this lock-unlock, or ensure that the newly onlined CPU does not proceed with its corresponding per-cpu spinlock unlocked. To achieve all this: (a) We introduce a new spinlock that is taken by the *_global_lock_online() routine and released by the *_global_unlock_online() routine. (b) We register a callback for CPU hotplug notifications, and this callback takes the same spinlock as above. (c) We maintain a bitmap which is close to the cpu_online_mask, and once it is initialized in the lock_init() code, all future updates to it are done in the callback, under the above spinlock. (d) The above bitmap is used (instead of cpu_online_mask) while locking and unlocking the per-cpu locks. The callback takes the spinlock upon the CPU_UP_PREPARE event. So, if the br_write_lock-unlock sequence is in progress, the callback keeps spinning, thus preventing the CPU online operation till the lock-unlock sequence is complete. This takes care of requirement (3). The bitmap that we maintain remains unmodified throughout the lock-unlock sequence, since all updates to it are managed by the callback, which takes the same spinlock as the one taken by the lock code and released only by the unlock routine. Combining this with (d) above, satisfies requirement (2). Overall, since we use a spinlock (mentioned in (a)) to prevent CPU hotplug operations from racing with br_write_lock-unlock, requirement (1) is also taken care of. By the way, it is to be noted that a CPU offline operation can actually run in parallel with our lock-unlock sequence, because our callback doesn't react to notifications earlier than CPU_DEAD (in order to maintain our bitmap properly). And this means, since we use our own bitmap (which is stale, on purpose) during the lock-unlock sequence, we could end up unlocking the per-cpu lock of an offline CPU (because we had locked it earlier, when the CPU was online), in order to satisfy requirement (2). But this is harmless, though it looks a bit awkward. Debugged-by: Cong Meng <mc@linux.vnet.ibm.com> Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Cc: stable@vger.kernel.org
2011-12-22Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/netLinus Torvalds27-47/+220
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: net: Add a flow_cache_flush_deferred function ipv4: reintroduce route cache garbage collector net: have ipconfig not wait if no dev is available sctp: Do not account for sizeof(struct sk_buff) in estimated rwnd asix: new device id davinci-cpdma: fix locking issue in cpdma_chan_stop sctp: fix incorrect overflow check on autoclose r8169: fix Config2 MSIEnable bit setting. llc: llc_cmsg_rcv was getting called after sk_eat_skb. net: bpf_jit: fix an off-one bug in x86_64 cond jump target iwlwifi: update SCD BC table for all SCD queues Revert "Bluetooth: Revert: Fix L2CAP connection establishment" Bluetooth: Clear RFCOMM session timer when disconnecting last channel Bluetooth: Prevent uninitialized data access in L2CAP configuration iwlwifi: allow to switch to HT40 if not associated iwlwifi: tx_sync only on PAN context mwifiex: avoid double list_del in command cancel path ath9k: fix max phy rate at rate control init nfc: signedness bug in __nci_request() iwlwifi: do not set the sequence control bit is not needed
2011-12-22Merge branch 'for-linus' of ↵Linus Torvalds1-0/+4
git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound: ALSA: atmel/ac97c: using software reset instead hardware reset if not available
2011-12-22Merge branch 'for-linus' of ↵Linus Torvalds10-20/+29
git://git.kernel.org/pub/scm/linux/kernel/git/sameo/mfd-2.6 * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sameo/mfd-2.6: mfd: Include linux/io.h to jz4740-adc mfd: Use request_threaded_irq for twl4030-irq instead of irq_set_chained_handler mfd: Base interrupt for twl4030-irq must be one-shot mfd: Handle tps65910 clear-mask correctly mfd: add #ifdef CONFIG_DEBUG_FS guard for ab8500_debug_resources mfd: Fix twl-core oops while calling twl_i2c_* for unbound driver mfd: include linux/module.h for ab5500-debugfs mfd: Update wm8994 active device checks for WM1811 mfd: Set tps6586x bits if new value is different from the old one mfd: Set da903x bits if new value is different from the old one mfd: Set adp5520 bits if new value is different from the old one mfd: Add missed free_irq in da903x_remove
2011-12-22vfs: __read_cache_page should use gfp argument rather than GFP_KERNELDave Kleikamp1-5/+2
lockdep reports a deadlock in jfs because a special inode's rw semaphore is taken recursively. The mapping's gfp mask is GFP_NOFS, but is not used when __read_cache_page() calls add_to_page_cache_lru(). Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com> Acked-by: Hugh Dickins <hughd@google.com> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Cc: stable@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-12-22Merge branch 'for-greg' of ↵Greg Kroah-Hartman3-4/+5
git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb into usb-linus * 'for-greg' of git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb: usb: gadget: epautoconf: do not change number of streams usb: dwc3: core: fix cached revision on our structure usb: musb: fix reset issue with full speed device
2011-12-22USB: Fix usb/isp1760 build on sparcDavid Miller1-4/+4
This commit: commit 8f5d621543cb064d2989fc223d3c2bc61a43981e Author: Joachim Foerster <joachim.foerster@missinglinkelectronics.com> Date: Mon Oct 10 18:06:54 2011 +0200 usb/isp1760: Let OF bindings depend on general CONFIG_OF instead of PPC_OF . To be able to use the driver on other OF-aware architectures, too. And add necessary OF related #includes to fix compilation error. Signed-off-by: Joachim Foerster <joachim.foerster@missinglinkelectronics.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> enabled the build on all CONFIG_OF architectures, but it cannot do this. This driver depends upon CONFIG_OF_IRQ but not all CONFIG_OF platforms support that infrastructure, in particular Sparc does not so the build fails. Please push a patch like the following to Linus so that this code only gets built where it actually should. -------------------- usb/isp1760: Add missing CONFIG_OF_IRQ dependency on OF code. Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2011-12-22net: Add a flow_cache_flush_deferred functionSteffen Klassert3-4/+27
flow_cach_flush() might sleep but can be called from atomic context via the xfrm garbage collector. So add a flow_cache_flush_deferred() function and use this if the xfrm garbage colector is invoked from within the packet path. Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com> Acked-by: Timo Teräs <timo.teras@iki.fi> Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-22ipv4: reintroduce route cache garbage collectorEric Dumazet1-0/+107
Commit 2c8cec5c10b (ipv4: Cache learned PMTU information in inetpeer) removed IP route cache garbage collector a bit too soon, as this gc was responsible for expired routes cleanup, releasing their neighbour reference. As pointed out by Robert Gladewitz, recent kernels can fill and exhaust their neighbour cache. Reintroduce the garbage collection, since we'll have to wait our neighbour lookups become refcount-less to not depend on this stuff. Reported-by: Robert Gladewitz <gladewitz@gmx.de> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-21Merge branch 'master' of ↵John W. Linville4-5/+14
git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless into for-davem
2011-12-21firmware: Refer to the co-maintained linux-firmware.git repositoryBen Hutchings1-1/+2
David and I are sharing maintenance of this repository. Patches should be sent to both of us. Signed-off-by: Ben Hutchings <ben@decadent.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>