From 6fd92b63d0626a8fe7eb8e2e50d19ecaa18cb412 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Sun, 9 Mar 2008 21:01:04 +0100
Subject: x86: change x86 to use generic find_next_bit

The versions with inline assembly are in fact slower on the machines I
tested them on (in userspace) (Athlon XP 2800+, p4-like Xeon 2.8GHz, AMD
Opteron 270). The i386-version needed a fix similar to 06024f21 to avoid
crashing the benchmark.

Benchmark using: gcc -fomit-frame-pointer -Os. For each bitmap size
1...512, for each possible bitmap with one bit set, for each possible
offset: find the position of the first bit starting at offset. If you
follow ;). Times include setup of the bitmap and checking of the
results.

		Athlon		Xeon		Opteron 32/64bit
x86-specific:	0m3.692s	0m2.820s	0m3.196s / 0m2.480s
generic:	0m2.622s	0m1.662s	0m2.100s / 0m1.572s

If the bitmap size is not a multiple of BITS_PER_LONG, and no set
(cleared) bit is found, find_next_bit (find_next_zero_bit) returns a
value outside of the range [0, size]. The generic version always returns
exactly size. The generic version also uses unsigned long everywhere,
while the x86 versions use a mishmash of int, unsigned (int), long and
unsigned long.

Using the generic version does give a slightly bigger kernel, though.

defconfig:	   text    data     bss     dec     hex filename
x86-specific:	4738555  481232  626688 5846475  5935cb vmlinux (32 bit)
generic:	4738621  481232  626688 5846541  59360d vmlinux (32 bit)
x86-specific:	5392395  846568  724424 6963387  6a40bb vmlinux (64 bit)
generic:	5392458  846568  724424 6963450  6a40fa vmlinux (64 bit)

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/bitops.h    |  6 ++++++
 include/asm-x86/bitops_32.h | 16 ----------------
 include/asm-x86/bitops_64.h |  2 --
 3 files changed, 6 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index 1ae7b270a1ef..31e408de90c6 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -306,6 +306,12 @@ static int test_bit(int nr, const volatile unsigned long *addr);
 #undef BIT_ADDR
 #undef ADDR
 
+unsigned long find_next_bit(const unsigned long *addr,
+		unsigned long size, unsigned long offset);
+unsigned long find_next_zero_bit(const unsigned long *addr,
+		unsigned long size, unsigned long offset);
+
+
 #ifdef CONFIG_X86_32
 # include "bitops_32.h"
 #else
diff --git a/include/asm-x86/bitops_32.h b/include/asm-x86/bitops_32.h
index 2513a81f82aa..7c9ed759afb2 100644
--- a/include/asm-x86/bitops_32.h
+++ b/include/asm-x86/bitops_32.h
@@ -39,14 +39,6 @@ static inline int find_first_zero_bit(const unsigned long *addr, unsigned size)
 	return res;
 }
 
-/**
- * find_next_zero_bit - find the first zero bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bit number to start searching at
- * @size: The maximum size to search
- */
-int find_next_zero_bit(const unsigned long *addr, int size, int offset);
-
 /**
  * __ffs - find first bit in word.
  * @word: The word to search
@@ -82,14 +74,6 @@ static inline unsigned find_first_bit(const unsigned long *addr, unsigned size)
 	return x;
 }
 
-/**
- * find_next_bit - find the first set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bit number to start searching at
- * @size: The maximum size to search
- */
-int find_next_bit(const unsigned long *addr, int size, int offset);
-
 /**
  * ffz - find first zero in word.
  * @word: The word to search
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h
index 365f8207ea59..65b20fb2ae78 100644
--- a/include/asm-x86/bitops_64.h
+++ b/include/asm-x86/bitops_64.h
@@ -6,9 +6,7 @@
  */
 
 extern long find_first_zero_bit(const unsigned long *addr, unsigned long size);
-extern long find_next_zero_bit(const unsigned long *addr, long size, long offset);
 extern long find_first_bit(const unsigned long *addr, unsigned long size);
-extern long find_next_bit(const unsigned long *addr, long size, long offset);
 
 /* return index of first bet set in val or max when no bit is set */
 static inline long __scanbit(unsigned long val, unsigned long max)
-- 
cgit v1.2.3


From 64970b68d2b3ed32b964b0b30b1b98518fde388e Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Tue, 11 Mar 2008 16:17:19 +0100
Subject: x86, generic: optimize find_next_(zero_)bit for small constant-size
 bitmaps

This moves an optimization for searching constant-sized small
bitmaps form x86_64-specific to generic code.

On an i386 defconfig (the x86#testing one), the size of vmlinux hardly
changes with this applied. I have observed only four places where this
optimization avoids a call into find_next_bit:

In the functions return_unused_surplus_pages, alloc_fresh_huge_page,
and adjust_pool_surplus, this patch avoids a call for a 1-bit bitmap.
In __next_cpu a call is avoided for a 32-bit bitmap. That's it.

On x86_64, 52 locations are optimized with a minimal increase in
code size:

Current #testing defconfig:
	146 x bsf, 27 x find_next_*bit
   text    data     bss     dec     hex filename
   5392637  846592  724424 6963653  6a41c5 vmlinux

After removing the x86_64 specific optimization for find_next_*bit:
	94 x bsf, 79 x find_next_*bit
   text    data     bss     dec     hex filename
   5392358  846592  724424 6963374  6a40ae vmlinux

After this patch (making the optimization generic):
	146 x bsf, 27 x find_next_*bit
   text    data     bss     dec     hex filename
   5392396  846592  724424 6963412  6a40d4 vmlinux

[ tglx@linutronix.de: build fixes ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/bitops/find.h |  2 +
 include/asm-x86/bitops.h          |  6 ---
 include/asm-x86/bitops_64.h       | 10 -----
 include/linux/bitops.h            | 77 +++++++++++++++++++++++++++++++++++++++
 lib/find_next_bit.c               | 25 +++++--------
 5 files changed, 88 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
index 72a51e5a12ef..1914e9742512 100644
--- a/include/asm-generic/bitops/find.h
+++ b/include/asm-generic/bitops/find.h
@@ -1,11 +1,13 @@
 #ifndef _ASM_GENERIC_BITOPS_FIND_H_
 #define _ASM_GENERIC_BITOPS_FIND_H_
 
+#ifndef CONFIG_GENERIC_FIND_NEXT_BIT
 extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
 		size, unsigned long offset);
 
 extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned
 		long size, unsigned long offset);
+#endif
 
 #define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
 #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index 31e408de90c6..1ae7b270a1ef 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -306,12 +306,6 @@ static int test_bit(int nr, const volatile unsigned long *addr);
 #undef BIT_ADDR
 #undef ADDR
 
-unsigned long find_next_bit(const unsigned long *addr,
-		unsigned long size, unsigned long offset);
-unsigned long find_next_zero_bit(const unsigned long *addr,
-		unsigned long size, unsigned long offset);
-
-
 #ifdef CONFIG_X86_32
 # include "bitops_32.h"
 #else
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h
index 65b20fb2ae78..7118ef2cc4ec 100644
--- a/include/asm-x86/bitops_64.h
+++ b/include/asm-x86/bitops_64.h
@@ -15,16 +15,6 @@ static inline long __scanbit(unsigned long val, unsigned long max)
 	return val;
 }
 
-#define find_next_bit(addr,size,off) \
-((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? 	  \
-  ((off) + (__scanbit((*(unsigned long *)addr) >> (off),(size)-(off)))) : \
-	find_next_bit(addr,size,off)))
-
-#define find_next_zero_bit(addr,size,off) \
-((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? 	  \
-  ((off)+(__scanbit(~(((*(unsigned long *)addr)) >> (off)),(size)-(off)))) : \
-	find_next_zero_bit(addr,size,off)))
-
 #define find_first_bit(addr, size)					\
 	((__builtin_constant_p((size)) && (size) <= BITS_PER_LONG	\
 	  ? (__scanbit(*(unsigned long *)(addr), (size)))		\
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 40d54731de7e..3865f2c93bd8 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -112,4 +112,81 @@ static inline unsigned fls_long(unsigned long l)
 	return fls64(l);
 }
 
+#ifdef __KERNEL__
+#ifdef CONFIG_GENERIC_FIND_NEXT_BIT
+extern unsigned long __find_next_bit(const unsigned long *addr,
+		unsigned long size, unsigned long offset);
+
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ */
+static __always_inline unsigned long
+find_next_bit(const unsigned long *addr, unsigned long size,
+		unsigned long offset)
+{
+	unsigned long value;
+
+	/* Avoid a function call if the bitmap size is a constant */
+	/* and not bigger than BITS_PER_LONG. */
+
+	/* insert a sentinel so that __ffs returns size if there */
+	/* are no set bits in the bitmap */
+	if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
+		value = (*addr) & ((~0ul) << offset);
+		value |= (1ul << size);
+		return __ffs(value);
+	}
+
+	/* the result of __ffs(0) is undefined, so it needs to be */
+	/* handled separately */
+	if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) {
+		value = (*addr) & ((~0ul) << offset);
+		return (value == 0) ? BITS_PER_LONG : __ffs(value);
+	}
+
+	/* size is not constant or too big */
+	return __find_next_bit(addr, size, offset);
+}
+
+extern unsigned long __find_next_zero_bit(const unsigned long *addr,
+		unsigned long size, unsigned long offset);
+
+/**
+ * find_next_zero_bit - find the next cleared bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ */
+static __always_inline unsigned long
+find_next_zero_bit(const unsigned long *addr, unsigned long size,
+		unsigned long offset)
+{
+	unsigned long value;
+
+	/* Avoid a function call if the bitmap size is a constant */
+	/* and not bigger than BITS_PER_LONG. */
+
+	/* insert a sentinel so that __ffs returns size if there */
+	/* are no set bits in the bitmap */
+	if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
+		value = (~(*addr)) & ((~0ul) << offset);
+		value |= (1ul << size);
+		return __ffs(value);
+	}
+
+	/* the result of __ffs(0) is undefined, so it needs to be */
+	/* handled separately */
+	if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) {
+		value = (~(*addr)) & ((~0ul) << offset);
+		return (value == 0) ? BITS_PER_LONG : __ffs(value);
+	}
+
+	/* size is not constant or too big */
+	return __find_next_zero_bit(addr, size, offset);
+}
+#endif /* CONFIG_GENERIC_FIND_NEXT_BIT */
+#endif /* __KERNEL__ */
 #endif
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
index 5820e072b890..ce94c4c92d10 100644
--- a/lib/find_next_bit.c
+++ b/lib/find_next_bit.c
@@ -15,17 +15,12 @@
 #include <asm/byteorder.h>
 
 #define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
-#undef find_next_bit
-#undef find_next_zero_bit
-
-/**
- * find_next_bit - find the next set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
+
+/*
+ * Find the next set bit in a memory region.
  */
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
-		unsigned long offset)
+unsigned long __find_next_bit(const unsigned long *addr,
+		unsigned long size, unsigned long offset)
 {
 	const unsigned long *p = addr + BITOP_WORD(offset);
 	unsigned long result = offset & ~(BITS_PER_LONG-1);
@@ -62,15 +57,14 @@ found_first:
 found_middle:
 	return result + __ffs(tmp);
 }
-
-EXPORT_SYMBOL(find_next_bit);
+EXPORT_SYMBOL(__find_next_bit);
 
 /*
  * This implementation of find_{first,next}_zero_bit was stolen from
  * Linus' asm-alpha/bitops.h.
  */
-unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
-		unsigned long offset)
+unsigned long __find_next_zero_bit(const unsigned long *addr,
+		unsigned long size, unsigned long offset)
 {
 	const unsigned long *p = addr + BITOP_WORD(offset);
 	unsigned long result = offset & ~(BITS_PER_LONG-1);
@@ -107,8 +101,7 @@ found_first:
 found_middle:
 	return result + ffz(tmp);
 }
-
-EXPORT_SYMBOL(find_next_zero_bit);
+EXPORT_SYMBOL(__find_next_zero_bit);
 
 #ifdef __BIG_ENDIAN
 
-- 
cgit v1.2.3


From 12d9c8420b9daa1da3d9e090640fb24bcd0deba2 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Sat, 15 Mar 2008 13:04:42 +0100
Subject: x86: merge the simple bitops and move them to bitops.h

Some of those can be written in such a way that the same
inline assembly can be used to generate both 32 bit and
64 bit code.

For ffs and fls, x86_64 unconditionally used the cmov
instruction and i386 unconditionally used a conditional
branch over a mov instruction. In the current patch I
chose to select the version based on the availability
of the cmov instruction instead. A small detail here is
that x86_64 did not previously set CONFIG_X86_CMOV=y.

Improved comments for ffs, ffz, fls and variations.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig.cpu        |  2 +-
 include/asm-x86/bitops.h    | 99 ++++++++++++++++++++++++++++++++++++++++++++-
 include/asm-x86/bitops_32.h | 64 -----------------------------
 include/asm-x86/bitops_64.h | 76 ----------------------------------
 4 files changed, 99 insertions(+), 142 deletions(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 4da3cdb9c1b1..cf3ff2c5cef2 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -398,7 +398,7 @@ config X86_TSC
 # generates cmov.
 config X86_CMOV
 	def_bool y
-	depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7)
+	depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || X86_64)
 
 config X86_MINIMUM_CPU_FAMILY
 	int
diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index 1ae7b270a1ef..1b6f547cb6bd 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -67,7 +67,6 @@ static inline void __set_bit(int nr, volatile void *addr)
 		     : "Ir" (nr) : "memory");
 }
 
-
 /**
  * clear_bit - Clears a bit in memory
  * @nr: Bit to clear
@@ -304,6 +303,104 @@ static int test_bit(int nr, const volatile unsigned long *addr);
 
 #undef BASE_ADDR
 #undef BIT_ADDR
+/**
+ * __ffs - find first set bit in word
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+	__asm__("bsf %1,%0"
+		:"=r" (word)
+		:"rm" (word));
+	return word;
+}
+
+/**
+ * ffz - find first zero bit in word
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static inline unsigned long ffz(unsigned long word)
+{
+	__asm__("bsf %1,%0"
+		:"=r" (word)
+		:"r" (~word));
+	return word;
+}
+
+/*
+ * __fls: find last set bit in word
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+	__asm__("bsr %1,%0"
+		:"=r" (word)
+		:"rm" (word));
+	return word;
+}
+
+#ifdef __KERNEL__
+/**
+ * ffs - find first set bit in word
+ * @x: the word to search
+ *
+ * This is defined the same way as the libc and compiler builtin ffs
+ * routines, therefore differs in spirit from the other bitops.
+ *
+ * ffs(value) returns 0 if value is 0 or the position of the first
+ * set bit if value is nonzero. The first (least significant) bit
+ * is at position 1.
+ */
+static inline int ffs(int x)
+{
+	int r;
+#ifdef CONFIG_X86_CMOV
+	__asm__("bsfl %1,%0\n\t"
+		"cmovzl %2,%0"
+		: "=r" (r) : "rm" (x), "r" (-1));
+#else
+	__asm__("bsfl %1,%0\n\t"
+		"jnz 1f\n\t"
+		"movl $-1,%0\n"
+		"1:" : "=r" (r) : "rm" (x));
+#endif
+	return r + 1;
+}
+
+/**
+ * fls - find last set bit in word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffs, but returns the position of the most significant set bit.
+ *
+ * fls(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 32.
+ */
+static inline int fls(int x)
+{
+	int r;
+#ifdef CONFIG_X86_CMOV
+	__asm__("bsrl %1,%0\n\t"
+		"cmovzl %2,%0"
+		: "=&r" (r) : "rm" (x), "rm" (-1));
+#else
+	__asm__("bsrl %1,%0\n\t"
+		"jnz 1f\n\t"
+		"movl $-1,%0\n"
+		"1:" : "=r" (r) : "rm" (x));
+#endif
+	return r + 1;
+}
+#endif /* __KERNEL__ */
+
 #undef ADDR
 
 #ifdef CONFIG_X86_32
diff --git a/include/asm-x86/bitops_32.h b/include/asm-x86/bitops_32.h
index 7c9ed759afb2..3ed64b21b765 100644
--- a/include/asm-x86/bitops_32.h
+++ b/include/asm-x86/bitops_32.h
@@ -39,20 +39,6 @@ static inline int find_first_zero_bit(const unsigned long *addr, unsigned size)
 	return res;
 }
 
-/**
- * __ffs - find first bit in word.
- * @word: The word to search
- *
- * Undefined if no bit exists, so code should check against 0 first.
- */
-static inline unsigned long __ffs(unsigned long word)
-{
-	__asm__("bsfl %1,%0"
-		:"=r" (word)
-		:"rm" (word));
-	return word;
-}
-
 /**
  * find_first_bit - find the first set bit in a memory region
  * @addr: The address to start the search at
@@ -74,60 +60,10 @@ static inline unsigned find_first_bit(const unsigned long *addr, unsigned size)
 	return x;
 }
 
-/**
- * ffz - find first zero in word.
- * @word: The word to search
- *
- * Undefined if no zero exists, so code should check against ~0UL first.
- */
-static inline unsigned long ffz(unsigned long word)
-{
-	__asm__("bsfl %1,%0"
-		:"=r" (word)
-		:"r" (~word));
-	return word;
-}
-
 #ifdef __KERNEL__
 
 #include <asm-generic/bitops/sched.h>
 
-/**
- * ffs - find first bit set
- * @x: the word to search
- *
- * This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz() (man ffs).
- */
-static inline int ffs(int x)
-{
-	int r;
-
-	__asm__("bsfl %1,%0\n\t"
-		"jnz 1f\n\t"
-		"movl $-1,%0\n"
-		"1:" : "=r" (r) : "rm" (x));
-	return r+1;
-}
-
-/**
- * fls - find last bit set
- * @x: the word to search
- *
- * This is defined the same way as ffs().
- */
-static inline int fls(int x)
-{
-	int r;
-
-	__asm__("bsrl %1,%0\n\t"
-		"jnz 1f\n\t"
-		"movl $-1,%0\n"
-		"1:" : "=r" (r) : "rm" (x));
-	return r+1;
-}
-
 #include <asm-generic/bitops/hweight.h>
 
 #endif /* __KERNEL__ */
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h
index 7118ef2cc4ec..a5fbe7a02a3f 100644
--- a/include/asm-x86/bitops_64.h
+++ b/include/asm-x86/bitops_64.h
@@ -35,70 +35,10 @@ static inline void set_bit_string(unsigned long *bitmap, unsigned long i,
 	}
 }
 
-/**
- * ffz - find first zero in word.
- * @word: The word to search
- *
- * Undefined if no zero exists, so code should check against ~0UL first.
- */
-static inline unsigned long ffz(unsigned long word)
-{
-	__asm__("bsfq %1,%0"
-		:"=r" (word)
-		:"r" (~word));
-	return word;
-}
-
-/**
- * __ffs - find first bit in word.
- * @word: The word to search
- *
- * Undefined if no bit exists, so code should check against 0 first.
- */
-static inline unsigned long __ffs(unsigned long word)
-{
-	__asm__("bsfq %1,%0"
-		:"=r" (word)
-		:"rm" (word));
-	return word;
-}
-
-/*
- * __fls: find last bit set.
- * @word: The word to search
- *
- * Undefined if no zero exists, so code should check against ~0UL first.
- */
-static inline unsigned long __fls(unsigned long word)
-{
-	__asm__("bsrq %1,%0"
-		:"=r" (word)
-		:"rm" (word));
-	return word;
-}
-
 #ifdef __KERNEL__
 
 #include <asm-generic/bitops/sched.h>
 
-/**
- * ffs - find first bit set
- * @x: the word to search
- *
- * This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
- */
-static inline int ffs(int x)
-{
-	int r;
-
-	__asm__("bsfl %1,%0\n\t"
-		"cmovzl %2,%0" 
-		: "=r" (r) : "rm" (x), "r" (-1));
-	return r+1;
-}
-
 /**
  * fls64 - find last bit set in 64 bit word
  * @x: the word to search
@@ -112,22 +52,6 @@ static inline int fls64(__u64 x)
 	return __fls(x) + 1;
 }
 
-/**
- * fls - find last bit set
- * @x: the word to search
- *
- * This is defined the same way as ffs.
- */
-static inline int fls(int x)
-{
-	int r;
-
-	__asm__("bsrl %1,%0\n\t"
-		"cmovzl %2,%0"
-		: "=&r" (r) : "rm" (x), "rm" (-1));
-	return r+1;
-}
-
 #define ARCH_HAS_FAST_MULTIPLIER 1
 
 #include <asm-generic/bitops/hweight.h>
-- 
cgit v1.2.3


From 7d9dff22e8ad06ad330968c9e3d3a2fb55a5f9c3 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Sat, 15 Mar 2008 18:30:57 +0100
Subject: generic: introduce a generic __fls implementation

Add a generic __fls implementation in the same spirit as
the generic __ffs one. It finds the last (most significant)
set bit in the given long value.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/bitops/__fls.h | 43 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 include/asm-generic/bitops/__fls.h

(limited to 'include')

diff --git a/include/asm-generic/bitops/__fls.h b/include/asm-generic/bitops/__fls.h
new file mode 100644
index 000000000000..be24465403d6
--- /dev/null
+++ b/include/asm-generic/bitops/__fls.h
@@ -0,0 +1,43 @@
+#ifndef _ASM_GENERIC_BITOPS___FLS_H_
+#define _ASM_GENERIC_BITOPS___FLS_H_
+
+#include <asm/types.h>
+
+/**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+	int num = BITS_PER_LONG - 1;
+
+#if BITS_PER_LONG == 64
+	if (!(word & (~0ul << 32))) {
+		num -= 32;
+		word <<= 32;
+	}
+#endif
+	if (!(word & (~0ul << (BITS_PER_LONG-16)))) {
+		num -= 16;
+		word <<= 16;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG-8)))) {
+		num -= 8;
+		word <<= 8;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG-4)))) {
+		num -= 4;
+		word <<= 4;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG-2)))) {
+		num -= 2;
+		word <<= 2;
+	}
+	if (!(word & (~0ul << (BITS_PER_LONG-1))))
+		num -= 1;
+	return num;
+}
+
+#endif /* _ASM_GENERIC_BITOPS___FLS_H_ */
-- 
cgit v1.2.3


From 56a6b1eb7bfb5ace0b5cb9c149f502fbd101b8ab Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Sat, 15 Mar 2008 18:31:49 +0100
Subject: generic: implement __fls on all 64-bit archs

Implement __fls on all 64-bit archs:

alpha has an implementation of fls64.
	Added __fls(x) = fls64(x) - 1.

ia64 has fls, but not __fls.
	Added __fls based on code of fls.

mips and powerpc have __ilog2, which is the same as __fls.
	Added __fls = __ilog2.

parisc, s390, sh and sparc64:
	Include generic __fls.

x86_64 already has __fls.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-alpha/bitops.h   |  5 +++++
 include/asm-ia64/bitops.h    | 16 ++++++++++++++++
 include/asm-mips/bitops.h    |  5 +++++
 include/asm-parisc/bitops.h  |  1 +
 include/asm-powerpc/bitops.h |  5 +++++
 include/asm-s390/bitops.h    |  1 +
 include/asm-sh/bitops.h      |  1 +
 include/asm-sparc64/bitops.h |  1 +
 8 files changed, 35 insertions(+)

(limited to 'include')

diff --git a/include/asm-alpha/bitops.h b/include/asm-alpha/bitops.h
index 9e19a704d484..15f3ae25c511 100644
--- a/include/asm-alpha/bitops.h
+++ b/include/asm-alpha/bitops.h
@@ -388,6 +388,11 @@ static inline int fls64(unsigned long x)
 }
 #endif
 
+static inline unsigned long __fls(unsigned long x)
+{
+	return fls64(x) - 1;
+}
+
 static inline int fls(int x)
 {
 	return fls64((unsigned int) x);
diff --git a/include/asm-ia64/bitops.h b/include/asm-ia64/bitops.h
index 953d3df9dd22..e2ca80037335 100644
--- a/include/asm-ia64/bitops.h
+++ b/include/asm-ia64/bitops.h
@@ -407,6 +407,22 @@ fls (int t)
 	return ia64_popcnt(x);
 }
 
+/*
+ * Find the last (most significant) bit set.  Undefined for x==0.
+ * Bits are numbered from 0..63 (e.g., __fls(9) == 3).
+ */
+static inline unsigned long
+__fls (unsigned long x)
+{
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	x |= x >> 32;
+	return ia64_popcnt(x) - 1;
+}
+
 #include <asm-generic/bitops/fls64.h>
 
 /*
diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h
index ec75ce4cdb8c..c2bd126c3b4e 100644
--- a/include/asm-mips/bitops.h
+++ b/include/asm-mips/bitops.h
@@ -591,6 +591,11 @@ static inline int __ilog2(unsigned long x)
 	return 63 - lz;
 }
 
+static inline unsigned long __fls(unsigned long x)
+{
+	return __ilog2(x);
+}
+
 #if defined(CONFIG_CPU_MIPS32) || defined(CONFIG_CPU_MIPS64)
 
 /*
diff --git a/include/asm-parisc/bitops.h b/include/asm-parisc/bitops.h
index f8eebcbad01f..7a6ea10bd231 100644
--- a/include/asm-parisc/bitops.h
+++ b/include/asm-parisc/bitops.h
@@ -210,6 +210,7 @@ static __inline__ int fls(int x)
 	return ret;
 }
 
+#include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/hweight.h>
 #include <asm-generic/bitops/lock.h>
diff --git a/include/asm-powerpc/bitops.h b/include/asm-powerpc/bitops.h
index a99a74929475..897eade3afbe 100644
--- a/include/asm-powerpc/bitops.h
+++ b/include/asm-powerpc/bitops.h
@@ -313,6 +313,11 @@ static __inline__ int fls(unsigned int x)
 	return 32 - lz;
 }
 
+static __inline__ unsigned long __fls(unsigned long x)
+{
+	return __ilog2(x);
+}
+
 /*
  * 64-bit can do this using one cntlzd (count leading zeroes doubleword)
  * instruction; for 32-bit we use the generic version, which does two
diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h
index 965394e69452..b4eb24ab5af9 100644
--- a/include/asm-s390/bitops.h
+++ b/include/asm-s390/bitops.h
@@ -769,6 +769,7 @@ static inline int sched_find_first_bit(unsigned long *b)
 }
 
 #include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 
 #include <asm-generic/bitops/hweight.h>
diff --git a/include/asm-sh/bitops.h b/include/asm-sh/bitops.h
index b6ba5a60dec2..d7d382f63ee5 100644
--- a/include/asm-sh/bitops.h
+++ b/include/asm-sh/bitops.h
@@ -95,6 +95,7 @@ static inline unsigned long ffz(unsigned long word)
 #include <asm-generic/bitops/ext2-atomic.h>
 #include <asm-generic/bitops/minix.h>
 #include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 
 #endif /* __KERNEL__ */
diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h
index 982ce8992b91..11f9d8146cdf 100644
--- a/include/asm-sparc64/bitops.h
+++ b/include/asm-sparc64/bitops.h
@@ -34,6 +34,7 @@ extern void change_bit(unsigned long nr, volatile unsigned long *addr);
 #include <asm-generic/bitops/ffz.h>
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls64.h>
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3


From d57594c203b1e7b54373080a797f0cbfa4aade68 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Sat, 15 Mar 2008 18:32:36 +0100
Subject: bitops: use __fls for fls64 on 64-bit archs

Use __fls for fls64 on 64-bit archs. The implementation for
64-bit archs is moved from x86_64 to asm-generic.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/bitops/fls64.h | 22 ++++++++++++++++++++++
 include/asm-x86/bitops_64.h        | 15 ++-------------
 2 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bitops/fls64.h b/include/asm-generic/bitops/fls64.h
index 1b6b17ce2428..86d403f8b256 100644
--- a/include/asm-generic/bitops/fls64.h
+++ b/include/asm-generic/bitops/fls64.h
@@ -3,6 +3,18 @@
 
 #include <asm/types.h>
 
+/**
+ * fls64 - find last set bit in a 64-bit word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
+ */
+#if BITS_PER_LONG == 32
 static inline int fls64(__u64 x)
 {
 	__u32 h = x >> 32;
@@ -10,5 +22,15 @@ static inline int fls64(__u64 x)
 		return fls(h) + 32;
 	return fls(x);
 }
+#elif BITS_PER_LONG == 64
+static inline int fls64(__u64 x)
+{
+	if (x == 0)
+		return 0;
+	return __fls(x) + 1;
+}
+#else
+#error BITS_PER_LONG not 32 or 64
+#endif
 
 #endif /* _ASM_GENERIC_BITOPS_FLS64_H_ */
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h
index a5fbe7a02a3f..d13352087191 100644
--- a/include/asm-x86/bitops_64.h
+++ b/include/asm-x86/bitops_64.h
@@ -39,25 +39,14 @@ static inline void set_bit_string(unsigned long *bitmap, unsigned long i,
 
 #include <asm-generic/bitops/sched.h>
 
-/**
- * fls64 - find last bit set in 64 bit word
- * @x: the word to search
- *
- * This is defined the same way as fls.
- */
-static inline int fls64(__u64 x)
-{
-	if (x == 0)
-		return 0;
-	return __fls(x) + 1;
-}
-
 #define ARCH_HAS_FAST_MULTIPLIER 1
 
 #include <asm-generic/bitops/hweight.h>
 
 #endif /* __KERNEL__ */
 
+#include <asm-generic/bitops/fls64.h>
+
 #ifdef __KERNEL__
 
 #include <asm-generic/bitops/ext2-non-atomic.h>
-- 
cgit v1.2.3


From 77b9bd9c49442407804c37bcc82021a35277f83c Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Tue, 1 Apr 2008 11:46:19 +0200
Subject: x86: generic versions of find_first_(zero_)bit, convert i386

Generic versions of __find_first_bit and __find_first_zero_bit
are introduced as simplified versions of __find_next_bit and
__find_next_zero_bit. Their compilation and use are guarded by
a new config variable GENERIC_FIND_FIRST_BIT.

The generic versions of find_first_bit and find_first_zero_bit
are implemented in terms of the newly introduced __find_first_bit
and __find_first_zero_bit.

This patch does not remove the i386-specific implementation,
but it does switch i386 to use the generic functions by setting
GENERIC_FIND_FIRST_BIT=y for X86_32.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig            |  3 +++
 include/asm-x86/bitops_32.h |  2 ++
 include/linux/bitops.h      | 34 ++++++++++++++++++++++++++
 lib/Makefile                |  1 +
 lib/find_next_bit.c         | 58 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 98 insertions(+)

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5639de47ed45..1a69b68ff6cc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -77,6 +77,9 @@ config GENERIC_BUG
 	def_bool y
 	depends on BUG
 
+config GENERIC_FIND_FIRST_BIT
+	def_bool X86_32
+
 config GENERIC_FIND_NEXT_BIT
 	def_bool y
 
diff --git a/include/asm-x86/bitops_32.h b/include/asm-x86/bitops_32.h
index 3ed64b21b765..ba2c0defafa8 100644
--- a/include/asm-x86/bitops_32.h
+++ b/include/asm-x86/bitops_32.h
@@ -5,6 +5,7 @@
  * Copyright 1992, Linus Torvalds.
  */
 
+#ifndef CONFIG_GENERIC_FIND_FIRST_BIT
 /**
  * find_first_zero_bit - find the first zero bit in a memory region
  * @addr: The address to start the search at
@@ -59,6 +60,7 @@ static inline unsigned find_first_bit(const unsigned long *addr, unsigned size)
 	}
 	return x;
 }
+#endif
 
 #ifdef __KERNEL__
 
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 3865f2c93bd8..355d67ba3bdc 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -113,6 +113,40 @@ static inline unsigned fls_long(unsigned long l)
 }
 
 #ifdef __KERNEL__
+#ifdef CONFIG_GENERIC_FIND_FIRST_BIT
+extern unsigned long __find_first_bit(const unsigned long *addr,
+		unsigned long size);
+
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit number of the first set bit.
+ */
+static __always_inline unsigned long
+find_first_bit(const unsigned long *addr, unsigned long size)
+{
+	return __find_first_bit(addr, size);
+}
+
+extern unsigned long __find_first_zero_bit(const unsigned long *addr,
+		unsigned long size);
+
+/**
+ * find_first_zero_bit - find the first cleared bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit number of the first cleared bit.
+ */
+static __always_inline unsigned long
+find_first_zero_bit(const unsigned long *addr, unsigned long size)
+{
+	return __find_first_zero_bit(addr, size);
+}
+#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
+
 #ifdef CONFIG_GENERIC_FIND_NEXT_BIT
 extern unsigned long __find_next_bit(const unsigned long *addr,
 		unsigned long size, unsigned long offset);
diff --git a/lib/Makefile b/lib/Makefile
index bf8000fc7d48..2d7001b7f5a4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
 lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
index ce94c4c92d10..d3f5784807b4 100644
--- a/lib/find_next_bit.c
+++ b/lib/find_next_bit.c
@@ -16,6 +16,7 @@
 
 #define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
 
+#ifdef CONFIG_GENERIC_FIND_NEXT_BIT
 /*
  * Find the next set bit in a memory region.
  */
@@ -102,6 +103,63 @@ found_middle:
 	return result + ffz(tmp);
 }
 EXPORT_SYMBOL(__find_next_zero_bit);
+#endif /* CONFIG_GENERIC_FIND_NEXT_BIT */
+
+#ifdef CONFIG_GENERIC_FIND_FIRST_BIT
+/*
+ * Find the first set bit in a memory region.
+ */
+unsigned long __find_first_bit(const unsigned long *addr,
+		unsigned long size)
+{
+	const unsigned long *p = addr;
+	unsigned long result = 0;
+	unsigned long tmp;
+
+	while (size & ~(BITS_PER_LONG-1)) {
+		if ((tmp = *(p++)))
+			goto found;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+
+	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
+	if (tmp == 0UL)		/* Are any bits set? */
+		return result + size;	/* Nope. */
+found:
+	return result + __ffs(tmp);
+}
+EXPORT_SYMBOL(__find_first_bit);
+
+/*
+ * Find the first cleared bit in a memory region.
+ */
+unsigned long __find_first_zero_bit(const unsigned long *addr,
+		unsigned long size)
+{
+	const unsigned long *p = addr;
+	unsigned long result = 0;
+	unsigned long tmp;
+
+	while (size & ~(BITS_PER_LONG-1)) {
+		if (~(tmp = *(p++)))
+			goto found;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+
+	tmp = (*p) | (~0UL << size);
+	if (tmp == ~0UL)	/* Are any bits zero? */
+		return result + size;	/* Nope. */
+found:
+	return result + ffz(tmp);
+}
+EXPORT_SYMBOL(__find_first_zero_bit);
+#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
 
 #ifdef __BIG_ENDIAN
 
-- 
cgit v1.2.3


From 2aba6925fdb96428d1129a61b1233597a03a387b Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Tue, 1 Apr 2008 17:41:26 +0200
Subject: x86: switch 64-bit to generic find_first_bit

Switch x86_64 to generic find_first_bit. The x86_64-specific
implementation is not removed.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig            | 2 +-
 arch/x86/lib/bitops_64.c    | 2 ++
 include/asm-x86/bitops_64.h | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1a69b68ff6cc..700447738e73 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -78,7 +78,7 @@ config GENERIC_BUG
 	depends on BUG
 
 config GENERIC_FIND_FIRST_BIT
-	def_bool X86_32
+	def_bool y
 
 config GENERIC_FIND_NEXT_BIT
 	def_bool y
diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c
index 0eeb704d2513..568467d390c0 100644
--- a/arch/x86/lib/bitops_64.c
+++ b/arch/x86/lib/bitops_64.c
@@ -1,3 +1,4 @@
+#ifndef CONFIG_GENERIC_FIND_FIRST_BIT
 #include <linux/bitops.h>
 
 #undef find_first_zero_bit
@@ -105,3 +106,4 @@ long find_first_bit(const unsigned long * addr, unsigned long size)
 
 EXPORT_SYMBOL(find_first_bit);
 EXPORT_SYMBOL(find_first_zero_bit);
+#endif
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h
index d13352087191..4081d7ecc2bd 100644
--- a/include/asm-x86/bitops_64.h
+++ b/include/asm-x86/bitops_64.h
@@ -5,6 +5,7 @@
  * Copyright 1992, Linus Torvalds.
  */
 
+#ifndef CONFIG_GENERIC_FIND_FIRST_BIT
 extern long find_first_zero_bit(const unsigned long *addr, unsigned long size);
 extern long find_first_bit(const unsigned long *addr, unsigned long size);
 
@@ -24,6 +25,7 @@ static inline long __scanbit(unsigned long val, unsigned long max)
 	((__builtin_constant_p((size)) && (size) <= BITS_PER_LONG	\
 	  ? (__scanbit(~*(unsigned long *)(addr), (size)))		\
 	  : find_first_zero_bit((addr), (size))))
+#endif
 
 static inline void set_bit_string(unsigned long *bitmap, unsigned long i,
 				  int len)
-- 
cgit v1.2.3


From 3a48305028aa38afba93fc05066c71a6ee668ad8 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Tue, 1 Apr 2008 17:42:21 +0200
Subject: x86: optimize find_first_bit for small bitmaps

Avoid a call to find_first_bit if the bitmap size is know at
compile time and small enough to fit in a single long integer.
Modeled after an optimization in the original x86_64-specific
code.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/bitops.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 355d67ba3bdc..48bde600a2db 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -127,6 +127,20 @@ extern unsigned long __find_first_bit(const unsigned long *addr,
 static __always_inline unsigned long
 find_first_bit(const unsigned long *addr, unsigned long size)
 {
+	/* Avoid a function call if the bitmap size is a constant */
+	/* and not bigger than BITS_PER_LONG. */
+
+	/* insert a sentinel so that __ffs returns size if there */
+	/* are no set bits in the bitmap */
+	if (__builtin_constant_p(size) && (size < BITS_PER_LONG))
+		return __ffs((*addr) | (1ul << size));
+
+	/* the result of __ffs(0) is undefined, so it needs to be */
+	/* handled separately */
+	if (__builtin_constant_p(size) && (size == BITS_PER_LONG))
+		return ((*addr) == 0) ? BITS_PER_LONG : __ffs(*addr);
+
+	/* size is not constant or too big */
 	return __find_first_bit(addr, size);
 }
 
@@ -143,6 +157,21 @@ extern unsigned long __find_first_zero_bit(const unsigned long *addr,
 static __always_inline unsigned long
 find_first_zero_bit(const unsigned long *addr, unsigned long size)
 {
+	/* Avoid a function call if the bitmap size is a constant */
+	/* and not bigger than BITS_PER_LONG. */
+
+	/* insert a sentinel so that __ffs returns size if there */
+	/* are no set bits in the bitmap */
+	if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
+		return __ffs(~(*addr) | (1ul << size));
+	}
+
+	/* the result of __ffs(0) is undefined, so it needs to be */
+	/* handled separately */
+	if (__builtin_constant_p(size) && (size == BITS_PER_LONG))
+		return (~(*addr) == 0) ? BITS_PER_LONG : __ffs(~(*addr));
+
+	/* size is not constant or too big */
 	return __find_first_zero_bit(addr, size);
 }
 #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
-- 
cgit v1.2.3


From 5245698f665c4b7a533dcc47a5afdf33095d436a Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Tue, 1 Apr 2008 17:47:57 +0200
Subject: x86, UML: remove x86-specific implementations of find_first_bit

x86 has been switched to the generic versions of find_first_bit
and find_first_zero_bit, but the original versions were retained.
This patch just removes the now unused x86-specific versions.

also update UML.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/um/Kconfig.i386        |   4 ++
 arch/um/Kconfig.x86_64      |   4 ++
 arch/um/sys-i386/Makefile   |   2 +-
 arch/um/sys-x86_64/Makefile |   2 +-
 arch/x86/lib/Makefile       |   1 -
 arch/x86/lib/bitops_64.c    | 109 --------------------------------------------
 include/asm-x86/bitops_32.h |  58 -----------------------
 include/asm-x86/bitops_64.h |  23 ----------
 8 files changed, 10 insertions(+), 193 deletions(-)
 delete mode 100644 arch/x86/lib/bitops_64.c

(limited to 'include')

diff --git a/arch/um/Kconfig.i386 b/arch/um/Kconfig.i386
index be7556028291..49990ea422e4 100644
--- a/arch/um/Kconfig.i386
+++ b/arch/um/Kconfig.i386
@@ -39,6 +39,10 @@ config ARCH_REUSE_HOST_VSYSCALL_AREA
 	bool
 	default y
 
+config GENERIC_FIND_FIRST_BIT
+	bool
+	default y
+
 config GENERIC_FIND_NEXT_BIT
 	bool
 	default y
diff --git a/arch/um/Kconfig.x86_64 b/arch/um/Kconfig.x86_64
index 4ab5aa62ff3f..cc42e59585d2 100644
--- a/arch/um/Kconfig.x86_64
+++ b/arch/um/Kconfig.x86_64
@@ -34,6 +34,10 @@ config SMP_BROKEN
 	bool
 	default y
 
+config GENERIC_FIND_FIRST_BIT
+	bool
+	default y
+
 config GENERIC_FIND_NEXT_BIT
 	bool
 	default y
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 964dc1a04c37..598b5c1903af 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -6,7 +6,7 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
 	ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
 	sys_call_table.o tls.o
 
-subarch-obj-y = lib/bitops_32.o lib/semaphore_32.o lib/string_32.o
+subarch-obj-y = lib/semaphore_32.o lib/string_32.o
 subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o
 subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o
 
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index 3c22de532088..c8b4cce9cfe1 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -10,7 +10,7 @@ obj-y = bug.o bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \
 
 obj-$(CONFIG_MODULES) += um_module.o
 
-subarch-obj-y = lib/bitops_64.o lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o
+subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o
 subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o
 
 ldt-y = ../sys-i386/ldt.o
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 436093299bd3..76f60f52a885 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -21,7 +21,6 @@ else
 
         lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
         lib-y += thunk_64.o clear_page_64.o copy_page_64.o
-        lib-y += bitops_64.o
         lib-y += memmove_64.o memset_64.o
         lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
 endif
diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c
deleted file mode 100644
index 568467d390c0..000000000000
--- a/arch/x86/lib/bitops_64.c
+++ /dev/null
@@ -1,109 +0,0 @@
-#ifndef CONFIG_GENERIC_FIND_FIRST_BIT
-#include <linux/bitops.h>
-
-#undef find_first_zero_bit
-#undef find_first_bit
-
-static inline long
-__find_first_zero_bit(const unsigned long * addr, unsigned long size)
-{
-	long d0, d1, d2;
-	long res;
-
-	/*
-	 * We must test the size in words, not in bits, because
-	 * otherwise incoming sizes in the range -63..-1 will not run
-	 * any scasq instructions, and then the flags used by the je
-	 * instruction will have whatever random value was in place
-	 * before.  Nobody should call us like that, but
-	 * find_next_zero_bit() does when offset and size are at the
-	 * same word and it fails to find a zero itself.
-	 */
-	size += 63;
-	size >>= 6;
-	if (!size)
-		return 0;
-	asm volatile(
-		"  repe; scasq\n"
-		"  je 1f\n"
-		"  xorq -8(%%rdi),%%rax\n"
-		"  subq $8,%%rdi\n"
-		"  bsfq %%rax,%%rdx\n"
-		"1:  subq %[addr],%%rdi\n"
-		"  shlq $3,%%rdi\n"
-		"  addq %%rdi,%%rdx"
-		:"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
-		:"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL),
-		 [addr] "S" (addr) : "memory");
-	/*
-	 * Any register would do for [addr] above, but GCC tends to
-	 * prefer rbx over rsi, even though rsi is readily available
-	 * and doesn't have to be saved.
-	 */
-	return res;
-}
-
-/**
- * find_first_zero_bit - find the first zero bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first zero bit, not the number of the byte
- * containing a bit.
- */
-long find_first_zero_bit(const unsigned long * addr, unsigned long size)
-{
-	return __find_first_zero_bit (addr, size);
-}
-
-static inline long
-__find_first_bit(const unsigned long * addr, unsigned long size)
-{
-	long d0, d1;
-	long res;
-
-	/*
-	 * We must test the size in words, not in bits, because
-	 * otherwise incoming sizes in the range -63..-1 will not run
-	 * any scasq instructions, and then the flags used by the jz
-	 * instruction will have whatever random value was in place
-	 * before.  Nobody should call us like that, but
-	 * find_next_bit() does when offset and size are at the same
-	 * word and it fails to find a one itself.
-	 */
-	size += 63;
-	size >>= 6;
-	if (!size)
-		return 0;
-	asm volatile(
-		"   repe; scasq\n"
-		"   jz 1f\n"
-		"   subq $8,%%rdi\n"
-		"   bsfq (%%rdi),%%rax\n"
-		"1: subq %[addr],%%rdi\n"
-		"   shlq $3,%%rdi\n"
-		"   addq %%rdi,%%rax"
-		:"=a" (res), "=&c" (d0), "=&D" (d1)
-		:"0" (0ULL), "1" (size), "2" (addr),
-		 [addr] "r" (addr) : "memory");
-	return res;
-}
-
-/**
- * find_first_bit - find the first set bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first set bit, not the number of the byte
- * containing a bit.
- */
-long find_first_bit(const unsigned long * addr, unsigned long size)
-{
-	return __find_first_bit(addr,size);
-}
-
-#include <linux/module.h>
-
-EXPORT_SYMBOL(find_first_bit);
-EXPORT_SYMBOL(find_first_zero_bit);
-#endif
diff --git a/include/asm-x86/bitops_32.h b/include/asm-x86/bitops_32.h
index ba2c0defafa8..2e863021bf81 100644
--- a/include/asm-x86/bitops_32.h
+++ b/include/asm-x86/bitops_32.h
@@ -4,64 +4,6 @@
 /*
  * Copyright 1992, Linus Torvalds.
  */
-
-#ifndef CONFIG_GENERIC_FIND_FIRST_BIT
-/**
- * find_first_zero_bit - find the first zero bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit number of the first zero bit, not the number of the byte
- * containing a bit.
- */
-static inline int find_first_zero_bit(const unsigned long *addr, unsigned size)
-{
-	int d0, d1, d2;
-	int res;
-
-	if (!size)
-		return 0;
-	/* This looks at memory.
-	 * Mark it volatile to tell gcc not to move it around
-	 */
-	asm volatile("movl $-1,%%eax\n\t"
-		     "xorl %%edx,%%edx\n\t"
-		     "repe; scasl\n\t"
-		     "je 1f\n\t"
-		     "xorl -4(%%edi),%%eax\n\t"
-		     "subl $4,%%edi\n\t"
-		     "bsfl %%eax,%%edx\n"
-		     "1:\tsubl %%ebx,%%edi\n\t"
-		     "shll $3,%%edi\n\t"
-		     "addl %%edi,%%edx"
-		     : "=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
-		     : "1" ((size + 31) >> 5), "2" (addr),
-		       "b" (addr) : "memory");
-	return res;
-}
-
-/**
- * find_first_bit - find the first set bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit number of the first set bit, not the number of the byte
- * containing a bit.
- */
-static inline unsigned find_first_bit(const unsigned long *addr, unsigned size)
-{
-	unsigned x = 0;
-
-	while (x < size) {
-		unsigned long val = *addr++;
-		if (val)
-			return __ffs(val) + x;
-		x += sizeof(*addr) << 3;
-	}
-	return x;
-}
-#endif
-
 #ifdef __KERNEL__
 
 #include <asm-generic/bitops/sched.h>
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h
index 4081d7ecc2bd..cb23122d23f1 100644
--- a/include/asm-x86/bitops_64.h
+++ b/include/asm-x86/bitops_64.h
@@ -4,29 +4,6 @@
 /*
  * Copyright 1992, Linus Torvalds.
  */
-
-#ifndef CONFIG_GENERIC_FIND_FIRST_BIT
-extern long find_first_zero_bit(const unsigned long *addr, unsigned long size);
-extern long find_first_bit(const unsigned long *addr, unsigned long size);
-
-/* return index of first bet set in val or max when no bit is set */
-static inline long __scanbit(unsigned long val, unsigned long max)
-{
-	asm("bsfq %1,%0 ; cmovz %2,%0" : "=&r" (val) : "r" (val), "r" (max));
-	return val;
-}
-
-#define find_first_bit(addr, size)					\
-	((__builtin_constant_p((size)) && (size) <= BITS_PER_LONG	\
-	  ? (__scanbit(*(unsigned long *)(addr), (size)))		\
-	  : find_first_bit((addr), (size))))
-
-#define find_first_zero_bit(addr, size)					\
-	((__builtin_constant_p((size)) && (size) <= BITS_PER_LONG	\
-	  ? (__scanbit(~*(unsigned long *)(addr), (size)))		\
-	  : find_first_zero_bit((addr), (size))))
-#endif
-
 static inline void set_bit_string(unsigned long *bitmap, unsigned long i,
 				  int len)
 {
-- 
cgit v1.2.3


From d66462f5314b0e70ddad8032eb76099475ca5571 Mon Sep 17 00:00:00 2001
From: Alexander van Heukelum <heukelum@mailshack.com>
Date: Fri, 4 Apr 2008 20:49:30 +0200
Subject: x86: finalize bitops unification

include/asm-x86/bitops_32.h and include/asm-x86/bitops_64.h are now
almost identical. The 64-bit version sets ARCH_HAS_FAST_MULTIPLIER
and has an extra inline function set_bit_string. The define currently
has no influence on the generated code, but it can be argued that
setting it on i386 is the right thing to do anyhow. The addition
of the extra inline function on i386 does not hurt either.

Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/bitops.h    | 38 +++++++++++++++++++++++++++++++++-----
 include/asm-x86/bitops_32.h | 30 ------------------------------
 include/asm-x86/bitops_64.h | 42 ------------------------------------------
 3 files changed, 33 insertions(+), 77 deletions(-)
 delete mode 100644 include/asm-x86/bitops_32.h
 delete mode 100644 include/asm-x86/bitops_64.h

(limited to 'include')

diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index 1b6f547cb6bd..daf1a72bdc4b 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -403,10 +403,38 @@ static inline int fls(int x)
 
 #undef ADDR
 
-#ifdef CONFIG_X86_32
-# include "bitops_32.h"
-#else
-# include "bitops_64.h"
-#endif
+static inline void set_bit_string(unsigned long *bitmap,
+		unsigned long i, int len)
+{
+	unsigned long end = i + len;
+	while (i < end) {
+		__set_bit(i, bitmap);
+		i++;
+	}
+}
+
+#ifdef __KERNEL__
+
+#include <asm-generic/bitops/sched.h>
+
+#define ARCH_HAS_FAST_MULTIPLIER 1
+
+#include <asm-generic/bitops/hweight.h>
 
+#endif /* __KERNEL__ */
+
+#include <asm-generic/bitops/fls64.h>
+
+#ifdef __KERNEL__
+
+#include <asm-generic/bitops/ext2-non-atomic.h>
+
+#define ext2_set_bit_atomic(lock, nr, addr)			\
+	test_and_set_bit((nr), (unsigned long *)(addr))
+#define ext2_clear_bit_atomic(lock, nr, addr)			\
+	test_and_clear_bit((nr), (unsigned long *)(addr))
+
+#include <asm-generic/bitops/minix.h>
+
+#endif /* __KERNEL__ */
 #endif	/* _ASM_X86_BITOPS_H */
diff --git a/include/asm-x86/bitops_32.h b/include/asm-x86/bitops_32.h
deleted file mode 100644
index 2e863021bf81..000000000000
--- a/include/asm-x86/bitops_32.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef _I386_BITOPS_H
-#define _I386_BITOPS_H
-
-/*
- * Copyright 1992, Linus Torvalds.
- */
-#ifdef __KERNEL__
-
-#include <asm-generic/bitops/sched.h>
-
-#include <asm-generic/bitops/hweight.h>
-
-#endif /* __KERNEL__ */
-
-#include <asm-generic/bitops/fls64.h>
-
-#ifdef __KERNEL__
-
-#include <asm-generic/bitops/ext2-non-atomic.h>
-
-#define ext2_set_bit_atomic(lock, nr, addr)			\
-	test_and_set_bit((nr), (unsigned long *)(addr))
-#define ext2_clear_bit_atomic(lock, nr, addr)			\
-	test_and_clear_bit((nr), (unsigned long *)(addr))
-
-#include <asm-generic/bitops/minix.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* _I386_BITOPS_H */
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h
deleted file mode 100644
index cb23122d23f1..000000000000
--- a/include/asm-x86/bitops_64.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef _X86_64_BITOPS_H
-#define _X86_64_BITOPS_H
-
-/*
- * Copyright 1992, Linus Torvalds.
- */
-static inline void set_bit_string(unsigned long *bitmap, unsigned long i,
-				  int len)
-{
-	unsigned long end = i + len;
-	while (i < end) {
-		__set_bit(i, bitmap);
-		i++;
-	}
-}
-
-#ifdef __KERNEL__
-
-#include <asm-generic/bitops/sched.h>
-
-#define ARCH_HAS_FAST_MULTIPLIER 1
-
-#include <asm-generic/bitops/hweight.h>
-
-#endif /* __KERNEL__ */
-
-#include <asm-generic/bitops/fls64.h>
-
-#ifdef __KERNEL__
-
-#include <asm-generic/bitops/ext2-non-atomic.h>
-
-#define ext2_set_bit_atomic(lock, nr, addr)			\
-	test_and_set_bit((nr), (unsigned long *)(addr))
-#define ext2_clear_bit_atomic(lock, nr, addr)			\
-	test_and_clear_bit((nr), (unsigned long *)(addr))
-
-#include <asm-generic/bitops/minix.h>
-
-#endif /* __KERNEL__ */
-
-#endif /* _X86_64_BITOPS_H */
-- 
cgit v1.2.3


From f19dcf4a61ea4a3d155acb239348d09cb264f6a0 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 23 Mar 2008 01:03:07 -0700
Subject: x86: include/asm-x86/pgalloc.h/bitops.h: checkpatch cleanups -
 formatting only

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/bitops.h | 62 +++++++++++++++++++++++-------------------------
 1 file changed, 30 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h
index daf1a72bdc4b..b81a4d4d3337 100644
--- a/include/asm-x86/bitops.h
+++ b/include/asm-x86/bitops.h
@@ -62,9 +62,7 @@ static inline void set_bit(int nr, volatile void *addr)
  */
 static inline void __set_bit(int nr, volatile void *addr)
 {
-	asm volatile("bts %1,%0"
-		     : ADDR
-		     : "Ir" (nr) : "memory");
+	asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
 }
 
 /**
@@ -296,13 +294,11 @@ static inline int variable_test_bit(int nr, volatile const void *addr)
 static int test_bit(int nr, const volatile unsigned long *addr);
 #endif
 
-#define test_bit(nr,addr)			\
-	(__builtin_constant_p(nr) ?		\
-	 constant_test_bit((nr),(addr)) :	\
-	 variable_test_bit((nr),(addr)))
+#define test_bit(nr, addr)			\
+	(__builtin_constant_p((nr))		\
+	 ? constant_test_bit((nr), (addr))	\
+	 : variable_test_bit((nr), (addr)))
 
-#undef BASE_ADDR
-#undef BIT_ADDR
 /**
  * __ffs - find first set bit in word
  * @word: The word to search
@@ -311,9 +307,9 @@ static int test_bit(int nr, const volatile unsigned long *addr);
  */
 static inline unsigned long __ffs(unsigned long word)
 {
-	__asm__("bsf %1,%0"
-		:"=r" (word)
-		:"rm" (word));
+	asm("bsf %1,%0"
+		: "=r" (word)
+		: "rm" (word));
 	return word;
 }
 
@@ -325,9 +321,9 @@ static inline unsigned long __ffs(unsigned long word)
  */
 static inline unsigned long ffz(unsigned long word)
 {
-	__asm__("bsf %1,%0"
-		:"=r" (word)
-		:"r" (~word));
+	asm("bsf %1,%0"
+		: "=r" (word)
+		: "r" (~word));
 	return word;
 }
 
@@ -339,9 +335,9 @@ static inline unsigned long ffz(unsigned long word)
  */
 static inline unsigned long __fls(unsigned long word)
 {
-	__asm__("bsr %1,%0"
-		:"=r" (word)
-		:"rm" (word));
+	asm("bsr %1,%0"
+	    : "=r" (word)
+	    : "rm" (word));
 	return word;
 }
 
@@ -361,14 +357,14 @@ static inline int ffs(int x)
 {
 	int r;
 #ifdef CONFIG_X86_CMOV
-	__asm__("bsfl %1,%0\n\t"
-		"cmovzl %2,%0"
-		: "=r" (r) : "rm" (x), "r" (-1));
+	asm("bsfl %1,%0\n\t"
+	    "cmovzl %2,%0"
+	    : "=r" (r) : "rm" (x), "r" (-1));
 #else
-	__asm__("bsfl %1,%0\n\t"
-		"jnz 1f\n\t"
-		"movl $-1,%0\n"
-		"1:" : "=r" (r) : "rm" (x));
+	asm("bsfl %1,%0\n\t"
+	    "jnz 1f\n\t"
+	    "movl $-1,%0\n"
+	    "1:" : "=r" (r) : "rm" (x));
 #endif
 	return r + 1;
 }
@@ -388,19 +384,21 @@ static inline int fls(int x)
 {
 	int r;
 #ifdef CONFIG_X86_CMOV
-	__asm__("bsrl %1,%0\n\t"
-		"cmovzl %2,%0"
-		: "=&r" (r) : "rm" (x), "rm" (-1));
+	asm("bsrl %1,%0\n\t"
+	    "cmovzl %2,%0"
+	    : "=&r" (r) : "rm" (x), "rm" (-1));
 #else
-	__asm__("bsrl %1,%0\n\t"
-		"jnz 1f\n\t"
-		"movl $-1,%0\n"
-		"1:" : "=r" (r) : "rm" (x));
+	asm("bsrl %1,%0\n\t"
+	    "jnz 1f\n\t"
+	    "movl $-1,%0\n"
+	    "1:" : "=r" (r) : "rm" (x));
 #endif
 	return r + 1;
 }
 #endif /* __KERNEL__ */
 
+#undef BASE_ADDR
+#undef BIT_ADDR
 #undef ADDR
 
 static inline void set_bit_string(unsigned long *bitmap,
-- 
cgit v1.2.3