Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6 into common/serial-rework

Conflicts: arch/sh/kernel/cpu/sh2/setup-sh7619.c arch/sh/kernel/cpu/sh2a/setup-mxg.c arch/sh/kernel/cpu/sh2a/setup-sh7201.c arch/sh/kernel/cpu/sh2a/setup-sh7203.c arch/sh/kernel/cpu/sh2a/setup-sh7206.c arch/sh/kernel/cpu/sh3/setup-sh7705.c arch/sh/kernel/cpu/sh3/setup-sh770x.c arch/sh/kernel/cpu/sh3/setup-sh7710.c arch/sh/kernel/cpu/sh3/setup-sh7720.c arch/sh/kernel/cpu/sh4/setup-sh4-202.c arch/sh/kernel/cpu/sh4/setup-sh7750.c arch/sh/kernel/cpu/sh4/setup-sh7760.c arch/sh/kernel/cpu/sh4a/setup-sh7343.c arch/sh/kernel/cpu/sh4a/setup-sh7366.c arch/sh/kernel/cpu/sh4a/setup-sh7722.c arch/sh/kernel/cpu/sh4a/setup-sh7723.c arch/sh/kernel/cpu/sh4a/setup-sh7724.c arch/sh/kernel/cpu/sh4a/setup-sh7763.c arch/sh/kernel/cpu/sh4a/setup-sh7770.c arch/sh/kernel/cpu/sh4a/setup-sh7780.c arch/sh/kernel/cpu/sh4a/setup-sh7785.c arch/sh/kernel/cpu/sh4a/setup-sh7786.c arch/sh/kernel/cpu/sh4a/setup-shx3.c arch/sh/kernel/cpu/sh5/setup-sh5.c drivers/serial/sh-sci.c drivers/serial/sh-sci.h include/linux/serial_sci.h
author: Paul Mundt <lethal@linux-sh.org> 2011-01-13 09:06:28 +0300
committer: Paul Mundt <lethal@linux-sh.org> 2011-01-13 09:06:28 +0300
commit: f43dc23d5ea91fca257be02138a255f02d98e806 (patch)
tree: b29722f6e965316e90ac97abf79923ced250dc21 /lib
parent: f8e53553f452dcbf67cb89c8cba63a1cd6eb4cc0 (diff)
parent: 4162cf64973df51fc885825bc9ca4d055891c49f (diff)
download: linux-f43dc23d5ea91fca257be02138a255f02d98e806.tar.xz
86 files changed, 7260 insertions, 1936 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index bb1326d3839c..3116aa631af6 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -7,6 +7,9 @@ config BINARY_PRINTF
 
 menu "Library routines"
 
+config RAID6_PQ
+	tristate
+
 config BITREVERSE
 	tristate
 
@@ -117,6 +120,10 @@ config DECOMPRESS_BZIP2
 config DECOMPRESS_LZMA
 	tristate
 
+config DECOMPRESS_LZO
+	select LZO_DECOMPRESS
+	tristate
+
 #
 # Generic allocator support is selected if needed
 #
@@ -156,6 +163,9 @@ config TEXTSEARCH_BM
 config TEXTSEARCH_FSM
 	tristate
 
+config BTREE
+	boolean
+
 config HAS_IOMEM
 	boolean
 	depends on !NO_IOMEM
@@ -174,9 +184,6 @@ config HAS_DMA
 config CHECK_SIGNATURE
 	bool
 
-config HAVE_LMB
-	boolean
-
 config CPUMASK_OFFSTACK
 	bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
 	help
@@ -200,4 +207,10 @@ config NLATTR
 config GENERIC_ATOMIC64
        bool
 
+config LRU_CACHE
+	tristate
+
+config AVERAGE
+	bool
+
 endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 23067ab1a73c..2d05adb98401 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -50,6 +50,14 @@ config MAGIC_SYSRQ
 	  keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
 	  unless you really know what this hack does.
 
+config STRIP_ASM_SYMS
+	bool "Strip assembler-generated symbols during link"
+	default n
+	help
+	  Strip internal assembler-generated symbols during a link (symbols
+	  that look like '.Lxxx') so they don't pollute the output of
+	  get_wchan() and suchlike.
+
 config UNUSED_SYMBOLS
 	bool "Enable unused/obsolete exported symbols"
 	default y if X86
@@ -68,7 +76,6 @@ config UNUSED_SYMBOLS
 
 config DEBUG_FS
 	bool "Debug Filesystem"
-	depends on SYSFS
 	help
 	  debugfs is a virtual file system that kernel developers use to put
 	  debugging files into.  Enable this option to be able to read and
@@ -95,9 +102,10 @@ config HEADERS_CHECK
 
 config DEBUG_SECTION_MISMATCH
 	bool "Enable full Section mismatch analysis"
-	depends on UNDEFINED
+	depends on UNDEFINED || (BLACKFIN)
+	default y
 	# This option is on purpose disabled for now.
-	# It will be enabled when we are down to a resonable number
+	# It will be enabled when we are down to a reasonable number
 	# of section mismatch warnings (< 10 for an allyesconfig build)
 	help
 	  The section mismatch analysis checks if there are illegal
@@ -143,28 +151,34 @@ config DEBUG_SHIRQ
 	  Drivers ought to be able to handle interrupts coming in at those
 	  points; some don't and need to be caught.
 
-config DETECT_SOFTLOCKUP
-	bool "Detect Soft Lockups"
+config LOCKUP_DETECTOR
+	bool "Detect Hard and Soft Lockups"
 	depends on DEBUG_KERNEL && !S390
-	default y
 	help
-	  Say Y here to enable the kernel to detect "soft lockups",
-	  which are bugs that cause the kernel to loop in kernel
+	  Say Y here to enable the kernel to act as a watchdog to detect
+	  hard and soft lockups.
+
+	  Softlockups are bugs that cause the kernel to loop in kernel
 	  mode for more than 60 seconds, without giving other tasks a
-	  chance to run.
+	  chance to run.  The current stack trace is displayed upon
+	  detection and the system will stay locked up.
 
-	  When a soft-lockup is detected, the kernel will print the
-	  current stack trace (which you should report), but the
-	  system will stay locked up. This feature has negligible
-	  overhead.
+	  Hardlockups are bugs that cause the CPU to loop in kernel mode
+	  for more than 60 seconds, without letting other interrupts have a
+	  chance to run.  The current stack trace is displayed upon detection
+	  and the system will stay locked up.
 
-	  (Note that "hard lockups" are separate type of bugs that
-	   can be detected via the NMI-watchdog, on platforms that
-	   support it.)
+	  The overhead should be minimal.  A periodic hrtimer runs to
+	  generate interrupts and kick the watchdog task every 10-12 seconds.
+	  An NMI is generated every 60 seconds or so to check for hardlockups.
+
+config HARDLOCKUP_DETECTOR
+	def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
+		 !ARCH_HAS_NMI_WATCHDOG
 
 config BOOTPARAM_SOFTLOCKUP_PANIC
 	bool "Panic (Reboot) On Soft Lockups"
-	depends on DETECT_SOFTLOCKUP
+	depends on LOCKUP_DETECTOR
 	help
 	  Say Y here to enable the kernel to panic on "soft lockups",
 	  which are bugs that cause the kernel to loop in kernel
@@ -181,7 +195,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
 
 config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
 	int
-	depends on DETECT_SOFTLOCKUP
+	depends on LOCKUP_DETECTOR
 	range 0 1
 	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
 	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
@@ -290,6 +304,28 @@ config DEBUG_OBJECTS_TIMERS
 	  timer routines to track the life time of timer objects and
 	  validate the timer operations.
 
+config DEBUG_OBJECTS_WORK
+	bool "Debug work objects"
+	depends on DEBUG_OBJECTS
+	help
+	  If you say Y here, additional code will be inserted into the
+	  work queue routines to track the life time of work objects and
+	  validate the work operations.
+
+config DEBUG_OBJECTS_RCU_HEAD
+	bool "Debug RCU callbacks objects"
+	depends on DEBUG_OBJECTS && PREEMPT
+	help
+	  Enable this to turn on debugging of RCU list heads (call_rcu() usage).
+
+config DEBUG_OBJECTS_PERCPU_COUNTER
+	bool "Debug percpu counter objects"
+	depends on DEBUG_OBJECTS
+	help
+	  If you say Y here, additional code will be inserted into the
+	  percpu counter routines to track the life time of percpu counter
+	  objects and validate the percpu counter operations.
+
 config DEBUG_OBJECTS_ENABLE_DEFAULT
 	int "debug_objects bootup default value (0-1)"
         range 0 1
@@ -326,7 +362,7 @@ config SLUB_DEBUG_ON
 config SLUB_STATS
 	default n
 	bool "Enable SLUB performance statistics"
-	depends on SLUB && SLUB_DEBUG && SYSFS
+	depends on SLUB && SYSFS
 	help
 	  SLUB statistics are useful to debug SLUBs allocation behavior in
 	  order find ways to optimize the allocator. This should never be
@@ -338,13 +374,13 @@ config SLUB_STATS
 
 config DEBUG_KMEMLEAK
 	bool "Kernel memory leak detector"
-	depends on DEBUG_KERNEL && EXPERIMENTAL && (X86 || ARM) && \
-		!MEMORY_HOTPLUG
-	select DEBUG_SLAB if SLAB
-	select SLUB_DEBUG if SLUB
+	depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \
+		(X86 || ARM || PPC || S390 || SPARC64 || SUPERH || MICROBLAZE || TILE)
+
 	select DEBUG_FS if SYSFS
 	select STACKTRACE if STACKTRACE_SUPPORT
 	select KALLSYMS
+	select CRC32
 	help
 	  Say Y here if you want to enable the memory leak
 	  detector. The memory allocation/freeing is traced in a way
@@ -355,9 +391,24 @@ config DEBUG_KMEMLEAK
 	  allocations. See Documentation/kmemleak.txt for more
 	  details.
 
+	  Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances
+	  of finding leaks due to the slab objects poisoning.
+
 	  In order to access the kmemleak file, debugfs needs to be
 	  mounted (usually at /sys/kernel/debug).
 
+config DEBUG_KMEMLEAK_EARLY_LOG_SIZE
+	int "Maximum kmemleak early log entries"
+	depends on DEBUG_KMEMLEAK
+	range 200 40000
+	default 400
+	help
+	  Kmemleak must track all the memory allocations to avoid
+	  reporting false positives. Since memory may be allocated or
+	  freed before kmemleak is initialised, an early log buffer is
+	  used to store these actions. If kmemleak reports "early log
+	  buffer exceeded", please increase this value.
+
 config DEBUG_KMEMLEAK_TEST
 	tristate "Simple test for the kernel memory leak detector"
 	depends on DEBUG_KMEMLEAK
@@ -368,9 +419,16 @@ config DEBUG_KMEMLEAK_TEST
 
 	  If unsure, say N.
 
+config DEBUG_KMEMLEAK_DEFAULT_OFF
+	bool "Default kmemleak to off"
+	depends on DEBUG_KMEMLEAK
+	help
+	  Say Y here to disable kmemleak by default. It can then be enabled
+	  on the command line via kmemleak=on.
+
 config DEBUG_PREEMPT
 	bool "Debug preemptible kernel"
-	depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64)
+	depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
 	default y
 	help
 	  If you say Y here then the kernel will use a debug variant of the
@@ -412,6 +470,15 @@ config DEBUG_MUTEXES
 	 This feature allows mutex semantics violations to be detected and
 	 reported.
 
+config BKL
+	bool "Big Kernel Lock" if (SMP || PREEMPT)
+	default y
+	help
+	  This is the traditional lock that is used in old code instead
+	  of proper locking. All drivers that use the BKL should depend
+	  on this symbol.
+	  Say Y here unless you are working on removing the BKL.
+
 config DEBUG_LOCK_ALLOC
 	bool "Lock debugging: detect incorrect freeing of live locks"
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
@@ -433,6 +500,7 @@ config PROVE_LOCKING
 	select DEBUG_SPINLOCK
 	select DEBUG_MUTEXES
 	select DEBUG_LOCK_ALLOC
+	select TRACE_IRQFLAGS
 	default n
 	help
 	 This feature enables the kernel to prove that all locking
@@ -468,11 +536,52 @@ config PROVE_LOCKING
 
 	 For more details, see Documentation/lockdep-design.txt.
 
+config PROVE_RCU
+	bool "RCU debugging: prove RCU correctness"
+	depends on PROVE_LOCKING
+	default n
+	help
+	 This feature enables lockdep extensions that check for correct
+	 use of RCU APIs.  This is currently under development.  Say Y
+	 if you want to debug RCU usage or help work on the PROVE_RCU
+	 feature.
+
+	 Say N if you are unsure.
+
+config PROVE_RCU_REPEATEDLY
+	bool "RCU debugging: don't disable PROVE_RCU on first splat"
+	depends on PROVE_RCU
+	default n
+	help
+	 By itself, PROVE_RCU will disable checking upon issuing the
+	 first warning (or "splat").  This feature prevents such
+	 disabling, allowing multiple RCU-lockdep warnings to be printed
+	 on a single reboot.
+
+	 Say Y to allow multiple RCU-lockdep warnings per boot.
+
+	 Say N if you are unsure.
+
+config SPARSE_RCU_POINTER
+	bool "RCU debugging: sparse-based checks for pointer usage"
+	default n
+	help
+	 This feature enables the __rcu sparse annotation for
+	 RCU-protected pointers.  This annotation will cause sparse
+	 to flag any non-RCU used of annotated pointers.  This can be
+	 helpful when debugging RCU usage.  Please note that this feature
+	 is not intended to enforce code cleanliness; it is instead merely
+	 a debugging aid.
+
+	 Say Y to make sparse flag questionable use of RCU-protected pointers
+
+	 Say N if you are unsure.
+
 config LOCKDEP
 	bool
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
 	select STACKTRACE
-	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390
+	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE
 	select KALLSYMS
 	select KALLSYMS_ALL
 
@@ -489,6 +598,14 @@ config LOCK_STAT
 
 	 For more details, see Documentation/lockstat.txt
 
+	 This also enables lock events required by "perf lock",
+	 subcommand of perf.
+	 If you want to use "perf lock", you also need to turn on
+	 CONFIG_EVENT_TRACING.
+
+	 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
+	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
+
 config DEBUG_LOCKDEP
 	bool "Lock dependency engine debugging"
 	depends on DEBUG_KERNEL && LOCKDEP
@@ -498,11 +615,10 @@ config DEBUG_LOCKDEP
 	  of more runtime overhead.
 
 config TRACE_IRQFLAGS
-	depends on DEBUG_KERNEL
 	bool
-	default y
-	depends on TRACE_IRQFLAGS_SUPPORT
-	depends on PROVE_LOCKING
+	help
+	  Enables hooks to interrupt enabling and disabling for
+	  either tracing or lock debugging.
 
 config DEBUG_SPINLOCK_SLEEP
 	bool "Spinlock debugging: sleep-inside-spinlock checking"
@@ -545,7 +661,7 @@ config DEBUG_BUGVERBOSE
 	depends on BUG
 	depends on ARM || AVR32 || M32R || M68K || SPARC32 || SPARC64 || \
 		   FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300
-	default !EMBEDDED
+	default y
 	help
 	  Say Y here to make BUG() panics output the file name and line number
 	  of the BUG call as well as the EIP and oops trace.  This aids
@@ -564,6 +680,19 @@ config DEBUG_INFO
 
 	  If unsure, say N.
 
+config DEBUG_INFO_REDUCED
+	bool "Reduce debugging information"
+	depends on DEBUG_INFO
+	help
+	  If you say Y here gcc is instructed to generate less debugging
+	  information for structure types. This means that tools that
+	  need full debugging information (like kgdb or systemtap) won't
+	  be happy. But if you merely need debugging information to
+	  resolve line numbers there is no loss. Advantage is that
+	  build directory object sizes shrink dramatically over a full
+	  DEBUG_INFO build and compile times are reduced too.
+	  Only works with newer gcc versions.
+
 config DEBUG_VM
 	bool "Debug VM"
 	depends on DEBUG_KERNEL
@@ -620,6 +749,15 @@ config DEBUG_LIST
 
 	  If unsure, say N.
 
+config TEST_LIST_SORT
+	bool "Linked list sorting test"
+	depends on DEBUG_KERNEL
+	help
+	  Enable this to turn on 'list_sort()' function test. This test is
+	  executed only once during system boot, so affects only boot time.
+
+	  If unsure, say N.
+
 config DEBUG_SG
 	bool "Debug SG table operations"
 	depends on DEBUG_KERNEL
@@ -640,6 +778,21 @@ config DEBUG_NOTIFIERS
 	  This is a relatively cheap check but if you care about maximum
 	  performance, say N.
 
+config DEBUG_CREDENTIALS
+	bool "Debug credential management"
+	depends on DEBUG_KERNEL
+	help
+	  Enable this to turn on some debug checking for credential
+	  management.  The additional code keeps track of the number of
+	  pointers from task_structs to any given cred struct, and checks to
+	  see that this number never exceeds the usage count of the cred
+	  struct.
+
+	  Furthermore, if SELinux is enabled, this also checks that the
+	  security pointer in the cred struct is never seen to be invalid.
+
+	  If unsure, say N.
+
 #
 # Select this config option from the architecture Kconfig, if it
 # it is preferred to always offer frame pointers as a config
@@ -712,17 +865,53 @@ config RCU_TORTURE_TEST_RUNNABLE
 
 config RCU_CPU_STALL_DETECTOR
 	bool "Check for stalled CPUs delaying RCU grace periods"
-	depends on CLASSIC_RCU || TREE_RCU
-	default n
+	depends on TREE_RCU || TREE_PREEMPT_RCU
+	default y
 	help
 	  This option causes RCU to printk information on which
 	  CPUs are delaying the current grace period, but only when
 	  the grace period extends for excessive time periods.
 
-	  Say Y if you want RCU to perform such checks.
+	  Say N if you want to disable such checks.
+
+	  Say Y if you are unsure.
+
+config RCU_CPU_STALL_TIMEOUT
+	int "RCU CPU stall timeout in seconds"
+	depends on RCU_CPU_STALL_DETECTOR
+	range 3 300
+	default 60
+	help
+	  If a given RCU grace period extends more than the specified
+	  number of seconds, a CPU stall warning is printed.  If the
+	  RCU grace period persists, additional CPU stall warnings are
+	  printed at more widely spaced intervals.
+
+config RCU_CPU_STALL_DETECTOR_RUNNABLE
+	bool "RCU CPU stall checking starts automatically at boot"
+	depends on RCU_CPU_STALL_DETECTOR
+	default y
+	help
+	  If set, start checking for RCU CPU stalls immediately on
+	  boot.  Otherwise, RCU CPU stall checking must be manually
+	  enabled.
+
+	  Say Y if you are unsure.
+
+	  Say N if you wish to suppress RCU CPU stall checking during boot.
+
+config RCU_CPU_STALL_VERBOSE
+	bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR"
+	depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU
+	default y
+	help
+	  This option causes RCU to printk detailed per-task information
+	  for any tasks that are stalling the current RCU grace period.
 
 	  Say N if you are unsure.
 
+	  Say Y if you want to enable such checks.
+
 config KPROBES_SANITY_TEST
 	bool "Kprobes sanity tests"
 	depends on DEBUG_KERNEL
@@ -777,10 +966,24 @@ config DEBUG_BLOCK_EXT_DEVT
 
 	  Say N if you are unsure.
 
+config DEBUG_FORCE_WEAK_PER_CPU
+	bool "Force weak per-cpu definitions"
+	depends on DEBUG_KERNEL
+	help
+	  s390 and alpha require percpu variables in modules to be
+	  defined weak to work around addressing range issue which
+	  puts the following two restrictions on percpu variable
+	  definitions.
+
+	  1. percpu symbols must be unique whether static or not
+	  2. percpu variables can't be defined inside a function
+
+	  To ensure that generic code follows the above rules, this
+	  option forces all percpu variables to be defined as weak.
+
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
-	depends on DEBUG_KERNEL
-	depends on KPROBES
+	depends on DEBUG_FS
 	depends on BLOCK
 	default n
 	help
@@ -791,7 +994,19 @@ config LKDTM
 	called lkdtm.
 
 	Documentation on how to use the module can be found in
-	drivers/misc/lkdtm.c
+	Documentation/fault-injection/provoke-crashes.txt
+
+config CPU_NOTIFIER_ERROR_INJECT
+	tristate "CPU notifier error injection module"
+	depends on HOTPLUG_CPU && DEBUG_KERNEL
+	help
+	  This option provides a kernel module that can be used to test
+	  the error handling of the cpu notifiers
+
+	  To compile this code as a module, choose M here: the module will
+	  be called cpu-notifier-error-inject.
+
+	  If unsure, say N.
 
 config FAULT_INJECTION
 	bool "Fault-injection framework"
@@ -820,7 +1035,7 @@ config FAIL_MAKE_REQUEST
 	  Provide fault-injection capability for disk IO.
 
 config FAIL_IO_TIMEOUT
-	bool "Faul-injection capability for faking disk interrupts"
+	bool "Fault-injection capability for faking disk interrupts"
 	depends on FAULT_INJECTION && BLOCK
 	help
 	  Provide fault-injection capability on end IO handling. This
@@ -841,26 +1056,29 @@ config FAULT_INJECTION_STACKTRACE_FILTER
 	depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
 	depends on !X86_64
 	select STACKTRACE
-	select FRAME_POINTER if !PPC && !S390
+	select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE
 	help
 	  Provide stacktrace filter for fault-injection capabilities
 
 config LATENCYTOP
 	bool "Latency measuring infrastructure"
-	select FRAME_POINTER if !MIPS && !PPC && !S390
+	depends on HAVE_LATENCYTOP_SUPPORT
+	depends on DEBUG_KERNEL
+	depends on STACKTRACE_SUPPORT
+	depends on PROC_FS
+	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE
 	select KALLSYMS
 	select KALLSYMS_ALL
 	select STACKTRACE
 	select SCHEDSTATS
 	select SCHED_DEBUG
-	depends on HAVE_LATENCYTOP_SUPPORT
 	help
 	  Enable this option if you want to use the LatencyTOP tool
 	  to find out which userspace is blocking on what kernel operations.
 
 config SYSCTL_SYSCALL_CHECK
 	bool "Sysctl checks"
-	depends on SYSCTL_SYSCALL
+	depends on SYSCTL
 	---help---
 	  sys_sysctl uses binary paths that have been found challenging
 	  to properly maintain and use. This enables checks that help
@@ -934,10 +1152,10 @@ config DYNAMIC_DEBUG
 
 	  Usage:
 
-	  Dynamic debugging is controlled via the 'dynamic_debug/ddebug' file,
+	  Dynamic debugging is controlled via the 'dynamic_debug/control' file,
 	  which is contained in the 'debugfs' filesystem. Thus, the debugfs
 	  filesystem must first be mounted before making use of this feature.
-	  We refer the control file as: <debugfs>/dynamic_debug/ddebug. This
+	  We refer the control file as: <debugfs>/dynamic_debug/control. This
 	  file contains a list of the debug statements that can be enabled. The
 	  format for each line of the file is:
 
@@ -952,7 +1170,7 @@ config DYNAMIC_DEBUG
 
 	  From a live system:
 
-		nullarbor:~ # cat <debugfs>/dynamic_debug/ddebug
+		nullarbor:~ # cat <debugfs>/dynamic_debug/control
 		# filename:lineno [module]function flags format
 		fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012"
 		fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012"
@@ -962,23 +1180,23 @@ config DYNAMIC_DEBUG
 
 		// enable the message at line 1603 of file svcsock.c
 		nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
-						<debugfs>/dynamic_debug/ddebug
+						<debugfs>/dynamic_debug/control
 
 		// enable all the messages in file svcsock.c
 		nullarbor:~ # echo -n 'file svcsock.c +p' >
-						<debugfs>/dynamic_debug/ddebug
+						<debugfs>/dynamic_debug/control
 
 		// enable all the messages in the NFS server module
 		nullarbor:~ # echo -n 'module nfsd +p' >
-						<debugfs>/dynamic_debug/ddebug
+						<debugfs>/dynamic_debug/control
 
 		// enable all 12 messages in the function svc_process()
 		nullarbor:~ # echo -n 'func svc_process +p' >
-						<debugfs>/dynamic_debug/ddebug
+						<debugfs>/dynamic_debug/control
 
 		// disable all 12 messages in the function svc_process()
 		nullarbor:~ # echo -n 'func svc_process -p' >
-						<debugfs>/dynamic_debug/ddebug
+						<debugfs>/dynamic_debug/control
 
 	  See Documentation/dynamic-debug-howto.txt for additional information.
 
@@ -993,6 +1211,26 @@ config DMA_API_DEBUG
 	  This option causes a performance degredation.  Use only if you want
 	  to debug device drivers. If unsure, say N.
 
+config ATOMIC64_SELFTEST
+	bool "Perform an atomic64_t self-test at boot"
+	help
+	  Enable this option to test the atomic64_t functions at boot.
+
+	  If unsure, say N.
+
+config ASYNC_RAID6_TEST
+	tristate "Self test for hardware accelerated raid6 recovery"
+	depends on ASYNC_RAID6_RECOV
+	select ASYNC_MEMCPY
+	---help---
+	  This is a one-shot self test that permutes through the
+	  recovery of all the possible two disk failure scenarios for a
+	  N-disk array.  Recovery is performed with the asynchronous
+	  raid6 recovery routines, and will optionally use an offload
+	  engine if one is available.
+
+	  If unsure, say N.
+
 source "samples/Kconfig"
 
 source "lib/Kconfig.kgdb"
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 9b5d1d7f2ef7..43cb93fa2651 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -3,7 +3,7 @@ config HAVE_ARCH_KGDB
 	bool
 
 menuconfig KGDB
-	bool "KGDB: kernel debugging with remote gdb"
+	bool "KGDB: kernel debugger"
 	depends on HAVE_ARCH_KGDB
 	depends on DEBUG_KERNEL && EXPERIMENTAL
 	help
@@ -57,4 +57,26 @@ config KGDB_TESTS_BOOT_STRING
 	  information about other strings you could use beyond the
 	  default of V1F100.
 
+config KGDB_LOW_LEVEL_TRAP
+       bool "KGDB: Allow debugging with traps in notifiers"
+       depends on X86 || MIPS
+       default n
+       help
+         This will add an extra call back to kgdb for the breakpoint
+         exception handler on which will will allow kgdb to step
+         through a notify handler.
+
+config KGDB_KDB
+	bool "KGDB_KDB: include kdb frontend for kgdb"
+	default n
+	help
+	  KDB frontend for kernel
+
+config KDB_KEYBOARD
+	bool "KGDB_KDB: keyboard as input device"
+	depends on VT && KGDB_KDB
+	default n
+	help
+	  KDB can use a PS/2 type keyboard for an input device
+
 endif # KGDB
diff --git a/lib/Kconfig.kmemcheck b/lib/Kconfig.kmemcheck
index 603c81b66549..846e039a86b4 100644
--- a/lib/Kconfig.kmemcheck
+++ b/lib/Kconfig.kmemcheck
@@ -1,6 +1,8 @@
 config HAVE_ARCH_KMEMCHECK
 	bool
 
+if HAVE_ARCH_KMEMCHECK
+
 menuconfig KMEMCHECK
 	bool "kmemcheck: trap use of uninitialized memory"
 	depends on DEBUG_KERNEL
@@ -89,3 +91,4 @@ config KMEMCHECK_BITOPS_OK
 	  accesses where not all the bits are initialized at the same time.
 	  This may also hide some real bugs.
 
+endif
diff --git a/lib/Makefile b/lib/Makefile
index b6d1857bbf08..d7b6e30a3a1e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,11 +8,11 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
 endif
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
-	 rbtree.o radix-tree.o dump_stack.o \
+	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o prio_heap.o ratelimit.o show_mem.o \
-	 is_single_threaded.o plist.o decompress.o
+	 is_single_threaded.o plist.o decompress.o flex_array.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
@@ -21,7 +21,7 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o gcd.o
+	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
@@ -39,8 +39,12 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
 lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
 obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
+
+CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
+
 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
+obj-$(CONFIG_BTREE) += btree.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
@@ -65,10 +69,12 @@ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
 obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
 obj-$(CONFIG_LZO_COMPRESS) += lzo/
 obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
+obj-$(CONFIG_RAID6_PQ) += raid6/
 
 lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o
 lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o
 lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o
+lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o
 
 obj-$(CONFIG_TEXTSEARCH) += textsearch.o
 obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
@@ -80,23 +86,28 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
 obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
 obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
+obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
 
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 
-obj-$(CONFIG_HAVE_LMB) += lmb.o
-
 obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
 
 obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o
 
 obj-$(CONFIG_NLATTR) += nlattr.o
 
+obj-$(CONFIG_LRU_CACHE) += lru_cache.o
+
 obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
 
 obj-$(CONFIG_GENERIC_CSUM) += checksum.o
 
 obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
 
+obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
+
+obj-$(CONFIG_AVERAGE) += average.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/argv_split.c b/lib/argv_split.c
index 5205a8dae5bc..4b1b083f219c 100644
--- a/lib/argv_split.c
+++ b/lib/argv_split.c
@@ -4,17 +4,10 @@
 
 #include <linux/kernel.h>
 #include <linux/ctype.h>
+#include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 
-static const char *skip_sep(const char *cp)
-{
-	while (*cp && isspace(*cp))
-		cp++;
-
-	return cp;
-}
-
 static const char *skip_arg(const char *cp)
 {
 	while (*cp && !isspace(*cp))
@@ -28,7 +21,7 @@ static int count_argc(const char *str)
 	int count = 0;
 
 	while (*str) {
-		str = skip_sep(str);
+		str = skip_spaces(str);
 		if (*str) {
 			count++;
 			str = skip_arg(str);
@@ -82,7 +75,7 @@ char **argv_split(gfp_t gfp, const char *str, int *argcp)
 	argvp = argv;
 
 	while (*str) {
-		str = skip_sep(str);
+		str = skip_spaces(str);
 
 		if (*str) {
 			const char *p = str;
diff --git a/lib/atomic64.c b/lib/atomic64.c
index c5e725562416..a21c12bc727c 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -13,6 +13,7 @@
 #include <linux/cache.h>
 #include <linux/spinlock.h>
 #include <linux/init.h>
+#include <linux/module.h>
 #include <asm/atomic.h>
 
 /*
@@ -52,6 +53,7 @@ long long atomic64_read(const atomic64_t *v)
 	spin_unlock_irqrestore(lock, flags);
 	return val;
 }
+EXPORT_SYMBOL(atomic64_read);
 
 void atomic64_set(atomic64_t *v, long long i)
 {
@@ -62,6 +64,7 @@ void atomic64_set(atomic64_t *v, long long i)
 	v->counter = i;
 	spin_unlock_irqrestore(lock, flags);
 }
+EXPORT_SYMBOL(atomic64_set);
 
 void atomic64_add(long long a, atomic64_t *v)
 {
@@ -72,6 +75,7 @@ void atomic64_add(long long a, atomic64_t *v)
 	v->counter += a;
 	spin_unlock_irqrestore(lock, flags);
 }
+EXPORT_SYMBOL(atomic64_add);
 
 long long atomic64_add_return(long long a, atomic64_t *v)
 {
@@ -84,6 +88,7 @@ long long atomic64_add_return(long long a, atomic64_t *v)
 	spin_unlock_irqrestore(lock, flags);
 	return val;
 }
+EXPORT_SYMBOL(atomic64_add_return);
 
 void atomic64_sub(long long a, atomic64_t *v)
 {
@@ -94,6 +99,7 @@ void atomic64_sub(long long a, atomic64_t *v)
 	v->counter -= a;
 	spin_unlock_irqrestore(lock, flags);
 }
+EXPORT_SYMBOL(atomic64_sub);
 
 long long atomic64_sub_return(long long a, atomic64_t *v)
 {
@@ -106,6 +112,7 @@ long long atomic64_sub_return(long long a, atomic64_t *v)
 	spin_unlock_irqrestore(lock, flags);
 	return val;
 }
+EXPORT_SYMBOL(atomic64_sub_return);
 
 long long atomic64_dec_if_positive(atomic64_t *v)
 {
@@ -120,6 +127,7 @@ long long atomic64_dec_if_positive(atomic64_t *v)
 	spin_unlock_irqrestore(lock, flags);
 	return val;
 }
+EXPORT_SYMBOL(atomic64_dec_if_positive);
 
 long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
 {
@@ -134,6 +142,7 @@ long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
 	spin_unlock_irqrestore(lock, flags);
 	return val;
 }
+EXPORT_SYMBOL(atomic64_cmpxchg);
 
 long long atomic64_xchg(atomic64_t *v, long long new)
 {
@@ -147,21 +156,23 @@ long long atomic64_xchg(atomic64_t *v, long long new)
 	spin_unlock_irqrestore(lock, flags);
 	return val;
 }
+EXPORT_SYMBOL(atomic64_xchg);
 
 int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 {
 	unsigned long flags;
 	spinlock_t *lock = lock_addr(v);
-	int ret = 1;
+	int ret = 0;
 
 	spin_lock_irqsave(lock, flags);
 	if (v->counter != u) {
 		v->counter += a;
-		ret = 0;
+		ret = 1;
 	}
 	spin_unlock_irqrestore(lock, flags);
 	return ret;
 }
+EXPORT_SYMBOL(atomic64_add_unless);
 
 static int init_atomic64_lock(void)
 {
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
new file mode 100644
index 000000000000..44524cc8c32a
--- /dev/null
+++ b/lib/atomic64_test.c
@@ -0,0 +1,166 @@
+/*
+ * Testsuite for atomic64_t functions
+ *
+ * Copyright © 2010  Luca Barbieri
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/atomic.h>
+
+#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
+static __init int test_atomic64(void)
+{
+	long long v0 = 0xaaa31337c001d00dLL;
+	long long v1 = 0xdeadbeefdeafcafeLL;
+	long long v2 = 0xfaceabadf00df001LL;
+	long long onestwos = 0x1111111122222222LL;
+	long long one = 1LL;
+
+	atomic64_t v = ATOMIC64_INIT(v0);
+	long long r = v0;
+	BUG_ON(v.counter != r);
+
+	atomic64_set(&v, v1);
+	r = v1;
+	BUG_ON(v.counter != r);
+	BUG_ON(atomic64_read(&v) != r);
+
+	INIT(v0);
+	atomic64_add(onestwos, &v);
+	r += onestwos;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	atomic64_add(-one, &v);
+	r += -one;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	r += onestwos;
+	BUG_ON(atomic64_add_return(onestwos, &v) != r);
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	r += -one;
+	BUG_ON(atomic64_add_return(-one, &v) != r);
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	atomic64_sub(onestwos, &v);
+	r -= onestwos;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	atomic64_sub(-one, &v);
+	r -= -one;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	r -= onestwos;
+	BUG_ON(atomic64_sub_return(onestwos, &v) != r);
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	r -= -one;
+	BUG_ON(atomic64_sub_return(-one, &v) != r);
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	atomic64_inc(&v);
+	r += one;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	r += one;
+	BUG_ON(atomic64_inc_return(&v) != r);
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	atomic64_dec(&v);
+	r -= one;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	r -= one;
+	BUG_ON(atomic64_dec_return(&v) != r);
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	BUG_ON(atomic64_xchg(&v, v1) != v0);
+	r = v1;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0);
+	r = v1;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0);
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	BUG_ON(atomic64_add_unless(&v, one, v0));
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	BUG_ON(!atomic64_add_unless(&v, one, v1));
+	r += one;
+	BUG_ON(v.counter != r);
+
+#if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \
+    defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) || defined(CONFIG_ARM)
+	INIT(onestwos);
+	BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
+	r -= one;
+	BUG_ON(v.counter != r);
+
+	INIT(0);
+	BUG_ON(atomic64_dec_if_positive(&v) != -one);
+	BUG_ON(v.counter != r);
+
+	INIT(-one);
+	BUG_ON(atomic64_dec_if_positive(&v) != (-one - one));
+	BUG_ON(v.counter != r);
+#else
+#warning Please implement atomic64_dec_if_positive for your architecture, and add it to the IF above
+#endif
+
+	INIT(onestwos);
+	BUG_ON(!atomic64_inc_not_zero(&v));
+	r += one;
+	BUG_ON(v.counter != r);
+
+	INIT(0);
+	BUG_ON(atomic64_inc_not_zero(&v));
+	BUG_ON(v.counter != r);
+
+	INIT(-one);
+	BUG_ON(!atomic64_inc_not_zero(&v));
+	r += one;
+	BUG_ON(v.counter != r);
+
+#ifdef CONFIG_X86
+	printk(KERN_INFO "atomic64 test passed for %s platform %s CX8 and %s SSE\n",
+#ifdef CONFIG_X86_64
+	       "x86-64",
+#elif defined(CONFIG_X86_CMPXCHG64)
+	       "i586+",
+#else
+	       "i386+",
+#endif
+	       boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without",
+	       boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without");
+#else
+	printk(KERN_INFO "atomic64 test passed\n");
+#endif
+
+	return 0;
+}
+
+core_initcall(test_atomic64);
diff --git a/lib/average.c b/lib/average.c
new file mode 100644
index 000000000000..5576c2841496
--- /dev/null
+++ b/lib/average.c
@@ -0,0 +1,61 @@
+/*
+ * lib/average.c
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/average.h>
+#include <linux/bug.h>
+#include <linux/log2.h>
+
+/**
+ * DOC: Exponentially Weighted Moving Average (EWMA)
+ *
+ * These are generic functions for calculating Exponentially Weighted Moving
+ * Averages (EWMA). We keep a structure with the EWMA parameters and a scaled
+ * up internal representation of the average value to prevent rounding errors.
+ * The factor for scaling up and the exponential weight (or decay rate) have to
+ * be specified thru the init fuction. The structure should not be accessed
+ * directly but only thru the helper functions.
+ */
+
+/**
+ * ewma_init() - Initialize EWMA parameters
+ * @avg: Average structure
+ * @factor: Factor to use for the scaled up internal value. The maximum value
+ *	of averages can be ULONG_MAX/(factor*weight). For performance reasons
+ *	factor has to be a power of 2.
+ * @weight: Exponential weight, or decay rate. This defines how fast the
+ *	influence of older values decreases. For performance reasons weight has
+ *	to be a power of 2.
+ *
+ * Initialize the EWMA parameters for a given struct ewma @avg.
+ */
+void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight)
+{
+	WARN_ON(!is_power_of_2(weight) || !is_power_of_2(factor));
+
+	avg->weight = ilog2(weight);
+	avg->factor = ilog2(factor);
+	avg->internal = 0;
+}
+EXPORT_SYMBOL(ewma_init);
+
+/**
+ * ewma_add() - Exponentially weighted moving average (EWMA)
+ * @avg: Average structure
+ * @val: Current value
+ *
+ * Add a sample to the average.
+ */
+struct ewma *ewma_add(struct ewma *avg, unsigned long val)
+{
+	avg->internal = avg->internal  ?
+		(((avg->internal << avg->weight) - avg->internal) +
+			(val << avg->factor)) >> avg->weight :
+		(val << avg->factor);
+	return avg;
+}
+EXPORT_SYMBOL(ewma_add);
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 35a1f7ff4149..741fae905ae3 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -179,14 +179,16 @@ void __bitmap_shift_left(unsigned long *dst,
 }
 EXPORT_SYMBOL(__bitmap_shift_left);
 
-void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
 				const unsigned long *bitmap2, int bits)
 {
 	int k;
 	int nr = BITS_TO_LONGS(bits);
+	unsigned long result = 0;
 
 	for (k = 0; k < nr; k++)
-		dst[k] = bitmap1[k] & bitmap2[k];
+		result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_and);
 
@@ -212,14 +214,16 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_xor);
 
-void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 				const unsigned long *bitmap2, int bits)
 {
 	int k;
 	int nr = BITS_TO_LONGS(bits);
+	unsigned long result = 0;
 
 	for (k = 0; k < nr; k++)
-		dst[k] = bitmap1[k] & ~bitmap2[k];
+		result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_andnot);
 
@@ -267,6 +271,87 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
 }
 EXPORT_SYMBOL(__bitmap_weight);
 
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
+
+void bitmap_set(unsigned long *map, int start, int nr)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const int size = start + nr;
+	int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+	while (nr - bits_to_set >= 0) {
+		*p |= mask_to_set;
+		nr -= bits_to_set;
+		bits_to_set = BITS_PER_LONG;
+		mask_to_set = ~0UL;
+		p++;
+	}
+	if (nr) {
+		mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+		*p |= mask_to_set;
+	}
+}
+EXPORT_SYMBOL(bitmap_set);
+
+void bitmap_clear(unsigned long *map, int start, int nr)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const int size = start + nr;
+	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+	while (nr - bits_to_clear >= 0) {
+		*p &= ~mask_to_clear;
+		nr -= bits_to_clear;
+		bits_to_clear = BITS_PER_LONG;
+		mask_to_clear = ~0UL;
+		p++;
+	}
+	if (nr) {
+		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+		*p &= ~mask_to_clear;
+	}
+}
+EXPORT_SYMBOL(bitmap_clear);
+
+/*
+ * bitmap_find_next_zero_area - find a contiguous aligned zero area
+ * @map: The address to base the search on
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ * @nr: The number of zeroed bits we're looking for
+ * @align_mask: Alignment mask for zero area
+ *
+ * The @align_mask should be one less than a power of 2; the effect is that
+ * the bit offset of all zero areas this function finds is multiples of that
+ * power of 2. A @align_mask of 0 means no alignment is required.
+ */
+unsigned long bitmap_find_next_zero_area(unsigned long *map,
+					 unsigned long size,
+					 unsigned long start,
+					 unsigned int nr,
+					 unsigned long align_mask)
+{
+	unsigned long index, end, i;
+again:
+	index = find_next_zero_bit(map, size, start);
+
+	/* Align allocation */
+	index = __ALIGN_MASK(index, align_mask);
+
+	end = index + nr;
+	if (end > size)
+		return end;
+	i = find_next_bit(map, end, index);
+	if (i < end) {
+		start = i + 1;
+		goto again;
+	}
+	return index;
+}
+EXPORT_SYMBOL(bitmap_find_next_zero_area);
+
 /*
  * Bitmap printing & parsing functions: first version by Bill Irwin,
  * second version by Paul Jackson, third by Joe Korty.
@@ -274,7 +359,6 @@ EXPORT_SYMBOL(__bitmap_weight);
 
 #define CHUNKSZ				32
 #define nbits_to_hold_value(val)	fls(val)
-#define unhex(c)			(isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10))
 #define BASEDEC 10		/* fancier cpuset lists input in decimal */
 
 /**
@@ -381,7 +465,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen,
 			if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1))
 				return -EOVERFLOW;
 
-			chunk = (chunk << 4) | unhex(c);
+			chunk = (chunk << 4) | hex_to_bin(c);
 			ndigits++; totaldigits++;
 		}
 		if (ndigits == 0)
@@ -402,7 +486,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen,
 EXPORT_SYMBOL(__bitmap_parse);
 
 /**
- * bitmap_parse_user()
+ * bitmap_parse_user - convert an ASCII hex string in a user buffer into a bitmap
  *
  * @ubuf: pointer to user buffer containing string.
  * @ulen: buffer size in bytes.  If string is smaller than this
@@ -534,7 +618,7 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
 EXPORT_SYMBOL(bitmap_parselist);
 
 /**
- * bitmap_pos_to_ord(buf, pos, bits)
+ * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
  *	@buf: pointer to a bitmap
  *	@pos: a bit position in @buf (0 <= @pos < @bits)
  *	@bits: number of valid bit positions in @buf
@@ -570,7 +654,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
 }
 
 /**
- * bitmap_ord_to_pos(buf, ord, bits)
+ * bitmap_ord_to_pos - find position of n-th set bit in bitmap
  *	@buf: pointer to bitmap
  *	@ord: ordinal bit position (n-th set bit, n >= 0)
  *	@bits: number of valid bit positions in @buf
@@ -648,10 +732,9 @@ void bitmap_remap(unsigned long *dst, const unsigned long *src,
 	bitmap_zero(dst, bits);
 
 	w = bitmap_weight(new, bits);
-	for (oldbit = find_first_bit(src, bits);
-	     oldbit < bits;
-	     oldbit = find_next_bit(src, bits, oldbit + 1)) {
+	for_each_set_bit(oldbit, src, bits) {
 	     	int n = bitmap_pos_to_ord(old, oldbit, bits);
+
 		if (n < 0 || w == 0)
 			set_bit(oldbit, dst);	/* identity map */
 		else
@@ -818,9 +901,7 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig,
 	 */
 
 	m = 0;
-	for (n = find_first_bit(relmap, bits);
-	     n < bits;
-	     n = find_next_bit(relmap, bits, n + 1)) {
+	for_each_set_bit(n, relmap, bits) {
 		/* m == bitmap_pos_to_ord(relmap, n, bits) */
 		if (test_bit(m, orig))
 			set_bit(n, dst);
@@ -849,9 +930,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig,
 		return;
 	bitmap_zero(dst, bits);
 
-	for (oldbit = find_first_bit(orig, bits);
-	     oldbit < bits;
-	     oldbit = find_next_bit(orig, bits, oldbit + 1))
+	for_each_set_bit(oldbit, orig, bits)
 		set_bit(oldbit % sz, dst);
 }
 EXPORT_SYMBOL(bitmap_fold);
diff --git a/lib/btree.c b/lib/btree.c
new file mode 100644
index 000000000000..c9c6f0351526
--- /dev/null
+++ b/lib/btree.c
@@ -0,0 +1,798 @@
+/*
+ * lib/btree.c	- Simple In-memory B+Tree
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org>
+ * Bits and pieces stolen from Peter Zijlstra's code, which is
+ * Copyright 2007, Red Hat Inc. Peter Zijlstra <pzijlstr@redhat.com>
+ * GPLv2
+ *
+ * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch
+ *
+ * A relatively simple B+Tree implementation.  I have written it as a learning
+ * excercise to understand how B+Trees work.  Turned out to be useful as well.
+ *
+ * B+Trees can be used similar to Linux radix trees (which don't have anything
+ * in common with textbook radix trees, beware).  Prerequisite for them working
+ * well is that access to a random tree node is much faster than a large number
+ * of operations within each node.
+ *
+ * Disks have fulfilled the prerequisite for a long time.  More recently DRAM
+ * has gained similar properties, as memory access times, when measured in cpu
+ * cycles, have increased.  Cacheline sizes have increased as well, which also
+ * helps B+Trees.
+ *
+ * Compared to radix trees, B+Trees are more efficient when dealing with a
+ * sparsely populated address space.  Between 25% and 50% of the memory is
+ * occupied with valid pointers.  When densely populated, radix trees contain
+ * ~98% pointers - hard to beat.  Very sparse radix trees contain only ~2%
+ * pointers.
+ *
+ * This particular implementation stores pointers identified by a long value.
+ * Storing NULL pointers is illegal, lookup will return NULL when no entry
+ * was found.
+ *
+ * A tricks was used that is not commonly found in textbooks.  The lowest
+ * values are to the right, not to the left.  All used slots within a node
+ * are on the left, all unused slots contain NUL values.  Most operations
+ * simply loop once over all slots and terminate on the first NUL.
+ */
+
+#include <linux/btree.h>
+#include <linux/cache.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define NODESIZE MAX(L1_CACHE_BYTES, 128)
+
+struct btree_geo {
+	int keylen;
+	int no_pairs;
+	int no_longs;
+};
+
+struct btree_geo btree_geo32 = {
+	.keylen = 1,
+	.no_pairs = NODESIZE / sizeof(long) / 2,
+	.no_longs = NODESIZE / sizeof(long) / 2,
+};
+EXPORT_SYMBOL_GPL(btree_geo32);
+
+#define LONG_PER_U64 (64 / BITS_PER_LONG)
+struct btree_geo btree_geo64 = {
+	.keylen = LONG_PER_U64,
+	.no_pairs = NODESIZE / sizeof(long) / (1 + LONG_PER_U64),
+	.no_longs = LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + LONG_PER_U64)),
+};
+EXPORT_SYMBOL_GPL(btree_geo64);
+
+struct btree_geo btree_geo128 = {
+	.keylen = 2 * LONG_PER_U64,
+	.no_pairs = NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64),
+	.no_longs = 2 * LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64)),
+};
+EXPORT_SYMBOL_GPL(btree_geo128);
+
+static struct kmem_cache *btree_cachep;
+
+void *btree_alloc(gfp_t gfp_mask, void *pool_data)
+{
+	return kmem_cache_alloc(btree_cachep, gfp_mask);
+}
+EXPORT_SYMBOL_GPL(btree_alloc);
+
+void btree_free(void *element, void *pool_data)
+{
+	kmem_cache_free(btree_cachep, element);
+}
+EXPORT_SYMBOL_GPL(btree_free);
+
+static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp)
+{
+	unsigned long *node;
+
+	node = mempool_alloc(head->mempool, gfp);
+	if (likely(node))
+		memset(node, 0, NODESIZE);
+	return node;
+}
+
+static int longcmp(const unsigned long *l1, const unsigned long *l2, size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++) {
+		if (l1[i] < l2[i])
+			return -1;
+		if (l1[i] > l2[i])
+			return 1;
+	}
+	return 0;
+}
+
+static unsigned long *longcpy(unsigned long *dest, const unsigned long *src,
+		size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		dest[i] = src[i];
+	return dest;
+}
+
+static unsigned long *longset(unsigned long *s, unsigned long c, size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		s[i] = c;
+	return s;
+}
+
+static void dec_key(struct btree_geo *geo, unsigned long *key)
+{
+	unsigned long val;
+	int i;
+
+	for (i = geo->keylen - 1; i >= 0; i--) {
+		val = key[i];
+		key[i] = val - 1;
+		if (val)
+			break;
+	}
+}
+
+static unsigned long *bkey(struct btree_geo *geo, unsigned long *node, int n)
+{
+	return &node[n * geo->keylen];
+}
+
+static void *bval(struct btree_geo *geo, unsigned long *node, int n)
+{
+	return (void *)node[geo->no_longs + n];
+}
+
+static void setkey(struct btree_geo *geo, unsigned long *node, int n,
+		   unsigned long *key)
+{
+	longcpy(bkey(geo, node, n), key, geo->keylen);
+}
+
+static void setval(struct btree_geo *geo, unsigned long *node, int n,
+		   void *val)
+{
+	node[geo->no_longs + n] = (unsigned long) val;
+}
+
+static void clearpair(struct btree_geo *geo, unsigned long *node, int n)
+{
+	longset(bkey(geo, node, n), 0, geo->keylen);
+	node[geo->no_longs + n] = 0;
+}
+
+static inline void __btree_init(struct btree_head *head)
+{
+	head->node = NULL;
+	head->height = 0;
+}
+
+void btree_init_mempool(struct btree_head *head, mempool_t *mempool)
+{
+	__btree_init(head);
+	head->mempool = mempool;
+}
+EXPORT_SYMBOL_GPL(btree_init_mempool);
+
+int btree_init(struct btree_head *head)
+{
+	__btree_init(head);
+	head->mempool = mempool_create(0, btree_alloc, btree_free, NULL);
+	if (!head->mempool)
+		return -ENOMEM;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(btree_init);
+
+void btree_destroy(struct btree_head *head)
+{
+	mempool_destroy(head->mempool);
+	head->mempool = NULL;
+}
+EXPORT_SYMBOL_GPL(btree_destroy);
+
+void *btree_last(struct btree_head *head, struct btree_geo *geo,
+		 unsigned long *key)
+{
+	int height = head->height;
+	unsigned long *node = head->node;
+
+	if (height == 0)
+		return NULL;
+
+	for ( ; height > 1; height--)
+		node = bval(geo, node, 0);
+
+	longcpy(key, bkey(geo, node, 0), geo->keylen);
+	return bval(geo, node, 0);
+}
+EXPORT_SYMBOL_GPL(btree_last);
+
+static int keycmp(struct btree_geo *geo, unsigned long *node, int pos,
+		  unsigned long *key)
+{
+	return longcmp(bkey(geo, node, pos), key, geo->keylen);
+}
+
+static int keyzero(struct btree_geo *geo, unsigned long *key)
+{
+	int i;
+
+	for (i = 0; i < geo->keylen; i++)
+		if (key[i])
+			return 0;
+
+	return 1;
+}
+
+void *btree_lookup(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key)
+{
+	int i, height = head->height;
+	unsigned long *node = head->node;
+
+	if (height == 0)
+		return NULL;
+
+	for ( ; height > 1; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+		if (i == geo->no_pairs)
+			return NULL;
+		node = bval(geo, node, i);
+		if (!node)
+			return NULL;
+	}
+
+	if (!node)
+		return NULL;
+
+	for (i = 0; i < geo->no_pairs; i++)
+		if (keycmp(geo, node, i, key) == 0)
+			return bval(geo, node, i);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(btree_lookup);
+
+int btree_update(struct btree_head *head, struct btree_geo *geo,
+		 unsigned long *key, void *val)
+{
+	int i, height = head->height;
+	unsigned long *node = head->node;
+
+	if (height == 0)
+		return -ENOENT;
+
+	for ( ; height > 1; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+		if (i == geo->no_pairs)
+			return -ENOENT;
+		node = bval(geo, node, i);
+		if (!node)
+			return -ENOENT;
+	}
+
+	if (!node)
+		return -ENOENT;
+
+	for (i = 0; i < geo->no_pairs; i++)
+		if (keycmp(geo, node, i, key) == 0) {
+			setval(geo, node, i, val);
+			return 0;
+		}
+	return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(btree_update);
+
+/*
+ * Usually this function is quite similar to normal lookup.  But the key of
+ * a parent node may be smaller than the smallest key of all its siblings.
+ * In such a case we cannot just return NULL, as we have only proven that no
+ * key smaller than __key, but larger than this parent key exists.
+ * So we set __key to the parent key and retry.  We have to use the smallest
+ * such parent key, which is the last parent key we encountered.
+ */
+void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
+		     unsigned long *__key)
+{
+	int i, height;
+	unsigned long *node, *oldnode;
+	unsigned long *retry_key = NULL, key[geo->keylen];
+
+	if (keyzero(geo, __key))
+		return NULL;
+
+	if (head->height == 0)
+		return NULL;
+retry:
+	longcpy(key, __key, geo->keylen);
+	dec_key(geo, key);
+
+	node = head->node;
+	for (height = head->height ; height > 1; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+		if (i == geo->no_pairs)
+			goto miss;
+		oldnode = node;
+		node = bval(geo, node, i);
+		if (!node)
+			goto miss;
+		retry_key = bkey(geo, oldnode, i);
+	}
+
+	if (!node)
+		goto miss;
+
+	for (i = 0; i < geo->no_pairs; i++) {
+		if (keycmp(geo, node, i, key) <= 0) {
+			if (bval(geo, node, i)) {
+				longcpy(__key, bkey(geo, node, i), geo->keylen);
+				return bval(geo, node, i);
+			} else
+				goto miss;
+		}
+	}
+miss:
+	if (retry_key) {
+		__key = retry_key;
+		retry_key = NULL;
+		goto retry;
+	}
+	return NULL;
+}
+
+static int getpos(struct btree_geo *geo, unsigned long *node,
+		unsigned long *key)
+{
+	int i;
+
+	for (i = 0; i < geo->no_pairs; i++) {
+		if (keycmp(geo, node, i, key) <= 0)
+			break;
+	}
+	return i;
+}
+
+static int getfill(struct btree_geo *geo, unsigned long *node, int start)
+{
+	int i;
+
+	for (i = start; i < geo->no_pairs; i++)
+		if (!bval(geo, node, i))
+			break;
+	return i;
+}
+
+/*
+ * locate the correct leaf node in the btree
+ */
+static unsigned long *find_level(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level)
+{
+	unsigned long *node = head->node;
+	int i, height;
+
+	for (height = head->height; height > level; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+
+		if ((i == geo->no_pairs) || !bval(geo, node, i)) {
+			/* right-most key is too large, update it */
+			/* FIXME: If the right-most key on higher levels is
+			 * always zero, this wouldn't be necessary. */
+			i--;
+			setkey(geo, node, i, key);
+		}
+		BUG_ON(i < 0);
+		node = bval(geo, node, i);
+	}
+	BUG_ON(!node);
+	return node;
+}
+
+static int btree_grow(struct btree_head *head, struct btree_geo *geo,
+		      gfp_t gfp)
+{
+	unsigned long *node;
+	int fill;
+
+	node = btree_node_alloc(head, gfp);
+	if (!node)
+		return -ENOMEM;
+	if (head->node) {
+		fill = getfill(geo, head->node, 0);
+		setkey(geo, node, 0, bkey(geo, head->node, fill - 1));
+		setval(geo, node, 0, head->node);
+	}
+	head->node = node;
+	head->height++;
+	return 0;
+}
+
+static void btree_shrink(struct btree_head *head, struct btree_geo *geo)
+{
+	unsigned long *node;
+	int fill;
+
+	if (head->height <= 1)
+		return;
+
+	node = head->node;
+	fill = getfill(geo, node, 0);
+	BUG_ON(fill > 1);
+	head->node = bval(geo, node, 0);
+	head->height--;
+	mempool_free(node, head->mempool);
+}
+
+static int btree_insert_level(struct btree_head *head, struct btree_geo *geo,
+			      unsigned long *key, void *val, int level,
+			      gfp_t gfp)
+{
+	unsigned long *node;
+	int i, pos, fill, err;
+
+	BUG_ON(!val);
+	if (head->height < level) {
+		err = btree_grow(head, geo, gfp);
+		if (err)
+			return err;
+	}
+
+retry:
+	node = find_level(head, geo, key, level);
+	pos = getpos(geo, node, key);
+	fill = getfill(geo, node, pos);
+	/* two identical keys are not allowed */
+	BUG_ON(pos < fill && keycmp(geo, node, pos, key) == 0);
+
+	if (fill == geo->no_pairs) {
+		/* need to split node */
+		unsigned long *new;
+
+		new = btree_node_alloc(head, gfp);
+		if (!new)
+			return -ENOMEM;
+		err = btree_insert_level(head, geo,
+				bkey(geo, node, fill / 2 - 1),
+				new, level + 1, gfp);
+		if (err) {
+			mempool_free(new, head->mempool);
+			return err;
+		}
+		for (i = 0; i < fill / 2; i++) {
+			setkey(geo, new, i, bkey(geo, node, i));
+			setval(geo, new, i, bval(geo, node, i));
+			setkey(geo, node, i, bkey(geo, node, i + fill / 2));
+			setval(geo, node, i, bval(geo, node, i + fill / 2));
+			clearpair(geo, node, i + fill / 2);
+		}
+		if (fill & 1) {
+			setkey(geo, node, i, bkey(geo, node, fill - 1));
+			setval(geo, node, i, bval(geo, node, fill - 1));
+			clearpair(geo, node, fill - 1);
+		}
+		goto retry;
+	}
+	BUG_ON(fill >= geo->no_pairs);
+
+	/* shift and insert */
+	for (i = fill; i > pos; i--) {
+		setkey(geo, node, i, bkey(geo, node, i - 1));
+		setval(geo, node, i, bval(geo, node, i - 1));
+	}
+	setkey(geo, node, pos, key);
+	setval(geo, node, pos, val);
+
+	return 0;
+}
+
+int btree_insert(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, void *val, gfp_t gfp)
+{
+	return btree_insert_level(head, geo, key, val, 1, gfp);
+}
+EXPORT_SYMBOL_GPL(btree_insert);
+
+static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level);
+static void merge(struct btree_head *head, struct btree_geo *geo, int level,
+		unsigned long *left, int lfill,
+		unsigned long *right, int rfill,
+		unsigned long *parent, int lpos)
+{
+	int i;
+
+	for (i = 0; i < rfill; i++) {
+		/* Move all keys to the left */
+		setkey(geo, left, lfill + i, bkey(geo, right, i));
+		setval(geo, left, lfill + i, bval(geo, right, i));
+	}
+	/* Exchange left and right child in parent */
+	setval(geo, parent, lpos, right);
+	setval(geo, parent, lpos + 1, left);
+	/* Remove left (formerly right) child from parent */
+	btree_remove_level(head, geo, bkey(geo, parent, lpos), level + 1);
+	mempool_free(right, head->mempool);
+}
+
+static void rebalance(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level, unsigned long *child, int fill)
+{
+	unsigned long *parent, *left = NULL, *right = NULL;
+	int i, no_left, no_right;
+
+	if (fill == 0) {
+		/* Because we don't steal entries from a neigbour, this case
+		 * can happen.  Parent node contains a single child, this
+		 * node, so merging with a sibling never happens.
+		 */
+		btree_remove_level(head, geo, key, level + 1);
+		mempool_free(child, head->mempool);
+		return;
+	}
+
+	parent = find_level(head, geo, key, level + 1);
+	i = getpos(geo, parent, key);
+	BUG_ON(bval(geo, parent, i) != child);
+
+	if (i > 0) {
+		left = bval(geo, parent, i - 1);
+		no_left = getfill(geo, left, 0);
+		if (fill + no_left <= geo->no_pairs) {
+			merge(head, geo, level,
+					left, no_left,
+					child, fill,
+					parent, i - 1);
+			return;
+		}
+	}
+	if (i + 1 < getfill(geo, parent, i)) {
+		right = bval(geo, parent, i + 1);
+		no_right = getfill(geo, right, 0);
+		if (fill + no_right <= geo->no_pairs) {
+			merge(head, geo, level,
+					child, fill,
+					right, no_right,
+					parent, i);
+			return;
+		}
+	}
+	/*
+	 * We could also try to steal one entry from the left or right
+	 * neighbor.  By not doing so we changed the invariant from
+	 * "all nodes are at least half full" to "no two neighboring
+	 * nodes can be merged".  Which means that the average fill of
+	 * all nodes is still half or better.
+	 */
+}
+
+static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level)
+{
+	unsigned long *node;
+	int i, pos, fill;
+	void *ret;
+
+	if (level > head->height) {
+		/* we recursed all the way up */
+		head->height = 0;
+		head->node = NULL;
+		return NULL;
+	}
+
+	node = find_level(head, geo, key, level);
+	pos = getpos(geo, node, key);
+	fill = getfill(geo, node, pos);
+	if ((level == 1) && (keycmp(geo, node, pos, key) != 0))
+		return NULL;
+	ret = bval(geo, node, pos);
+
+	/* remove and shift */
+	for (i = pos; i < fill - 1; i++) {
+		setkey(geo, node, i, bkey(geo, node, i + 1));
+		setval(geo, node, i, bval(geo, node, i + 1));
+	}
+	clearpair(geo, node, fill - 1);
+
+	if (fill - 1 < geo->no_pairs / 2) {
+		if (level < head->height)
+			rebalance(head, geo, key, level, node, fill - 1);
+		else if (fill - 1 == 1)
+			btree_shrink(head, geo);
+	}
+
+	return ret;
+}
+
+void *btree_remove(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key)
+{
+	if (head->height == 0)
+		return NULL;
+
+	return btree_remove_level(head, geo, key, 1);
+}
+EXPORT_SYMBOL_GPL(btree_remove);
+
+int btree_merge(struct btree_head *target, struct btree_head *victim,
+		struct btree_geo *geo, gfp_t gfp)
+{
+	unsigned long key[geo->keylen];
+	unsigned long dup[geo->keylen];
+	void *val;
+	int err;
+
+	BUG_ON(target == victim);
+
+	if (!(target->node)) {
+		/* target is empty, just copy fields over */
+		target->node = victim->node;
+		target->height = victim->height;
+		__btree_init(victim);
+		return 0;
+	}
+
+	/* TODO: This needs some optimizations.  Currently we do three tree
+	 * walks to remove a single object from the victim.
+	 */
+	for (;;) {
+		if (!btree_last(victim, geo, key))
+			break;
+		val = btree_lookup(victim, geo, key);
+		err = btree_insert(target, geo, key, val, gfp);
+		if (err)
+			return err;
+		/* We must make a copy of the key, as the original will get
+		 * mangled inside btree_remove. */
+		longcpy(dup, key, geo->keylen);
+		btree_remove(victim, geo, dup);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(btree_merge);
+
+static size_t __btree_for_each(struct btree_head *head, struct btree_geo *geo,
+			       unsigned long *node, unsigned long opaque,
+			       void (*func)(void *elem, unsigned long opaque,
+					    unsigned long *key, size_t index,
+					    void *func2),
+			       void *func2, int reap, int height, size_t count)
+{
+	int i;
+	unsigned long *child;
+
+	for (i = 0; i < geo->no_pairs; i++) {
+		child = bval(geo, node, i);
+		if (!child)
+			break;
+		if (height > 1)
+			count = __btree_for_each(head, geo, child, opaque,
+					func, func2, reap, height - 1, count);
+		else
+			func(child, opaque, bkey(geo, node, i), count++,
+					func2);
+	}
+	if (reap)
+		mempool_free(node, head->mempool);
+	return count;
+}
+
+static void empty(void *elem, unsigned long opaque, unsigned long *key,
+		  size_t index, void *func2)
+{
+}
+
+void visitorl(void *elem, unsigned long opaque, unsigned long *key,
+	      size_t index, void *__func)
+{
+	visitorl_t func = __func;
+
+	func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitorl);
+
+void visitor32(void *elem, unsigned long opaque, unsigned long *__key,
+	       size_t index, void *__func)
+{
+	visitor32_t func = __func;
+	u32 *key = (void *)__key;
+
+	func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitor32);
+
+void visitor64(void *elem, unsigned long opaque, unsigned long *__key,
+	       size_t index, void *__func)
+{
+	visitor64_t func = __func;
+	u64 *key = (void *)__key;
+
+	func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitor64);
+
+void visitor128(void *elem, unsigned long opaque, unsigned long *__key,
+		size_t index, void *__func)
+{
+	visitor128_t func = __func;
+	u64 *key = (void *)__key;
+
+	func(elem, opaque, key[0], key[1], index);
+}
+EXPORT_SYMBOL_GPL(visitor128);
+
+size_t btree_visitor(struct btree_head *head, struct btree_geo *geo,
+		     unsigned long opaque,
+		     void (*func)(void *elem, unsigned long opaque,
+		     		  unsigned long *key,
+		     		  size_t index, void *func2),
+		     void *func2)
+{
+	size_t count = 0;
+
+	if (!func2)
+		func = empty;
+	if (head->node)
+		count = __btree_for_each(head, geo, head->node, opaque, func,
+				func2, 0, head->height, 0);
+	return count;
+}
+EXPORT_SYMBOL_GPL(btree_visitor);
+
+size_t btree_grim_visitor(struct btree_head *head, struct btree_geo *geo,
+			  unsigned long opaque,
+			  void (*func)(void *elem, unsigned long opaque,
+				       unsigned long *key,
+				       size_t index, void *func2),
+			  void *func2)
+{
+	size_t count = 0;
+
+	if (!func2)
+		func = empty;
+	if (head->node)
+		count = __btree_for_each(head, geo, head->node, opaque, func,
+				func2, 1, head->height, 0);
+	__btree_init(head);
+	return count;
+}
+EXPORT_SYMBOL_GPL(btree_grim_visitor);
+
+static int __init btree_module_init(void)
+{
+	btree_cachep = kmem_cache_create("btree_node", NODESIZE, 0,
+			SLAB_HWCACHE_ALIGN, NULL);
+	return 0;
+}
+
+static void __exit btree_module_exit(void)
+{
+	kmem_cache_destroy(btree_cachep);
+}
+
+/* If core code starts using btree, initialization should happen even earlier */
+module_init(btree_module_init);
+module_exit(btree_module_exit);
+
+MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
+MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
+MODULE_LICENSE("GPL");
diff --git a/lib/bug.c b/lib/bug.c
index 300e41afbf97..19552096d16b 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -72,8 +72,8 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr)
 	return NULL;
 }
 
-int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
-			struct module *mod)
+void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
+			 struct module *mod)
 {
 	char *secstrings;
 	unsigned int i;
@@ -97,8 +97,6 @@ int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
 	 * could potentially lead to deadlock and thus be counter-productive.
 	 */
 	list_add(&mod->bug_list, &module_bug_list);
-
-	return 0;
 }
 
 void module_bug_cleanup(struct module *mod)
@@ -136,8 +134,6 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 
 	bug = find_bug(bugaddr);
 
-	printk(KERN_EMERG "------------[ cut here ]------------\n");
-
 	file = NULL;
 	line = 0;
 	warning = 0;
@@ -156,19 +152,25 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 
 	if (warning) {
 		/* this is a WARN_ON rather than BUG/BUG_ON */
+		printk(KERN_WARNING "------------[ cut here ]------------\n");
+
 		if (file)
-			printk(KERN_ERR "Badness at %s:%u\n",
+			printk(KERN_WARNING "WARNING: at %s:%u\n",
 			       file, line);
 		else
-			printk(KERN_ERR "Badness at %p "
+			printk(KERN_WARNING "WARNING: at %p "
 			       "[verbose debug info unavailable]\n",
 			       (void *)bugaddr);
 
+		print_modules();
 		show_regs(regs);
-		add_taint(TAINT_WARN);
+		print_oops_end_marker();
+		add_taint(BUG_GET_TAINT(bug));
 		return BUG_TRAP_TYPE_WARN;
 	}
 
+	printk(KERN_EMERG "------------[ cut here ]------------\n");
+
 	if (file)
 		printk(KERN_CRIT "kernel BUG at %s:%u!\n",
 		       file, line);
diff --git a/lib/checksum.c b/lib/checksum.c
index 12e5a1c91cda..097508732f34 100644
--- a/lib/checksum.c
+++ b/lib/checksum.c
@@ -37,7 +37,8 @@
 
 #include <asm/byteorder.h>
 
-static inline unsigned short from32to16(unsigned long x)
+#ifndef do_csum
+static inline unsigned short from32to16(unsigned int x)
 {
 	/* add up 16-bit and 16-bit for 16+c bit */
 	x = (x & 0xffff) + (x >> 16);
@@ -49,13 +50,17 @@ static inline unsigned short from32to16(unsigned long x)
 static unsigned int do_csum(const unsigned char *buff, int len)
 {
 	int odd, count;
-	unsigned long result = 0;
+	unsigned int result = 0;
 
 	if (len <= 0)
 		goto out;
 	odd = 1 & (unsigned long) buff;
 	if (odd) {
+#ifdef __LITTLE_ENDIAN
+		result += (*buff << 8);
+#else
 		result = *buff;
+#endif
 		len--;
 		buff++;
 	}
@@ -69,9 +74,9 @@ static unsigned int do_csum(const unsigned char *buff, int len)
 		}
 		count >>= 1;		/* nr of 32-bit words.. */
 		if (count) {
-			unsigned long carry = 0;
+			unsigned int carry = 0;
 			do {
-				unsigned long w = *(unsigned long *) buff;
+				unsigned int w = *(unsigned int *) buff;
 				count--;
 				buff += 4;
 				result += carry;
@@ -87,13 +92,18 @@ static unsigned int do_csum(const unsigned char *buff, int len)
 		}
 	}
 	if (len & 1)
+#ifdef __LITTLE_ENDIAN
+		result += *buff;
+#else
 		result += (*buff << 8);
+#endif
 	result = from32to16(result);
 	if (odd)
 		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
 out:
 	return result;
 }
+#endif
 
 /*
  *	This is a version of ip_compute_csum() optimized for IP headers,
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
new file mode 100644
index 000000000000..4dc20321b0d5
--- /dev/null
+++ b/lib/cpu-notifier-error-inject.c
@@ -0,0 +1,63 @@
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+
+static int priority;
+static int cpu_up_prepare_error;
+static int cpu_down_prepare_error;
+
+module_param(priority, int, 0);
+MODULE_PARM_DESC(priority, "specify cpu notifier priority");
+
+module_param(cpu_up_prepare_error, int, 0644);
+MODULE_PARM_DESC(cpu_up_prepare_error,
+		"specify error code to inject CPU_UP_PREPARE action");
+
+module_param(cpu_down_prepare_error, int, 0644);
+MODULE_PARM_DESC(cpu_down_prepare_error,
+		"specify error code to inject CPU_DOWN_PREPARE action");
+
+static int err_inject_cpu_callback(struct notifier_block *nfb,
+				unsigned long action, void *hcpu)
+{
+	int err = 0;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		err = cpu_up_prepare_error;
+		break;
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		err = cpu_down_prepare_error;
+		break;
+	}
+	if (err)
+		printk(KERN_INFO "Injecting error (%d) at cpu notifier\n", err);
+
+	return notifier_from_errno(err);
+}
+
+static struct notifier_block err_inject_cpu_notifier = {
+	.notifier_call = err_inject_cpu_callback,
+};
+
+static int err_inject_init(void)
+{
+	err_inject_cpu_notifier.priority = priority;
+
+	return register_hotcpu_notifier(&err_inject_cpu_notifier);
+}
+
+static void err_inject_exit(void)
+{
+	unregister_hotcpu_notifier(&err_inject_cpu_notifier);
+}
+
+module_init(err_inject_init);
+module_exit(err_inject_exit);
+
+MODULE_DESCRIPTION("CPU notifier error injection module");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 7bb4142a502f..05d6aca7fc19 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -1,3 +1,4 @@
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/cpumask.h>
diff --git a/lib/crc32.c b/lib/crc32.c
index 49d1c9e3ce38..4855995fcde9 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -25,16 +25,19 @@
 #include <linux/module.h>
 #include <linux/compiler.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/init.h>
 #include <asm/atomic.h>
 #include "crc32defs.h"
 #if CRC_LE_BITS == 8
-#define tole(x) __constant_cpu_to_le32(x)
-#define tobe(x) __constant_cpu_to_be32(x)
+# define tole(x) __constant_cpu_to_le32(x)
 #else
-#define tole(x) (x)
-#define tobe(x) (x)
+# define tole(x) (x)
+#endif
+
+#if CRC_BE_BITS == 8
+# define tobe(x) __constant_cpu_to_be32(x)
+#else
+# define tobe(x) (x)
 #endif
 #include "crc32table.h"
 
@@ -42,6 +45,54 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
 MODULE_DESCRIPTION("Ethernet CRC32 calculations");
 MODULE_LICENSE("GPL");
 
+#if CRC_LE_BITS == 8 || CRC_BE_BITS == 8
+
+static inline u32
+crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
+{
+# ifdef __LITTLE_ENDIAN
+#  define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8)
+#  define DO_CRC4 crc = tab[3][(crc) & 255] ^ \
+		tab[2][(crc >> 8) & 255] ^ \
+		tab[1][(crc >> 16) & 255] ^ \
+		tab[0][(crc >> 24) & 255]
+# else
+#  define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
+#  define DO_CRC4 crc = tab[0][(crc) & 255] ^ \
+		tab[1][(crc >> 8) & 255] ^ \
+		tab[2][(crc >> 16) & 255] ^ \
+		tab[3][(crc >> 24) & 255]
+# endif
+	const u32 *b;
+	size_t    rem_len;
+
+	/* Align it */
+	if (unlikely((long)buf & 3 && len)) {
+		do {
+			DO_CRC(*buf++);
+		} while ((--len) && ((long)buf)&3);
+	}
+	rem_len = len & 3;
+	/* load data 32 bits wide, xor data 32 bits wide. */
+	len = len >> 2;
+	b = (const u32 *)buf;
+	for (--b; len; --len) {
+		crc ^= *++b; /* use pre increment for speed */
+		DO_CRC4;
+	}
+	len = rem_len;
+	/* And the last few bytes */
+	if (len) {
+		u8 *p = (u8 *)(b + 1) - 1;
+		do {
+			DO_CRC(*++p); /* use pre increment for speed */
+		} while (--len);
+	}
+	return crc;
+#undef DO_CRC
+#undef DO_CRC4
+}
+#endif
 /**
  * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32
  * @crc: seed value for computation.  ~0 for Ethernet, sometimes 0 for
@@ -72,52 +123,11 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
 {
 # if CRC_LE_BITS == 8
-	const u32      *b =(u32 *)p;
-	const u32      *tab = crc32table_le;
-
-# ifdef __LITTLE_ENDIAN
-#  define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8)
-# else
-#  define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8)
-# endif
+	const u32      (*tab)[] = crc32table_le;
 
 	crc = __cpu_to_le32(crc);
-	/* Align it */
-	if(unlikely(((long)b)&3 && len)){
-		do {
-			u8 *p = (u8 *)b;
-			DO_CRC(*p++);
-			b = (void *)p;
-		} while ((--len) && ((long)b)&3 );
-	}
-	if(likely(len >= 4)){
-		/* load data 32 bits wide, xor data 32 bits wide. */
-		size_t save_len = len & 3;
-	        len = len >> 2;
-		--b; /* use pre increment below(*++b) for speed */
-		do {
-			crc ^= *++b;
-			DO_CRC(0);
-			DO_CRC(0);
-			DO_CRC(0);
-			DO_CRC(0);
-		} while (--len);
-		b++; /* point to next byte(s) */
-		len = save_len;
-	}
-	/* And the last few bytes */
-	if(len){
-		do {
-			u8 *p = (u8 *)b;
-			DO_CRC(*p++);
-			b = (void *)p;
-		} while (--len);
-	}
-
+	crc = crc32_body(crc, p, len, tab);
 	return __le32_to_cpu(crc);
-#undef ENDIAN_SHIFT
-#undef DO_CRC
-
 # elif CRC_LE_BITS == 4
 	while (len--) {
 		crc ^= *p++;
@@ -170,51 +180,11 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 {
 # if CRC_BE_BITS == 8
-	const u32      *b =(u32 *)p;
-	const u32      *tab = crc32table_be;
-
-# ifdef __LITTLE_ENDIAN
-#  define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8)
-# else
-#  define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8)
-# endif
+	const u32      (*tab)[] = crc32table_be;
 
 	crc = __cpu_to_be32(crc);
-	/* Align it */
-	if(unlikely(((long)b)&3 && len)){
-		do {
-			u8 *p = (u8 *)b;
-			DO_CRC(*p++);
-			b = (u32 *)p;
-		} while ((--len) && ((long)b)&3 );
-	}
-	if(likely(len >= 4)){
-		/* load data 32 bits wide, xor data 32 bits wide. */
-		size_t save_len = len & 3;
-	        len = len >> 2;
-		--b; /* use pre increment below(*++b) for speed */
-		do {
-			crc ^= *++b;
-			DO_CRC(0);
-			DO_CRC(0);
-			DO_CRC(0);
-			DO_CRC(0);
-		} while (--len);
-		b++; /* point to next byte(s) */
-		len = save_len;
-	}
-	/* And the last few bytes */
-	if(len){
-		do {
-			u8 *p = (u8 *)b;
-			DO_CRC(*p++);
-			b = (void *)p;
-		} while (--len);
-	}
+	crc = crc32_body(crc, p, len, tab);
 	return __be32_to_cpu(crc);
-#undef ENDIAN_SHIFT
-#undef DO_CRC
-
 # elif CRC_BE_BITS == 4
 	while (len--) {
 		crc ^= *p++ << 24;
diff --git a/lib/ctype.c b/lib/ctype.c
index d02ace14a322..26baa620e95b 100644
--- a/lib/ctype.c
+++ b/lib/ctype.c
@@ -7,30 +7,30 @@
 #include <linux/ctype.h>
 #include <linux/module.h>
 
-unsigned char _ctype[] = {
-_C,_C,_C,_C,_C,_C,_C,_C,			/* 0-7 */
-_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,		/* 8-15 */
-_C,_C,_C,_C,_C,_C,_C,_C,			/* 16-23 */
-_C,_C,_C,_C,_C,_C,_C,_C,			/* 24-31 */
-_S|_SP,_P,_P,_P,_P,_P,_P,_P,			/* 32-39 */
-_P,_P,_P,_P,_P,_P,_P,_P,			/* 40-47 */
-_D,_D,_D,_D,_D,_D,_D,_D,			/* 48-55 */
-_D,_D,_P,_P,_P,_P,_P,_P,			/* 56-63 */
-_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,	/* 64-71 */
-_U,_U,_U,_U,_U,_U,_U,_U,			/* 72-79 */
-_U,_U,_U,_U,_U,_U,_U,_U,			/* 80-87 */
-_U,_U,_U,_P,_P,_P,_P,_P,			/* 88-95 */
-_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,	/* 96-103 */
-_L,_L,_L,_L,_L,_L,_L,_L,			/* 104-111 */
-_L,_L,_L,_L,_L,_L,_L,_L,			/* 112-119 */
-_L,_L,_L,_P,_P,_P,_P,_C,			/* 120-127 */
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,		/* 128-143 */
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,		/* 144-159 */
-_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,   /* 160-175 */
-_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,       /* 176-191 */
-_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,       /* 192-207 */
-_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,       /* 208-223 */
-_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,       /* 224-239 */
-_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};      /* 240-255 */
+const unsigned char _ctype[] = {
+_C,_C,_C,_C,_C,_C,_C,_C,				/* 0-7 */
+_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,			/* 8-15 */
+_C,_C,_C,_C,_C,_C,_C,_C,				/* 16-23 */
+_C,_C,_C,_C,_C,_C,_C,_C,				/* 24-31 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,				/* 32-39 */
+_P,_P,_P,_P,_P,_P,_P,_P,				/* 40-47 */
+_D,_D,_D,_D,_D,_D,_D,_D,				/* 48-55 */
+_D,_D,_P,_P,_P,_P,_P,_P,				/* 56-63 */
+_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,		/* 64-71 */
+_U,_U,_U,_U,_U,_U,_U,_U,				/* 72-79 */
+_U,_U,_U,_U,_U,_U,_U,_U,				/* 80-87 */
+_U,_U,_U,_P,_P,_P,_P,_P,				/* 88-95 */
+_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,		/* 96-103 */
+_L,_L,_L,_L,_L,_L,_L,_L,				/* 104-111 */
+_L,_L,_L,_L,_L,_L,_L,_L,				/* 112-119 */
+_L,_L,_L,_P,_P,_P,_P,_C,				/* 120-127 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,			/* 128-143 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,			/* 144-159 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,	/* 160-175 */
+_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,	/* 176-191 */
+_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,	/* 192-207 */
+_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,	/* 208-223 */
+_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,	/* 224-239 */
+_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};	/* 240-255 */
 
 EXPORT_SYMBOL(_ctype);
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index bc3b11731b9c..b1c177307677 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -8,7 +8,6 @@
  *
  *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  */
-#include <linux/kernel.h>
 #include <linux/rwsem.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
@@ -23,6 +22,7 @@
  * shut up after that.
  */
 int debug_locks = 1;
+EXPORT_SYMBOL_GPL(debug_locks);
 
 /*
  * The locking-testsuite uses <debug_locks_silent> to get a
@@ -38,7 +38,6 @@ int debug_locks_off(void)
 {
 	if (__debug_locks_off()) {
 		if (!debug_locks_silent) {
-			oops_in_progress = 1;
 			console_verbose();
 			return 1;
 		}
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 2755a3bd16a1..deebcc57d4e6 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -9,8 +9,10 @@
  */
 #include <linux/debugobjects.h>
 #include <linux/interrupt.h>
+#include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/slab.h>
 #include <linux/hash.h>
 
 #define ODEBUG_HASH_BITS	14
@@ -25,14 +27,14 @@
 
 struct debug_bucket {
 	struct hlist_head	list;
-	spinlock_t		lock;
+	raw_spinlock_t		lock;
 };
 
 static struct debug_bucket	obj_hash[ODEBUG_HASH_SIZE];
 
 static struct debug_obj		obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
 
-static DEFINE_SPINLOCK(pool_lock);
+static DEFINE_RAW_SPINLOCK(pool_lock);
 
 static HLIST_HEAD(obj_pool);
 
@@ -95,10 +97,10 @@ static int fill_pool(void)
 		if (!new)
 			return obj_pool_free;
 
-		spin_lock_irqsave(&pool_lock, flags);
+		raw_spin_lock_irqsave(&pool_lock, flags);
 		hlist_add_head(&new->node, &obj_pool);
 		obj_pool_free++;
-		spin_unlock_irqrestore(&pool_lock, flags);
+		raw_spin_unlock_irqrestore(&pool_lock, flags);
 	}
 	return obj_pool_free;
 }
@@ -132,13 +134,14 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
 {
 	struct debug_obj *obj = NULL;
 
-	spin_lock(&pool_lock);
+	raw_spin_lock(&pool_lock);
 	if (obj_pool.first) {
 		obj	    = hlist_entry(obj_pool.first, typeof(*obj), node);
 
 		obj->object = addr;
 		obj->descr  = descr;
 		obj->state  = ODEBUG_STATE_NONE;
+		obj->astate = 0;
 		hlist_del(&obj->node);
 
 		hlist_add_head(&obj->node, &b->list);
@@ -151,7 +154,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
 		if (obj_pool_free < obj_pool_min_free)
 			obj_pool_min_free = obj_pool_free;
 	}
-	spin_unlock(&pool_lock);
+	raw_spin_unlock(&pool_lock);
 
 	return obj;
 }
@@ -164,7 +167,7 @@ static void free_obj_work(struct work_struct *work)
 	struct debug_obj *obj;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pool_lock, flags);
+	raw_spin_lock_irqsave(&pool_lock, flags);
 	while (obj_pool_free > ODEBUG_POOL_SIZE) {
 		obj = hlist_entry(obj_pool.first, typeof(*obj), node);
 		hlist_del(&obj->node);
@@ -173,11 +176,11 @@ static void free_obj_work(struct work_struct *work)
 		 * We release pool_lock across kmem_cache_free() to
 		 * avoid contention on pool_lock.
 		 */
-		spin_unlock_irqrestore(&pool_lock, flags);
+		raw_spin_unlock_irqrestore(&pool_lock, flags);
 		kmem_cache_free(obj_cache, obj);
-		spin_lock_irqsave(&pool_lock, flags);
+		raw_spin_lock_irqsave(&pool_lock, flags);
 	}
-	spin_unlock_irqrestore(&pool_lock, flags);
+	raw_spin_unlock_irqrestore(&pool_lock, flags);
 }
 
 /*
@@ -189,7 +192,7 @@ static void free_object(struct debug_obj *obj)
 	unsigned long flags;
 	int sched = 0;
 
-	spin_lock_irqsave(&pool_lock, flags);
+	raw_spin_lock_irqsave(&pool_lock, flags);
 	/*
 	 * schedule work when the pool is filled and the cache is
 	 * initialized:
@@ -199,7 +202,7 @@ static void free_object(struct debug_obj *obj)
 	hlist_add_head(&obj->node, &obj_pool);
 	obj_pool_free++;
 	obj_pool_used--;
-	spin_unlock_irqrestore(&pool_lock, flags);
+	raw_spin_unlock_irqrestore(&pool_lock, flags);
 	if (sched)
 		schedule_work(&debug_obj_work);
 }
@@ -220,9 +223,9 @@ static void debug_objects_oom(void)
 	printk(KERN_WARNING "ODEBUG: Out of memory. ODEBUG disabled\n");
 
 	for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
-		spin_lock_irqsave(&db->lock, flags);
+		raw_spin_lock_irqsave(&db->lock, flags);
 		hlist_move_list(&db->list, &freelist);
-		spin_unlock_irqrestore(&db->lock, flags);
+		raw_spin_unlock_irqrestore(&db->lock, flags);
 
 		/* Now free them */
 		hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) {
@@ -250,8 +253,10 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
 
 	if (limit < 5 && obj->descr != descr_test) {
 		limit++;
-		WARN(1, KERN_ERR "ODEBUG: %s %s object type: %s\n", msg,
-		       obj_states[obj->state], obj->descr->name);
+		WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
+				 "object type: %s\n",
+			msg, obj_states[obj->state], obj->astate,
+			obj->descr->name);
 	}
 	debug_objects_warnings++;
 }
@@ -302,14 +307,14 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
 
 	db = get_bucket((unsigned long) addr);
 
-	spin_lock_irqsave(&db->lock, flags);
+	raw_spin_lock_irqsave(&db->lock, flags);
 
 	obj = lookup_object(addr, db);
 	if (!obj) {
 		obj = alloc_object(addr, db, descr);
 		if (!obj) {
 			debug_objects_enabled = 0;
-			spin_unlock_irqrestore(&db->lock, flags);
+			raw_spin_unlock_irqrestore(&db->lock, flags);
 			debug_objects_oom();
 			return;
 		}
@@ -326,7 +331,7 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
 	case ODEBUG_STATE_ACTIVE:
 		debug_print_object(obj, "init");
 		state = obj->state;
-		spin_unlock_irqrestore(&db->lock, flags);
+		raw_spin_unlock_irqrestore(&db->lock, flags);
 		debug_object_fixup(descr->fixup_init, addr, state);
 		return;
 
@@ -337,7 +342,7 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
 		break;
 	}
 
-	spin_unlock_irqrestore(&db->lock, flags);
+	raw_spin_unlock_irqrestore(&db->lock, flags);
 }
 
 /**
@@ -384,7 +389,7 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr)
 
 	db = get_bucket((unsigned long) addr);
 
-	spin_lock_irqsave(&db->lock, flags);
+	raw_spin_lock_irqsave(&db->lock, flags);
 
 	obj = lookup_object(addr, db);
 	if (obj) {
@@ -397,7 +402,7 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr)
 		case ODEBUG_STATE_ACTIVE:
 			debug_print_object(obj, "activate");
 			state = obj->state;
-			spin_unlock_irqrestore(&db->lock, flags);
+			raw_spin_unlock_irqrestore(&db->lock, flags);
 			debug_object_fixup(descr->fixup_activate, addr, state);
 			return;
 
@@ -407,11 +412,11 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr)
 		default:
 			break;
 		}
-		spin_unlock_irqrestore(&db->lock, flags);
+		raw_spin_unlock_irqrestore(&db->lock, flags);
 		return;
 	}
 
-	spin_unlock_irqrestore(&db->lock, flags);
+	raw_spin_unlock_irqrestore(&db->lock, flags);
 	/*
 	 * This happens when a static object is activated. We
 	 * let the type specific code decide whether this is
@@ -437,7 +442,7 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
 
 	db = get_bucket((unsigned long) addr);
 
-	spin_lock_irqsave(&db->lock, flags);
+	raw_spin_lock_irqsave(&db->lock, flags);
 
 	obj = lookup_object(addr, db);
 	if (obj) {
@@ -445,7 +450,10 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
 		case ODEBUG_STATE_INIT:
 		case ODEBUG_STATE_INACTIVE:
 		case ODEBUG_STATE_ACTIVE:
-			obj->state = ODEBUG_STATE_INACTIVE;
+			if (!obj->astate)
+				obj->state = ODEBUG_STATE_INACTIVE;
+			else
+				debug_print_object(obj, "deactivate");
 			break;
 
 		case ODEBUG_STATE_DESTROYED:
@@ -462,7 +470,7 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
 		debug_print_object(&o, "deactivate");
 	}
 
-	spin_unlock_irqrestore(&db->lock, flags);
+	raw_spin_unlock_irqrestore(&db->lock, flags);
 }
 
 /**
@@ -482,7 +490,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr)
 
 	db = get_bucket((unsigned long) addr);
 
-	spin_lock_irqsave(&db->lock, flags);
+	raw_spin_lock_irqsave(&db->lock, flags);
 
 	obj = lookup_object(addr, db);
 	if (!obj)
@@ -497,7 +505,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr)
 	case ODEBUG_STATE_ACTIVE:
 		debug_print_object(obj, "destroy");
 		state = obj->state;
-		spin_unlock_irqrestore(&db->lock, flags);
+		raw_spin_unlock_irqrestore(&db->lock, flags);
 		debug_object_fixup(descr->fixup_destroy, addr, state);
 		return;
 
@@ -508,7 +516,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr)
 		break;
 	}
 out_unlock:
-	spin_unlock_irqrestore(&db->lock, flags);
+	raw_spin_unlock_irqrestore(&db->lock, flags);
 }
 
 /**
@@ -528,7 +536,7 @@ void debug_object_free(void *addr, struct debug_obj_descr *descr)
 
 	db = get_bucket((unsigned long) addr);
 
-	spin_lock_irqsave(&db->lock, flags);
+	raw_spin_lock_irqsave(&db->lock, flags);
 
 	obj = lookup_object(addr, db);
 	if (!obj)
@@ -538,17 +546,64 @@ void debug_object_free(void *addr, struct debug_obj_descr *descr)
 	case ODEBUG_STATE_ACTIVE:
 		debug_print_object(obj, "free");
 		state = obj->state;
-		spin_unlock_irqrestore(&db->lock, flags);
+		raw_spin_unlock_irqrestore(&db->lock, flags);
 		debug_object_fixup(descr->fixup_free, addr, state);
 		return;
 	default:
 		hlist_del(&obj->node);
-		spin_unlock_irqrestore(&db->lock, flags);
+		raw_spin_unlock_irqrestore(&db->lock, flags);
 		free_object(obj);
 		return;
 	}
 out_unlock:
-	spin_unlock_irqrestore(&db->lock, flags);
+	raw_spin_unlock_irqrestore(&db->lock, flags);
+}
+
+/**
+ * debug_object_active_state - debug checks object usage state machine
+ * @addr:	address of the object
+ * @descr:	pointer to an object specific debug description structure
+ * @expect:	expected state
+ * @next:	state to move to if expected state is found
+ */
+void
+debug_object_active_state(void *addr, struct debug_obj_descr *descr,
+			  unsigned int expect, unsigned int next)
+{
+	struct debug_bucket *db;
+	struct debug_obj *obj;
+	unsigned long flags;
+
+	if (!debug_objects_enabled)
+		return;
+
+	db = get_bucket((unsigned long) addr);
+
+	raw_spin_lock_irqsave(&db->lock, flags);
+
+	obj = lookup_object(addr, db);
+	if (obj) {
+		switch (obj->state) {
+		case ODEBUG_STATE_ACTIVE:
+			if (obj->astate == expect)
+				obj->astate = next;
+			else
+				debug_print_object(obj, "active_state");
+			break;
+
+		default:
+			debug_print_object(obj, "active_state");
+			break;
+		}
+	} else {
+		struct debug_obj o = { .object = addr,
+				       .state = ODEBUG_STATE_NOTAVAILABLE,
+				       .descr = descr };
+
+		debug_print_object(&o, "active_state");
+	}
+
+	raw_spin_unlock_irqrestore(&db->lock, flags);
 }
 
 #ifdef CONFIG_DEBUG_OBJECTS_FREE
@@ -574,7 +629,7 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 
 repeat:
 		cnt = 0;
-		spin_lock_irqsave(&db->lock, flags);
+		raw_spin_lock_irqsave(&db->lock, flags);
 		hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) {
 			cnt++;
 			oaddr = (unsigned long) obj->object;
@@ -586,7 +641,7 @@ repeat:
 				debug_print_object(obj, "free");
 				descr = obj->descr;
 				state = obj->state;
-				spin_unlock_irqrestore(&db->lock, flags);
+				raw_spin_unlock_irqrestore(&db->lock, flags);
 				debug_object_fixup(descr->fixup_free,
 						   (void *) oaddr, state);
 				goto repeat;
@@ -596,7 +651,7 @@ repeat:
 				break;
 			}
 		}
-		spin_unlock_irqrestore(&db->lock, flags);
+		raw_spin_unlock_irqrestore(&db->lock, flags);
 
 		/* Now free them */
 		hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) {
@@ -772,7 +827,7 @@ static int __init fixup_free(void *addr, enum debug_obj_state state)
 	}
 }
 
-static int
+static int __init
 check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
 {
 	struct debug_bucket *db;
@@ -782,7 +837,7 @@ check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
 
 	db = get_bucket((unsigned long) addr);
 
-	spin_lock_irqsave(&db->lock, flags);
+	raw_spin_lock_irqsave(&db->lock, flags);
 
 	obj = lookup_object(addr, db);
 	if (!obj && state != ODEBUG_STATE_NONE) {
@@ -806,7 +861,7 @@ check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
 	}
 	res = 0;
 out:
-	spin_unlock_irqrestore(&db->lock, flags);
+	raw_spin_unlock_irqrestore(&db->lock, flags);
 	if (res)
 		debug_objects_enabled = 0;
 	return res;
@@ -906,7 +961,7 @@ void __init debug_objects_early_init(void)
 	int i;
 
 	for (i = 0; i < ODEBUG_HASH_SIZE; i++)
-		spin_lock_init(&obj_hash[i].lock);
+		raw_spin_lock_init(&obj_hash[i].lock);
 
 	for (i = 0; i < ODEBUG_POOL_SIZE; i++)
 		hlist_add_head(&obj_static_pool[i].node, &obj_pool);
@@ -915,7 +970,7 @@ void __init debug_objects_early_init(void)
 /*
  * Convert the statically allocated objects to dynamic ones:
  */
-static int debug_objects_replace_static_objects(void)
+static int __init debug_objects_replace_static_objects(void)
 {
 	struct debug_bucket *db = obj_hash;
 	struct hlist_node *node, *tmp;
diff --git a/lib/decompress.c b/lib/decompress.c
index d2842f571674..a7606815541f 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -9,6 +9,7 @@
 #include <linux/decompress/bunzip2.h>
 #include <linux/decompress/unlzma.h>
 #include <linux/decompress/inflate.h>
+#include <linux/decompress/unlzo.h>
 
 #include <linux/types.h>
 #include <linux/string.h>
@@ -22,6 +23,9 @@
 #ifndef CONFIG_DECOMPRESS_LZMA
 # define unlzma NULL
 #endif
+#ifndef CONFIG_DECOMPRESS_LZO
+# define unlzo NULL
+#endif
 
 static const struct compress_format {
 	unsigned char magic[2];
@@ -32,6 +36,7 @@ static const struct compress_format {
 	{ {037, 0236}, "gzip", gunzip },
 	{ {0x42, 0x5a}, "bzip2", bunzip2 },
 	{ {0x5d, 0x00}, "lzma", unlzma },
+	{ {0x89, 0x4c}, "lzo", unlzo },
 	{ {0, 0}, NULL, NULL }
 };
 
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index 708e2a86d87b..81c8bb1cc6aa 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -45,12 +45,14 @@
 */
 
 
-#ifndef STATIC
+#ifdef STATIC
+#define PREBOOT
+#else
 #include <linux/decompress/bunzip2.h>
-#endif /* !STATIC */
+#include <linux/slab.h>
+#endif /* STATIC */
 
 #include <linux/decompress/mm.h>
-#include <linux/slab.h>
 
 #ifndef INT_MAX
 #define INT_MAX 0x7fffffff
@@ -105,6 +107,8 @@ struct bunzip_data {
 	unsigned char selectors[32768];		/* nSelectors = 15 bits */
 	struct group_data groups[MAX_GROUPS];	/* Huffman coding tables */
 	int io_error;			/* non-zero if we have IO error */
+	int byteCount[256];
+	unsigned char symToByte[256], mtfSymbol[256];
 };
 
 
@@ -156,14 +160,16 @@ static int INIT get_next_block(struct bunzip_data *bd)
 	int *base = NULL;
 	int *limit = NULL;
 	int dbufCount, nextSym, dbufSize, groupCount, selector,
-		i, j, k, t, runPos, symCount, symTotal, nSelectors,
-		byteCount[256];
-	unsigned char uc, symToByte[256], mtfSymbol[256], *selectors;
+		i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount;
+	unsigned char uc, *symToByte, *mtfSymbol, *selectors;
 	unsigned int *dbuf, origPtr;
 
 	dbuf = bd->dbuf;
 	dbufSize = bd->dbufSize;
 	selectors = bd->selectors;
+	byteCount = bd->byteCount;
+	symToByte = bd->symToByte;
+	mtfSymbol = bd->mtfSymbol;
 
 	/* Read in header signature and CRC, then validate signature.
 	   (last block signature means CRC is for whole file, return now) */
@@ -297,7 +303,7 @@ static int INIT get_next_block(struct bunzip_data *bd)
 		   again when using them (during symbol decoding).*/
 		base = hufGroup->base-1;
 		limit = hufGroup->limit-1;
-		/* Calculate permute[].  Concurently, initialize
+		/* Calculate permute[].  Concurrently, initialize
 		 * temp[] and limit[]. */
 		pp = 0;
 		for (i = minLen; i <= maxLen; i++) {
@@ -635,6 +641,8 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
 
 	/* Allocate bunzip_data.  Most fields initialize to zero. */
 	bd = *bdp = malloc(i);
+	if (!bd)
+		return RETVAL_OUT_OF_MEMORY;
 	memset(bd, 0, sizeof(struct bunzip_data));
 	/* Setup input buffer */
 	bd->inbuf = inbuf;
@@ -662,6 +670,8 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
 	bd->dbufSize = 100000*(i-BZh0);
 
 	bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
+	if (!bd->dbuf)
+		return RETVAL_OUT_OF_MEMORY;
 	return RETVAL_OK;
 }
 
@@ -681,12 +691,10 @@ STATIC int INIT bunzip2(unsigned char *buf, int len,
 	set_error_fn(error_fn);
 	if (flush)
 		outbuf = malloc(BZIP2_IOBUF_SIZE);
-	else
-		len -= 4; /* Uncompressed size hack active in pre-boot
-			     environment */
+
 	if (!outbuf) {
 		error("Could not allocate output bufer");
-		return -1;
+		return RETVAL_OUT_OF_MEMORY;
 	}
 	if (buf)
 		inbuf = buf;
@@ -694,6 +702,7 @@ STATIC int INIT bunzip2(unsigned char *buf, int len,
 		inbuf = malloc(BZIP2_IOBUF_SIZE);
 	if (!inbuf) {
 		error("Could not allocate input bufer");
+		i = RETVAL_OUT_OF_MEMORY;
 		goto exit_0;
 	}
 	i = start_bunzip(&bd, inbuf, len, fill);
@@ -720,11 +729,14 @@ STATIC int INIT bunzip2(unsigned char *buf, int len,
 	} else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {
 		error("Compressed file ends unexpectedly");
 	}
+	if (!bd)
+		goto exit_1;
 	if (bd->dbuf)
 		large_free(bd->dbuf);
 	if (pos)
 		*pos = bd->inbufPos;
 	free(bd);
+exit_1:
 	if (!buf)
 		free(inbuf);
 exit_0:
@@ -733,4 +745,14 @@ exit_0:
 	return i;
 }
 
-#define decompress bunzip2
+#ifdef PREBOOT
+STATIC int INIT decompress(unsigned char *buf, int len,
+			int(*fill)(void*, unsigned int),
+			int(*flush)(void*, unsigned int),
+			unsigned char *outbuf,
+			int *pos,
+			void(*error_fn)(char *x))
+{
+	return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error_fn);
+}
+#endif
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index e36b296fc9f8..fc686c7a0a0d 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -19,13 +19,18 @@
 #include "zlib_inflate/inflate.h"
 
 #include "zlib_inflate/infutil.h"
+#include <linux/slab.h>
 
 #endif /* STATIC */
 
 #include <linux/decompress/mm.h>
-#include <linux/slab.h>
 
-#define INBUF_LEN (16*1024)
+#define GZIP_IOBUF_SIZE (16*1024)
+
+static int nofill(void *buffer, unsigned int len)
+{
+	return -1;
+}
 
 /* Included from initramfs et al code */
 STATIC int INIT gunzip(unsigned char *buf, int len,
@@ -55,7 +60,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 	if (buf)
 		zbuf = buf;
 	else {
-		zbuf = malloc(INBUF_LEN);
+		zbuf = malloc(GZIP_IOBUF_SIZE);
 		len = 0;
 	}
 	if (!zbuf) {
@@ -76,8 +81,11 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 		goto gunzip_nomem4;
 	}
 
+	if (!fill)
+		fill = nofill;
+
 	if (len == 0)
-		len = fill(zbuf, INBUF_LEN);
+		len = fill(zbuf, GZIP_IOBUF_SIZE);
 
 	/* verify the gzip header */
 	if (len < 10 ||
@@ -113,7 +121,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 	while (rc == Z_OK) {
 		if (strm->avail_in == 0) {
 			/* TODO: handle case where both pos and fill are set */
-			len = fill(zbuf, INBUF_LEN);
+			len = fill(zbuf, GZIP_IOBUF_SIZE);
 			if (len < 0) {
 				rc = -1;
 				error("read error");
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 32123a1340e6..ca82fde81c8f 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -29,12 +29,14 @@
  *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-#ifndef STATIC
+#ifdef STATIC
+#define PREBOOT
+#else
 #include <linux/decompress/unlzma.h>
+#include <linux/slab.h>
 #endif /* STATIC */
 
 #include <linux/decompress/mm.h>
-#include <linux/slab.h>
 
 #define	MIN(a, b) (((a) < (b)) ? (a) : (b))
 
@@ -80,6 +82,11 @@ struct rc {
 #define RC_MODEL_TOTAL_BITS 11
 
 
+static int nofill(void *buffer, unsigned int len)
+{
+	return -1;
+}
+
 /* Called twice: once at startup and once in rc_normalize() */
 static void INIT rc_read(struct rc *rc)
 {
@@ -95,7 +102,10 @@ static inline void INIT rc_init(struct rc *rc,
 				       int (*fill)(void*, unsigned int),
 				       char *buffer, int buffer_size)
 {
-	rc->fill = fill;
+	if (fill)
+		rc->fill = fill;
+	else
+		rc->fill = nofill;
 	rc->buffer = (uint8_t *)buffer;
 	rc->buffer_size = buffer_size;
 	rc->buffer_end = rc->buffer + rc->buffer_size;
@@ -543,9 +553,7 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
 	int ret = -1;
 
 	set_error_fn(error_fn);
-	if (!flush)
-		in_len -= 4; /* Uncompressed size hack active in pre-boot
-				environment */
+
 	if (buf)
 		inbuf = buf;
 	else
@@ -645,4 +653,15 @@ exit_0:
 	return ret;
 }
 
-#define decompress unlzma
+#ifdef PREBOOT
+STATIC int INIT decompress(unsigned char *buf, int in_len,
+			      int(*fill)(void*, unsigned int),
+			      int(*flush)(void*, unsigned int),
+			      unsigned char *output,
+			      int *posp,
+			      void(*error_fn)(char *x)
+	)
+{
+	return unlzma(buf, in_len - 4, fill, flush, output, posp, error_fn);
+}
+#endif
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
new file mode 100644
index 000000000000..bcb3a4bd68ff
--- /dev/null
+++ b/lib/decompress_unlzo.c
@@ -0,0 +1,217 @@
+/*
+ * LZO decompressor for the Linux kernel. Code borrowed from the lzo
+ * implementation by Markus Franz Xaver Johannes Oberhumer.
+ *
+ * Linux kernel adaptation:
+ * Copyright (C) 2009
+ * Albin Tonnerre, Free Electrons <albin.tonnerre@free-electrons.com>
+ *
+ * Original code:
+ * Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer
+ * All Rights Reserved.
+ *
+ * lzop and the LZO library are free software; you can redistribute them
+ * and/or modify them under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.
+ * If not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Markus F.X.J. Oberhumer
+ * <markus@oberhumer.com>
+ * http://www.oberhumer.com/opensource/lzop/
+ */
+
+#ifdef STATIC
+#include "lzo/lzo1x_decompress.c"
+#else
+#include <linux/slab.h>
+#include <linux/decompress/unlzo.h>
+#endif
+
+#include <linux/types.h>
+#include <linux/lzo.h>
+#include <linux/decompress/mm.h>
+
+#include <linux/compiler.h>
+#include <asm/unaligned.h>
+
+static const unsigned char lzop_magic[] = {
+	0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a };
+
+#define LZO_BLOCK_SIZE        (256*1024l)
+#define HEADER_HAS_FILTER      0x00000800L
+
+STATIC inline int INIT parse_header(u8 *input, u8 *skip)
+{
+	int l;
+	u8 *parse = input;
+	u8 level = 0;
+	u16 version;
+
+	/* read magic: 9 first bits */
+	for (l = 0; l < 9; l++) {
+		if (*parse++ != lzop_magic[l])
+			return 0;
+	}
+	/* get version (2bytes), skip library version (2),
+	 * 'need to be extracted' version (2) and
+	 * method (1) */
+	version = get_unaligned_be16(parse);
+	parse += 7;
+	if (version >= 0x0940)
+		level = *parse++;
+	if (get_unaligned_be32(parse) & HEADER_HAS_FILTER)
+		parse += 8; /* flags + filter info */
+	else
+		parse += 4; /* flags */
+
+	/* skip mode and mtime_low */
+	parse += 8;
+	if (version >= 0x0940)
+		parse += 4;	/* skip mtime_high */
+
+	l = *parse++;
+	/* don't care about the file name, and skip checksum */
+	parse += l + 4;
+
+	*skip = parse - input;
+	return 1;
+}
+
+STATIC inline int INIT unlzo(u8 *input, int in_len,
+				int (*fill) (void *, unsigned int),
+				int (*flush) (void *, unsigned int),
+				u8 *output, int *posp,
+				void (*error_fn) (char *x))
+{
+	u8 skip = 0, r = 0;
+	u32 src_len, dst_len;
+	size_t tmp;
+	u8 *in_buf, *in_buf_save, *out_buf;
+	int ret = -1;
+
+	set_error_fn(error_fn);
+
+	if (output) {
+		out_buf = output;
+	} else if (!flush) {
+		error("NULL output pointer and no flush function provided");
+		goto exit;
+	} else {
+		out_buf = malloc(LZO_BLOCK_SIZE);
+		if (!out_buf) {
+			error("Could not allocate output buffer");
+			goto exit;
+		}
+	}
+
+	if (input && fill) {
+		error("Both input pointer and fill function provided, don't know what to do");
+		goto exit_1;
+	} else if (input) {
+		in_buf = input;
+	} else if (!fill || !posp) {
+		error("NULL input pointer and missing position pointer or fill function");
+		goto exit_1;
+	} else {
+		in_buf = malloc(lzo1x_worst_compress(LZO_BLOCK_SIZE));
+		if (!in_buf) {
+			error("Could not allocate input buffer");
+			goto exit_1;
+		}
+	}
+	in_buf_save = in_buf;
+
+	if (posp)
+		*posp = 0;
+
+	if (fill)
+		fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+
+	if (!parse_header(input, &skip)) {
+		error("invalid header");
+		goto exit_2;
+	}
+	in_buf += skip;
+
+	if (posp)
+		*posp = skip;
+
+	for (;;) {
+		/* read uncompressed block size */
+		dst_len = get_unaligned_be32(in_buf);
+		in_buf += 4;
+
+		/* exit if last block */
+		if (dst_len == 0) {
+			if (posp)
+				*posp += 4;
+			break;
+		}
+
+		if (dst_len > LZO_BLOCK_SIZE) {
+			error("dest len longer than block size");
+			goto exit_2;
+		}
+
+		/* read compressed block size, and skip block checksum info */
+		src_len = get_unaligned_be32(in_buf);
+		in_buf += 8;
+
+		if (src_len <= 0 || src_len > dst_len) {
+			error("file corrupted");
+			goto exit_2;
+		}
+
+		/* decompress */
+		tmp = dst_len;
+
+		/* When the input data is not compressed at all,
+		 * lzo1x_decompress_safe will fail, so call memcpy()
+		 * instead */
+		if (unlikely(dst_len == src_len))
+			memcpy(out_buf, in_buf, src_len);
+		else {
+			r = lzo1x_decompress_safe((u8 *) in_buf, src_len,
+						out_buf, &tmp);
+
+			if (r != LZO_E_OK || dst_len != tmp) {
+				error("Compressed data violation");
+				goto exit_2;
+			}
+		}
+
+		if (flush)
+			flush(out_buf, dst_len);
+		if (output)
+			out_buf += dst_len;
+		if (posp)
+			*posp += src_len + 12;
+		if (fill) {
+			in_buf = in_buf_save;
+			fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+		} else
+			in_buf += src_len;
+	}
+
+	ret = 0;
+exit_2:
+	if (!input)
+		free(in_buf);
+exit_1:
+	if (!output)
+		free(out_buf);
+exit:
+	return ret;
+}
+
+#define decompress unlzo
diff --git a/lib/devres.c b/lib/devres.c
index 72c8909006da..6efddf53b90c 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -1,5 +1,6 @@
 #include <linux/pci.h>
 #include <linux/io.h>
+#include <linux/gfp.h>
 #include <linux/module.h>
 
 void devm_ioremap_release(struct device *dev, void *res)
@@ -327,7 +328,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all);
  * @pdev: PCI device to map IO resources for
  * @mask: Mask of BARs to unmap and release
  *
- * Unamp and release regions specified by @mask.
+ * Unmap and release regions specified by @mask.
  */
 void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask)
 {
diff --git a/lib/div64.c b/lib/div64.c
index a111eb8de9cf..5b4919191778 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -77,26 +77,58 @@ s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
 EXPORT_SYMBOL(div_s64_rem);
 #endif
 
-/* 64bit divisor, dividend and result. dynamic precision */
+/**
+ * div64_u64 - unsigned 64bit divide with 64bit divisor
+ * @dividend:	64bit dividend
+ * @divisor:	64bit divisor
+ *
+ * This implementation is a modified version of the algorithm proposed
+ * by the book 'Hacker's Delight'.  The original source and full proof
+ * can be found here and is available for use without restriction.
+ *
+ * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c'
+ */
 #ifndef div64_u64
 u64 div64_u64(u64 dividend, u64 divisor)
 {
-	u32 high, d;
+	u32 high = divisor >> 32;
+	u64 quot;
 
-	high = divisor >> 32;
-	if (high) {
-		unsigned int shift = fls(high);
+	if (high == 0) {
+		quot = div_u64(dividend, divisor);
+	} else {
+		int n = 1 + fls(high);
+		quot = div_u64(dividend >> n, divisor >> n);
 
-		d = divisor >> shift;
-		dividend >>= shift;
-	} else
-		d = divisor;
+		if (quot != 0)
+			quot--;
+		if ((dividend - quot * divisor) >= divisor)
+			quot++;
+	}
 
-	return div_u64(dividend, d);
+	return quot;
 }
 EXPORT_SYMBOL(div64_u64);
 #endif
 
+/**
+ * div64_s64 - signed 64bit divide with 64bit divisor
+ * @dividend:	64bit dividend
+ * @divisor:	64bit divisor
+ */
+#ifndef div64_s64
+s64 div64_s64(s64 dividend, s64 divisor)
+{
+	s64 quot, t;
+
+	quot = div64_u64(abs64(dividend), abs64(divisor));
+	t = (dividend ^ divisor) >> 63;
+
+	return (quot ^ t) - t;
+}
+EXPORT_SYMBOL(div64_s64);
+#endif
+
 #endif /* BITS_PER_LONG == 32 */
 
 /*
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 3b93129a968c..4bfb0471f106 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -156,9 +156,13 @@ static bool driver_filter(struct device *dev)
 		return true;
 
 	/* driver filter on and initialized */
-	if (current_driver && dev->driver == current_driver)
+	if (current_driver && dev && dev->driver == current_driver)
 		return true;
 
+	/* driver filter on, but we can't filter on a NULL device... */
+	if (!dev)
+		return false;
+
 	if (current_driver || !current_driver_name[0])
 		return false;
 
@@ -183,17 +187,17 @@ static bool driver_filter(struct device *dev)
 	return ret;
 }
 
-#define err_printk(dev, entry, format, arg...) do {		\
-		error_count += 1;				\
-		if (driver_filter(dev) &&			\
-		    (show_all_errors || show_num_errors > 0)) {	\
-			WARN(1, "%s %s: " format,		\
-			     dev_driver_string(dev),		\
-			     dev_name(dev) , ## arg);		\
-			dump_entry_trace(entry);		\
-		}						\
-		if (!show_all_errors && show_num_errors > 0)	\
-			show_num_errors -= 1;			\
+#define err_printk(dev, entry, format, arg...) do {			\
+		error_count += 1;					\
+		if (driver_filter(dev) &&				\
+		    (show_all_errors || show_num_errors > 0)) {		\
+			WARN(1, "%s %s: " format,			\
+			     dev ? dev_driver_string(dev) : "NULL",	\
+			     dev ? dev_name(dev) : "NULL", ## arg);	\
+			dump_entry_trace(entry);			\
+		}							\
+		if (!show_all_errors && show_num_errors > 0)		\
+			show_num_errors -= 1;				\
 	} while (0);
 
 /*
@@ -255,7 +259,7 @@ static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket,
 		 * times. Without a hardware IOMMU this results in the
 		 * same device addresses being put into the dma-debug
 		 * hash multiple times too. This can result in false
-		 * positives being reported. Therfore we implement a
+		 * positives being reported. Therefore we implement a
 		 * best-fit algorithm here which returns the entry from
 		 * the hash which fits best to the reference value
 		 * instead of the first-fit.
@@ -566,7 +570,7 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf,
 	 * Now parse out the first token and use it as the name for the
 	 * driver to filter for.
 	 */
-	for (i = 0; i < NAME_MAX_LEN; ++i) {
+	for (i = 0; i < NAME_MAX_LEN - 1; ++i) {
 		current_driver_name[i] = buf[i];
 		if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
 			break;
@@ -583,9 +587,10 @@ out_unlock:
 	return count;
 }
 
-const struct file_operations filter_fops = {
+static const struct file_operations filter_fops = {
 	.read  = filter_read,
 	.write = filter_write,
+	.llseek = default_llseek,
 };
 
 static int dma_debug_fs_init(void)
@@ -666,12 +671,13 @@ static int device_dma_allocations(struct device *dev)
 	return count;
 }
 
-static int dma_debug_device_change(struct notifier_block *nb,
-				    unsigned long action, void *data)
+static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data)
 {
 	struct device *dev = data;
 	int count;
 
+	if (global_disable)
+		return 0;
 
 	switch (action) {
 	case BUS_NOTIFY_UNBOUND_DRIVER:
@@ -693,6 +699,9 @@ void dma_debug_add_bus(struct bus_type *bus)
 {
 	struct notifier_block *nb;
 
+	if (global_disable)
+		return;
+
 	nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
 	if (nb == NULL) {
 		pr_err("dma_debug_add_bus: out of memory\n");
@@ -716,7 +725,7 @@ void dma_debug_init(u32 num_entries)
 
 	for (i = 0; i < HASH_SIZE; ++i) {
 		INIT_LIST_HEAD(&dma_entry_hash[i].list);
-		dma_entry_hash[i].lock = SPIN_LOCK_UNLOCKED;
+		spin_lock_init(&dma_entry_hash[i].lock);
 	}
 
 	if (dma_debug_fs_init() != 0) {
@@ -815,9 +824,11 @@ static void check_unmap(struct dma_debug_entry *ref)
 		err_printk(ref->dev, entry, "DMA-API: device driver frees "
 			   "DMA memory with different CPU address "
 			   "[device address=0x%016llx] [size=%llu bytes] "
-			   "[cpu alloc address=%p] [cpu free address=%p]",
+			   "[cpu alloc address=0x%016llx] "
+			   "[cpu free address=0x%016llx]",
 			   ref->dev_addr, ref->size,
-			   (void *)entry->paddr, (void *)ref->paddr);
+			   (unsigned long long)entry->paddr,
+			   (unsigned long long)ref->paddr);
 	}
 
 	if (ref->sg_call_ents && ref->type == dma_debug_sg &&
@@ -856,22 +867,21 @@ static void check_for_stack(struct device *dev, void *addr)
 				"stack [addr=%p]\n", addr);
 }
 
-static inline bool overlap(void *addr, u64 size, void *start, void *end)
+static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
 {
-	void *addr2 = (char *)addr + size;
+	unsigned long a1 = (unsigned long)addr;
+	unsigned long b1 = a1 + len;
+	unsigned long a2 = (unsigned long)start;
+	unsigned long b2 = (unsigned long)end;
 
-	return ((addr >= start && addr < end) ||
-		(addr2 >= start && addr2 < end) ||
-		((addr < start) && (addr2 >= end)));
+	return !(b1 <= a2 || a1 >= b2);
 }
 
-static void check_for_illegal_area(struct device *dev, void *addr, u64 size)
+static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len)
 {
-	if (overlap(addr, size, _text, _etext) ||
-	    overlap(addr, size, __start_rodata, __end_rodata))
-		err_printk(dev, NULL, "DMA-API: device driver maps "
-				"memory from kernel text or rodata "
-				"[addr=%p] [size=%llu]\n", addr, size);
+	if (overlap(addr, len, _text, _etext) ||
+	    overlap(addr, len, __start_rodata, __end_rodata))
+		err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
 }
 
 static void check_sync(struct device *dev,
@@ -904,6 +914,9 @@ static void check_sync(struct device *dev,
 				ref->size);
 	}
 
+	if (entry->direction == DMA_BIDIRECTIONAL)
+		goto out;
+
 	if (ref->direction != entry->direction) {
 		err_printk(dev, entry, "DMA-API: device driver syncs "
 				"DMA memory with different direction "
@@ -914,9 +927,6 @@ static void check_sync(struct device *dev,
 				dir2name[ref->direction]);
 	}
 
-	if (entry->direction == DMA_BIDIRECTIONAL)
-		goto out;
-
 	if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) &&
 		      !(ref->direction == DMA_TO_DEVICE))
 		err_printk(dev, entry, "DMA-API: device driver syncs "
@@ -939,7 +949,6 @@ static void check_sync(struct device *dev,
 
 out:
 	put_hash_bucket(bucket, &flags);
-
 }
 
 void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
@@ -969,7 +978,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
 		entry->type = dma_debug_single;
 
 	if (!PageHighMem(page)) {
-		void *addr = ((char *)page_address(page)) + offset;
+		void *addr = page_address(page) + offset;
+
 		check_for_stack(dev, addr);
 		check_for_illegal_area(dev, addr, size);
 	}
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 833139ce1e22..b335acb43be2 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -21,22 +21,16 @@
 #include <linux/list.h>
 #include <linux/sysctl.h>
 #include <linux/ctype.h>
+#include <linux/string.h>
 #include <linux/uaccess.h>
 #include <linux/dynamic_debug.h>
 #include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/jump_label.h>
 
 extern struct _ddebug __start___verbose[];
 extern struct _ddebug __stop___verbose[];
 
-/* dynamic_debug_enabled, and dynamic_debug_enabled2 are bitmasks in which
- * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
- * use independent hash functions, to reduce the chance of false positives.
- */
-long long dynamic_debug_enabled;
-EXPORT_SYMBOL_GPL(dynamic_debug_enabled);
-long long dynamic_debug_enabled2;
-EXPORT_SYMBOL_GPL(dynamic_debug_enabled2);
-
 struct ddebug_table {
 	struct list_head link;
 	char *mod_name;
@@ -86,26 +80,6 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf,
 }
 
 /*
- * must be called with ddebug_lock held
- */
-
-static int disabled_hash(char hash, bool first_table)
-{
-	struct ddebug_table *dt;
-	char table_hash_value;
-
-	list_for_each_entry(dt, &ddebug_tables, link) {
-		if (first_table)
-			table_hash_value = dt->ddebugs->primary_hash;
-		else
-			table_hash_value = dt->ddebugs->secondary_hash;
-		if (dt->num_enabled && (hash == table_hash_value))
-			return 0;
-	}
-	return 1;
-}
-
-/*
  * Search the tables for _ddebug's which match the given
  * `query' and apply the `flags' and `mask' to them.  Tells
  * the user which ddebug's were changed, or whether none
@@ -164,22 +138,13 @@ static void ddebug_change(const struct ddebug_query *query,
 
 			if (!newflags)
 				dt->num_enabled--;
-			else if (!dp-flags)
+			else if (!dp->flags)
 				dt->num_enabled++;
 			dp->flags = newflags;
-			if (newflags) {
-				dynamic_debug_enabled |=
-						(1LL << dp->primary_hash);
-				dynamic_debug_enabled2 |=
-						(1LL << dp->secondary_hash);
-			} else {
-				if (disabled_hash(dp->primary_hash, true))
-					dynamic_debug_enabled &=
-						~(1LL << dp->primary_hash);
-				if (disabled_hash(dp->secondary_hash, false))
-					dynamic_debug_enabled2 &=
-						~(1LL << dp->secondary_hash);
-			}
+			if (newflags)
+				dp->enabled = 1;
+			else
+				dp->enabled = 0;
 			if (verbose)
 				printk(KERN_INFO
 					"ddebug: changed %s:%d [%s]%s %s\n",
@@ -209,8 +174,7 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords)
 		char *end;
 
 		/* Skip leading whitespace */
-		while (*buf && isspace(*buf))
-			buf++;
+		buf = skip_spaces(buf);
 		if (!*buf)
 			break;	/* oh, it was trailing whitespace */
 
@@ -428,6 +392,40 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
 	return 0;
 }
 
+static int ddebug_exec_query(char *query_string)
+{
+	unsigned int flags = 0, mask = 0;
+	struct ddebug_query query;
+#define MAXWORDS 9
+	int nwords;
+	char *words[MAXWORDS];
+
+	nwords = ddebug_tokenize(query_string, words, MAXWORDS);
+	if (nwords <= 0)
+		return -EINVAL;
+	if (ddebug_parse_query(words, nwords-1, &query))
+		return -EINVAL;
+	if (ddebug_parse_flags(words[nwords-1], &flags, &mask))
+		return -EINVAL;
+
+	/* actually go and implement the change */
+	ddebug_change(&query, flags, mask);
+	return 0;
+}
+
+static __initdata char ddebug_setup_string[1024];
+static __init int ddebug_setup_query(char *str)
+{
+	if (strlen(str) >= 1024) {
+		pr_warning("ddebug boot param string too large\n");
+		return 0;
+	}
+	strcpy(ddebug_setup_string, str);
+	return 1;
+}
+
+__setup("ddebug_query=", ddebug_setup_query);
+
 /*
  * File_ops->write method for <debugfs>/dynamic_debug/conrol.  Gathers the
  * command text from userspace, parses and executes it.
@@ -435,12 +433,8 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
 static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf,
 				  size_t len, loff_t *offp)
 {
-	unsigned int flags = 0, mask = 0;
-	struct ddebug_query query;
-#define MAXWORDS 9
-	int nwords;
-	char *words[MAXWORDS];
 	char tmpbuf[256];
+	int ret;
 
 	if (len == 0)
 		return 0;
@@ -454,16 +448,9 @@ static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf,
 		printk(KERN_INFO "%s: read %d bytes from userspace\n",
 			__func__, (int)len);
 
-	nwords = ddebug_tokenize(tmpbuf, words, MAXWORDS);
-	if (nwords < 0)
-		return -EINVAL;
-	if (ddebug_parse_query(words, nwords-1, &query))
-		return -EINVAL;
-	if (ddebug_parse_flags(words[nwords-1], &flags, &mask))
-		return -EINVAL;
-
-	/* actually go and implement the change */
-	ddebug_change(&query, flags, mask);
+	ret = ddebug_exec_query(tmpbuf);
+	if (ret)
+		return ret;
 
 	*offp += len;
 	return len;
@@ -691,7 +678,7 @@ static void ddebug_table_free(struct ddebug_table *dt)
  * Called in response to a module being unloaded.  Removes
  * any ddebug_table's which point at the module.
  */
-int ddebug_remove_module(char *mod_name)
+int ddebug_remove_module(const char *mod_name)
 {
 	struct ddebug_table *dt, *nextdt;
 	int ret = -ENOENT;
@@ -724,13 +711,14 @@ static void ddebug_remove_all_tables(void)
 	mutex_unlock(&ddebug_lock);
 }
 
-static int __init dynamic_debug_init(void)
+static __initdata int ddebug_init_success;
+
+static int __init dynamic_debug_init_debugfs(void)
 {
 	struct dentry *dir, *file;
-	struct _ddebug *iter, *iter_start;
-	const char *modname = NULL;
-	int ret = 0;
-	int n = 0;
+
+	if (!ddebug_init_success)
+		return -ENODEV;
 
 	dir = debugfs_create_dir("dynamic_debug", NULL);
 	if (!dir)
@@ -741,6 +729,16 @@ static int __init dynamic_debug_init(void)
 		debugfs_remove(dir);
 		return -ENOMEM;
 	}
+	return 0;
+}
+
+static int __init dynamic_debug_init(void)
+{
+	struct _ddebug *iter, *iter_start;
+	const char *modname = NULL;
+	int ret = 0;
+	int n = 0;
+
 	if (__start___verbose != __stop___verbose) {
 		iter = __start___verbose;
 		modname = iter->modname;
@@ -758,12 +756,26 @@ static int __init dynamic_debug_init(void)
 		}
 		ret = ddebug_add_module(iter_start, n, modname);
 	}
+
+	/* ddebug_query boot param got passed -> set it up */
+	if (ddebug_setup_string[0] != '\0') {
+		ret = ddebug_exec_query(ddebug_setup_string);
+		if (ret)
+			pr_warning("Invalid ddebug boot param %s",
+				   ddebug_setup_string);
+		else
+			pr_info("ddebug initialized with string %s",
+				ddebug_setup_string);
+	}
+
 out_free:
-	if (ret) {
+	if (ret)
 		ddebug_remove_all_tables();
-		debugfs_remove(dir);
-		debugfs_remove(file);
-	}
+	else
+		ddebug_init_success = 1;
 	return 0;
 }
-module_init(dynamic_debug_init);
+/* Allow early initialization for boot messages via boot param */
+arch_initcall(dynamic_debug_init);
+/* Debugfs setup must be done later */
+module_init(dynamic_debug_init_debugfs);
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index f97af55bdd96..7e65af70635e 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -1,6 +1,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/random.h>
+#include <linux/sched.h>
 #include <linux/stat.h>
 #include <linux/types.h>
 #include <linux/fs.h>
diff --git a/lib/flex_array.c b/lib/flex_array.c
new file mode 100644
index 000000000000..77a6fea7481e
--- /dev/null
+++ b/lib/flex_array.c
@@ -0,0 +1,350 @@
+/*
+ * Flexible array managed in PAGE_SIZE parts
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2009
+ *
+ * Author: Dave Hansen <dave@linux.vnet.ibm.com>
+ */
+
+#include <linux/flex_array.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+
+struct flex_array_part {
+	char elements[FLEX_ARRAY_PART_SIZE];
+};
+
+/*
+ * If a user requests an allocation which is small
+ * enough, we may simply use the space in the
+ * flex_array->parts[] array to store the user
+ * data.
+ */
+static inline int elements_fit_in_base(struct flex_array *fa)
+{
+	int data_size = fa->element_size * fa->total_nr_elements;
+	if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT)
+		return 1;
+	return 0;
+}
+
+/**
+ * flex_array_alloc - allocate a new flexible array
+ * @element_size:	the size of individual elements in the array
+ * @total:		total number of elements that this should hold
+ * @flags:		page allocation flags to use for base array
+ *
+ * Note: all locking must be provided by the caller.
+ *
+ * @total is used to size internal structures.  If the user ever
+ * accesses any array indexes >=@total, it will produce errors.
+ *
+ * The maximum number of elements is defined as: the number of
+ * elements that can be stored in a page times the number of
+ * page pointers that we can fit in the base structure or (using
+ * integer math):
+ *
+ * 	(PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *)
+ *
+ * Here's a table showing example capacities.  Note that the maximum
+ * index that the get/put() functions is just nr_objects-1.   This
+ * basically means that you get 4MB of storage on 32-bit and 2MB on
+ * 64-bit.
+ *
+ *
+ * Element size | Objects | Objects |
+ * PAGE_SIZE=4k |  32-bit |  64-bit |
+ * ---------------------------------|
+ *      1 bytes | 4186112 | 2093056 |
+ *      2 bytes | 2093056 | 1046528 |
+ *      3 bytes | 1395030 |  697515 |
+ *      4 bytes | 1046528 |  523264 |
+ *     32 bytes |  130816 |   65408 |
+ *     33 bytes |  126728 |   63364 |
+ *   2048 bytes |    2044 |    1022 |
+ *   2049 bytes |    1022 |     511 |
+ *       void * | 1046528 |  261632 |
+ *
+ * Since 64-bit pointers are twice the size, we lose half the
+ * capacity in the base structure.  Also note that no effort is made
+ * to efficiently pack objects across page boundaries.
+ */
+struct flex_array *flex_array_alloc(int element_size, unsigned int total,
+					gfp_t flags)
+{
+	struct flex_array *ret;
+	int max_size = FLEX_ARRAY_NR_BASE_PTRS *
+				FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
+
+	/* max_size will end up 0 if element_size > PAGE_SIZE */
+	if (total > max_size)
+		return NULL;
+	ret = kzalloc(sizeof(struct flex_array), flags);
+	if (!ret)
+		return NULL;
+	ret->element_size = element_size;
+	ret->total_nr_elements = total;
+	if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
+		memset(&ret->parts[0], FLEX_ARRAY_FREE,
+						FLEX_ARRAY_BASE_BYTES_LEFT);
+	return ret;
+}
+
+static int fa_element_to_part_nr(struct flex_array *fa,
+					unsigned int element_nr)
+{
+	return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
+}
+
+/**
+ * flex_array_free_parts - just free the second-level pages
+ * @fa:		the flex array from which to free parts
+ *
+ * This is to be used in cases where the base 'struct flex_array'
+ * has been statically allocated and should not be free.
+ */
+void flex_array_free_parts(struct flex_array *fa)
+{
+	int part_nr;
+
+	if (elements_fit_in_base(fa))
+		return;
+	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++)
+		kfree(fa->parts[part_nr]);
+}
+
+void flex_array_free(struct flex_array *fa)
+{
+	flex_array_free_parts(fa);
+	kfree(fa);
+}
+
+static unsigned int index_inside_part(struct flex_array *fa,
+					unsigned int element_nr)
+{
+	unsigned int part_offset;
+
+	part_offset = element_nr %
+				FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
+	return part_offset * fa->element_size;
+}
+
+static struct flex_array_part *
+__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
+{
+	struct flex_array_part *part = fa->parts[part_nr];
+	if (!part) {
+		part = kmalloc(sizeof(struct flex_array_part), flags);
+		if (!part)
+			return NULL;
+		if (!(flags & __GFP_ZERO))
+			memset(part, FLEX_ARRAY_FREE,
+				sizeof(struct flex_array_part));
+		fa->parts[part_nr] = part;
+	}
+	return part;
+}
+
+/**
+ * flex_array_put - copy data into the array at @element_nr
+ * @fa:		the flex array to copy data into
+ * @element_nr:	index of the position in which to insert
+ * 		the new element.
+ * @src:	address of data to copy into the array
+ * @flags:	page allocation flags to use for array expansion
+ *
+ *
+ * Note that this *copies* the contents of @src into
+ * the array.  If you are trying to store an array of
+ * pointers, make sure to pass in &ptr instead of ptr.
+ * You may instead wish to use the flex_array_put_ptr()
+ * helper function.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
+			gfp_t flags)
+{
+	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	struct flex_array_part *part;
+	void *dst;
+
+	if (element_nr >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (elements_fit_in_base(fa))
+		part = (struct flex_array_part *)&fa->parts[0];
+	else {
+		part = __fa_get_part(fa, part_nr, flags);
+		if (!part)
+			return -ENOMEM;
+	}
+	dst = &part->elements[index_inside_part(fa, element_nr)];
+	memcpy(dst, src, fa->element_size);
+	return 0;
+}
+
+/**
+ * flex_array_clear - clear element in array at @element_nr
+ * @fa:		the flex array of the element.
+ * @element_nr:	index of the position to clear.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
+{
+	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	struct flex_array_part *part;
+	void *dst;
+
+	if (element_nr >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (elements_fit_in_base(fa))
+		part = (struct flex_array_part *)&fa->parts[0];
+	else {
+		part = fa->parts[part_nr];
+		if (!part)
+			return -EINVAL;
+	}
+	dst = &part->elements[index_inside_part(fa, element_nr)];
+	memset(dst, FLEX_ARRAY_FREE, fa->element_size);
+	return 0;
+}
+
+/**
+ * flex_array_prealloc - guarantee that array space exists
+ * @fa:		the flex array for which to preallocate parts
+ * @start:	index of first array element for which space is allocated
+ * @end:	index of last (inclusive) element for which space is allocated
+ * @flags:	page allocation flags
+ *
+ * This will guarantee that no future calls to flex_array_put()
+ * will allocate memory.  It can be used if you are expecting to
+ * be holding a lock or in some atomic context while writing
+ * data into the array.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_prealloc(struct flex_array *fa, unsigned int start,
+			unsigned int end, gfp_t flags)
+{
+	int start_part;
+	int end_part;
+	int part_nr;
+	struct flex_array_part *part;
+
+	if (start >= fa->total_nr_elements || end >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (elements_fit_in_base(fa))
+		return 0;
+	start_part = fa_element_to_part_nr(fa, start);
+	end_part = fa_element_to_part_nr(fa, end);
+	for (part_nr = start_part; part_nr <= end_part; part_nr++) {
+		part = __fa_get_part(fa, part_nr, flags);
+		if (!part)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * flex_array_get - pull data back out of the array
+ * @fa:		the flex array from which to extract data
+ * @element_nr:	index of the element to fetch from the array
+ *
+ * Returns a pointer to the data at index @element_nr.  Note
+ * that this is a copy of the data that was passed in.  If you
+ * are using this to store pointers, you'll get back &ptr.  You
+ * may instead wish to use the flex_array_get_ptr helper.
+ *
+ * Locking must be provided by the caller.
+ */
+void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
+{
+	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	struct flex_array_part *part;
+
+	if (element_nr >= fa->total_nr_elements)
+		return NULL;
+	if (elements_fit_in_base(fa))
+		part = (struct flex_array_part *)&fa->parts[0];
+	else {
+		part = fa->parts[part_nr];
+		if (!part)
+			return NULL;
+	}
+	return &part->elements[index_inside_part(fa, element_nr)];
+}
+
+/**
+ * flex_array_get_ptr - pull a ptr back out of the array
+ * @fa:		the flex array from which to extract data
+ * @element_nr:	index of the element to fetch from the array
+ *
+ * Returns the pointer placed in the flex array at element_nr using
+ * flex_array_put_ptr().  This function should not be called if the
+ * element in question was not set using the _put_ptr() helper.
+ */
+void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr)
+{
+	void **tmp;
+
+	tmp = flex_array_get(fa, element_nr);
+	if (!tmp)
+		return NULL;
+
+	return *tmp;
+}
+
+static int part_is_free(struct flex_array_part *part)
+{
+	int i;
+
+	for (i = 0; i < sizeof(struct flex_array_part); i++)
+		if (part->elements[i] != FLEX_ARRAY_FREE)
+			return 0;
+	return 1;
+}
+
+/**
+ * flex_array_shrink - free unused second-level pages
+ * @fa:		the flex array to shrink
+ *
+ * Frees all second-level pages that consist solely of unused
+ * elements.  Returns the number of pages freed.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_shrink(struct flex_array *fa)
+{
+	struct flex_array_part *part;
+	int part_nr;
+	int ret = 0;
+
+	if (elements_fit_in_base(fa))
+		return ret;
+	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) {
+		part = fa->parts[part_nr];
+		if (!part)
+			continue;
+		if (part_is_free(part)) {
+			fa->parts[part_nr] = NULL;
+			kfree(part);
+			ret++;
+		}
+	}
+	return ret;
+}
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c
index bea5d97df991..85d0e412a04f 100644
--- a/lib/gen_crc32table.c
+++ b/lib/gen_crc32table.c
@@ -7,8 +7,8 @@
 #define LE_TABLE_SIZE (1 << CRC_LE_BITS)
 #define BE_TABLE_SIZE (1 << CRC_BE_BITS)
 
-static uint32_t crc32table_le[LE_TABLE_SIZE];
-static uint32_t crc32table_be[BE_TABLE_SIZE];
+static uint32_t crc32table_le[4][LE_TABLE_SIZE];
+static uint32_t crc32table_be[4][BE_TABLE_SIZE];
 
 /**
  * crc32init_le() - allocate and initialize LE table data
@@ -22,12 +22,19 @@ static void crc32init_le(void)
 	unsigned i, j;
 	uint32_t crc = 1;
 
-	crc32table_le[0] = 0;
+	crc32table_le[0][0] = 0;
 
 	for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) {
 		crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
 		for (j = 0; j < LE_TABLE_SIZE; j += 2 * i)
-			crc32table_le[i + j] = crc ^ crc32table_le[j];
+			crc32table_le[0][i + j] = crc ^ crc32table_le[0][j];
+	}
+	for (i = 0; i < LE_TABLE_SIZE; i++) {
+		crc = crc32table_le[0][i];
+		for (j = 1; j < 4; j++) {
+			crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8);
+			crc32table_le[j][i] = crc;
+		}
 	}
 }
 
@@ -39,25 +46,35 @@ static void crc32init_be(void)
 	unsigned i, j;
 	uint32_t crc = 0x80000000;
 
-	crc32table_be[0] = 0;
+	crc32table_be[0][0] = 0;
 
 	for (i = 1; i < BE_TABLE_SIZE; i <<= 1) {
 		crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0);
 		for (j = 0; j < i; j++)
-			crc32table_be[i + j] = crc ^ crc32table_be[j];
+			crc32table_be[0][i + j] = crc ^ crc32table_be[0][j];
+	}
+	for (i = 0; i < BE_TABLE_SIZE; i++) {
+		crc = crc32table_be[0][i];
+		for (j = 1; j < 4; j++) {
+			crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8);
+			crc32table_be[j][i] = crc;
+		}
 	}
 }
 
-static void output_table(uint32_t table[], int len, char *trans)
+static void output_table(uint32_t table[4][256], int len, char *trans)
 {
-	int i;
+	int i, j;
 
-	for (i = 0; i < len - 1; i++) {
-		if (i % ENTRIES_PER_LINE == 0)
-			printf("\n");
-		printf("%s(0x%8.8xL), ", trans, table[i]);
+	for (j = 0 ; j < 4; j++) {
+		printf("{");
+		for (i = 0; i < len - 1; i++) {
+			if (i % ENTRIES_PER_LINE == 0)
+				printf("\n");
+			printf("%s(0x%8.8xL), ", trans, table[j][i]);
+		}
+		printf("%s(0x%8.8xL)},\n", trans, table[j][len - 1]);
 	}
-	printf("%s(0x%8.8xL)\n", trans, table[len - 1]);
 }
 
 int main(int argc, char** argv)
@@ -66,14 +83,14 @@ int main(int argc, char** argv)
 
 	if (CRC_LE_BITS > 1) {
 		crc32init_le();
-		printf("static const u32 crc32table_le[] = {");
+		printf("static const u32 crc32table_le[4][256] = {");
 		output_table(crc32table_le, LE_TABLE_SIZE, "tole");
 		printf("};\n");
 	}
 
 	if (CRC_BE_BITS > 1) {
 		crc32init_be();
-		printf("static const u32 crc32table_be[] = {");
+		printf("static const u32 crc32table_be[4][256] = {");
 		output_table(crc32table_be, BE_TABLE_SIZE, "tobe");
 		printf("};\n");
 	}
diff --git a/lib/genalloc.c b/lib/genalloc.c
index eed2bdb865e7..1923f1490e72 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -10,7 +10,9 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/bitmap.h>
 #include <linux/genalloc.h>
 
 
@@ -114,7 +116,7 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
 	struct gen_pool_chunk *chunk;
 	unsigned long addr, flags;
 	int order = pool->min_alloc_order;
-	int nbits, bit, start_bit, end_bit;
+	int nbits, start_bit, end_bit;
 
 	if (size == 0)
 		return 0;
@@ -126,32 +128,21 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
 		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
 
 		end_bit = (chunk->end_addr - chunk->start_addr) >> order;
-		end_bit -= nbits + 1;
 
 		spin_lock_irqsave(&chunk->lock, flags);
-		bit = -1;
-		while (bit + 1 < end_bit) {
-			bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1);
-			if (bit >= end_bit)
-				break;
-
-			start_bit = bit;
-			if (nbits > 1) {
-				bit = find_next_bit(chunk->bits, bit + nbits,
-							bit + 1);
-				if (bit - start_bit < nbits)
-					continue;
-			}
-
-			addr = chunk->start_addr +
-					    ((unsigned long)start_bit << order);
-			while (nbits--)
-				__set_bit(start_bit++, chunk->bits);
+		start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0,
+						nbits, 0);
+		if (start_bit >= end_bit) {
 			spin_unlock_irqrestore(&chunk->lock, flags);
-			read_unlock(&pool->lock);
-			return addr;
+			continue;
 		}
+
+		addr = chunk->start_addr + ((unsigned long)start_bit << order);
+
+		bitmap_set(chunk->bits, start_bit, nbits);
 		spin_unlock_irqrestore(&chunk->lock, flags);
+		read_unlock(&pool->lock);
+		return addr;
 	}
 	read_unlock(&pool->lock);
 	return 0;
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 39af2560f765..b66b2bd67952 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -16,6 +16,40 @@ const char hex_asc[] = "0123456789abcdef";
 EXPORT_SYMBOL(hex_asc);
 
 /**
+ * hex_to_bin - convert a hex digit to its real value
+ * @ch: ascii character represents hex digit
+ *
+ * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad
+ * input.
+ */
+int hex_to_bin(char ch)
+{
+	if ((ch >= '0') && (ch <= '9'))
+		return ch - '0';
+	ch = tolower(ch);
+	if ((ch >= 'a') && (ch <= 'f'))
+		return ch - 'a' + 10;
+	return -1;
+}
+EXPORT_SYMBOL(hex_to_bin);
+
+/**
+ * hex2bin - convert an ascii hexadecimal string to its binary representation
+ * @dst: binary result
+ * @src: ascii hexadecimal string
+ * @count: result length
+ */
+void hex2bin(u8 *dst, const char *src, size_t count)
+{
+	while (count--) {
+		*dst = hex_to_bin(*src++) << 4;
+		*dst += hex_to_bin(*src++);
+		dst++;
+	}
+}
+EXPORT_SYMBOL(hex2bin);
+
+/**
  * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory
  * @buf: data blob to dump
  * @len: number of bytes in the @buf
@@ -34,7 +68,7 @@ EXPORT_SYMBOL(hex_asc);
  *
  * E.g.:
  *   hex_dump_to_buffer(frame->data, frame->len, 16, 1,
- *			linebuf, sizeof(linebuf), 1);
+ *			linebuf, sizeof(linebuf), true);
  *
  * example output buffer:
  * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f  @ABCDEFGHIJKLMNO
@@ -65,8 +99,8 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 
 		for (j = 0; j < ngroups; j++)
 			lx += scnprintf(linebuf + lx, linebuflen - lx,
-				"%s%16.16llx", j ? " " : "",
-				(unsigned long long)*(ptr8 + j));
+					"%s%16.16llx", j ? " " : "",
+					(unsigned long long)*(ptr8 + j));
 		ascii_column = 17 * ngroups + 2;
 		break;
 	}
@@ -77,7 +111,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 
 		for (j = 0; j < ngroups; j++)
 			lx += scnprintf(linebuf + lx, linebuflen - lx,
-				"%s%8.8x", j ? " " : "", *(ptr4 + j));
+					"%s%8.8x", j ? " " : "", *(ptr4 + j));
 		ascii_column = 9 * ngroups + 2;
 		break;
 	}
@@ -88,7 +122,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 
 		for (j = 0; j < ngroups; j++)
 			lx += scnprintf(linebuf + lx, linebuflen - lx,
-				"%s%4.4x", j ? " " : "", *(ptr2 + j));
+					"%s%4.4x", j ? " " : "", *(ptr2 + j));
 		ascii_column = 5 * ngroups + 2;
 		break;
 	}
@@ -111,9 +145,10 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 
 	while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
 		linebuf[lx++] = ' ';
-	for (j = 0; (j < len) && (lx + 2) < linebuflen; j++)
-		linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j]
-				: '.';
+	for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) {
+		ch = ptr[j];
+		linebuf[lx++] = (isascii(ch) && isprint(ch)) ? ch : '.';
+	}
 nil:
 	linebuf[lx++] = '\0';
 }
@@ -143,7 +178,7 @@ EXPORT_SYMBOL(hex_dump_to_buffer);
  *
  * E.g.:
  *   print_hex_dump(KERN_DEBUG, "raw data: ", DUMP_PREFIX_ADDRESS,
- *		16, 1, frame->data, frame->len, 1);
+ *		    16, 1, frame->data, frame->len, true);
  *
  * Example output using %DUMP_PREFIX_OFFSET and 1-byte mode:
  * 0009ab42: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f  @ABCDEFGHIJKLMNO
@@ -151,12 +186,12 @@ EXPORT_SYMBOL(hex_dump_to_buffer);
  * ffffffff88089af0: 73727170 77767574 7b7a7978 7f7e7d7c  pqrstuvwxyz{|}~.
  */
 void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
-			int rowsize, int groupsize,
-			const void *buf, size_t len, bool ascii)
+		    int rowsize, int groupsize,
+		    const void *buf, size_t len, bool ascii)
 {
 	const u8 *ptr = buf;
 	int i, linelen, remaining = len;
-	unsigned char linebuf[200];
+	unsigned char linebuf[32 * 3 + 2 + 32 + 1];
 
 	if (rowsize != 16 && rowsize != 32)
 		rowsize = 16;
@@ -164,13 +199,14 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
 	for (i = 0; i < len; i += rowsize) {
 		linelen = min(remaining, rowsize);
 		remaining -= rowsize;
+
 		hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
-				linebuf, sizeof(linebuf), ascii);
+				   linebuf, sizeof(linebuf), ascii);
 
 		switch (prefix_type) {
 		case DUMP_PREFIX_ADDRESS:
-			printk("%s%s%*p: %s\n", level, prefix_str,
-				(int)(2 * sizeof(void *)), ptr + i, linebuf);
+			printk("%s%s%p: %s\n",
+			       level, prefix_str, ptr + i, linebuf);
 			break;
 		case DUMP_PREFIX_OFFSET:
 			printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf);
@@ -196,9 +232,9 @@ EXPORT_SYMBOL(print_hex_dump);
  * rowsize of 16, groupsize of 1, and ASCII output included.
  */
 void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
-			const void *buf, size_t len)
+			  const void *buf, size_t len)
 {
 	print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1,
-			buf, len, 1);
+		       buf, len, true);
 }
 EXPORT_SYMBOL(print_hex_dump_bytes);
diff --git a/lib/hweight.c b/lib/hweight.c
index 389424ecb129..3c79d50814cf 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,37 +9,45 @@
  * The Hamming Weight of a number is the total number of bits set in it.
  */
 
-unsigned int hweight32(unsigned int w)
+unsigned int __sw_hweight32(unsigned int w)
 {
+#ifdef ARCH_HAS_FAST_MULTIPLIER
+	w -= (w >> 1) & 0x55555555;
+	w =  (w & 0x33333333) + ((w >> 2) & 0x33333333);
+	w =  (w + (w >> 4)) & 0x0f0f0f0f;
+	return (w * 0x01010101) >> 24;
+#else
 	unsigned int res = w - ((w >> 1) & 0x55555555);
 	res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
 	res = (res + (res >> 4)) & 0x0F0F0F0F;
 	res = res + (res >> 8);
 	return (res + (res >> 16)) & 0x000000FF;
+#endif
 }
-EXPORT_SYMBOL(hweight32);
+EXPORT_SYMBOL(__sw_hweight32);
 
-unsigned int hweight16(unsigned int w)
+unsigned int __sw_hweight16(unsigned int w)
 {
 	unsigned int res = w - ((w >> 1) & 0x5555);
 	res = (res & 0x3333) + ((res >> 2) & 0x3333);
 	res = (res + (res >> 4)) & 0x0F0F;
 	return (res + (res >> 8)) & 0x00FF;
 }
-EXPORT_SYMBOL(hweight16);
+EXPORT_SYMBOL(__sw_hweight16);
 
-unsigned int hweight8(unsigned int w)
+unsigned int __sw_hweight8(unsigned int w)
 {
 	unsigned int res = w - ((w >> 1) & 0x55);
 	res = (res & 0x33) + ((res >> 2) & 0x33);
 	return (res + (res >> 4)) & 0x0F;
 }
-EXPORT_SYMBOL(hweight8);
+EXPORT_SYMBOL(__sw_hweight8);
 
-unsigned long hweight64(__u64 w)
+unsigned long __sw_hweight64(__u64 w)
 {
 #if BITS_PER_LONG == 32
-	return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
+	return __sw_hweight32((unsigned int)(w >> 32)) +
+	       __sw_hweight32((unsigned int)w);
 #elif BITS_PER_LONG == 64
 #ifdef ARCH_HAS_FAST_MULTIPLIER
 	w -= (w >> 1) & 0x5555555555555555ul;
@@ -56,4 +64,4 @@ unsigned long hweight64(__u64 w)
 #endif
 #endif
 }
-EXPORT_SYMBOL(hweight64);
+EXPORT_SYMBOL(__sw_hweight64);
diff --git a/lib/idr.c b/lib/idr.c
index 80ca9aca038b..e15502e8b21e 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -106,16 +106,17 @@ static void idr_mark_full(struct idr_layer **pa, int id)
 }
 
 /**
- * idr_pre_get - reserver resources for idr allocation
+ * idr_pre_get - reserve resources for idr allocation
  * @idp:	idr handle
  * @gfp_mask:	memory allocation flags
  *
- * This function should be called prior to locking and calling the
- * idr_get_new* functions. It preallocates enough memory to satisfy
- * the worst possible allocation.
+ * This function should be called prior to calling the idr_get_new* functions.
+ * It preallocates enough memory to satisfy the worst possible allocation. The
+ * caller should pass in GFP_KERNEL if possible.  This of course requires that
+ * no spinning locks be held.
  *
- * If the system is REALLY out of memory this function returns 0,
- * otherwise 1.
+ * If the system is REALLY out of memory this function returns %0,
+ * otherwise %1.
  */
 int idr_pre_get(struct idr *idp, gfp_t gfp_mask)
 {
@@ -156,10 +157,12 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
 
 			/* if already at the top layer, we need to grow */
-			if (!(p = pa[l])) {
+			if (id >= 1 << (idp->layers * IDR_BITS)) {
 				*starting_id = id;
 				return IDR_NEED_TO_GROW;
 			}
+			p = pa[l];
+			BUG_ON(!p);
 
 			/* If we need to go up one layer, continue the
 			 * loop; otherwise, restart from the top.
@@ -281,18 +284,20 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id)
 /**
  * idr_get_new_above - allocate new idr entry above or equal to a start id
  * @idp: idr handle
- * @ptr: pointer you want associated with the ide
- * @start_id: id to start search at
+ * @ptr: pointer you want associated with the id
+ * @starting_id: id to start search at
  * @id: pointer to the allocated handle
  *
  * This is the allocate id function.  It should be called with any
  * required locks.
  *
- * If memory is required, it will return -EAGAIN, you should unlock
- * and go back to the idr_pre_get() call.  If the idr is full, it will
- * return -ENOSPC.
+ * If allocation from IDR's private freelist fails, idr_get_new_above() will
+ * return %-EAGAIN.  The caller should retry the idr_pre_get() call to refill
+ * IDR's preallocation and then retry the idr_get_new_above() call.
+ *
+ * If the idr is full idr_get_new_above() will return %-ENOSPC.
  *
- * @id returns a value in the range @starting_id ... 0x7fffffff
+ * @id returns a value in the range @starting_id ... %0x7fffffff
  */
 int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
 {
@@ -313,17 +318,16 @@ EXPORT_SYMBOL(idr_get_new_above);
 /**
  * idr_get_new - allocate new idr entry
  * @idp: idr handle
- * @ptr: pointer you want associated with the ide
+ * @ptr: pointer you want associated with the id
  * @id: pointer to the allocated handle
  *
- * This is the allocate id function.  It should be called with any
- * required locks.
+ * If allocation from IDR's private freelist fails, idr_get_new_above() will
+ * return %-EAGAIN.  The caller should retry the idr_pre_get() call to refill
+ * IDR's preallocation and then retry the idr_get_new_above() call.
  *
- * If memory is required, it will return -EAGAIN, you should unlock
- * and go back to the idr_pre_get() call.  If the idr is full, it will
- * return -ENOSPC.
+ * If the idr is full idr_get_new_above() will return %-ENOSPC.
  *
- * @id returns a value in the range 0 ... 0x7fffffff
+ * @id returns a value in the range %0 ... %0x7fffffff
  */
 int idr_get_new(struct idr *idp, void *ptr, int *id)
 {
@@ -386,7 +390,7 @@ static void sub_remove(struct idr *idp, int shift, int id)
 }
 
 /**
- * idr_remove - remove the given id and free it's slot
+ * idr_remove - remove the given id and free its slot
  * @idp: idr handle
  * @id: unique key
  */
@@ -435,7 +439,7 @@ EXPORT_SYMBOL(idr_remove);
  * function will remove all id mappings and leave all idp_layers
  * unused.
  *
- * A typical clean-up sequence for objects stored in an idr tree, will
+ * A typical clean-up sequence for objects stored in an idr tree will
  * use idr_for_each() to free all objects, if necessay, then
  * idr_remove_all() to remove all ids, and idr_destroy() to free
  * up the cached idr_layers.
@@ -443,6 +447,7 @@ EXPORT_SYMBOL(idr_remove);
 void idr_remove_all(struct idr *idp)
 {
 	int n, id, max;
+	int bt_mask;
 	struct idr_layer *p;
 	struct idr_layer *pa[MAX_LEVEL];
 	struct idr_layer **paa = &pa[0];
@@ -460,8 +465,10 @@ void idr_remove_all(struct idr *idp)
 			p = p->ary[(id >> n) & IDR_MASK];
 		}
 
+		bt_mask = id;
 		id += 1 << n;
-		while (n < fls(id)) {
+		/* Get the highest bit that the above add changed from 0->1. */
+		while (n < fls(id ^ bt_mask)) {
 			if (p)
 				free_layer(p);
 			n += IDR_BITS;
@@ -474,7 +481,7 @@ EXPORT_SYMBOL(idr_remove_all);
 
 /**
  * idr_destroy - release all cached layers within an idr tree
- * idp: idr handle
+ * @idp: idr handle
  */
 void idr_destroy(struct idr *idp)
 {
@@ -502,7 +509,7 @@ void *idr_find(struct idr *idp, int id)
 	int n;
 	struct idr_layer *p;
 
-	p = rcu_dereference(idp->top);
+	p = rcu_dereference_raw(idp->top);
 	if (!p)
 		return NULL;
 	n = (p->layer+1) * IDR_BITS;
@@ -517,7 +524,7 @@ void *idr_find(struct idr *idp, int id)
 	while (n > 0 && p) {
 		n -= IDR_BITS;
 		BUG_ON(n != p->layer*IDR_BITS);
-		p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
+		p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
 	}
 	return((void *)p);
 }
@@ -537,7 +544,7 @@ EXPORT_SYMBOL(idr_find);
  * not allowed.
  *
  * We check the return of @fn each time. If it returns anything other
- * than 0, we break out and return that value.
+ * than %0, we break out and return that value.
  *
  * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove().
  */
@@ -550,7 +557,7 @@ int idr_for_each(struct idr *idp,
 	struct idr_layer **paa = &pa[0];
 
 	n = idp->layers * IDR_BITS;
-	p = rcu_dereference(idp->top);
+	p = rcu_dereference_raw(idp->top);
 	max = 1 << n;
 
 	id = 0;
@@ -558,7 +565,7 @@ int idr_for_each(struct idr *idp,
 		while (n > 0 && p) {
 			n -= IDR_BITS;
 			*paa++ = p;
-			p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
+			p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
 		}
 
 		if (p) {
@@ -581,10 +588,11 @@ EXPORT_SYMBOL(idr_for_each);
 /**
  * idr_get_next - lookup next object of id to given id.
  * @idp: idr handle
- * @id:  pointer to lookup key
+ * @nextidp:  pointer to lookup key
  *
  * Returns pointer to registered object with id, which is next number to
- * given id.
+ * given id. After being looked up, *@nextidp will be updated for the next
+ * iteration.
  */
 
 void *idr_get_next(struct idr *idp, int *nextidp)
@@ -597,7 +605,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
 	/* find first ent */
 	n = idp->layers * IDR_BITS;
 	max = 1 << n;
-	p = rcu_dereference(idp->top);
+	p = rcu_dereference_raw(idp->top);
 	if (!p)
 		return NULL;
 
@@ -605,7 +613,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
 		while (n > 0 && p) {
 			n -= IDR_BITS;
 			*paa++ = p;
-			p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
+			p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
 		}
 
 		if (p) {
@@ -621,7 +629,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
 	}
 	return NULL;
 }
-
+EXPORT_SYMBOL(idr_get_next);
 
 
 /**
@@ -631,8 +639,8 @@ void *idr_get_next(struct idr *idp, int *nextidp)
  * @id: lookup key
  *
  * Replace the pointer registered with an id and return the old value.
- * A -ENOENT return indicates that @id was not found.
- * A -EINVAL return indicates that @id was not within valid constraints.
+ * A %-ENOENT return indicates that @id was not found.
+ * A %-EINVAL return indicates that @id was not within valid constraints.
  *
  * The caller must serialize with writers.
  */
@@ -690,10 +698,11 @@ void idr_init(struct idr *idp)
 EXPORT_SYMBOL(idr_init);
 
 
-/*
+/**
+ * DOC: IDA description
  * IDA - IDR based ID allocator
  *
- * this is id allocator without id -> pointer translation.  Memory
+ * This is id allocator without id -> pointer translation.  Memory
  * usage is much lower than full blown idr because each id only
  * occupies a bit.  ida uses a custom leaf node which contains
  * IDA_BITMAP_BITS slots.
@@ -726,8 +735,8 @@ static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap)
  * following function.  It preallocates enough memory to satisfy the
  * worst possible allocation.
  *
- * If the system is REALLY out of memory this function returns 0,
- * otherwise 1.
+ * If the system is REALLY out of memory this function returns %0,
+ * otherwise %1.
  */
 int ida_pre_get(struct ida *ida, gfp_t gfp_mask)
 {
@@ -753,17 +762,17 @@ EXPORT_SYMBOL(ida_pre_get);
 /**
  * ida_get_new_above - allocate new ID above or equal to a start id
  * @ida:	ida handle
- * @staring_id:	id to start search at
+ * @starting_id: id to start search at
  * @p_id:	pointer to the allocated handle
  *
  * Allocate new ID above or equal to @ida.  It should be called with
  * any required locks.
  *
- * If memory is required, it will return -EAGAIN, you should unlock
+ * If memory is required, it will return %-EAGAIN, you should unlock
  * and go back to the ida_pre_get() call.  If the ida is full, it will
- * return -ENOSPC.
+ * return %-ENOSPC.
  *
- * @p_id returns a value in the range @starting_id ... 0x7fffffff.
+ * @p_id returns a value in the range @starting_id ... %0x7fffffff.
  */
 int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
 {
@@ -845,11 +854,11 @@ EXPORT_SYMBOL(ida_get_new_above);
  *
  * Allocate new ID.  It should be called with any required locks.
  *
- * If memory is required, it will return -EAGAIN, you should unlock
+ * If memory is required, it will return %-EAGAIN, you should unlock
  * and go back to the idr_pre_get() call.  If the idr is full, it will
- * return -ENOSPC.
+ * return %-ENOSPC.
  *
- * @id returns a value in the range 0 ... 0x7fffffff.
+ * @id returns a value in the range %0 ... %0x7fffffff.
  */
 int ida_get_new(struct ida *ida, int *p_id)
 {
@@ -907,7 +916,7 @@ EXPORT_SYMBOL(ida_remove);
 
 /**
  * ida_destroy - release all cached layers within an ida tree
- * ida:		ida handle
+ * @ida:		ida handle
  */
 void ida_destroy(struct ida *ida)
 {
diff --git a/lib/inflate.c b/lib/inflate.c
index 1a8e8a978128..013a76193481 100644
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -7,7 +7,7 @@
  * Adapted for booting Linux by Hannu Savolainen 1993
  * based on gzip-1.0.3 
  *
- * Nicolas Pitre <nico@cam.org>, 1999/04/14 :
+ * Nicolas Pitre <nico@fluxnic.net>, 1999/04/14 :
  *   Little mods for all variable to reside either into rodata or bss segments
  *   by marking constant variables with 'const' and initializing all the others
  *   at run-time only.  This allows for the kernel uncompressor to run
@@ -103,6 +103,9 @@
       the two sets of lengths.
  */
 #include <linux/compiler.h>
+#ifdef NO_INFLATE_MALLOC
+#include <linux/slab.h>
+#endif
 
 #ifdef RCSID
 static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 13:27:04 jloup Exp #";
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index 75dbda03f4fb..da053313ee5c 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -3,41 +3,7 @@
  */
 
 #include <linux/module.h>
-#include <linux/bitops.h>
-
-static unsigned long find_next_zero_area(unsigned long *map,
-					 unsigned long size,
-					 unsigned long start,
-					 unsigned int nr,
-					 unsigned long align_mask)
-{
-	unsigned long index, end, i;
-again:
-	index = find_next_zero_bit(map, size, start);
-
-	/* Align allocation */
-	index = (index + align_mask) & ~align_mask;
-
-	end = index + nr;
-	if (end >= size)
-		return -1;
-	for (i = index; i < end; i++) {
-		if (test_bit(i, map)) {
-			start = i+1;
-			goto again;
-		}
-	}
-	return index;
-}
-
-void iommu_area_reserve(unsigned long *map, unsigned long i, int len)
-{
-	unsigned long end = i + len;
-	while (i < end) {
-		__set_bit(i, map);
-		i++;
-	}
-}
+#include <linux/bitmap.h>
 
 int iommu_is_span_boundary(unsigned int index, unsigned int nr,
 			   unsigned long shift,
@@ -55,36 +21,20 @@ unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
 			       unsigned long align_mask)
 {
 	unsigned long index;
+
+	/* We don't want the last of the limit */
+	size -= 1;
 again:
-	index = find_next_zero_area(map, size, start, nr, align_mask);
-	if (index != -1) {
+	index = bitmap_find_next_zero_area(map, size, start, nr, align_mask);
+	if (index < size) {
 		if (iommu_is_span_boundary(index, nr, shift, boundary_size)) {
 			/* we could do more effectively */
 			start = index + 1;
 			goto again;
 		}
-		iommu_area_reserve(map, index, nr);
+		bitmap_set(map, index, nr);
+		return index;
 	}
-	return index;
+	return -1;
 }
 EXPORT_SYMBOL(iommu_area_alloc);
-
-void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr)
-{
-	unsigned long end = start + nr;
-
-	while (start < end) {
-		__clear_bit(start, map);
-		start++;
-	}
-}
-EXPORT_SYMBOL(iommu_area_free);
-
-unsigned long iommu_num_pages(unsigned long addr, unsigned long len,
-			      unsigned long io_page_size)
-{
-	unsigned long size = (addr & (io_page_size - 1)) + len;
-
-	return DIV_ROUND_UP(size, io_page_size);
-}
-EXPORT_SYMBOL(iommu_num_pages);
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 14c6078f17a2..5730ecd3eb66 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -13,10 +13,10 @@
 #include <asm/pgtable.h>
 
 static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
-		unsigned long end, unsigned long phys_addr, pgprot_t prot)
+		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
 	pte_t *pte;
-	unsigned long pfn;
+	u64 pfn;
 
 	pfn = phys_addr >> PAGE_SHIFT;
 	pte = pte_alloc_kernel(pmd, addr);
@@ -31,7 +31,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
 }
 
 static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
-		unsigned long end, unsigned long phys_addr, pgprot_t prot)
+		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -49,7 +49,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 }
 
 static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
-		unsigned long end, unsigned long phys_addr, pgprot_t prot)
+		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -67,7 +67,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
 }
 
 int ioremap_page_range(unsigned long addr,
-		       unsigned long end, unsigned long phys_addr, pgprot_t prot)
+		       unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
 	pgd_t *pgd;
 	unsigned long start;
diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c
index f1ed2fe76c65..bd2bea963364 100644
--- a/lib/is_single_threaded.c
+++ b/lib/is_single_threaded.c
@@ -12,34 +12,47 @@
 
 #include <linux/sched.h>
 
-/**
- * is_single_threaded - Determine if a thread group is single-threaded or not
- * @p: A task in the thread group in question
- *
- * This returns true if the thread group to which a task belongs is single
- * threaded, false if it is not.
+/*
+ * Returns true if the task does not share ->mm with another thread/process.
  */
-bool is_single_threaded(struct task_struct *p)
+bool current_is_single_threaded(void)
 {
-	struct task_struct *g, *t;
-	struct mm_struct *mm = p->mm;
+	struct task_struct *task = current;
+	struct mm_struct *mm = task->mm;
+	struct task_struct *p, *t;
+	bool ret;
 
-	if (atomic_read(&p->signal->count) != 1)
-		goto no;
+	if (atomic_read(&task->signal->live) != 1)
+		return false;
 
-	if (atomic_read(&p->mm->mm_users) != 1) {
-		read_lock(&tasklist_lock);
-		do_each_thread(g, t) {
-			if (t->mm == mm && t != p)
-				goto no_unlock;
-		} while_each_thread(g, t);
-		read_unlock(&tasklist_lock);
-	}
+	if (atomic_read(&mm->mm_users) == 1)
+		return true;
 
-	return true;
+	ret = false;
+	rcu_read_lock();
+	for_each_process(p) {
+		if (unlikely(p->flags & PF_KTHREAD))
+			continue;
+		if (unlikely(p == task->group_leader))
+			continue;
+
+		t = p;
+		do {
+			if (unlikely(t->mm == mm))
+				goto found;
+			if (likely(t->mm))
+				break;
+			/*
+			 * t->mm == NULL. Make sure next_thread/next_task
+			 * will see other CLONE_VM tasks which might be
+			 * forked before exiting.
+			 */
+			smp_rmb();
+		} while_each_thread(p, t);
+	}
+	ret = true;
+found:
+	rcu_read_unlock();
 
-no_unlock:
-	read_unlock(&tasklist_lock);
-no:
-	return false;
+	return ret;
 }
diff --git a/lib/kasprintf.c b/lib/kasprintf.c
index c5ff1fd10030..9c4233b23783 100644
--- a/lib/kasprintf.c
+++ b/lib/kasprintf.c
@@ -6,6 +6,7 @@
 
 #include <stdarg.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/string.h>
 
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 39f1029e3525..b135d04aa48a 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -5,10 +5,13 @@
  * relegated to obsolescence, but used by various less
  * important (or lazy) subsystems.
  */
-#include <linux/smp_lock.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/semaphore.h>
+#include <linux/smp_lock.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/bkl.h>
 
 /*
  * The 'big kernel lock'
@@ -20,7 +23,7 @@
  *
  * Don't use in new code.
  */
-static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag);
+static  __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(kernel_flag);
 
 
 /*
@@ -33,12 +36,12 @@ static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag);
  * If it successfully gets the lock, it should increment
  * the preemption count like any spinlock does.
  *
- * (This works on UP too - _raw_spin_trylock will never
+ * (This works on UP too - do_raw_spin_trylock will never
  * return false in that case)
  */
 int __lockfunc __reacquire_kernel_lock(void)
 {
-	while (!_raw_spin_trylock(&kernel_flag)) {
+	while (!do_raw_spin_trylock(&kernel_flag)) {
 		if (need_resched())
 			return -EAGAIN;
 		cpu_relax();
@@ -49,27 +52,27 @@ int __lockfunc __reacquire_kernel_lock(void)
 
 void __lockfunc __release_kernel_lock(void)
 {
-	_raw_spin_unlock(&kernel_flag);
+	do_raw_spin_unlock(&kernel_flag);
 	preempt_enable_no_resched();
 }
 
 /*
  * These are the BKL spinlocks - we try to be polite about preemption.
  * If SMP is not on (ie UP preemption), this all goes away because the
- * _raw_spin_trylock() will always succeed.
+ * do_raw_spin_trylock() will always succeed.
  */
 #ifdef CONFIG_PREEMPT
 static inline void __lock_kernel(void)
 {
 	preempt_disable();
-	if (unlikely(!_raw_spin_trylock(&kernel_flag))) {
+	if (unlikely(!do_raw_spin_trylock(&kernel_flag))) {
 		/*
 		 * If preemption was disabled even before this
 		 * was called, there's nothing we can be polite
 		 * about - just spin.
 		 */
 		if (preempt_count() > 1) {
-			_raw_spin_lock(&kernel_flag);
+			do_raw_spin_lock(&kernel_flag);
 			return;
 		}
 
@@ -79,10 +82,10 @@ static inline void __lock_kernel(void)
 		 */
 		do {
 			preempt_enable();
-			while (spin_is_locked(&kernel_flag))
+			while (raw_spin_is_locked(&kernel_flag))
 				cpu_relax();
 			preempt_disable();
-		} while (!_raw_spin_trylock(&kernel_flag));
+		} while (!do_raw_spin_trylock(&kernel_flag));
 	}
 }
 
@@ -93,7 +96,7 @@ static inline void __lock_kernel(void)
  */
 static inline void __lock_kernel(void)
 {
-	_raw_spin_lock(&kernel_flag);
+	do_raw_spin_lock(&kernel_flag);
 }
 #endif
 
@@ -103,7 +106,7 @@ static inline void __unlock_kernel(void)
 	 * the BKL is not covered by lockdep, so we open-code the
 	 * unlocking sequence (and thus avoid the dep-chain ops):
 	 */
-	_raw_spin_unlock(&kernel_flag);
+	do_raw_spin_unlock(&kernel_flag);
 	preempt_enable();
 }
 
@@ -113,21 +116,28 @@ static inline void __unlock_kernel(void)
  * This cannot happen asynchronously, so we only need to
  * worry about other CPU's.
  */
-void __lockfunc lock_kernel(void)
+void __lockfunc _lock_kernel(const char *func, const char *file, int line)
 {
-	int depth = current->lock_depth+1;
-	if (likely(!depth))
+	int depth = current->lock_depth + 1;
+
+	trace_lock_kernel(func, file, line);
+
+	if (likely(!depth)) {
+		might_sleep();
 		__lock_kernel();
+	}
 	current->lock_depth = depth;
 }
 
-void __lockfunc unlock_kernel(void)
+void __lockfunc _unlock_kernel(const char *func, const char *file, int line)
 {
 	BUG_ON(current->lock_depth < 0);
 	if (likely(--current->lock_depth < 0))
 		__unlock_kernel();
+
+	trace_unlock_kernel(func, file, line);
 }
 
-EXPORT_SYMBOL(lock_kernel);
-EXPORT_SYMBOL(unlock_kernel);
+EXPORT_SYMBOL(_lock_kernel);
+EXPORT_SYMBOL(_unlock_kernel);
 
diff --git a/lib/kobject.c b/lib/kobject.c
index b512b746d2af..82dc34c095c2 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -700,7 +700,7 @@ static ssize_t kobj_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-struct sysfs_ops kobj_sysfs_ops = {
+const struct sysfs_ops kobj_sysfs_ops = {
 	.show	= kobj_attr_show,
 	.store	= kobj_attr_store,
 };
@@ -746,17 +746,56 @@ void kset_unregister(struct kset *k)
  */
 struct kobject *kset_find_obj(struct kset *kset, const char *name)
 {
+	return kset_find_obj_hinted(kset, name, NULL);
+}
+
+/**
+ * kset_find_obj_hinted - search for object in kset given a predecessor hint.
+ * @kset: kset we're looking in.
+ * @name: object's name.
+ * @hint: hint to possible object's predecessor.
+ *
+ * Check the hint's next object and if it is a match return it directly,
+ * otherwise, fall back to the behavior of kset_find_obj().  Either way
+ * a reference for the returned object is held and the reference on the
+ * hinted object is released.
+ */
+struct kobject *kset_find_obj_hinted(struct kset *kset, const char *name,
+				     struct kobject *hint)
+{
 	struct kobject *k;
 	struct kobject *ret = NULL;
 
 	spin_lock(&kset->list_lock);
+
+	if (!hint)
+		goto slow_search;
+
+	/* end of list detection */
+	if (hint->entry.next == kset->list.next)
+		goto slow_search;
+
+	k = container_of(hint->entry.next, struct kobject, entry);
+	if (!kobject_name(k) || strcmp(kobject_name(k), name))
+		goto slow_search;
+
+	ret = kobject_get(k);
+	goto unlock_exit;
+
+slow_search:
 	list_for_each_entry(k, &kset->list, entry) {
 		if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
 			ret = kobject_get(k);
 			break;
 		}
 	}
+
+unlock_exit:
 	spin_unlock(&kset->list_lock);
+
+	if (hint)
+		kobject_put(hint);
+
 	return ret;
 }
 
@@ -789,7 +828,7 @@ static struct kobj_type kset_ktype = {
  * If the kset was not able to be created, NULL will be returned.
  */
 static struct kset *kset_create(const char *name,
-				struct kset_uevent_ops *uevent_ops,
+				const struct kset_uevent_ops *uevent_ops,
 				struct kobject *parent_kobj)
 {
 	struct kset *kset;
@@ -832,7 +871,7 @@ static struct kset *kset_create(const char *name,
  * If the kset was not able to be created, NULL will be returned.
  */
 struct kset *kset_create_and_add(const char *name,
-				 struct kset_uevent_ops *uevent_ops,
+				 const struct kset_uevent_ops *uevent_ops,
 				 struct kobject *parent_kobj)
 {
 	struct kset *kset;
@@ -850,6 +889,121 @@ struct kset *kset_create_and_add(const char *name,
 }
 EXPORT_SYMBOL_GPL(kset_create_and_add);
 
+
+static DEFINE_SPINLOCK(kobj_ns_type_lock);
+static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES];
+
+int kobj_ns_type_register(const struct kobj_ns_type_operations *ops)
+{
+	enum kobj_ns_type type = ops->type;
+	int error;
+
+	spin_lock(&kobj_ns_type_lock);
+
+	error = -EINVAL;
+	if (type >= KOBJ_NS_TYPES)
+		goto out;
+
+	error = -EINVAL;
+	if (type <= KOBJ_NS_TYPE_NONE)
+		goto out;
+
+	error = -EBUSY;
+	if (kobj_ns_ops_tbl[type])
+		goto out;
+
+	error = 0;
+	kobj_ns_ops_tbl[type] = ops;
+
+out:
+	spin_unlock(&kobj_ns_type_lock);
+	return error;
+}
+
+int kobj_ns_type_registered(enum kobj_ns_type type)
+{
+	int registered = 0;
+
+	spin_lock(&kobj_ns_type_lock);
+	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES))
+		registered = kobj_ns_ops_tbl[type] != NULL;
+	spin_unlock(&kobj_ns_type_lock);
+
+	return registered;
+}
+
+const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent)
+{
+	const struct kobj_ns_type_operations *ops = NULL;
+
+	if (parent && parent->ktype->child_ns_type)
+		ops = parent->ktype->child_ns_type(parent);
+
+	return ops;
+}
+
+const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
+{
+	return kobj_child_ns_ops(kobj->parent);
+}
+
+
+const void *kobj_ns_current(enum kobj_ns_type type)
+{
+	const void *ns = NULL;
+
+	spin_lock(&kobj_ns_type_lock);
+	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+	    kobj_ns_ops_tbl[type])
+		ns = kobj_ns_ops_tbl[type]->current_ns();
+	spin_unlock(&kobj_ns_type_lock);
+
+	return ns;
+}
+
+const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk)
+{
+	const void *ns = NULL;
+
+	spin_lock(&kobj_ns_type_lock);
+	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+	    kobj_ns_ops_tbl[type])
+		ns = kobj_ns_ops_tbl[type]->netlink_ns(sk);
+	spin_unlock(&kobj_ns_type_lock);
+
+	return ns;
+}
+
+const void *kobj_ns_initial(enum kobj_ns_type type)
+{
+	const void *ns = NULL;
+
+	spin_lock(&kobj_ns_type_lock);
+	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+	    kobj_ns_ops_tbl[type])
+		ns = kobj_ns_ops_tbl[type]->initial_ns();
+	spin_unlock(&kobj_ns_type_lock);
+
+	return ns;
+}
+
+/*
+ * kobj_ns_exit - invalidate a namespace tag
+ *
+ * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET)
+ * @ns: the actual namespace being invalidated
+ *
+ * This is called when a tag is no longer valid.  For instance,
+ * when a network namespace exits, it uses this helper to
+ * make sure no sb's sysfs_info points to the now-invalidated
+ * netns.
+ */
+void kobj_ns_exit(enum kobj_ns_type type, const void *ns)
+{
+	sysfs_exit_ns(type, ns);
+}
+
+
 EXPORT_SYMBOL(kobject_get);
 EXPORT_SYMBOL(kobject_put);
 EXPORT_SYMBOL(kobject_del);
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 920a3ca6e259..70af0a7f97c0 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -18,18 +18,25 @@
 #include <linux/string.h>
 #include <linux/kobject.h>
 #include <linux/module.h>
-
+#include <linux/slab.h>
+#include <linux/user_namespace.h>
 #include <linux/socket.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <net/sock.h>
+#include <net/net_namespace.h>
 
 
 u64 uevent_seqnum;
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
 static DEFINE_SPINLOCK(sequence_lock);
-#if defined(CONFIG_NET)
-static struct sock *uevent_sock;
+#ifdef CONFIG_NET
+struct uevent_sock {
+	struct list_head list;
+	struct sock *sk;
+};
+static LIST_HEAD(uevent_sock_list);
+static DEFINE_MUTEX(uevent_sock_mutex);
 #endif
 
 /* the strings here must match the enum in include/linux/kobject.h */
@@ -76,6 +83,39 @@ out:
 	return ret;
 }
 
+#ifdef CONFIG_NET
+static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
+{
+	struct kobject *kobj = data;
+	const struct kobj_ns_type_operations *ops;
+
+	ops = kobj_ns_ops(kobj);
+	if (ops) {
+		const void *sock_ns, *ns;
+		ns = kobj->ktype->namespace(kobj);
+		sock_ns = ops->netlink_ns(dsk);
+		return sock_ns != ns;
+	}
+
+	return 0;
+}
+#endif
+
+static int kobj_usermode_filter(struct kobject *kobj)
+{
+	const struct kobj_ns_type_operations *ops;
+
+	ops = kobj_ns_ops(kobj);
+	if (ops) {
+		const void *init_ns, *ns;
+		ns = kobj->ktype->namespace(kobj);
+		init_ns = ops->initial_ns();
+		return ns != init_ns;
+	}
+
+	return 0;
+}
+
 /**
  * kobject_uevent_env - send an uevent with environmental data
  *
@@ -83,7 +123,7 @@ out:
  * @kobj: struct kobject that the action is happening to
  * @envp_ext: pointer to environmental data
  *
- * Returns 0 if kobject_uevent() is completed with success or the
+ * Returns 0 if kobject_uevent_env() is completed with success or the
  * corresponding error when it fails.
  */
 int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
@@ -95,10 +135,13 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 	const char *subsystem;
 	struct kobject *top_kobj;
 	struct kset *kset;
-	struct kset_uevent_ops *uevent_ops;
+	const struct kset_uevent_ops *uevent_ops;
 	u64 seq;
 	int i = 0;
 	int retval = 0;
+#ifdef CONFIG_NET
+	struct uevent_sock *ue_sk;
+#endif
 
 	pr_debug("kobject: '%s' (%p): %s\n",
 		 kobject_name(kobj), kobj, __func__);
@@ -210,7 +253,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 
 #if defined(CONFIG_NET)
 	/* send netlink message */
-	if (uevent_sock) {
+	mutex_lock(&uevent_sock_mutex);
+	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
+		struct sock *uevent_sock = ue_sk->sk;
 		struct sk_buff *skb;
 		size_t len;
 
@@ -232,18 +277,21 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 			}
 
 			NETLINK_CB(skb).dst_group = 1;
-			retval = netlink_broadcast(uevent_sock, skb, 0, 1,
-						   GFP_KERNEL);
+			retval = netlink_broadcast_filtered(uevent_sock, skb,
+							    0, 1, GFP_KERNEL,
+							    kobj_bcast_filter,
+							    kobj);
 			/* ENOBUFS should be handled in userspace */
 			if (retval == -ENOBUFS)
 				retval = 0;
 		} else
 			retval = -ENOMEM;
 	}
+	mutex_unlock(&uevent_sock_mutex);
 #endif
 
 	/* call uevent_helper, usually only enabled during early boot */
-	if (uevent_helper[0]) {
+	if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
 		char *argv [3];
 
 		argv [0] = uevent_helper;
@@ -269,7 +317,7 @@ exit:
 EXPORT_SYMBOL_GPL(kobject_uevent_env);
 
 /**
- * kobject_uevent - notify userspace by ending an uevent
+ * kobject_uevent - notify userspace by sending an uevent
  *
  * @action: action that is happening
  * @kobj: struct kobject that the action is happening to
@@ -319,18 +367,59 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
 EXPORT_SYMBOL_GPL(add_uevent_var);
 
 #if defined(CONFIG_NET)
-static int __init kobject_uevent_init(void)
+static int uevent_net_init(struct net *net)
 {
-	uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT,
-					    1, NULL, NULL, THIS_MODULE);
-	if (!uevent_sock) {
+	struct uevent_sock *ue_sk;
+
+	ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
+	if (!ue_sk)
+		return -ENOMEM;
+
+	ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT,
+					  1, NULL, NULL, THIS_MODULE);
+	if (!ue_sk->sk) {
 		printk(KERN_ERR
 		       "kobject_uevent: unable to create netlink socket!\n");
+		kfree(ue_sk);
 		return -ENODEV;
 	}
-	netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV);
+	mutex_lock(&uevent_sock_mutex);
+	list_add_tail(&ue_sk->list, &uevent_sock_list);
+	mutex_unlock(&uevent_sock_mutex);
 	return 0;
 }
 
+static void uevent_net_exit(struct net *net)
+{
+	struct uevent_sock *ue_sk;
+
+	mutex_lock(&uevent_sock_mutex);
+	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
+		if (sock_net(ue_sk->sk) == net)
+			goto found;
+	}
+	mutex_unlock(&uevent_sock_mutex);
+	return;
+
+found:
+	list_del(&ue_sk->list);
+	mutex_unlock(&uevent_sock_mutex);
+
+	netlink_kernel_release(ue_sk->sk);
+	kfree(ue_sk);
+}
+
+static struct pernet_operations uevent_net_ops = {
+	.init	= uevent_net_init,
+	.exit	= uevent_net_exit,
+};
+
+static int __init kobject_uevent_init(void)
+{
+	netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV);
+	return register_pernet_subsys(&uevent_net_ops);
+}
+
+
 postcore_initcall(kobject_uevent_init);
 #endif
diff --git a/lib/kref.c b/lib/kref.c
index 9ecd6e865610..3efb882b11db 100644
--- a/lib/kref.c
+++ b/lib/kref.c
@@ -13,17 +13,7 @@
 
 #include <linux/kref.h>
 #include <linux/module.h>
-
-/**
- * kref_set - initialize object and set refcount to requested number.
- * @kref: object in question.
- * @num: initial reference counter
- */
-void kref_set(struct kref *kref, int num)
-{
-	atomic_set(&kref->refcount, num);
-	smp_mb();
-}
+#include <linux/slab.h>
 
 /**
  * kref_init - initialize object.
@@ -31,7 +21,8 @@ void kref_set(struct kref *kref, int num)
  */
 void kref_init(struct kref *kref)
 {
-	kref_set(kref, 1);
+	atomic_set(&kref->refcount, 1);
+	smp_mb();
 }
 
 /**
@@ -71,7 +62,36 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref))
 	return 0;
 }
 
-EXPORT_SYMBOL(kref_set);
+
+/**
+ * kref_sub - subtract a number of refcounts for object.
+ * @kref: object.
+ * @count: Number of recounts to subtract.
+ * @release: pointer to the function that will clean up the object when the
+ *	     last reference to the object is released.
+ *	     This pointer is required, and it is not acceptable to pass kfree
+ *	     in as this function.
+ *
+ * Subtract @count from the refcount, and if 0, call release().
+ * Return 1 if the object was removed, otherwise return 0.  Beware, if this
+ * function returns 0, you still can not count on the kref from remaining in
+ * memory.  Only use the return value if you want to see if the kref is now
+ * gone, not present.
+ */
+int kref_sub(struct kref *kref, unsigned int count,
+	     void (*release)(struct kref *kref))
+{
+	WARN_ON(release == NULL);
+	WARN_ON(release == (void (*)(struct kref *))kfree);
+
+	if (atomic_sub_and_test((int) count, &kref->refcount)) {
+		release(kref);
+		return 1;
+	}
+	return 0;
+}
+
 EXPORT_SYMBOL(kref_init);
 EXPORT_SYMBOL(kref_get);
 EXPORT_SYMBOL(kref_put);
+EXPORT_SYMBOL(kref_sub);
diff --git a/lib/lcm.c b/lib/lcm.c
new file mode 100644
index 000000000000..157cd88a6ffc
--- /dev/null
+++ b/lib/lcm.c
@@ -0,0 +1,15 @@
+#include <linux/kernel.h>
+#include <linux/gcd.h>
+#include <linux/module.h>
+
+/* Lowest common multiple */
+unsigned long lcm(unsigned long a, unsigned long b)
+{
+	if (a && b)
+		return (a * b) / gcd(a, b);
+	else if (b)
+		return b;
+
+	return a;
+}
+EXPORT_SYMBOL_GPL(lcm);
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 1a39f4e3ae1f..344c710d16ca 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -43,6 +43,12 @@ EXPORT_SYMBOL(__list_add);
  */
 void list_del(struct list_head *entry)
 {
+	WARN(entry->next == LIST_POISON1,
+		"list_del corruption, next is LIST_POISON1 (%p)\n",
+		LIST_POISON1);
+	WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2,
+		"list_del corruption, prev is LIST_POISON2 (%p)\n",
+		LIST_POISON2);
 	WARN(entry->prev->next != entry,
 		"list_del corruption. prev->next should be %p, "
 		"but was %p\n", entry, entry->prev->next);
diff --git a/lib/list_sort.c b/lib/list_sort.c
new file mode 100644
index 000000000000..d7325c6b103f
--- /dev/null
+++ b/lib/list_sort.c
@@ -0,0 +1,291 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#define MAX_LIST_LENGTH_BITS 20
+
+/*
+ * Returns a list organized in an intermediate format suited
+ * to chaining of merge() calls: null-terminated, no reserved or
+ * sentinel head node, "prev" links not maintained.
+ */
+static struct list_head *merge(void *priv,
+				int (*cmp)(void *priv, struct list_head *a,
+					struct list_head *b),
+				struct list_head *a, struct list_head *b)
+{
+	struct list_head head, *tail = &head;
+
+	while (a && b) {
+		/* if equal, take 'a' -- important for sort stability */
+		if ((*cmp)(priv, a, b) <= 0) {
+			tail->next = a;
+			a = a->next;
+		} else {
+			tail->next = b;
+			b = b->next;
+		}
+		tail = tail->next;
+	}
+	tail->next = a?:b;
+	return head.next;
+}
+
+/*
+ * Combine final list merge with restoration of standard doubly-linked
+ * list structure.  This approach duplicates code from merge(), but
+ * runs faster than the tidier alternatives of either a separate final
+ * prev-link restoration pass, or maintaining the prev links
+ * throughout.
+ */
+static void merge_and_restore_back_links(void *priv,
+				int (*cmp)(void *priv, struct list_head *a,
+					struct list_head *b),
+				struct list_head *head,
+				struct list_head *a, struct list_head *b)
+{
+	struct list_head *tail = head;
+
+	while (a && b) {
+		/* if equal, take 'a' -- important for sort stability */
+		if ((*cmp)(priv, a, b) <= 0) {
+			tail->next = a;
+			a->prev = tail;
+			a = a->next;
+		} else {
+			tail->next = b;
+			b->prev = tail;
+			b = b->next;
+		}
+		tail = tail->next;
+	}
+	tail->next = a ? : b;
+
+	do {
+		/*
+		 * In worst cases this loop may run many iterations.
+		 * Continue callbacks to the client even though no
+		 * element comparison is needed, so the client's cmp()
+		 * routine can invoke cond_resched() periodically.
+		 */
+		(*cmp)(priv, tail->next, tail->next);
+
+		tail->next->prev = tail;
+		tail = tail->next;
+	} while (tail->next);
+
+	tail->next = head;
+	head->prev = tail;
+}
+
+/**
+ * list_sort - sort a list
+ * @priv: private data, opaque to list_sort(), passed to @cmp
+ * @head: the list to sort
+ * @cmp: the elements comparison function
+ *
+ * This function implements "merge sort", which has O(nlog(n))
+ * complexity.
+ *
+ * The comparison function @cmp must return a negative value if @a
+ * should sort before @b, and a positive value if @a should sort after
+ * @b. If @a and @b are equivalent, and their original relative
+ * ordering is to be preserved, @cmp must return 0.
+ */
+void list_sort(void *priv, struct list_head *head,
+		int (*cmp)(void *priv, struct list_head *a,
+			struct list_head *b))
+{
+	struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
+						-- last slot is a sentinel */
+	int lev;  /* index into part[] */
+	int max_lev = 0;
+	struct list_head *list;
+
+	if (list_empty(head))
+		return;
+
+	memset(part, 0, sizeof(part));
+
+	head->prev->next = NULL;
+	list = head->next;
+
+	while (list) {
+		struct list_head *cur = list;
+		list = list->next;
+		cur->next = NULL;
+
+		for (lev = 0; part[lev]; lev++) {
+			cur = merge(priv, cmp, part[lev], cur);
+			part[lev] = NULL;
+		}
+		if (lev > max_lev) {
+			if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
+				printk_once(KERN_DEBUG "list passed to"
+					" list_sort() too long for"
+					" efficiency\n");
+				lev--;
+			}
+			max_lev = lev;
+		}
+		part[lev] = cur;
+	}
+
+	for (lev = 0; lev < max_lev; lev++)
+		if (part[lev])
+			list = merge(priv, cmp, part[lev], list);
+
+	merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
+}
+EXPORT_SYMBOL(list_sort);
+
+#ifdef CONFIG_TEST_LIST_SORT
+
+#include <linux/random.h>
+
+/*
+ * The pattern of set bits in the list length determines which cases
+ * are hit in list_sort().
+ */
+#define TEST_LIST_LEN (512+128+2) /* not including head */
+
+#define TEST_POISON1 0xDEADBEEF
+#define TEST_POISON2 0xA324354C
+
+struct debug_el {
+	unsigned int poison1;
+	struct list_head list;
+	unsigned int poison2;
+	int value;
+	unsigned serial;
+};
+
+/* Array, containing pointers to all elements in the test list */
+static struct debug_el **elts __initdata;
+
+static int __init check(struct debug_el *ela, struct debug_el *elb)
+{
+	if (ela->serial >= TEST_LIST_LEN) {
+		printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
+				ela->serial);
+		return -EINVAL;
+	}
+	if (elb->serial >= TEST_LIST_LEN) {
+		printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
+				elb->serial);
+		return -EINVAL;
+	}
+	if (elts[ela->serial] != ela || elts[elb->serial] != elb) {
+		printk(KERN_ERR "list_sort_test: error: phantom element\n");
+		return -EINVAL;
+	}
+	if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) {
+		printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
+				ela->poison1, ela->poison2);
+		return -EINVAL;
+	}
+	if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) {
+		printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
+				elb->poison1, elb->poison2);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int __init cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+	struct debug_el *ela, *elb;
+
+	ela = container_of(a, struct debug_el, list);
+	elb = container_of(b, struct debug_el, list);
+
+	check(ela, elb);
+	return ela->value - elb->value;
+}
+
+static int __init list_sort_test(void)
+{
+	int i, count = 1, err = -EINVAL;
+	struct debug_el *el;
+	struct list_head *cur, *tmp;
+	LIST_HEAD(head);
+
+	printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n");
+
+	elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL);
+	if (!elts) {
+		printk(KERN_ERR "list_sort_test: error: cannot allocate "
+				"memory\n");
+		goto exit;
+	}
+
+	for (i = 0; i < TEST_LIST_LEN; i++) {
+		el = kmalloc(sizeof(*el), GFP_KERNEL);
+		if (!el) {
+			printk(KERN_ERR "list_sort_test: error: cannot "
+					"allocate memory\n");
+			goto exit;
+		}
+		 /* force some equivalencies */
+		el->value = random32() % (TEST_LIST_LEN/3);
+		el->serial = i;
+		el->poison1 = TEST_POISON1;
+		el->poison2 = TEST_POISON2;
+		elts[i] = el;
+		list_add_tail(&el->list, &head);
+	}
+
+	list_sort(NULL, &head, cmp);
+
+	for (cur = head.next; cur->next != &head; cur = cur->next) {
+		struct debug_el *el1;
+		int cmp_result;
+
+		if (cur->next->prev != cur) {
+			printk(KERN_ERR "list_sort_test: error: list is "
+					"corrupted\n");
+			goto exit;
+		}
+
+		cmp_result = cmp(NULL, cur, cur->next);
+		if (cmp_result > 0) {
+			printk(KERN_ERR "list_sort_test: error: list is not "
+					"sorted\n");
+			goto exit;
+		}
+
+		el = container_of(cur, struct debug_el, list);
+		el1 = container_of(cur->next, struct debug_el, list);
+		if (cmp_result == 0 && el->serial >= el1->serial) {
+			printk(KERN_ERR "list_sort_test: error: order of "
+					"equivalent elements not preserved\n");
+			goto exit;
+		}
+
+		if (check(el, el1)) {
+			printk(KERN_ERR "list_sort_test: error: element check "
+					"failed\n");
+			goto exit;
+		}
+		count++;
+	}
+
+	if (count != TEST_LIST_LEN) {
+		printk(KERN_ERR "list_sort_test: error: bad list length %d",
+				count);
+		goto exit;
+	}
+
+	err = 0;
+exit:
+	kfree(elts);
+	list_for_each_safe(cur, tmp, &head) {
+		list_del(cur);
+		kfree(container_of(cur, struct debug_el, list));
+	}
+	return err;
+}
+module_init(list_sort_test);
+#endif /* CONFIG_TEST_LIST_SORT */
diff --git a/lib/lmb.c b/lib/lmb.c
deleted file mode 100644
index e4a6482d8b26..000000000000
--- a/lib/lmb.c
+++ /dev/null
@@ -1,527 +0,0 @@
-/*
- * Procedures for maintaining information about logical memory blocks.
- *
- * Peter Bergner, IBM Corp.	June 2001.
- * Copyright (C) 2001 Peter Bergner.
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <linux/lmb.h>
-
-#define LMB_ALLOC_ANYWHERE	0
-
-struct lmb lmb;
-
-static int lmb_debug;
-
-static int __init early_lmb(char *p)
-{
-	if (p && strstr(p, "debug"))
-		lmb_debug = 1;
-	return 0;
-}
-early_param("lmb", early_lmb);
-
-static void lmb_dump(struct lmb_region *region, char *name)
-{
-	unsigned long long base, size;
-	int i;
-
-	pr_info(" %s.cnt  = 0x%lx\n", name, region->cnt);
-
-	for (i = 0; i < region->cnt; i++) {
-		base = region->region[i].base;
-		size = region->region[i].size;
-
-		pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
-		    name, i, base, base + size - 1, size);
-	}
-}
-
-void lmb_dump_all(void)
-{
-	if (!lmb_debug)
-		return;
-
-	pr_info("LMB configuration:\n");
-	pr_info(" rmo_size    = 0x%llx\n", (unsigned long long)lmb.rmo_size);
-	pr_info(" memory.size = 0x%llx\n", (unsigned long long)lmb.memory.size);
-
-	lmb_dump(&lmb.memory, "memory");
-	lmb_dump(&lmb.reserved, "reserved");
-}
-
-static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2,
-					u64 size2)
-{
-	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
-}
-
-static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
-{
-	if (base2 == base1 + size1)
-		return 1;
-	else if (base1 == base2 + size2)
-		return -1;
-
-	return 0;
-}
-
-static long lmb_regions_adjacent(struct lmb_region *rgn,
-		unsigned long r1, unsigned long r2)
-{
-	u64 base1 = rgn->region[r1].base;
-	u64 size1 = rgn->region[r1].size;
-	u64 base2 = rgn->region[r2].base;
-	u64 size2 = rgn->region[r2].size;
-
-	return lmb_addrs_adjacent(base1, size1, base2, size2);
-}
-
-static void lmb_remove_region(struct lmb_region *rgn, unsigned long r)
-{
-	unsigned long i;
-
-	for (i = r; i < rgn->cnt - 1; i++) {
-		rgn->region[i].base = rgn->region[i + 1].base;
-		rgn->region[i].size = rgn->region[i + 1].size;
-	}
-	rgn->cnt--;
-}
-
-/* Assumption: base addr of region 1 < base addr of region 2 */
-static void lmb_coalesce_regions(struct lmb_region *rgn,
-		unsigned long r1, unsigned long r2)
-{
-	rgn->region[r1].size += rgn->region[r2].size;
-	lmb_remove_region(rgn, r2);
-}
-
-void __init lmb_init(void)
-{
-	/* Create a dummy zero size LMB which will get coalesced away later.
-	 * This simplifies the lmb_add() code below...
-	 */
-	lmb.memory.region[0].base = 0;
-	lmb.memory.region[0].size = 0;
-	lmb.memory.cnt = 1;
-
-	/* Ditto. */
-	lmb.reserved.region[0].base = 0;
-	lmb.reserved.region[0].size = 0;
-	lmb.reserved.cnt = 1;
-}
-
-void __init lmb_analyze(void)
-{
-	int i;
-
-	lmb.memory.size = 0;
-
-	for (i = 0; i < lmb.memory.cnt; i++)
-		lmb.memory.size += lmb.memory.region[i].size;
-}
-
-static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
-{
-	unsigned long coalesced = 0;
-	long adjacent, i;
-
-	if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) {
-		rgn->region[0].base = base;
-		rgn->region[0].size = size;
-		return 0;
-	}
-
-	/* First try and coalesce this LMB with another. */
-	for (i = 0; i < rgn->cnt; i++) {
-		u64 rgnbase = rgn->region[i].base;
-		u64 rgnsize = rgn->region[i].size;
-
-		if ((rgnbase == base) && (rgnsize == size))
-			/* Already have this region, so we're done */
-			return 0;
-
-		adjacent = lmb_addrs_adjacent(base, size, rgnbase, rgnsize);
-		if (adjacent > 0) {
-			rgn->region[i].base -= size;
-			rgn->region[i].size += size;
-			coalesced++;
-			break;
-		} else if (adjacent < 0) {
-			rgn->region[i].size += size;
-			coalesced++;
-			break;
-		}
-	}
-
-	if ((i < rgn->cnt - 1) && lmb_regions_adjacent(rgn, i, i+1)) {
-		lmb_coalesce_regions(rgn, i, i+1);
-		coalesced++;
-	}
-
-	if (coalesced)
-		return coalesced;
-	if (rgn->cnt >= MAX_LMB_REGIONS)
-		return -1;
-
-	/* Couldn't coalesce the LMB, so add it to the sorted table. */
-	for (i = rgn->cnt - 1; i >= 0; i--) {
-		if (base < rgn->region[i].base) {
-			rgn->region[i+1].base = rgn->region[i].base;
-			rgn->region[i+1].size = rgn->region[i].size;
-		} else {
-			rgn->region[i+1].base = base;
-			rgn->region[i+1].size = size;
-			break;
-		}
-	}
-
-	if (base < rgn->region[0].base) {
-		rgn->region[0].base = base;
-		rgn->region[0].size = size;
-	}
-	rgn->cnt++;
-
-	return 0;
-}
-
-long lmb_add(u64 base, u64 size)
-{
-	struct lmb_region *_rgn = &lmb.memory;
-
-	/* On pSeries LPAR systems, the first LMB is our RMO region. */
-	if (base == 0)
-		lmb.rmo_size = size;
-
-	return lmb_add_region(_rgn, base, size);
-
-}
-
-long lmb_remove(u64 base, u64 size)
-{
-	struct lmb_region *rgn = &(lmb.memory);
-	u64 rgnbegin, rgnend;
-	u64 end = base + size;
-	int i;
-
-	rgnbegin = rgnend = 0; /* supress gcc warnings */
-
-	/* Find the region where (base, size) belongs to */
-	for (i=0; i < rgn->cnt; i++) {
-		rgnbegin = rgn->region[i].base;
-		rgnend = rgnbegin + rgn->region[i].size;
-
-		if ((rgnbegin <= base) && (end <= rgnend))
-			break;
-	}
-
-	/* Didn't find the region */
-	if (i == rgn->cnt)
-		return -1;
-
-	/* Check to see if we are removing entire region */
-	if ((rgnbegin == base) && (rgnend == end)) {
-		lmb_remove_region(rgn, i);
-		return 0;
-	}
-
-	/* Check to see if region is matching at the front */
-	if (rgnbegin == base) {
-		rgn->region[i].base = end;
-		rgn->region[i].size -= size;
-		return 0;
-	}
-
-	/* Check to see if the region is matching at the end */
-	if (rgnend == end) {
-		rgn->region[i].size -= size;
-		return 0;
-	}
-
-	/*
-	 * We need to split the entry -  adjust the current one to the
-	 * beginging of the hole and add the region after hole.
-	 */
-	rgn->region[i].size = base - rgn->region[i].base;
-	return lmb_add_region(rgn, end, rgnend - end);
-}
-
-long __init lmb_reserve(u64 base, u64 size)
-{
-	struct lmb_region *_rgn = &lmb.reserved;
-
-	BUG_ON(0 == size);
-
-	return lmb_add_region(_rgn, base, size);
-}
-
-long __init lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size)
-{
-	unsigned long i;
-
-	for (i = 0; i < rgn->cnt; i++) {
-		u64 rgnbase = rgn->region[i].base;
-		u64 rgnsize = rgn->region[i].size;
-		if (lmb_addrs_overlap(base, size, rgnbase, rgnsize))
-			break;
-	}
-
-	return (i < rgn->cnt) ? i : -1;
-}
-
-static u64 lmb_align_down(u64 addr, u64 size)
-{
-	return addr & ~(size - 1);
-}
-
-static u64 lmb_align_up(u64 addr, u64 size)
-{
-	return (addr + (size - 1)) & ~(size - 1);
-}
-
-static u64 __init lmb_alloc_nid_unreserved(u64 start, u64 end,
-					   u64 size, u64 align)
-{
-	u64 base, res_base;
-	long j;
-
-	base = lmb_align_down((end - size), align);
-	while (start <= base) {
-		j = lmb_overlaps_region(&lmb.reserved, base, size);
-		if (j < 0) {
-			/* this area isn't reserved, take it */
-			if (lmb_add_region(&lmb.reserved, base, size) < 0)
-				base = ~(u64)0;
-			return base;
-		}
-		res_base = lmb.reserved.region[j].base;
-		if (res_base < size)
-			break;
-		base = lmb_align_down(res_base - size, align);
-	}
-
-	return ~(u64)0;
-}
-
-static u64 __init lmb_alloc_nid_region(struct lmb_property *mp,
-				       u64 (*nid_range)(u64, u64, int *),
-				       u64 size, u64 align, int nid)
-{
-	u64 start, end;
-
-	start = mp->base;
-	end = start + mp->size;
-
-	start = lmb_align_up(start, align);
-	while (start < end) {
-		u64 this_end;
-		int this_nid;
-
-		this_end = nid_range(start, end, &this_nid);
-		if (this_nid == nid) {
-			u64 ret = lmb_alloc_nid_unreserved(start, this_end,
-							   size, align);
-			if (ret != ~(u64)0)
-				return ret;
-		}
-		start = this_end;
-	}
-
-	return ~(u64)0;
-}
-
-u64 __init lmb_alloc_nid(u64 size, u64 align, int nid,
-			 u64 (*nid_range)(u64 start, u64 end, int *nid))
-{
-	struct lmb_region *mem = &lmb.memory;
-	int i;
-
-	BUG_ON(0 == size);
-
-	size = lmb_align_up(size, align);
-
-	for (i = 0; i < mem->cnt; i++) {
-		u64 ret = lmb_alloc_nid_region(&mem->region[i],
-					       nid_range,
-					       size, align, nid);
-		if (ret != ~(u64)0)
-			return ret;
-	}
-
-	return lmb_alloc(size, align);
-}
-
-u64 __init lmb_alloc(u64 size, u64 align)
-{
-	return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE);
-}
-
-u64 __init lmb_alloc_base(u64 size, u64 align, u64 max_addr)
-{
-	u64 alloc;
-
-	alloc = __lmb_alloc_base(size, align, max_addr);
-
-	if (alloc == 0)
-		panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
-		      (unsigned long long) size, (unsigned long long) max_addr);
-
-	return alloc;
-}
-
-u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr)
-{
-	long i, j;
-	u64 base = 0;
-	u64 res_base;
-
-	BUG_ON(0 == size);
-
-	size = lmb_align_up(size, align);
-
-	/* On some platforms, make sure we allocate lowmem */
-	/* Note that LMB_REAL_LIMIT may be LMB_ALLOC_ANYWHERE */
-	if (max_addr == LMB_ALLOC_ANYWHERE)
-		max_addr = LMB_REAL_LIMIT;
-
-	for (i = lmb.memory.cnt - 1; i >= 0; i--) {
-		u64 lmbbase = lmb.memory.region[i].base;
-		u64 lmbsize = lmb.memory.region[i].size;
-
-		if (lmbsize < size)
-			continue;
-		if (max_addr == LMB_ALLOC_ANYWHERE)
-			base = lmb_align_down(lmbbase + lmbsize - size, align);
-		else if (lmbbase < max_addr) {
-			base = min(lmbbase + lmbsize, max_addr);
-			base = lmb_align_down(base - size, align);
-		} else
-			continue;
-
-		while (base && lmbbase <= base) {
-			j = lmb_overlaps_region(&lmb.reserved, base, size);
-			if (j < 0) {
-				/* this area isn't reserved, take it */
-				if (lmb_add_region(&lmb.reserved, base, size) < 0)
-					return 0;
-				return base;
-			}
-			res_base = lmb.reserved.region[j].base;
-			if (res_base < size)
-				break;
-			base = lmb_align_down(res_base - size, align);
-		}
-	}
-	return 0;
-}
-
-/* You must call lmb_analyze() before this. */
-u64 __init lmb_phys_mem_size(void)
-{
-	return lmb.memory.size;
-}
-
-u64 __init lmb_end_of_DRAM(void)
-{
-	int idx = lmb.memory.cnt - 1;
-
-	return (lmb.memory.region[idx].base + lmb.memory.region[idx].size);
-}
-
-/* You must call lmb_analyze() after this. */
-void __init lmb_enforce_memory_limit(u64 memory_limit)
-{
-	unsigned long i;
-	u64 limit;
-	struct lmb_property *p;
-
-	if (!memory_limit)
-		return;
-
-	/* Truncate the lmb regions to satisfy the memory limit. */
-	limit = memory_limit;
-	for (i = 0; i < lmb.memory.cnt; i++) {
-		if (limit > lmb.memory.region[i].size) {
-			limit -= lmb.memory.region[i].size;
-			continue;
-		}
-
-		lmb.memory.region[i].size = limit;
-		lmb.memory.cnt = i + 1;
-		break;
-	}
-
-	if (lmb.memory.region[0].size < lmb.rmo_size)
-		lmb.rmo_size = lmb.memory.region[0].size;
-
-	memory_limit = lmb_end_of_DRAM();
-
-	/* And truncate any reserves above the limit also. */
-	for (i = 0; i < lmb.reserved.cnt; i++) {
-		p = &lmb.reserved.region[i];
-
-		if (p->base > memory_limit)
-			p->size = 0;
-		else if ((p->base + p->size) > memory_limit)
-			p->size = memory_limit - p->base;
-
-		if (p->size == 0) {
-			lmb_remove_region(&lmb.reserved, i);
-			i--;
-		}
-	}
-}
-
-int __init lmb_is_reserved(u64 addr)
-{
-	int i;
-
-	for (i = 0; i < lmb.reserved.cnt; i++) {
-		u64 upper = lmb.reserved.region[i].base +
-			lmb.reserved.region[i].size - 1;
-		if ((addr >= lmb.reserved.region[i].base) && (addr <= upper))
-			return 1;
-	}
-	return 0;
-}
-
-/*
- * Given a <base, len>, find which memory regions belong to this range.
- * Adjust the request and return a contiguous chunk.
- */
-int lmb_find(struct lmb_property *res)
-{
-	int i;
-	u64 rstart, rend;
-
-	rstart = res->base;
-	rend = rstart + res->size - 1;
-
-	for (i = 0; i < lmb.memory.cnt; i++) {
-		u64 start = lmb.memory.region[i].base;
-		u64 end = start + lmb.memory.region[i].size - 1;
-
-		if (start > rend)
-			return -1;
-
-		if ((end >= rstart) && (start < rend)) {
-			/* adjust the request */
-			if (rstart < start)
-				rstart = start;
-			if (rend > end)
-				rend = end;
-			res->base = rstart;
-			res->size = rend - rstart + 1;
-			return 0;
-		}
-	}
-	return -1;
-}
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
new file mode 100644
index 000000000000..270de9d31b8c
--- /dev/null
+++ b/lib/lru_cache.c
@@ -0,0 +1,560 @@
+/*
+   lru_cache.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/string.h> /* for memset */
+#include <linux/seq_file.h> /* for seq_printf */
+#include <linux/lru_cache.h>
+
+MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
+	      "Lars Ellenberg <lars@linbit.com>");
+MODULE_DESCRIPTION("lru_cache - Track sets of hot objects");
+MODULE_LICENSE("GPL");
+
+/* this is developers aid only.
+ * it catches concurrent access (lack of locking on the users part) */
+#define PARANOIA_ENTRY() do {		\
+	BUG_ON(!lc);			\
+	BUG_ON(!lc->nr_elements);	\
+	BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \
+} while (0)
+
+#define RETURN(x...)     do { \
+	clear_bit(__LC_PARANOIA, &lc->flags); \
+	smp_mb__after_clear_bit(); return x ; } while (0)
+
+/* BUG() if e is not one of the elements tracked by lc */
+#define PARANOIA_LC_ELEMENT(lc, e) do {	\
+	struct lru_cache *lc_ = (lc);	\
+	struct lc_element *e_ = (e);	\
+	unsigned i = e_->lc_index;	\
+	BUG_ON(i >= lc_->nr_elements);	\
+	BUG_ON(lc_->lc_element[i] != e_); } while (0)
+
+/**
+ * lc_create - prepares to track objects in an active set
+ * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
+ * @e_count: number of elements allowed to be active simultaneously
+ * @e_size: size of the tracked objects
+ * @e_off: offset to the &struct lc_element member in a tracked object
+ *
+ * Returns a pointer to a newly initialized struct lru_cache on success,
+ * or NULL on (allocation) failure.
+ */
+struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
+		unsigned e_count, size_t e_size, size_t e_off)
+{
+	struct hlist_head *slot = NULL;
+	struct lc_element **element = NULL;
+	struct lru_cache *lc;
+	struct lc_element *e;
+	unsigned cache_obj_size = kmem_cache_size(cache);
+	unsigned i;
+
+	WARN_ON(cache_obj_size < e_size);
+	if (cache_obj_size < e_size)
+		return NULL;
+
+	/* e_count too big; would probably fail the allocation below anyways.
+	 * for typical use cases, e_count should be few thousand at most. */
+	if (e_count > LC_MAX_ACTIVE)
+		return NULL;
+
+	slot = kzalloc(e_count * sizeof(struct hlist_head*), GFP_KERNEL);
+	if (!slot)
+		goto out_fail;
+	element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL);
+	if (!element)
+		goto out_fail;
+
+	lc = kzalloc(sizeof(*lc), GFP_KERNEL);
+	if (!lc)
+		goto out_fail;
+
+	INIT_LIST_HEAD(&lc->in_use);
+	INIT_LIST_HEAD(&lc->lru);
+	INIT_LIST_HEAD(&lc->free);
+
+	lc->name = name;
+	lc->element_size = e_size;
+	lc->element_off = e_off;
+	lc->nr_elements = e_count;
+	lc->new_number = LC_FREE;
+	lc->lc_cache = cache;
+	lc->lc_element = element;
+	lc->lc_slot = slot;
+
+	/* preallocate all objects */
+	for (i = 0; i < e_count; i++) {
+		void *p = kmem_cache_alloc(cache, GFP_KERNEL);
+		if (!p)
+			break;
+		memset(p, 0, lc->element_size);
+		e = p + e_off;
+		e->lc_index = i;
+		e->lc_number = LC_FREE;
+		list_add(&e->list, &lc->free);
+		element[i] = e;
+	}
+	if (i == e_count)
+		return lc;
+
+	/* else: could not allocate all elements, give up */
+	for (i--; i; i--) {
+		void *p = element[i];
+		kmem_cache_free(cache, p - e_off);
+	}
+	kfree(lc);
+out_fail:
+	kfree(element);
+	kfree(slot);
+	return NULL;
+}
+
+void lc_free_by_index(struct lru_cache *lc, unsigned i)
+{
+	void *p = lc->lc_element[i];
+	WARN_ON(!p);
+	if (p) {
+		p -= lc->element_off;
+		kmem_cache_free(lc->lc_cache, p);
+	}
+}
+
+/**
+ * lc_destroy - frees memory allocated by lc_create()
+ * @lc: the lru cache to destroy
+ */
+void lc_destroy(struct lru_cache *lc)
+{
+	unsigned i;
+	if (!lc)
+		return;
+	for (i = 0; i < lc->nr_elements; i++)
+		lc_free_by_index(lc, i);
+	kfree(lc->lc_element);
+	kfree(lc->lc_slot);
+	kfree(lc);
+}
+
+/**
+ * lc_reset - does a full reset for @lc and the hash table slots.
+ * @lc: the lru cache to operate on
+ *
+ * It is roughly the equivalent of re-allocating a fresh lru_cache object,
+ * basically a short cut to lc_destroy(lc); lc = lc_create(...);
+ */
+void lc_reset(struct lru_cache *lc)
+{
+	unsigned i;
+
+	INIT_LIST_HEAD(&lc->in_use);
+	INIT_LIST_HEAD(&lc->lru);
+	INIT_LIST_HEAD(&lc->free);
+	lc->used = 0;
+	lc->hits = 0;
+	lc->misses = 0;
+	lc->starving = 0;
+	lc->dirty = 0;
+	lc->changed = 0;
+	lc->flags = 0;
+	lc->changing_element = NULL;
+	lc->new_number = LC_FREE;
+	memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
+
+	for (i = 0; i < lc->nr_elements; i++) {
+		struct lc_element *e = lc->lc_element[i];
+		void *p = e;
+		p -= lc->element_off;
+		memset(p, 0, lc->element_size);
+		/* re-init it */
+		e->lc_index = i;
+		e->lc_number = LC_FREE;
+		list_add(&e->list, &lc->free);
+	}
+}
+
+/**
+ * lc_seq_printf_stats - print stats about @lc into @seq
+ * @seq: the seq_file to print into
+ * @lc: the lru cache to print statistics of
+ */
+size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
+{
+	/* NOTE:
+	 * total calls to lc_get are
+	 * (starving + hits + misses)
+	 * misses include "dirty" count (update from an other thread in
+	 * progress) and "changed", when this in fact lead to an successful
+	 * update of the cache.
+	 */
+	return seq_printf(seq, "\t%s: used:%u/%u "
+		"hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n",
+		lc->name, lc->used, lc->nr_elements,
+		lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed);
+}
+
+static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
+{
+	return  lc->lc_slot + (enr % lc->nr_elements);
+}
+
+
+/**
+ * lc_find - find element by label, if present in the hash table
+ * @lc: The lru_cache object
+ * @enr: element number
+ *
+ * Returns the pointer to an element, if the element with the requested
+ * "label" or element number is present in the hash table,
+ * or NULL if not found. Does not change the refcnt.
+ */
+struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
+{
+	struct hlist_node *n;
+	struct lc_element *e;
+
+	BUG_ON(!lc);
+	BUG_ON(!lc->nr_elements);
+	hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
+		if (e->lc_number == enr)
+			return e;
+	}
+	return NULL;
+}
+
+/* returned element will be "recycled" immediately */
+static struct lc_element *lc_evict(struct lru_cache *lc)
+{
+	struct list_head  *n;
+	struct lc_element *e;
+
+	if (list_empty(&lc->lru))
+		return NULL;
+
+	n = lc->lru.prev;
+	e = list_entry(n, struct lc_element, list);
+
+	PARANOIA_LC_ELEMENT(lc, e);
+
+	list_del(&e->list);
+	hlist_del(&e->colision);
+	return e;
+}
+
+/**
+ * lc_del - removes an element from the cache
+ * @lc: The lru_cache object
+ * @e: The element to remove
+ *
+ * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list,
+ * sets @e->enr to %LC_FREE.
+ */
+void lc_del(struct lru_cache *lc, struct lc_element *e)
+{
+	PARANOIA_ENTRY();
+	PARANOIA_LC_ELEMENT(lc, e);
+	BUG_ON(e->refcnt);
+
+	e->lc_number = LC_FREE;
+	hlist_del_init(&e->colision);
+	list_move(&e->list, &lc->free);
+	RETURN();
+}
+
+static struct lc_element *lc_get_unused_element(struct lru_cache *lc)
+{
+	struct list_head *n;
+
+	if (list_empty(&lc->free))
+		return lc_evict(lc);
+
+	n = lc->free.next;
+	list_del(n);
+	return list_entry(n, struct lc_element, list);
+}
+
+static int lc_unused_element_available(struct lru_cache *lc)
+{
+	if (!list_empty(&lc->free))
+		return 1; /* something on the free list */
+	if (!list_empty(&lc->lru))
+		return 1;  /* something to evict */
+
+	return 0;
+}
+
+
+/**
+ * lc_get - get element by label, maybe change the active set
+ * @lc: the lru cache to operate on
+ * @enr: the label to look up
+ *
+ * Finds an element in the cache, increases its usage count,
+ * "touches" and returns it.
+ *
+ * In case the requested number is not present, it needs to be added to the
+ * cache. Therefore it is possible that an other element becomes evicted from
+ * the cache. In either case, the user is notified so he is able to e.g. keep
+ * a persistent log of the cache changes, and therefore the objects in use.
+ *
+ * Return values:
+ *  NULL
+ *     The cache was marked %LC_STARVING,
+ *     or the requested label was not in the active set
+ *     and a changing transaction is still pending (@lc was marked %LC_DIRTY).
+ *     Or no unused or free element could be recycled (@lc will be marked as
+ *     %LC_STARVING, blocking further lc_get() operations).
+ *
+ *  pointer to the element with the REQUESTED element number.
+ *     In this case, it can be used right away
+ *
+ *  pointer to an UNUSED element with some different element number,
+ *          where that different number may also be %LC_FREE.
+ *
+ *          In this case, the cache is marked %LC_DIRTY (blocking further changes),
+ *          and the returned element pointer is removed from the lru list and
+ *          hash collision chains.  The user now should do whatever housekeeping
+ *          is necessary.
+ *          Then he must call lc_changed(lc,element_pointer), to finish
+ *          the change.
+ *
+ * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
+ *       any cache set change.
+ */
+struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
+{
+	struct lc_element *e;
+
+	PARANOIA_ENTRY();
+	if (lc->flags & LC_STARVING) {
+		++lc->starving;
+		RETURN(NULL);
+	}
+
+	e = lc_find(lc, enr);
+	if (e) {
+		++lc->hits;
+		if (e->refcnt++ == 0)
+			lc->used++;
+		list_move(&e->list, &lc->in_use); /* Not evictable... */
+		RETURN(e);
+	}
+
+	++lc->misses;
+
+	/* In case there is nothing available and we can not kick out
+	 * the LRU element, we have to wait ...
+	 */
+	if (!lc_unused_element_available(lc)) {
+		__set_bit(__LC_STARVING, &lc->flags);
+		RETURN(NULL);
+	}
+
+	/* it was not present in the active set.
+	 * we are going to recycle an unused (or even "free") element.
+	 * user may need to commit a transaction to record that change.
+	 * we serialize on flags & TF_DIRTY */
+	if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
+		++lc->dirty;
+		RETURN(NULL);
+	}
+
+	e = lc_get_unused_element(lc);
+	BUG_ON(!e);
+
+	clear_bit(__LC_STARVING, &lc->flags);
+	BUG_ON(++e->refcnt != 1);
+	lc->used++;
+
+	lc->changing_element = e;
+	lc->new_number = enr;
+
+	RETURN(e);
+}
+
+/* similar to lc_get,
+ * but only gets a new reference on an existing element.
+ * you either get the requested element, or NULL.
+ * will be consolidated into one function.
+ */
+struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
+{
+	struct lc_element *e;
+
+	PARANOIA_ENTRY();
+	if (lc->flags & LC_STARVING) {
+		++lc->starving;
+		RETURN(NULL);
+	}
+
+	e = lc_find(lc, enr);
+	if (e) {
+		++lc->hits;
+		if (e->refcnt++ == 0)
+			lc->used++;
+		list_move(&e->list, &lc->in_use); /* Not evictable... */
+	}
+	RETURN(e);
+}
+
+/**
+ * lc_changed - tell @lc that the change has been recorded
+ * @lc: the lru cache to operate on
+ * @e: the element pending label change
+ */
+void lc_changed(struct lru_cache *lc, struct lc_element *e)
+{
+	PARANOIA_ENTRY();
+	BUG_ON(e != lc->changing_element);
+	PARANOIA_LC_ELEMENT(lc, e);
+	++lc->changed;
+	e->lc_number = lc->new_number;
+	list_add(&e->list, &lc->in_use);
+	hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
+	lc->changing_element = NULL;
+	lc->new_number = LC_FREE;
+	clear_bit(__LC_DIRTY, &lc->flags);
+	smp_mb__after_clear_bit();
+	RETURN();
+}
+
+
+/**
+ * lc_put - give up refcnt of @e
+ * @lc: the lru cache to operate on
+ * @e: the element to put
+ *
+ * If refcnt reaches zero, the element is moved to the lru list,
+ * and a %LC_STARVING (if set) is cleared.
+ * Returns the new (post-decrement) refcnt.
+ */
+unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
+{
+	PARANOIA_ENTRY();
+	PARANOIA_LC_ELEMENT(lc, e);
+	BUG_ON(e->refcnt == 0);
+	BUG_ON(e == lc->changing_element);
+	if (--e->refcnt == 0) {
+		/* move it to the front of LRU. */
+		list_move(&e->list, &lc->lru);
+		lc->used--;
+		clear_bit(__LC_STARVING, &lc->flags);
+		smp_mb__after_clear_bit();
+	}
+	RETURN(e->refcnt);
+}
+
+/**
+ * lc_element_by_index
+ * @lc: the lru cache to operate on
+ * @i: the index of the element to return
+ */
+struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i)
+{
+	BUG_ON(i >= lc->nr_elements);
+	BUG_ON(lc->lc_element[i] == NULL);
+	BUG_ON(lc->lc_element[i]->lc_index != i);
+	return lc->lc_element[i];
+}
+
+/**
+ * lc_index_of
+ * @lc: the lru cache to operate on
+ * @e: the element to query for its index position in lc->element
+ */
+unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
+{
+	PARANOIA_LC_ELEMENT(lc, e);
+	return e->lc_index;
+}
+
+/**
+ * lc_set - associate index with label
+ * @lc: the lru cache to operate on
+ * @enr: the label to set
+ * @index: the element index to associate label with.
+ *
+ * Used to initialize the active set to some previously recorded state.
+ */
+void lc_set(struct lru_cache *lc, unsigned int enr, int index)
+{
+	struct lc_element *e;
+
+	if (index < 0 || index >= lc->nr_elements)
+		return;
+
+	e = lc_element_by_index(lc, index);
+	e->lc_number = enr;
+
+	hlist_del_init(&e->colision);
+	hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
+	list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
+}
+
+/**
+ * lc_dump - Dump a complete LRU cache to seq in textual form.
+ * @lc: the lru cache to operate on
+ * @seq: the &struct seq_file pointer to seq_printf into
+ * @utext: user supplied "heading" or other info
+ * @detail: function pointer the user may provide to dump further details
+ * of the object the lc_element is embedded in.
+ */
+void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
+	     void (*detail) (struct seq_file *, struct lc_element *))
+{
+	unsigned int nr_elements = lc->nr_elements;
+	struct lc_element *e;
+	int i;
+
+	seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext);
+	for (i = 0; i < nr_elements; i++) {
+		e = lc_element_by_index(lc, i);
+		if (e->lc_number == LC_FREE) {
+			seq_printf(seq, "\t%2d: FREE\n", i);
+		} else {
+			seq_printf(seq, "\t%2d: %4u %4u    ", i,
+				   e->lc_number, e->refcnt);
+			detail(seq, e);
+		}
+	}
+}
+
+EXPORT_SYMBOL(lc_create);
+EXPORT_SYMBOL(lc_reset);
+EXPORT_SYMBOL(lc_destroy);
+EXPORT_SYMBOL(lc_set);
+EXPORT_SYMBOL(lc_del);
+EXPORT_SYMBOL(lc_try_get);
+EXPORT_SYMBOL(lc_find);
+EXPORT_SYMBOL(lc_get);
+EXPORT_SYMBOL(lc_put);
+EXPORT_SYMBOL(lc_changed);
+EXPORT_SYMBOL(lc_element_by_index);
+EXPORT_SYMBOL(lc_index_of);
+EXPORT_SYMBOL(lc_seq_printf_stats);
+EXPORT_SYMBOL(lc_seq_dump_details);
diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c
index 5dc6b29c1575..f2fd09850223 100644
--- a/lib/lzo/lzo1x_decompress.c
+++ b/lib/lzo/lzo1x_decompress.c
@@ -11,11 +11,13 @@
  *  Richard Purdie <rpurdie@openedhand.com>
  */
 
+#ifndef STATIC
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lzo.h>
-#include <asm/byteorder.h>
+#endif
+
 #include <asm/unaligned.h>
+#include <linux/lzo.h>
 #include "lzodefs.h"
 
 #define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x))
@@ -244,9 +246,10 @@ lookbehind_overrun:
 	*out_len = op - out;
 	return LZO_E_LOOKBEHIND_OVERRUN;
 }
-
+#ifndef STATIC
 EXPORT_SYMBOL_GPL(lzo1x_decompress_safe);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("LZO1X Decompressor");
 
+#endif
diff --git a/lib/nlattr.c b/lib/nlattr.c
index c4706eb98d3d..00e8a02681a6 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -15,7 +15,7 @@
 #include <linux/types.h>
 #include <net/netlink.h>
 
-static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
+static const u16 nla_attr_minlen[NLA_TYPE_MAX+1] = {
 	[NLA_U8]	= sizeof(u8),
 	[NLA_U16]	= sizeof(u16),
 	[NLA_U32]	= sizeof(u32),
@@ -23,7 +23,7 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
 	[NLA_NESTED]	= NLA_HDRLEN,
 };
 
-static int validate_nla(struct nlattr *nla, int maxtype,
+static int validate_nla(const struct nlattr *nla, int maxtype,
 			const struct nla_policy *policy)
 {
 	const struct nla_policy *pt;
@@ -115,10 +115,10 @@ static int validate_nla(struct nlattr *nla, int maxtype,
  *
  * Returns 0 on success or a negative error code.
  */
-int nla_validate(struct nlattr *head, int len, int maxtype,
+int nla_validate(const struct nlattr *head, int len, int maxtype,
 		 const struct nla_policy *policy)
 {
-	struct nlattr *nla;
+	const struct nlattr *nla;
 	int rem, err;
 
 	nla_for_each_attr(nla, head, len, rem) {
@@ -173,10 +173,10 @@ nla_policy_len(const struct nla_policy *p, int n)
  *
  * Returns 0 on success or a negative error code.
  */
-int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
-	      const struct nla_policy *policy)
+int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
+	      int len, const struct nla_policy *policy)
 {
-	struct nlattr *nla;
+	const struct nlattr *nla;
 	int rem, err;
 
 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
@@ -191,7 +191,7 @@ int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
 					goto errout;
 			}
 
-			tb[type] = nla;
+			tb[type] = (struct nlattr *)nla;
 		}
 	}
 
@@ -212,14 +212,14 @@ errout:
  *
  * Returns the first attribute in the stream matching the specified type.
  */
-struct nlattr *nla_find(struct nlattr *head, int len, int attrtype)
+struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype)
 {
-	struct nlattr *nla;
+	const struct nlattr *nla;
 	int rem;
 
 	nla_for_each_attr(nla, head, len, rem)
 		if (nla_type(nla) == attrtype)
-			return nla;
+			return (struct nlattr *)nla;
 
 	return NULL;
 }
diff --git a/lib/parser.c b/lib/parser.c
index b00d02059a5f..6e89eca5cca0 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -56,13 +56,16 @@ static int match_one(char *s, const char *p, substring_t args[])
 
 		args[argc].from = s;
 		switch (*p++) {
-		case 's':
-			if (strlen(s) == 0)
+		case 's': {
+			size_t str_len = strlen(s);
+
+			if (str_len == 0)
 				return 0;
-			else if (len == -1 || len > strlen(s))
-				len = strlen(s);
+			if (len == -1 || len > str_len)
+				len = str_len;
 			args[argc].to = s + len;
 			break;
+		}
 		case 'd':
 			simple_strtol(s, &args[argc].to, 0);
 			goto num;
@@ -125,12 +128,13 @@ static int match_number(substring_t *s, int *result, int base)
 	char *endp;
 	char *buf;
 	int ret;
+	size_t len = s->to - s->from;
 
-	buf = kmalloc(s->to - s->from + 1, GFP_KERNEL);
+	buf = kmalloc(len + 1, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
-	memcpy(buf, s->from, s->to - s->from);
-	buf[s->to - s->from] = '\0';
+	memcpy(buf, s->from, len);
+	buf[len] = '\0';
 	*result = simple_strtol(buf, &endp, base);
 	ret = 0;
 	if (endp == buf)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index aeaa6d734447..28f2c33c6b53 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -8,10 +8,53 @@
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
+#include <linux/debugobjects.h>
 
 static LIST_HEAD(percpu_counters);
 static DEFINE_MUTEX(percpu_counters_lock);
 
+#ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER
+
+static struct debug_obj_descr percpu_counter_debug_descr;
+
+static int percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
+{
+	struct percpu_counter *fbc = addr;
+
+	switch (state) {
+	case ODEBUG_STATE_ACTIVE:
+		percpu_counter_destroy(fbc);
+		debug_object_free(fbc, &percpu_counter_debug_descr);
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static struct debug_obj_descr percpu_counter_debug_descr = {
+	.name		= "percpu_counter",
+	.fixup_free	= percpu_counter_fixup_free,
+};
+
+static inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
+{
+	debug_object_init(fbc, &percpu_counter_debug_descr);
+	debug_object_activate(fbc, &percpu_counter_debug_descr);
+}
+
+static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
+{
+	debug_object_deactivate(fbc, &percpu_counter_debug_descr);
+	debug_object_free(fbc, &percpu_counter_debug_descr);
+}
+
+#else	/* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
+static inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
+{ }
+static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
+{ }
+#endif	/* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
+
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 {
 	int cpu;
@@ -29,20 +72,18 @@ EXPORT_SYMBOL(percpu_counter_set);
 void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
 {
 	s64 count;
-	s32 *pcount;
-	int cpu = get_cpu();
 
-	pcount = per_cpu_ptr(fbc->counters, cpu);
-	count = *pcount + amount;
+	preempt_disable();
+	count = __this_cpu_read(*fbc->counters) + amount;
 	if (count >= batch || count <= -batch) {
 		spin_lock(&fbc->lock);
 		fbc->count += count;
-		*pcount = 0;
+		__this_cpu_write(*fbc->counters, 0);
 		spin_unlock(&fbc->lock);
 	} else {
-		*pcount = count;
+		__this_cpu_write(*fbc->counters, count);
 	}
-	put_cpu();
+	preempt_enable();
 }
 EXPORT_SYMBOL(__percpu_counter_add);
 
@@ -75,7 +116,11 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
 	fbc->counters = alloc_percpu(s32);
 	if (!fbc->counters)
 		return -ENOMEM;
+
+	debug_percpu_counter_activate(fbc);
+
 #ifdef CONFIG_HOTPLUG_CPU
+	INIT_LIST_HEAD(&fbc->list);
 	mutex_lock(&percpu_counters_lock);
 	list_add(&fbc->list, &percpu_counters);
 	mutex_unlock(&percpu_counters_lock);
@@ -89,6 +134,8 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
 	if (!fbc->counters)
 		return;
 
+	debug_percpu_counter_deactivate(fbc);
+
 #ifdef CONFIG_HOTPLUG_CPU
 	mutex_lock(&percpu_counters_lock);
 	list_del(&fbc->list);
@@ -137,6 +184,33 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
+/*
+ * Compare counter against given value.
+ * Return 1 if greater, 0 if equal and -1 if less
+ */
+int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
+{
+	s64	count;
+
+	count = percpu_counter_read(fbc);
+	/* Check to see if rough count will be sufficient for comparison */
+	if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) {
+		if (count > rhs)
+			return 1;
+		else
+			return -1;
+	}
+	/* Need to use precise count */
+	count = percpu_counter_sum(fbc);
+	if (count > rhs)
+		return 1;
+	else if (count < rhs)
+		return -1;
+	else
+		return 0;
+}
+EXPORT_SYMBOL(percpu_counter_compare);
+
 static int __init percpu_counter_startup(void)
 {
 	compute_batch_value();
diff --git a/lib/plist.c b/lib/plist.c
index d6c64a824e1d..1471988d9190 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -54,9 +54,11 @@ static void plist_check_list(struct list_head *top)
 
 static void plist_check_head(struct plist_head *head)
 {
-	WARN_ON(!head->lock);
-	if (head->lock)
-		WARN_ON_SMP(!spin_is_locked(head->lock));
+	WARN_ON(!head->rawlock && !head->spinlock);
+	if (head->rawlock)
+		WARN_ON_SMP(!raw_spin_is_locked(head->rawlock));
+	if (head->spinlock)
+		WARN_ON_SMP(!spin_is_locked(head->spinlock));
 	plist_check_list(&head->prio_list);
 	plist_check_list(&head->node_list);
 }
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 23abbd93cae1..5086bb962b4d 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -28,7 +28,6 @@
 #include <linux/slab.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
-#include <linux/gfp.h>
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <linux/rcupdate.h>
@@ -50,7 +49,7 @@ struct radix_tree_node {
 	unsigned int	height;		/* Height from the bottom */
 	unsigned int	count;
 	struct rcu_head	rcu_head;
-	void		*slots[RADIX_TREE_MAP_SIZE];
+	void __rcu	*slots[RADIX_TREE_MAP_SIZE];
 	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
 
@@ -83,6 +82,16 @@ struct radix_tree_preload {
 };
 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
 
+static inline void *ptr_to_indirect(void *ptr)
+{
+	return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
+}
+
+static inline void *indirect_to_ptr(void *ptr)
+{
+	return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
+}
+
 static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
 {
 	return root->gfp_mask & __GFP_BITS_MASK;
@@ -175,14 +184,16 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)
 {
 	struct radix_tree_node *node =
 			container_of(head, struct radix_tree_node, rcu_head);
+	int i;
 
 	/*
 	 * must only free zeroed nodes into the slab. radix_tree_shrink
 	 * can leave us with a non-NULL entry in the first slot, so clear
 	 * that here to make sure.
 	 */
-	tag_clear(node, 0, 0);
-	tag_clear(node, 1, 0);
+	for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
+		tag_clear(node, i, 0);
+
 	node->slots[0] = NULL;
 	node->count = 0;
 
@@ -200,6 +211,9 @@ radix_tree_node_free(struct radix_tree_node *node)
  * ensure that the addition of a single element in the tree cannot fail.  On
  * success, return zero, with preemption disabled.  On error, return -ENOMEM
  * with preemption not disabled.
+ *
+ * To make use of this facility, the radix tree must be initialised without
+ * __GFP_WAIT being passed to INIT_RADIX_TREE().
  */
 int radix_tree_preload(gfp_t gfp_mask)
 {
@@ -261,7 +275,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
 			return -ENOMEM;
 
 		/* Increase the height.  */
-		node->slots[0] = radix_tree_indirect_to_ptr(root->rnode);
+		node->slots[0] = indirect_to_ptr(root->rnode);
 
 		/* Propagate the aggregated tag info into the new root */
 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
@@ -272,7 +286,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
 		newheight = root->height+1;
 		node->height = newheight;
 		node->count = 1;
-		node = radix_tree_ptr_to_indirect(node);
+		node = ptr_to_indirect(node);
 		rcu_assign_pointer(root->rnode, node);
 		root->height = newheight;
 	} while (height > root->height);
@@ -305,7 +319,7 @@ int radix_tree_insert(struct radix_tree_root *root,
 			return error;
 	}
 
-	slot = radix_tree_indirect_to_ptr(root->rnode);
+	slot = indirect_to_ptr(root->rnode);
 
 	height = root->height;
 	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
@@ -321,8 +335,7 @@ int radix_tree_insert(struct radix_tree_root *root,
 				rcu_assign_pointer(node->slots[offset], slot);
 				node->count++;
 			} else
-				rcu_assign_pointer(root->rnode,
-					radix_tree_ptr_to_indirect(slot));
+				rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
 		}
 
 		/* Go a level down */
@@ -361,7 +374,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
 	unsigned int height, shift;
 	struct radix_tree_node *node, **slot;
 
-	node = rcu_dereference(root->rnode);
+	node = rcu_dereference_raw(root->rnode);
 	if (node == NULL)
 		return NULL;
 
@@ -370,7 +383,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
 			return NULL;
 		return is_slot ? (void *)&root->rnode : node;
 	}
-	node = radix_tree_indirect_to_ptr(node);
+	node = indirect_to_ptr(node);
 
 	height = node->height;
 	if (index > radix_tree_maxindex(height))
@@ -381,7 +394,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
 	do {
 		slot = (struct radix_tree_node **)
 			(node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
-		node = rcu_dereference(*slot);
+		node = rcu_dereference_raw(*slot);
 		if (node == NULL)
 			return NULL;
 
@@ -389,7 +402,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
 		height--;
 	} while (height > 0);
 
-	return is_slot ? (void *)slot:node;
+	return is_slot ? (void *)slot : indirect_to_ptr(node);
 }
 
 /**
@@ -451,7 +464,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
 	height = root->height;
 	BUG_ON(index > radix_tree_maxindex(height));
 
-	slot = radix_tree_indirect_to_ptr(root->rnode);
+	slot = indirect_to_ptr(root->rnode);
 	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 
 	while (height > 0) {
@@ -505,7 +518,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
 
 	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 	pathp->node = NULL;
-	slot = radix_tree_indirect_to_ptr(root->rnode);
+	slot = indirect_to_ptr(root->rnode);
 
 	while (height > 0) {
 		int offset;
@@ -543,7 +556,6 @@ out:
 }
 EXPORT_SYMBOL(radix_tree_tag_clear);
 
-#ifndef __KERNEL__	/* Only the test harness uses this at present */
 /**
  * radix_tree_tag_get - get a tag on a radix tree node
  * @root:		radix tree root
@@ -554,6 +566,10 @@ EXPORT_SYMBOL(radix_tree_tag_clear);
  *
  *  0: tag not present or not set
  *  1: tag set
+ *
+ * Note that the return value of this function may not be relied on, even if
+ * the RCU lock is held, unless tag modification and node deletion are excluded
+ * from concurrency.
  */
 int radix_tree_tag_get(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag)
@@ -566,13 +582,13 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 	if (!root_tag_get(root, tag))
 		return 0;
 
-	node = rcu_dereference(root->rnode);
+	node = rcu_dereference_raw(root->rnode);
 	if (node == NULL)
 		return 0;
 
 	if (!radix_tree_is_indirect_ptr(node))
 		return (index == 0);
-	node = radix_tree_indirect_to_ptr(node);
+	node = indirect_to_ptr(node);
 
 	height = node->height;
 	if (index > radix_tree_maxindex(height))
@@ -594,19 +610,142 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 		 */
 		if (!tag_get(node, tag, offset))
 			saw_unset_tag = 1;
-		if (height == 1) {
-			int ret = tag_get(node, tag, offset);
-
-			BUG_ON(ret && saw_unset_tag);
-			return !!ret;
-		}
-		node = rcu_dereference(node->slots[offset]);
+		if (height == 1)
+			return !!tag_get(node, tag, offset);
+		node = rcu_dereference_raw(node->slots[offset]);
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
 	}
 }
 EXPORT_SYMBOL(radix_tree_tag_get);
-#endif
+
+/**
+ * radix_tree_range_tag_if_tagged - for each item in given range set given
+ *				   tag if item has another tag set
+ * @root:		radix tree root
+ * @first_indexp:	pointer to a starting index of a range to scan
+ * @last_index:		last index of a range to scan
+ * @nr_to_tag:		maximum number items to tag
+ * @iftag:		tag index to test
+ * @settag:		tag index to set if tested tag is set
+ *
+ * This function scans range of radix tree from first_index to last_index
+ * (inclusive).  For each item in the range if iftag is set, the function sets
+ * also settag. The function stops either after tagging nr_to_tag items or
+ * after reaching last_index.
+ *
+ * The tags must be set from the leaf level only and propagated back up the
+ * path to the root. We must do this so that we resolve the full path before
+ * setting any tags on intermediate nodes. If we set tags as we descend, then
+ * we can get to the leaf node and find that the index that has the iftag
+ * set is outside the range we are scanning. This reults in dangling tags and
+ * can lead to problems with later tag operations (e.g. livelocks on lookups).
+ *
+ * The function returns number of leaves where the tag was set and sets
+ * *first_indexp to the first unscanned index.
+ * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
+ * be prepared to handle that.
+ */
+unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
+		unsigned long *first_indexp, unsigned long last_index,
+		unsigned long nr_to_tag,
+		unsigned int iftag, unsigned int settag)
+{
+	unsigned int height = root->height;
+	struct radix_tree_path path[height];
+	struct radix_tree_path *pathp = path;
+	struct radix_tree_node *slot;
+	unsigned int shift;
+	unsigned long tagged = 0;
+	unsigned long index = *first_indexp;
+
+	last_index = min(last_index, radix_tree_maxindex(height));
+	if (index > last_index)
+		return 0;
+	if (!nr_to_tag)
+		return 0;
+	if (!root_tag_get(root, iftag)) {
+		*first_indexp = last_index + 1;
+		return 0;
+	}
+	if (height == 0) {
+		*first_indexp = last_index + 1;
+		root_tag_set(root, settag);
+		return 1;
+	}
+
+	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+	slot = indirect_to_ptr(root->rnode);
+
+	/*
+	 * we fill the path from (root->height - 2) to 0, leaving the index at
+	 * (root->height - 1) as a terminator. Zero the node in the terminator
+	 * so that we can use this to end walk loops back up the path.
+	 */
+	path[height - 1].node = NULL;
+
+	for (;;) {
+		int offset;
+
+		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+		if (!slot->slots[offset])
+			goto next;
+		if (!tag_get(slot, iftag, offset))
+			goto next;
+		if (height > 1) {
+			/* Go down one level */
+			height--;
+			shift -= RADIX_TREE_MAP_SHIFT;
+			path[height - 1].node = slot;
+			path[height - 1].offset = offset;
+			slot = slot->slots[offset];
+			continue;
+		}
+
+		/* tag the leaf */
+		tagged++;
+		tag_set(slot, settag, offset);
+
+		/* walk back up the path tagging interior nodes */
+		pathp = &path[0];
+		while (pathp->node) {
+			/* stop if we find a node with the tag already set */
+			if (tag_get(pathp->node, settag, pathp->offset))
+				break;
+			tag_set(pathp->node, settag, pathp->offset);
+			pathp++;
+		}
+
+next:
+		/* Go to next item at level determined by 'shift' */
+		index = ((index >> shift) + 1) << shift;
+		/* Overflow can happen when last_index is ~0UL... */
+		if (index > last_index || !index)
+			break;
+		if (tagged >= nr_to_tag)
+			break;
+		while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
+			/*
+			 * We've fully scanned this node. Go up. Because
+			 * last_index is guaranteed to be in the tree, what
+			 * we do below cannot wander astray.
+			 */
+			slot = path[height - 1].node;
+			height++;
+			shift += RADIX_TREE_MAP_SHIFT;
+		}
+	}
+	/*
+	 * The iftag must have been set somewhere because otherwise
+	 * we would return immediated at the beginning of the function
+	 */
+	root_tag_set(root, settag);
+	*first_indexp = index;
+
+	return tagged;
+}
+EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
+
 
 /**
  *	radix_tree_next_hole    -    find the next hole (not-present entry)
@@ -656,7 +795,7 @@ EXPORT_SYMBOL(radix_tree_next_hole);
  *
  *	Returns: the index of the hole if found, otherwise returns an index
  *	outside of the set specified (in which case 'index - return >= max_scan'
- *	will be true). In rare cases of wrap-around, LONG_MAX will be returned.
+ *	will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
  *
  *	radix_tree_next_hole may be called under rcu_read_lock. However, like
  *	radix_tree_gang_lookup, this will not atomically search a snapshot of
@@ -674,7 +813,7 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
 		if (!radix_tree_lookup(root, index))
 			break;
 		index--;
-		if (index == LONG_MAX)
+		if (index == ULONG_MAX)
 			break;
 	}
 
@@ -710,7 +849,7 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
 		}
 
 		shift -= RADIX_TREE_MAP_SHIFT;
-		slot = rcu_dereference(slot->slots[i]);
+		slot = rcu_dereference_raw(slot->slots[i]);
 		if (slot == NULL)
 			goto out;
 	}
@@ -757,7 +896,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 	unsigned long cur_index = first_index;
 	unsigned int ret;
 
-	node = rcu_dereference(root->rnode);
+	node = rcu_dereference_raw(root->rnode);
 	if (!node)
 		return 0;
 
@@ -767,7 +906,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 		results[0] = node;
 		return 1;
 	}
-	node = radix_tree_indirect_to_ptr(node);
+	node = indirect_to_ptr(node);
 
 	max_index = radix_tree_maxindex(node->height);
 
@@ -786,7 +925,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 			slot = *(((void ***)results)[ret + i]);
 			if (!slot)
 				continue;
-			results[ret + nr_found] = rcu_dereference(slot);
+			results[ret + nr_found] =
+				indirect_to_ptr(rcu_dereference_raw(slot));
 			nr_found++;
 		}
 		ret += nr_found;
@@ -825,7 +965,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
 	unsigned long cur_index = first_index;
 	unsigned int ret;
 
-	node = rcu_dereference(root->rnode);
+	node = rcu_dereference_raw(root->rnode);
 	if (!node)
 		return 0;
 
@@ -835,7 +975,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
 		results[0] = (void **)&root->rnode;
 		return 1;
 	}
-	node = radix_tree_indirect_to_ptr(node);
+	node = indirect_to_ptr(node);
 
 	max_index = radix_tree_maxindex(node->height);
 
@@ -914,7 +1054,7 @@ __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
 			}
 		}
 		shift -= RADIX_TREE_MAP_SHIFT;
-		slot = rcu_dereference(slot->slots[i]);
+		slot = rcu_dereference_raw(slot->slots[i]);
 		if (slot == NULL)
 			break;
 	}
@@ -950,7 +1090,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 	if (!root_tag_get(root, tag))
 		return 0;
 
-	node = rcu_dereference(root->rnode);
+	node = rcu_dereference_raw(root->rnode);
 	if (!node)
 		return 0;
 
@@ -960,7 +1100,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 		results[0] = node;
 		return 1;
 	}
-	node = radix_tree_indirect_to_ptr(node);
+	node = indirect_to_ptr(node);
 
 	max_index = radix_tree_maxindex(node->height);
 
@@ -979,7 +1119,8 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 			slot = *(((void ***)results)[ret + i]);
 			if (!slot)
 				continue;
-			results[ret + nr_found] = rcu_dereference(slot);
+			results[ret + nr_found] =
+				indirect_to_ptr(rcu_dereference_raw(slot));
 			nr_found++;
 		}
 		ret += nr_found;
@@ -1019,7 +1160,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
 	if (!root_tag_get(root, tag))
 		return 0;
 
-	node = rcu_dereference(root->rnode);
+	node = rcu_dereference_raw(root->rnode);
 	if (!node)
 		return 0;
 
@@ -1029,7 +1170,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
 		results[0] = (void **)&root->rnode;
 		return 1;
 	}
-	node = radix_tree_indirect_to_ptr(node);
+	node = indirect_to_ptr(node);
 
 	max_index = radix_tree_maxindex(node->height);
 
@@ -1065,7 +1206,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
 		void *newptr;
 
 		BUG_ON(!radix_tree_is_indirect_ptr(to_free));
-		to_free = radix_tree_indirect_to_ptr(to_free);
+		to_free = indirect_to_ptr(to_free);
 
 		/*
 		 * The candidate node has more than one child, or its child
@@ -1078,16 +1219,39 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
 
 		/*
 		 * We don't need rcu_assign_pointer(), since we are simply
-		 * moving the node from one part of the tree to another. If
-		 * it was safe to dereference the old pointer to it
+		 * moving the node from one part of the tree to another: if it
+		 * was safe to dereference the old pointer to it
 		 * (to_free->slots[0]), it will be safe to dereference the new
-		 * one (root->rnode).
+		 * one (root->rnode) as far as dependent read barriers go.
 		 */
 		newptr = to_free->slots[0];
 		if (root->height > 1)
-			newptr = radix_tree_ptr_to_indirect(newptr);
+			newptr = ptr_to_indirect(newptr);
 		root->rnode = newptr;
 		root->height--;
+
+		/*
+		 * We have a dilemma here. The node's slot[0] must not be
+		 * NULLed in case there are concurrent lookups expecting to
+		 * find the item. However if this was a bottom-level node,
+		 * then it may be subject to the slot pointer being visible
+		 * to callers dereferencing it. If item corresponding to
+		 * slot[0] is subsequently deleted, these callers would expect
+		 * their slot to become empty sooner or later.
+		 *
+		 * For example, lockless pagecache will look up a slot, deref
+		 * the page pointer, and if the page is 0 refcount it means it
+		 * was concurrently deleted from pagecache so try the deref
+		 * again. Fortunately there is already a requirement for logic
+		 * to retry the entire slot lookup -- the indirect pointer
+		 * problem (replacing direct root node with an indirect pointer
+		 * also results in a stale slot). So tag the slot as indirect
+		 * to force callers to retry.
+		 */
+		if (root->height == 0)
+			*((unsigned long *)&to_free->slots[0]) |=
+						RADIX_TREE_INDIRECT_PTR;
+
 		radix_tree_node_free(to_free);
 	}
 }
@@ -1124,7 +1288,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 		root->rnode = NULL;
 		goto out;
 	}
-	slot = radix_tree_indirect_to_ptr(slot);
+	slot = indirect_to_ptr(slot);
 
 	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 	pathp->node = NULL;
@@ -1166,8 +1330,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 			radix_tree_node_free(to_free);
 
 		if (pathp->node->count) {
-			if (pathp->node ==
-					radix_tree_indirect_to_ptr(root->rnode))
+			if (pathp->node == indirect_to_ptr(root->rnode))
 				radix_tree_shrink(root);
 			goto out;
 		}
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
new file mode 100644
index 000000000000..162becacf97c
--- /dev/null
+++ b/lib/raid6/.gitignore
@@ -0,0 +1,4 @@
+mktables
+altivec*.c
+int*.c
+tables.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
new file mode 100644
index 000000000000..8a38102770f3
--- /dev/null
+++ b/lib/raid6/Makefile
@@ -0,0 +1,75 @@
+obj-$(CONFIG_RAID6_PQ)	+= raid6_pq.o
+
+raid6_pq-y	+= algos.o recov.o tables.o int1.o int2.o int4.o \
+		   int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \
+		   altivec8.o mmx.o sse1.o sse2.o
+hostprogs-y	+= mktables
+
+quiet_cmd_unroll = UNROLL  $@
+      cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \
+                   < $< > $@ || ( rm -f $@ && exit 1 )
+
+ifeq ($(CONFIG_ALTIVEC),y)
+altivec_flags := -maltivec -mabi=altivec
+endif
+
+targets += int1.c
+$(obj)/int1.c:   UNROLL := 1
+$(obj)/int1.c:   $(src)/int.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+targets += int2.c
+$(obj)/int2.c:   UNROLL := 2
+$(obj)/int2.c:   $(src)/int.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+targets += int4.c
+$(obj)/int4.c:   UNROLL := 4
+$(obj)/int4.c:   $(src)/int.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+targets += int8.c
+$(obj)/int8.c:   UNROLL := 8
+$(obj)/int8.c:   $(src)/int.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+targets += int16.c
+$(obj)/int16.c:  UNROLL := 16
+$(obj)/int16.c:  $(src)/int.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+targets += int32.c
+$(obj)/int32.c:  UNROLL := 32
+$(obj)/int32.c:  $(src)/int.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_altivec1.o += $(altivec_flags)
+targets += altivec1.c
+$(obj)/altivec1.c:   UNROLL := 1
+$(obj)/altivec1.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_altivec2.o += $(altivec_flags)
+targets += altivec2.c
+$(obj)/altivec2.c:   UNROLL := 2
+$(obj)/altivec2.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_altivec4.o += $(altivec_flags)
+targets += altivec4.c
+$(obj)/altivec4.c:   UNROLL := 4
+$(obj)/altivec4.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_altivec8.o += $(altivec_flags)
+targets += altivec8.c
+$(obj)/altivec8.c:   UNROLL := 8
+$(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+quiet_cmd_mktable = TABLE   $@
+      cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
+
+targets += tables.c
+$(obj)/tables.c: $(obj)/mktables FORCE
+	$(call if_changed,mktable)
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
new file mode 100644
index 000000000000..b595f560bee7
--- /dev/null
+++ b/lib/raid6/algos.c
@@ -0,0 +1,154 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6/algos.c
+ *
+ * Algorithm list and algorithm selection for RAID-6
+ */
+
+#include <linux/raid/pq.h>
+#ifndef __KERNEL__
+#include <sys/mman.h>
+#include <stdio.h>
+#else
+#include <linux/gfp.h>
+#if !RAID6_USE_EMPTY_ZERO_PAGE
+/* In .bss so it's zeroed */
+const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
+EXPORT_SYMBOL(raid6_empty_zero_page);
+#endif
+#endif
+
+struct raid6_calls raid6_call;
+EXPORT_SYMBOL_GPL(raid6_call);
+
+const struct raid6_calls * const raid6_algos[] = {
+	&raid6_intx1,
+	&raid6_intx2,
+	&raid6_intx4,
+	&raid6_intx8,
+#if defined(__ia64__)
+	&raid6_intx16,
+	&raid6_intx32,
+#endif
+#if defined(__i386__) && !defined(__arch_um__)
+	&raid6_mmxx1,
+	&raid6_mmxx2,
+	&raid6_sse1x1,
+	&raid6_sse1x2,
+	&raid6_sse2x1,
+	&raid6_sse2x2,
+#endif
+#if defined(__x86_64__) && !defined(__arch_um__)
+	&raid6_sse2x1,
+	&raid6_sse2x2,
+	&raid6_sse2x4,
+#endif
+#ifdef CONFIG_ALTIVEC
+	&raid6_altivec1,
+	&raid6_altivec2,
+	&raid6_altivec4,
+	&raid6_altivec8,
+#endif
+	NULL
+};
+
+#ifdef __KERNEL__
+#define RAID6_TIME_JIFFIES_LG2	4
+#else
+/* Need more time to be stable in userspace */
+#define RAID6_TIME_JIFFIES_LG2	9
+#define time_before(x, y) ((x) < (y))
+#endif
+
+/* Try to pick the best algorithm */
+/* This code uses the gfmul table as convenient data set to abuse */
+
+int __init raid6_select_algo(void)
+{
+	const struct raid6_calls * const * algo;
+	const struct raid6_calls * best;
+	char *syndromes;
+	void *dptrs[(65536/PAGE_SIZE)+2];
+	int i, disks;
+	unsigned long perf, bestperf;
+	int bestprefer;
+	unsigned long j0, j1;
+
+	disks = (65536/PAGE_SIZE)+2;
+	for ( i = 0 ; i < disks-2 ; i++ ) {
+		dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
+	}
+
+	/* Normal code - use a 2-page allocation to avoid D$ conflict */
+	syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
+
+	if ( !syndromes ) {
+		printk("raid6: Yikes!  No memory available.\n");
+		return -ENOMEM;
+	}
+
+	dptrs[disks-2] = syndromes;
+	dptrs[disks-1] = syndromes + PAGE_SIZE;
+
+	bestperf = 0;  bestprefer = 0;  best = NULL;
+
+	for ( algo = raid6_algos ; *algo ; algo++ ) {
+		if ( !(*algo)->valid || (*algo)->valid() ) {
+			perf = 0;
+
+			preempt_disable();
+			j0 = jiffies;
+			while ( (j1 = jiffies) == j0 )
+				cpu_relax();
+			while (time_before(jiffies,
+					    j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
+				(*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs);
+				perf++;
+			}
+			preempt_enable();
+
+			if ( (*algo)->prefer > bestprefer ||
+			     ((*algo)->prefer == bestprefer &&
+			      perf > bestperf) ) {
+				best = *algo;
+				bestprefer = best->prefer;
+				bestperf = perf;
+			}
+			printk("raid6: %-8s %5ld MB/s\n", (*algo)->name,
+			       (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+		}
+	}
+
+	if (best) {
+		printk("raid6: using algorithm %s (%ld MB/s)\n",
+		       best->name,
+		       (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+		raid6_call = *best;
+	} else
+		printk("raid6: Yikes!  No algorithm found!\n");
+
+	free_pages((unsigned long)syndromes, 1);
+
+	return best ? 0 : -EINVAL;
+}
+
+static void raid6_exit(void)
+{
+	do { } while (0);
+}
+
+subsys_initcall(raid6_select_algo);
+module_exit(raid6_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("RAID6 Q-syndrome calculations");
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
new file mode 100644
index 000000000000..2654d5c854be
--- /dev/null
+++ b/lib/raid6/altivec.uc
@@ -0,0 +1,130 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6altivec$#.c
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ *
+ * This file is postprocessed using unroll.awk
+ *
+ * <benh> hpa: in process,
+ * you can just "steal" the vec unit with enable_kernel_altivec() (but
+ * bracked this with preempt_disable/enable or in a lock)
+ */
+
+#include <linux/raid/pq.h>
+
+#ifdef CONFIG_ALTIVEC
+
+#include <altivec.h>
+#ifdef __KERNEL__
+# include <asm/system.h>
+# include <asm/cputable.h>
+#endif
+
+/*
+ * This is the C data type to use.  We use a vector of
+ * signed char so vec_cmpgt() will generate the right
+ * instruction.
+ */
+
+typedef vector signed char unative_t;
+
+#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
+#define NSIZE	sizeof(unative_t)
+
+/*
+ * The SHLBYTE() operation shifts each byte left by 1, *not*
+ * rolling over into the next byte
+ */
+static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
+{
+	return vec_add(v,v);
+}
+
+/*
+ * The MASK() operation returns 0xFF in any byte for which the high
+ * bit is 1, 0x00 for any byte for which the high bit is 0.
+ */
+static inline __attribute_const__ unative_t MASK(unative_t v)
+{
+	unative_t zv = NBYTES(0);
+
+	/* vec_cmpgt returns a vector bool char; thus the need for the cast */
+	return (unative_t)vec_cmpgt(zv, v);
+}
+
+
+/* This is noinline to make damned sure that gcc doesn't move any of the
+   Altivec code around the enable/disable code */
+static void noinline
+raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
+	unative_t x1d = NBYTES(0x1d);
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
+		wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+		for ( z = z0-1 ; z >= 0 ; z-- ) {
+			wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+			wp$$ = vec_xor(wp$$, wd$$);
+			w2$$ = MASK(wq$$);
+			w1$$ = SHLBYTE(wq$$);
+			w2$$ = vec_and(w2$$, x1d);
+			w1$$ = vec_xor(w1$$, w2$$);
+			wq$$ = vec_xor(w1$$, wd$$);
+		}
+		*(unative_t *)&p[d+NSIZE*$$] = wp$$;
+		*(unative_t *)&q[d+NSIZE*$$] = wq$$;
+	}
+}
+
+static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	preempt_disable();
+	enable_kernel_altivec();
+
+	raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs);
+
+	preempt_enable();
+}
+
+int raid6_have_altivec(void);
+#if $# == 1
+int raid6_have_altivec(void)
+{
+	/* This assumes either all CPUs have Altivec or none does */
+# ifdef __KERNEL__
+	return cpu_has_feature(CPU_FTR_ALTIVEC);
+# else
+	return 1;
+# endif
+}
+#endif
+
+const struct raid6_calls raid6_altivec$# = {
+	raid6_altivec$#_gen_syndrome,
+	raid6_have_altivec,
+	"altivecx$#",
+	0
+};
+
+#endif /* CONFIG_ALTIVEC */
diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc
new file mode 100644
index 000000000000..d1e276a14fab
--- /dev/null
+++ b/lib/raid6/int.uc
@@ -0,0 +1,117 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6int$#.c
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ *
+ * This file is postprocessed using unroll.awk
+ */
+
+#include <linux/raid/pq.h>
+
+/*
+ * This is the C data type to use
+ */
+
+/* Change this from BITS_PER_LONG if there is something better... */
+#if BITS_PER_LONG == 64
+# define NBYTES(x) ((x) * 0x0101010101010101UL)
+# define NSIZE  8
+# define NSHIFT 3
+# define NSTRING "64"
+typedef u64 unative_t;
+#else
+# define NBYTES(x) ((x) * 0x01010101U)
+# define NSIZE  4
+# define NSHIFT 2
+# define NSTRING "32"
+typedef u32 unative_t;
+#endif
+
+
+
+/*
+ * IA-64 wants insane amounts of unrolling.  On other architectures that
+ * is just a waste of space.
+ */
+#if ($# <= 8) || defined(__ia64__)
+
+
+/*
+ * These sub-operations are separate inlines since they can sometimes be
+ * specially optimized using architecture-specific hacks.
+ */
+
+/*
+ * The SHLBYTE() operation shifts each byte left by 1, *not*
+ * rolling over into the next byte
+ */
+static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
+{
+	unative_t vv;
+
+	vv = (v << 1) & NBYTES(0xfe);
+	return vv;
+}
+
+/*
+ * The MASK() operation returns 0xFF in any byte for which the high
+ * bit is 1, 0x00 for any byte for which the high bit is 0.
+ */
+static inline __attribute_const__ unative_t MASK(unative_t v)
+{
+	unative_t vv;
+
+	vv = v & NBYTES(0x80);
+	vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */
+	return vv;
+}
+
+
+static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
+		wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+		for ( z = z0-1 ; z >= 0 ; z-- ) {
+			wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+			wp$$ ^= wd$$;
+			w2$$ = MASK(wq$$);
+			w1$$ = SHLBYTE(wq$$);
+			w2$$ &= NBYTES(0x1d);
+			w1$$ ^= w2$$;
+			wq$$ = w1$$ ^ wd$$;
+		}
+		*(unative_t *)&p[d+NSIZE*$$] = wp$$;
+		*(unative_t *)&q[d+NSIZE*$$] = wq$$;
+	}
+}
+
+const struct raid6_calls raid6_intx$# = {
+	raid6_int$#_gen_syndrome,
+	NULL,		/* always valid */
+	"int" NSTRING "x$#",
+	0
+};
+
+#endif
diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c
new file mode 100644
index 000000000000..3b1500843bba
--- /dev/null
+++ b/lib/raid6/mktables.c
@@ -0,0 +1,132 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2 or (at your
+ *   option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * mktables.c
+ *
+ * Make RAID-6 tables.  This is a host user space program to be run at
+ * compile time.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <time.h>
+
+static uint8_t gfmul(uint8_t a, uint8_t b)
+{
+	uint8_t v = 0;
+
+	while (b) {
+		if (b & 1)
+			v ^= a;
+		a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
+		b >>= 1;
+	}
+
+	return v;
+}
+
+static uint8_t gfpow(uint8_t a, int b)
+{
+	uint8_t v = 1;
+
+	b %= 255;
+	if (b < 0)
+		b += 255;
+
+	while (b) {
+		if (b & 1)
+			v = gfmul(v, a);
+		a = gfmul(a, a);
+		b >>= 1;
+	}
+
+	return v;
+}
+
+int main(int argc, char *argv[])
+{
+	int i, j, k;
+	uint8_t v;
+	uint8_t exptbl[256], invtbl[256];
+
+	printf("#include <linux/raid/pq.h>\n");
+
+	/* Compute multiplication table */
+	printf("\nconst u8  __attribute__((aligned(256)))\n"
+		"raid6_gfmul[256][256] =\n"
+		"{\n");
+	for (i = 0; i < 256; i++) {
+		printf("\t{\n");
+		for (j = 0; j < 256; j += 8) {
+			printf("\t\t");
+			for (k = 0; k < 8; k++)
+				printf("0x%02x,%c", gfmul(i, j + k),
+				       (k == 7) ? '\n' : ' ');
+		}
+		printf("\t},\n");
+	}
+	printf("};\n");
+	printf("#ifdef __KERNEL__\n");
+	printf("EXPORT_SYMBOL(raid6_gfmul);\n");
+	printf("#endif\n");
+
+	/* Compute power-of-2 table (exponent) */
+	v = 1;
+	printf("\nconst u8 __attribute__((aligned(256)))\n"
+	       "raid6_gfexp[256] =\n" "{\n");
+	for (i = 0; i < 256; i += 8) {
+		printf("\t");
+		for (j = 0; j < 8; j++) {
+			exptbl[i + j] = v;
+			printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
+			v = gfmul(v, 2);
+			if (v == 1)
+				v = 0;	/* For entry 255, not a real entry */
+		}
+	}
+	printf("};\n");
+	printf("#ifdef __KERNEL__\n");
+	printf("EXPORT_SYMBOL(raid6_gfexp);\n");
+	printf("#endif\n");
+
+	/* Compute inverse table x^-1 == x^254 */
+	printf("\nconst u8 __attribute__((aligned(256)))\n"
+	       "raid6_gfinv[256] =\n" "{\n");
+	for (i = 0; i < 256; i += 8) {
+		printf("\t");
+		for (j = 0; j < 8; j++) {
+			invtbl[i + j] = v = gfpow(i + j, 254);
+			printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
+		}
+	}
+	printf("};\n");
+	printf("#ifdef __KERNEL__\n");
+	printf("EXPORT_SYMBOL(raid6_gfinv);\n");
+	printf("#endif\n");
+
+	/* Compute inv(2^x + 1) (exponent-xor-inverse) table */
+	printf("\nconst u8 __attribute__((aligned(256)))\n"
+	       "raid6_gfexi[256] =\n" "{\n");
+	for (i = 0; i < 256; i += 8) {
+		printf("\t");
+		for (j = 0; j < 8; j++)
+			printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1],
+			       (j == 7) ? '\n' : ' ');
+	}
+	printf("};\n");
+	printf("#ifdef __KERNEL__\n");
+	printf("EXPORT_SYMBOL(raid6_gfexi);\n");
+	printf("#endif\n");
+
+	return 0;
+}
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
new file mode 100644
index 000000000000..279347f23094
--- /dev/null
+++ b/lib/raid6/mmx.c
@@ -0,0 +1,142 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6/mmx.c
+ *
+ * MMX implementation of RAID-6 syndrome functions
+ */
+
+#if defined(__i386__) && !defined(__arch_um__)
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+/* Shared with raid6/sse1.c */
+const struct raid6_mmx_constants {
+	u64 x1d;
+} raid6_mmx_constants = {
+	0x1d1d1d1d1d1d1d1dULL,
+};
+
+static int raid6_have_mmx(void)
+{
+	/* Not really "boot_cpu" but "all_cpus" */
+	return boot_cpu_has(X86_FEATURE_MMX);
+}
+
+/*
+ * Plain MMX implementation
+ */
+static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
+	asm volatile("pxor %mm5,%mm5");	/* Zero temp */
+
+	for ( d = 0 ; d < bytes ; d += 8 ) {
+		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
+		asm volatile("movq %mm2,%mm4");	/* Q[0] */
+		for ( z = z0-1 ; z >= 0 ; z-- ) {
+			asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
+			asm volatile("pcmpgtb %mm4,%mm5");
+			asm volatile("paddb %mm4,%mm4");
+			asm volatile("pand %mm0,%mm5");
+			asm volatile("pxor %mm5,%mm4");
+			asm volatile("pxor %mm5,%mm5");
+			asm volatile("pxor %mm6,%mm2");
+			asm volatile("pxor %mm6,%mm4");
+		}
+		asm volatile("movq %%mm2,%0" : "=m" (p[d]));
+		asm volatile("pxor %mm2,%mm2");
+		asm volatile("movq %%mm4,%0" : "=m" (q[d]));
+		asm volatile("pxor %mm4,%mm4");
+	}
+
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_mmxx1 = {
+	raid6_mmx1_gen_syndrome,
+	raid6_have_mmx,
+	"mmxx1",
+	0
+};
+
+/*
+ * Unrolled-by-2 MMX implementation
+ */
+static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
+	asm volatile("pxor %mm5,%mm5");	/* Zero temp */
+	asm volatile("pxor %mm7,%mm7"); /* Zero temp */
+
+	for ( d = 0 ; d < bytes ; d += 16 ) {
+		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
+		asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8]));
+		asm volatile("movq %mm2,%mm4"); /* Q[0] */
+		asm volatile("movq %mm3,%mm6"); /* Q[1] */
+		for ( z = z0-1 ; z >= 0 ; z-- ) {
+			asm volatile("pcmpgtb %mm4,%mm5");
+			asm volatile("pcmpgtb %mm6,%mm7");
+			asm volatile("paddb %mm4,%mm4");
+			asm volatile("paddb %mm6,%mm6");
+			asm volatile("pand %mm0,%mm5");
+			asm volatile("pand %mm0,%mm7");
+			asm volatile("pxor %mm5,%mm4");
+			asm volatile("pxor %mm7,%mm6");
+			asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
+			asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
+			asm volatile("pxor %mm5,%mm2");
+			asm volatile("pxor %mm7,%mm3");
+			asm volatile("pxor %mm5,%mm4");
+			asm volatile("pxor %mm7,%mm6");
+			asm volatile("pxor %mm5,%mm5");
+			asm volatile("pxor %mm7,%mm7");
+		}
+		asm volatile("movq %%mm2,%0" : "=m" (p[d]));
+		asm volatile("movq %%mm3,%0" : "=m" (p[d+8]));
+		asm volatile("movq %%mm4,%0" : "=m" (q[d]));
+		asm volatile("movq %%mm6,%0" : "=m" (q[d+8]));
+	}
+
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_mmxx2 = {
+	raid6_mmx2_gen_syndrome,
+	raid6_have_mmx,
+	"mmxx2",
+	0
+};
+
+#endif
diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c
new file mode 100644
index 000000000000..8590d19cf522
--- /dev/null
+++ b/lib/raid6/recov.c
@@ -0,0 +1,132 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6/recov.c
+ *
+ * RAID-6 data recovery in dual failure mode.  In single failure mode,
+ * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct
+ * the syndrome.)
+ */
+
+#include <linux/raid/pq.h>
+
+/* Recover two failed data blocks. */
+void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+		       void **ptrs)
+{
+	u8 *p, *q, *dp, *dq;
+	u8 px, qx, db;
+	const u8 *pbmul;	/* P multiplier table for B data */
+	const u8 *qmul;		/* Q multiplier table (for both) */
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data pages
+	   Use the dead data pages as temporary storage for
+	   delta p and delta q */
+	dp = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-2] = dp;
+	dq = (u8 *)ptrs[failb];
+	ptrs[failb] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dp;
+	ptrs[failb]   = dq;
+	ptrs[disks-2] = p;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
+	qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
+
+	/* Now do it... */
+	while ( bytes-- ) {
+		px    = *p ^ *dp;
+		qx    = qmul[*q ^ *dq];
+		*dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
+		*dp++ = db ^ px; /* Reconstructed A */
+		p++; q++;
+	}
+}
+EXPORT_SYMBOL_GPL(raid6_2data_recov);
+
+/* Recover failure of one data block plus the P block */
+void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
+{
+	u8 *p, *q, *dq;
+	const u8 *qmul;		/* Q multiplier table */
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data page
+	   Use the dead data page as temporary storage for delta q */
+	dq = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dq;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+	/* Now do it... */
+	while ( bytes-- ) {
+		*p++ ^= *dq = qmul[*q ^ *dq];
+		q++; dq++;
+	}
+}
+EXPORT_SYMBOL_GPL(raid6_datap_recov);
+
+#ifndef __KERNEL__
+/* Testing only */
+
+/* Recover two failed blocks. */
+void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs)
+{
+	if ( faila > failb ) {
+		int tmp = faila;
+		faila = failb;
+		failb = tmp;
+	}
+
+	if ( failb == disks-1 ) {
+		if ( faila == disks-2 ) {
+			/* P+Q failure.  Just rebuild the syndrome. */
+			raid6_call.gen_syndrome(disks, bytes, ptrs);
+		} else {
+			/* data+Q failure.  Reconstruct data from P,
+			   then rebuild syndrome. */
+			/* NOT IMPLEMENTED - equivalent to RAID-5 */
+		}
+	} else {
+		if ( failb == disks-2 ) {
+			/* data+P failure. */
+			raid6_datap_recov(disks, bytes, faila, ptrs);
+		} else {
+			/* data+data failure. */
+			raid6_2data_recov(disks, bytes, faila, failb, ptrs);
+		}
+	}
+}
+
+#endif
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
new file mode 100644
index 000000000000..10dd91948c07
--- /dev/null
+++ b/lib/raid6/sse1.c
@@ -0,0 +1,162 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6/sse1.c
+ *
+ * SSE-1/MMXEXT implementation of RAID-6 syndrome functions
+ *
+ * This is really an MMX implementation, but it requires SSE-1 or
+ * AMD MMXEXT for prefetch support and a few other features.  The
+ * support for nontemporal memory accesses is enough to make this
+ * worthwhile as a separate implementation.
+ */
+
+#if defined(__i386__) && !defined(__arch_um__)
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+/* Defined in raid6/mmx.c */
+extern const struct raid6_mmx_constants {
+	u64 x1d;
+} raid6_mmx_constants;
+
+static int raid6_have_sse1_or_mmxext(void)
+{
+	/* Not really boot_cpu but "all_cpus" */
+	return boot_cpu_has(X86_FEATURE_MMX) &&
+		(boot_cpu_has(X86_FEATURE_XMM) ||
+		 boot_cpu_has(X86_FEATURE_MMXEXT));
+}
+
+/*
+ * Plain SSE1 implementation
+ */
+static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
+	asm volatile("pxor %mm5,%mm5");	/* Zero temp */
+
+	for ( d = 0 ; d < bytes ; d += 8 ) {
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
+		asm volatile("movq %mm2,%mm4");	/* Q[0] */
+		asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
+		for ( z = z0-2 ; z >= 0 ; z-- ) {
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+			asm volatile("pcmpgtb %mm4,%mm5");
+			asm volatile("paddb %mm4,%mm4");
+			asm volatile("pand %mm0,%mm5");
+			asm volatile("pxor %mm5,%mm4");
+			asm volatile("pxor %mm5,%mm5");
+			asm volatile("pxor %mm6,%mm2");
+			asm volatile("pxor %mm6,%mm4");
+			asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
+		}
+		asm volatile("pcmpgtb %mm4,%mm5");
+		asm volatile("paddb %mm4,%mm4");
+		asm volatile("pand %mm0,%mm5");
+		asm volatile("pxor %mm5,%mm4");
+		asm volatile("pxor %mm5,%mm5");
+		asm volatile("pxor %mm6,%mm2");
+		asm volatile("pxor %mm6,%mm4");
+
+		asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
+		asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
+	}
+
+	asm volatile("sfence" : : : "memory");
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_sse1x1 = {
+	raid6_sse11_gen_syndrome,
+	raid6_have_sse1_or_mmxext,
+	"sse1x1",
+	1			/* Has cache hints */
+};
+
+/*
+ * Unrolled-by-2 SSE1 implementation
+ */
+static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
+	asm volatile("pxor %mm5,%mm5");	/* Zero temp */
+	asm volatile("pxor %mm7,%mm7"); /* Zero temp */
+
+	/* We uniformly assume a single prefetch covers at least 16 bytes */
+	for ( d = 0 ; d < bytes ; d += 16 ) {
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
+		asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */
+		asm volatile("movq %mm2,%mm4");	/* Q[0] */
+		asm volatile("movq %mm3,%mm6"); /* Q[1] */
+		for ( z = z0-1 ; z >= 0 ; z-- ) {
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+			asm volatile("pcmpgtb %mm4,%mm5");
+			asm volatile("pcmpgtb %mm6,%mm7");
+			asm volatile("paddb %mm4,%mm4");
+			asm volatile("paddb %mm6,%mm6");
+			asm volatile("pand %mm0,%mm5");
+			asm volatile("pand %mm0,%mm7");
+			asm volatile("pxor %mm5,%mm4");
+			asm volatile("pxor %mm7,%mm6");
+			asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
+			asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
+			asm volatile("pxor %mm5,%mm2");
+			asm volatile("pxor %mm7,%mm3");
+			asm volatile("pxor %mm5,%mm4");
+			asm volatile("pxor %mm7,%mm6");
+			asm volatile("pxor %mm5,%mm5");
+			asm volatile("pxor %mm7,%mm7");
+		}
+		asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
+		asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
+		asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
+		asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
+	}
+
+	asm volatile("sfence" : :: "memory");
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_sse1x2 = {
+	raid6_sse12_gen_syndrome,
+	raid6_have_sse1_or_mmxext,
+	"sse1x2",
+	1			/* Has cache hints */
+};
+
+#endif
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
new file mode 100644
index 000000000000..bc2d57daa589
--- /dev/null
+++ b/lib/raid6/sse2.c
@@ -0,0 +1,262 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6/sse2.c
+ *
+ * SSE-2 implementation of RAID-6 syndrome functions
+ *
+ */
+
+#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+static const struct raid6_sse_constants {
+	u64 x1d[2];
+} raid6_sse_constants  __attribute__((aligned(16))) = {
+	{ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
+};
+
+static int raid6_have_sse2(void)
+{
+	/* Not really boot_cpu but "all_cpus" */
+	return boot_cpu_has(X86_FEATURE_MMX) &&
+		boot_cpu_has(X86_FEATURE_FXSR) &&
+		boot_cpu_has(X86_FEATURE_XMM) &&
+		boot_cpu_has(X86_FEATURE_XMM2);
+}
+
+/*
+ * Plain SSE2 implementation
+ */
+static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
+	asm volatile("pxor %xmm5,%xmm5");	/* Zero temp */
+
+	for ( d = 0 ; d < bytes ; d += 16 ) {
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+		asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
+		asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
+		asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
+		for ( z = z0-2 ; z >= 0 ; z-- ) {
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+			asm volatile("pcmpgtb %xmm4,%xmm5");
+			asm volatile("paddb %xmm4,%xmm4");
+			asm volatile("pand %xmm0,%xmm5");
+			asm volatile("pxor %xmm5,%xmm4");
+			asm volatile("pxor %xmm5,%xmm5");
+			asm volatile("pxor %xmm6,%xmm2");
+			asm volatile("pxor %xmm6,%xmm4");
+			asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
+		}
+		asm volatile("pcmpgtb %xmm4,%xmm5");
+		asm volatile("paddb %xmm4,%xmm4");
+		asm volatile("pand %xmm0,%xmm5");
+		asm volatile("pxor %xmm5,%xmm4");
+		asm volatile("pxor %xmm5,%xmm5");
+		asm volatile("pxor %xmm6,%xmm2");
+		asm volatile("pxor %xmm6,%xmm4");
+
+		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
+		asm volatile("pxor %xmm2,%xmm2");
+		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
+		asm volatile("pxor %xmm4,%xmm4");
+	}
+
+	asm volatile("sfence" : : : "memory");
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_sse2x1 = {
+	raid6_sse21_gen_syndrome,
+	raid6_have_sse2,
+	"sse2x1",
+	1			/* Has cache hints */
+};
+
+/*
+ * Unrolled-by-2 SSE2 implementation
+ */
+static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
+	asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
+	asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
+
+	/* We uniformly assume a single prefetch covers at least 32 bytes */
+	for ( d = 0 ; d < bytes ; d += 32 ) {
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+		asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d]));    /* P[0] */
+		asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
+		asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
+		asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
+		for ( z = z0-1 ; z >= 0 ; z-- ) {
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+			asm volatile("pcmpgtb %xmm4,%xmm5");
+			asm volatile("pcmpgtb %xmm6,%xmm7");
+			asm volatile("paddb %xmm4,%xmm4");
+			asm volatile("paddb %xmm6,%xmm6");
+			asm volatile("pand %xmm0,%xmm5");
+			asm volatile("pand %xmm0,%xmm7");
+			asm volatile("pxor %xmm5,%xmm4");
+			asm volatile("pxor %xmm7,%xmm6");
+			asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
+			asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
+			asm volatile("pxor %xmm5,%xmm2");
+			asm volatile("pxor %xmm7,%xmm3");
+			asm volatile("pxor %xmm5,%xmm4");
+			asm volatile("pxor %xmm7,%xmm6");
+			asm volatile("pxor %xmm5,%xmm5");
+			asm volatile("pxor %xmm7,%xmm7");
+		}
+		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
+		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
+		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
+		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
+	}
+
+	asm volatile("sfence" : : : "memory");
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_sse2x2 = {
+	raid6_sse22_gen_syndrome,
+	raid6_have_sse2,
+	"sse2x2",
+	1			/* Has cache hints */
+};
+
+#endif
+
+#if defined(__x86_64__) && !defined(__arch_um__)
+
+/*
+ * Unrolled-by-4 SSE2 implementation
+ */
+static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
+	asm volatile("pxor %xmm2,%xmm2");	/* P[0] */
+	asm volatile("pxor %xmm3,%xmm3");	/* P[1] */
+	asm volatile("pxor %xmm4,%xmm4"); 	/* Q[0] */
+	asm volatile("pxor %xmm5,%xmm5");	/* Zero temp */
+	asm volatile("pxor %xmm6,%xmm6"); 	/* Q[1] */
+	asm volatile("pxor %xmm7,%xmm7"); 	/* Zero temp */
+	asm volatile("pxor %xmm10,%xmm10");	/* P[2] */
+	asm volatile("pxor %xmm11,%xmm11");	/* P[3] */
+	asm volatile("pxor %xmm12,%xmm12"); 	/* Q[2] */
+	asm volatile("pxor %xmm13,%xmm13");	/* Zero temp */
+	asm volatile("pxor %xmm14,%xmm14"); 	/* Q[3] */
+	asm volatile("pxor %xmm15,%xmm15"); 	/* Zero temp */
+
+	for ( d = 0 ; d < bytes ; d += 64 ) {
+		for ( z = z0 ; z >= 0 ; z-- ) {
+			/* The second prefetch seems to improve performance... */
+			asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
+			asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
+			asm volatile("pcmpgtb %xmm4,%xmm5");
+			asm volatile("pcmpgtb %xmm6,%xmm7");
+			asm volatile("pcmpgtb %xmm12,%xmm13");
+			asm volatile("pcmpgtb %xmm14,%xmm15");
+			asm volatile("paddb %xmm4,%xmm4");
+			asm volatile("paddb %xmm6,%xmm6");
+			asm volatile("paddb %xmm12,%xmm12");
+			asm volatile("paddb %xmm14,%xmm14");
+			asm volatile("pand %xmm0,%xmm5");
+			asm volatile("pand %xmm0,%xmm7");
+			asm volatile("pand %xmm0,%xmm13");
+			asm volatile("pand %xmm0,%xmm15");
+			asm volatile("pxor %xmm5,%xmm4");
+			asm volatile("pxor %xmm7,%xmm6");
+			asm volatile("pxor %xmm13,%xmm12");
+			asm volatile("pxor %xmm15,%xmm14");
+			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
+			asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
+			asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
+			asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
+			asm volatile("pxor %xmm5,%xmm2");
+			asm volatile("pxor %xmm7,%xmm3");
+			asm volatile("pxor %xmm13,%xmm10");
+			asm volatile("pxor %xmm15,%xmm11");
+			asm volatile("pxor %xmm5,%xmm4");
+			asm volatile("pxor %xmm7,%xmm6");
+			asm volatile("pxor %xmm13,%xmm12");
+			asm volatile("pxor %xmm15,%xmm14");
+			asm volatile("pxor %xmm5,%xmm5");
+			asm volatile("pxor %xmm7,%xmm7");
+			asm volatile("pxor %xmm13,%xmm13");
+			asm volatile("pxor %xmm15,%xmm15");
+		}
+		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
+		asm volatile("pxor %xmm2,%xmm2");
+		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
+		asm volatile("pxor %xmm3,%xmm3");
+		asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
+		asm volatile("pxor %xmm10,%xmm10");
+		asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
+		asm volatile("pxor %xmm11,%xmm11");
+		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
+		asm volatile("pxor %xmm4,%xmm4");
+		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
+		asm volatile("pxor %xmm6,%xmm6");
+		asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
+		asm volatile("pxor %xmm12,%xmm12");
+		asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
+		asm volatile("pxor %xmm14,%xmm14");
+	}
+
+	asm volatile("sfence" : : : "memory");
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_sse2x4 = {
+	raid6_sse24_gen_syndrome,
+	raid6_have_sse2,
+	"sse2x4",
+	1			/* Has cache hints */
+};
+
+#endif
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
new file mode 100644
index 000000000000..aa651697b6dc
--- /dev/null
+++ b/lib/raid6/test/Makefile
@@ -0,0 +1,72 @@
+#
+# This is a simple Makefile to test some of the RAID-6 code
+# from userspace.
+#
+
+CC	 = gcc
+OPTFLAGS = -O2			# Adjust as desired
+CFLAGS	 = -I.. -I ../../../include -g $(OPTFLAGS)
+LD	 = ld
+AWK	 = awk -f
+AR	 = ar
+RANLIB	 = ranlib
+
+.c.o:
+	$(CC) $(CFLAGS) -c -o $@ $<
+
+%.c: ../%.c
+	cp -f $< $@
+
+%.uc: ../%.uc
+	cp -f $< $@
+
+all:	raid6.a raid6test
+
+raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \
+	 altivec1.o altivec2.o altivec4.o altivec8.o recov.o algos.o \
+	 tables.o
+	 rm -f $@
+	 $(AR) cq $@ $^
+	 $(RANLIB) $@
+
+raid6test: test.c raid6.a
+	$(CC) $(CFLAGS) -o raid6test $^
+
+altivec1.c: altivec.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=1 < altivec.uc > $@
+
+altivec2.c: altivec.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=2 < altivec.uc > $@
+
+altivec4.c: altivec.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=4 < altivec.uc > $@
+
+altivec8.c: altivec.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
+
+int1.c: int.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=1 < int.uc > $@
+
+int2.c: int.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=2 < int.uc > $@
+
+int4.c: int.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=4 < int.uc > $@
+
+int8.c: int.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=8 < int.uc > $@
+
+int16.c: int.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=16 < int.uc > $@
+
+int32.c: int.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=32 < int.uc > $@
+
+tables.c: mktables
+	./mktables > tables.c
+
+clean:
+	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test
+
+spotless: clean
+	rm -f *~
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
new file mode 100644
index 000000000000..7a930318b17d
--- /dev/null
+++ b/lib/raid6/test/test.c
@@ -0,0 +1,124 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2 or (at your
+ *   option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6test.c
+ *
+ * Test RAID-6 recovery with various algorithms
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/raid/pq.h>
+
+#define NDISKS		16	/* Including P and Q */
+
+const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
+struct raid6_calls raid6_call;
+
+char *dataptrs[NDISKS];
+char data[NDISKS][PAGE_SIZE];
+char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
+
+static void makedata(void)
+{
+	int i, j;
+
+	for (i = 0; i < NDISKS; i++) {
+		for (j = 0; j < PAGE_SIZE; j++)
+			data[i][j] = rand();
+
+		dataptrs[i] = data[i];
+	}
+}
+
+static char disk_type(int d)
+{
+	switch (d) {
+	case NDISKS-2:
+		return 'P';
+	case NDISKS-1:
+		return 'Q';
+	default:
+		return 'D';
+	}
+}
+
+static int test_disks(int i, int j)
+{
+	int erra, errb;
+
+	memset(recovi, 0xf0, PAGE_SIZE);
+	memset(recovj, 0xba, PAGE_SIZE);
+
+	dataptrs[i] = recovi;
+	dataptrs[j] = recovj;
+
+	raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs);
+
+	erra = memcmp(data[i], recovi, PAGE_SIZE);
+	errb = memcmp(data[j], recovj, PAGE_SIZE);
+
+	if (i < NDISKS-2 && j == NDISKS-1) {
+		/* We don't implement the DQ failure scenario, since it's
+		   equivalent to a RAID-5 failure (XOR, then recompute Q) */
+		erra = errb = 0;
+	} else {
+		printf("algo=%-8s  faila=%3d(%c)  failb=%3d(%c)  %s\n",
+		       raid6_call.name,
+		       i, disk_type(i),
+		       j, disk_type(j),
+		       (!erra && !errb) ? "OK" :
+		       !erra ? "ERRB" :
+		       !errb ? "ERRA" : "ERRAB");
+	}
+
+	dataptrs[i] = data[i];
+	dataptrs[j] = data[j];
+
+	return erra || errb;
+}
+
+int main(int argc, char *argv[])
+{
+	const struct raid6_calls *const *algo;
+	int i, j;
+	int err = 0;
+
+	makedata();
+
+	for (algo = raid6_algos; *algo; algo++) {
+		if (!(*algo)->valid || (*algo)->valid()) {
+			raid6_call = **algo;
+
+			/* Nuke syndromes */
+			memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
+
+			/* Generate assumed good syndrome */
+			raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
+						(void **)&dataptrs);
+
+			for (i = 0; i < NDISKS-1; i++)
+				for (j = i+1; j < NDISKS; j++)
+					err += test_disks(i, j);
+		}
+		printf("\n");
+	}
+
+	printf("\n");
+	/* Pick the best algorithm test */
+	raid6_select_algo();
+
+	if (err)
+		printf("\n*** ERRORS FOUND ***\n");
+
+	return err;
+}
diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk
new file mode 100644
index 000000000000..c6aa03631df8
--- /dev/null
+++ b/lib/raid6/unroll.awk
@@ -0,0 +1,20 @@
+
+# This filter requires one command line option of form -vN=n
+# where n must be a decimal number.
+#
+# Repeat each input line containing $$ n times, replacing $$ with 0...n-1.
+# Replace each $# with n, and each $* with a single $.
+
+BEGIN {
+	n = N + 0
+}
+{
+	if (/\$\$/) { rep = n } else { rep = 1 }
+	for (i = 0; i < rep; ++i) {
+		tmp = $0
+		gsub(/\$\$/, i, tmp)
+		gsub(/\$\#/, n, tmp)
+		gsub(/\$\*/, "$", tmp)
+		print tmp
+	}
+}
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
new file mode 100644
index 000000000000..cb2a8c91c886
--- /dev/null
+++ b/lib/raid6/x86.h
@@ -0,0 +1,61 @@
+/* ----------------------------------------------------------------------- *
+ *
+ *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6/x86.h
+ *
+ * Definitions common to x86 and x86-64 RAID-6 code only
+ */
+
+#ifndef LINUX_RAID_RAID6X86_H
+#define LINUX_RAID_RAID6X86_H
+
+#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+
+#ifdef __KERNEL__ /* Real code */
+
+#include <asm/i387.h>
+
+#else /* Dummy code for user space testing */
+
+static inline void kernel_fpu_begin(void)
+{
+}
+
+static inline void kernel_fpu_end(void)
+{
+}
+
+#define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE and FXRSTOR instructions
+					   * (fast save and restore) */
+#define X86_FEATURE_XMM		(0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2	(0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */
+
+/* Should work well enough on modern CPUs for testing */
+static inline int boot_cpu_has(int flag)
+{
+	u32 eax = (flag >> 5) ? 0x80000001 : 1;
+	u32 edx;
+
+	asm volatile("cpuid"
+		     : "+a" (eax), "=d" (edx)
+		     : : "ecx", "ebx");
+
+	return (edx >> (flag & 31)) & 1;
+}
+
+#endif /* ndef __KERNEL__ */
+
+#endif
+#endif
diff --git a/lib/random32.c b/lib/random32.c
index 217d5c4b666d..fc3545a32771 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -39,13 +39,16 @@
 #include <linux/jiffies.h>
 #include <linux/random.h>
 
-struct rnd_state {
-	u32 s1, s2, s3;
-};
-
 static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
 
-static u32 __random32(struct rnd_state *state)
+/**
+ *	prandom32 - seeded pseudo-random number generator.
+ *	@state: pointer to state structure holding seeded state.
+ *
+ *	This is used for pseudo-randomness with no outside seeding.
+ *	For more random results, use random32().
+ */
+u32 prandom32(struct rnd_state *state)
 {
 #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
 
@@ -55,14 +58,7 @@ static u32 __random32(struct rnd_state *state)
 
 	return (state->s1 ^ state->s2 ^ state->s3);
 }
-
-/*
- * Handle minimum values for seeds
- */
-static inline u32 __seed(u32 x, u32 m)
-{
-	return (x < m) ? x + m : x;
-}
+EXPORT_SYMBOL(prandom32);
 
 /**
  *	random32 - pseudo random number generator
@@ -75,7 +71,7 @@ u32 random32(void)
 {
 	unsigned long r;
 	struct rnd_state *state = &get_cpu_var(net_rand_state);
-	r = __random32(state);
+	r = prandom32(state);
 	put_cpu_var(state);
 	return r;
 }
@@ -118,12 +114,12 @@ static int __init random32_init(void)
 		state->s3 = __seed(LCG(state->s2), 15);
 
 		/* "warm it up" */
-		__random32(state);
-		__random32(state);
-		__random32(state);
-		__random32(state);
-		__random32(state);
-		__random32(state);
+		prandom32(state);
+		prandom32(state);
+		prandom32(state);
+		prandom32(state);
+		prandom32(state);
+		prandom32(state);
 	}
 	return 0;
 }
@@ -131,7 +127,7 @@ core_initcall(random32_init);
 
 /*
  *	Generate better values after random number generator
- *	is fully initalized.
+ *	is fully initialized.
  */
 static int __init random32_reseed(void)
 {
@@ -147,7 +143,7 @@ static int __init random32_reseed(void)
 		state->s3 = __seed(seeds[2], 15);
 
 		/* mix it in */
-		__random32(state);
+		prandom32(state);
 	}
 	return 0;
 }
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 26187edcc7ea..027a03f4c56d 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -7,51 +7,61 @@
  * parameter. Now every user can use their own standalone ratelimit_state.
  *
  * This file is released under the GPLv2.
- *
  */
 
-#include <linux/kernel.h>
+#include <linux/ratelimit.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
 
-static DEFINE_SPINLOCK(ratelimit_lock);
-
 /*
  * __ratelimit - rate limiting
  * @rs: ratelimit_state data
+ * @func: name of calling function
+ *
+ * This enforces a rate limit: not more than @rs->burst callbacks
+ * in every @rs->interval
  *
- * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks
- * in every @rs->ratelimit_jiffies
+ * RETURNS:
+ * 0 means callbacks will be suppressed.
+ * 1 means go ahead and do it.
  */
-int __ratelimit(struct ratelimit_state *rs)
+int ___ratelimit(struct ratelimit_state *rs, const char *func)
 {
 	unsigned long flags;
+	int ret;
 
 	if (!rs->interval)
 		return 1;
 
-	spin_lock_irqsave(&ratelimit_lock, flags);
+	/*
+	 * If we contend on this state's lock then almost
+	 * by definition we are too busy to print a message,
+	 * in addition to the one that will be printed by
+	 * the entity that is holding the lock already:
+	 */
+	if (!spin_trylock_irqsave(&rs->lock, flags))
+		return 0;
+
 	if (!rs->begin)
 		rs->begin = jiffies;
 
 	if (time_is_before_jiffies(rs->begin + rs->interval)) {
 		if (rs->missed)
 			printk(KERN_WARNING "%s: %d callbacks suppressed\n",
-				__func__, rs->missed);
-		rs->begin = 0;
+				func, rs->missed);
+		rs->begin   = 0;
 		rs->printed = 0;
-		rs->missed = 0;
+		rs->missed  = 0;
 	}
-	if (rs->burst && rs->burst > rs->printed)
-		goto print;
-
-	rs->missed++;
-	spin_unlock_irqrestore(&ratelimit_lock, flags);
-	return 0;
+	if (rs->burst && rs->burst > rs->printed) {
+		rs->printed++;
+		ret = 1;
+	} else {
+		rs->missed++;
+		ret = 0;
+	}
+	spin_unlock_irqrestore(&rs->lock, flags);
 
-print:
-	rs->printed++;
-	spin_unlock_irqrestore(&ratelimit_lock, flags);
-	return 1;
+	return ret;
 }
-EXPORT_SYMBOL(__ratelimit);
+EXPORT_SYMBOL(___ratelimit);
diff --git a/lib/rational.c b/lib/rational.c
index b3c099b5478e..3ed247b80662 100644
--- a/lib/rational.c
+++ b/lib/rational.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/rational.h>
+#include <linux/module.h>
 
 /*
  * calculate best rational approximation for a given fraction
diff --git a/lib/rbtree.c b/lib/rbtree.c
index e2aa3be29858..4693f79195d3 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -283,6 +283,74 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 }
 EXPORT_SYMBOL(rb_erase);
 
+static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data)
+{
+	struct rb_node *parent;
+
+up:
+	func(node, data);
+	parent = rb_parent(node);
+	if (!parent)
+		return;
+
+	if (node == parent->rb_left && parent->rb_right)
+		func(parent->rb_right, data);
+	else if (parent->rb_left)
+		func(parent->rb_left, data);
+
+	node = parent;
+	goto up;
+}
+
+/*
+ * after inserting @node into the tree, update the tree to account for
+ * both the new entry and any damage done by rebalance
+ */
+void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data)
+{
+	if (node->rb_left)
+		node = node->rb_left;
+	else if (node->rb_right)
+		node = node->rb_right;
+
+	rb_augment_path(node, func, data);
+}
+
+/*
+ * before removing the node, find the deepest node on the rebalance path
+ * that will still be there after @node gets removed
+ */
+struct rb_node *rb_augment_erase_begin(struct rb_node *node)
+{
+	struct rb_node *deepest;
+
+	if (!node->rb_right && !node->rb_left)
+		deepest = rb_parent(node);
+	else if (!node->rb_right)
+		deepest = node->rb_left;
+	else if (!node->rb_left)
+		deepest = node->rb_right;
+	else {
+		deepest = rb_next(node);
+		if (deepest->rb_right)
+			deepest = deepest->rb_right;
+		else if (rb_parent(deepest) != node)
+			deepest = rb_parent(deepest);
+	}
+
+	return deepest;
+}
+
+/*
+ * after removal, update the tree to account for the removed entry
+ * and any rebalance damage.
+ */
+void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data)
+{
+	if (node)
+		rb_augment_path(node, func, data);
+}
+
 /*
  * This function returns the first node (in sort order) of the tree.
  */
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index 9df3ca56db11..ffc9fc7f3b05 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -17,6 +17,19 @@ struct rwsem_waiter {
 #define RWSEM_WAITING_FOR_WRITE	0x00000002
 };
 
+int rwsem_is_locked(struct rw_semaphore *sem)
+{
+	int ret = 1;
+	unsigned long flags;
+
+	if (spin_trylock_irqsave(&sem->wait_lock, flags)) {
+		ret = (sem->activity != 0);
+		spin_unlock_irqrestore(&sem->wait_lock, flags);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(rwsem_is_locked);
+
 /*
  * initialise the semaphore
  */
@@ -34,6 +47,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 	spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
 }
+EXPORT_SYMBOL(__init_rwsem);
 
 /*
  * handle the lock release when processes blocked on it that can now run
@@ -129,13 +143,14 @@ void __sched __down_read(struct rw_semaphore *sem)
 {
 	struct rwsem_waiter waiter;
 	struct task_struct *tsk;
+	unsigned long flags;
 
-	spin_lock_irq(&sem->wait_lock);
+	spin_lock_irqsave(&sem->wait_lock, flags);
 
 	if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
 		/* granted */
 		sem->activity++;
-		spin_unlock_irq(&sem->wait_lock);
+		spin_unlock_irqrestore(&sem->wait_lock, flags);
 		goto out;
 	}
 
@@ -150,7 +165,7 @@ void __sched __down_read(struct rw_semaphore *sem)
 	list_add_tail(&waiter.list, &sem->wait_list);
 
 	/* we don't need to touch the semaphore struct anymore */
-	spin_unlock_irq(&sem->wait_lock);
+	spin_unlock_irqrestore(&sem->wait_lock, flags);
 
 	/* wait to be given the lock */
 	for (;;) {
@@ -195,13 +210,14 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
 {
 	struct rwsem_waiter waiter;
 	struct task_struct *tsk;
+	unsigned long flags;
 
-	spin_lock_irq(&sem->wait_lock);
+	spin_lock_irqsave(&sem->wait_lock, flags);
 
 	if (sem->activity == 0 && list_empty(&sem->wait_list)) {
 		/* granted */
 		sem->activity = -1;
-		spin_unlock_irq(&sem->wait_lock);
+		spin_unlock_irqrestore(&sem->wait_lock, flags);
 		goto out;
 	}
 
@@ -216,7 +232,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
 	list_add_tail(&waiter.list, &sem->wait_list);
 
 	/* we don't need to touch the semaphore struct anymore */
-	spin_unlock_irq(&sem->wait_lock);
+	spin_unlock_irqrestore(&sem->wait_lock, flags);
 
 	/* wait to be given the lock */
 	for (;;) {
@@ -305,12 +321,3 @@ void __downgrade_write(struct rw_semaphore *sem)
 	spin_unlock_irqrestore(&sem->wait_lock, flags);
 }
 
-EXPORT_SYMBOL(__init_rwsem);
-EXPORT_SYMBOL(__down_read);
-EXPORT_SYMBOL(__down_read_trylock);
-EXPORT_SYMBOL(__down_write_nested);
-EXPORT_SYMBOL(__down_write);
-EXPORT_SYMBOL(__down_write_trylock);
-EXPORT_SYMBOL(__up_read);
-EXPORT_SYMBOL(__up_write);
-EXPORT_SYMBOL(__downgrade_write);
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 3e3365e5665e..f236d7cd5cf3 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -36,45 +36,56 @@ struct rwsem_waiter {
 #define RWSEM_WAITING_FOR_WRITE	0x00000002
 };
 
+/* Wake types for __rwsem_do_wake().  Note that RWSEM_WAKE_NO_ACTIVE and
+ * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held
+ * since the rwsem value was observed.
+ */
+#define RWSEM_WAKE_ANY        0 /* Wake whatever's at head of wait list */
+#define RWSEM_WAKE_NO_ACTIVE  1 /* rwsem was observed with no active thread */
+#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */
+
 /*
  * handle the lock release when processes blocked on it that can now run
  * - if we come here from up_xxxx(), then:
  *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
  *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
- *   - there must be someone on the queue
+ * - there must be someone on the queue
  * - the spinlock must be held by the caller
  * - woken process blocks are discarded from the list after having task zeroed
  * - writers are only woken if downgrading is false
  */
-static inline struct rw_semaphore *
-__rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
+static struct rw_semaphore *
+__rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
 {
 	struct rwsem_waiter *waiter;
 	struct task_struct *tsk;
 	struct list_head *next;
-	signed long oldcount, woken, loop;
-
-	if (downgrading)
-		goto dont_wake_writers;
-
-	/* if we came through an up_xxxx() call, we only only wake someone up
-	 * if we can transition the active part of the count from 0 -> 1
-	 */
- try_again:
-	oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem)
-						- RWSEM_ACTIVE_BIAS;
-	if (oldcount & RWSEM_ACTIVE_MASK)
-		goto undo;
+	signed long oldcount, woken, loop, adjustment;
 
 	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
-
-	/* try to grant a single write lock if there's a writer at the front
-	 * of the queue - note we leave the 'active part' of the count
-	 * incremented by 1 and the waiting part incremented by 0x00010000
-	 */
 	if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
 		goto readers_only;
 
+	if (wake_type == RWSEM_WAKE_READ_OWNED)
+		/* Another active reader was observed, so wakeup is not
+		 * likely to succeed. Save the atomic op.
+		 */
+		goto out;
+
+	/* There's a writer at the front of the queue - try to grant it the
+	 * write lock.  However, we only wake this writer if we can transition
+	 * the active part of the count from 0 -> 1
+	 */
+	adjustment = RWSEM_ACTIVE_WRITE_BIAS;
+	if (waiter->list.next == &sem->wait_list)
+		adjustment -= RWSEM_WAITING_BIAS;
+
+ try_again_write:
+	oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
+	if (oldcount & RWSEM_ACTIVE_MASK)
+		/* Someone grabbed the sem already */
+		goto undo_write;
+
 	/* We must be careful not to touch 'waiter' after we set ->task = NULL.
 	 * It is an allocated on the waiter's stack and may become invalid at
 	 * any time after that point (due to a wakeup from another source).
@@ -87,18 +98,30 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
 	put_task_struct(tsk);
 	goto out;
 
-	/* don't want to wake any writers */
- dont_wake_writers:
-	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
-	if (waiter->flags & RWSEM_WAITING_FOR_WRITE)
+ readers_only:
+	/* If we come here from up_xxxx(), another thread might have reached
+	 * rwsem_down_failed_common() before we acquired the spinlock and
+	 * woken up a waiter, making it now active.  We prefer to check for
+	 * this first in order to not spend too much time with the spinlock
+	 * held if we're not going to be able to wake up readers in the end.
+	 *
+	 * Note that we do not need to update the rwsem count: any writer
+	 * trying to acquire rwsem will run rwsem_down_write_failed() due
+	 * to the waiting threads and block trying to acquire the spinlock.
+	 *
+	 * We use a dummy atomic update in order to acquire the cache line
+	 * exclusively since we expect to succeed and run the final rwsem
+	 * count adjustment pretty soon.
+	 */
+	if (wake_type == RWSEM_WAKE_ANY &&
+	    rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS)
+		/* Someone grabbed the sem for write already */
 		goto out;
 
-	/* grant an infinite number of read locks to the readers at the front
-	 * of the queue
-	 * - note we increment the 'active part' of the count by the number of
-	 *   readers before waking any processes up
+	/* Grant an infinite number of read locks to the readers at the front
+	 * of the queue.  Note we increment the 'active part' of the count by
+	 * the number of readers before waking any processes up.
 	 */
- readers_only:
 	woken = 0;
 	do {
 		woken++;
@@ -111,16 +134,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
 
 	} while (waiter->flags & RWSEM_WAITING_FOR_READ);
 
-	loop = woken;
-	woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS;
-	if (!downgrading)
-		/* we'd already done one increment earlier */
-		woken -= RWSEM_ACTIVE_BIAS;
+	adjustment = woken * RWSEM_ACTIVE_READ_BIAS;
+	if (waiter->flags & RWSEM_WAITING_FOR_READ)
+		/* hit end of list above */
+		adjustment -= RWSEM_WAITING_BIAS;
 
-	rwsem_atomic_add(woken, sem);
+	rwsem_atomic_add(adjustment, sem);
 
 	next = sem->wait_list.next;
-	for (; loop > 0; loop--) {
+	for (loop = woken; loop > 0; loop--) {
 		waiter = list_entry(next, struct rwsem_waiter, list);
 		next = waiter->list.next;
 		tsk = waiter->task;
@@ -136,11 +158,12 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
  out:
 	return sem;
 
-	/* undo the change to count, but check for a transition 1->0 */
- undo:
-	if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0)
+	/* undo the change to the active count, but check for a transition
+	 * 1->0 */
+ undo_write:
+	if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
 		goto out;
-	goto try_again;
+	goto try_again_write;
 }
 
 /*
@@ -148,8 +171,9 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
  */
 static struct rw_semaphore __sched *
 rwsem_down_failed_common(struct rw_semaphore *sem,
-			struct rwsem_waiter *waiter, signed long adjustment)
+			 unsigned int flags, signed long adjustment)
 {
+	struct rwsem_waiter waiter;
 	struct task_struct *tsk = current;
 	signed long count;
 
@@ -157,23 +181,34 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
 
 	/* set up my own style of waitqueue */
 	spin_lock_irq(&sem->wait_lock);
-	waiter->task = tsk;
+	waiter.task = tsk;
+	waiter.flags = flags;
 	get_task_struct(tsk);
 
-	list_add_tail(&waiter->list, &sem->wait_list);
+	if (list_empty(&sem->wait_list))
+		adjustment += RWSEM_WAITING_BIAS;
+	list_add_tail(&waiter.list, &sem->wait_list);
 
-	/* we're now waiting on the lock, but no longer actively read-locking */
+	/* we're now waiting on the lock, but no longer actively locking */
 	count = rwsem_atomic_update(adjustment, sem);
 
-	/* if there are no active locks, wake the front queued process(es) up */
-	if (!(count & RWSEM_ACTIVE_MASK))
-		sem = __rwsem_do_wake(sem, 0);
+	/* If there are no active locks, wake the front queued process(es) up.
+	 *
+	 * Alternatively, if we're called from a failed down_write(), there
+	 * were already threads queued before us and there are no active
+	 * writers, the lock must be read owned; so we try to wake any read
+	 * locks that were queued ahead of us. */
+	if (count == RWSEM_WAITING_BIAS)
+		sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE);
+	else if (count > RWSEM_WAITING_BIAS &&
+		 adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
+		sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
 
 	spin_unlock_irq(&sem->wait_lock);
 
 	/* wait to be given the lock */
 	for (;;) {
-		if (!waiter->task)
+		if (!waiter.task)
 			break;
 		schedule();
 		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
@@ -190,12 +225,8 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
 asmregparm struct rw_semaphore __sched *
 rwsem_down_read_failed(struct rw_semaphore *sem)
 {
-	struct rwsem_waiter waiter;
-
-	waiter.flags = RWSEM_WAITING_FOR_READ;
-	rwsem_down_failed_common(sem, &waiter,
-				RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS);
-	return sem;
+	return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
+					-RWSEM_ACTIVE_READ_BIAS);
 }
 
 /*
@@ -204,12 +235,8 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
 asmregparm struct rw_semaphore __sched *
 rwsem_down_write_failed(struct rw_semaphore *sem)
 {
-	struct rwsem_waiter waiter;
-
-	waiter.flags = RWSEM_WAITING_FOR_WRITE;
-	rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS);
-
-	return sem;
+	return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
+					-RWSEM_ACTIVE_WRITE_BIAS);
 }
 
 /*
@@ -224,7 +251,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_do_wake(sem, 0);
+		sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
 
 	spin_unlock_irqrestore(&sem->wait_lock, flags);
 
@@ -244,7 +271,7 @@ asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
 
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_do_wake(sem, 1);
+		sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
 
 	spin_unlock_irqrestore(&sem->wait_lock, flags);
 
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index a295e404e908..4ceb05d772ae 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -7,8 +7,10 @@
  * Version 2. See the file COPYING for more details.
  */
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/scatterlist.h>
 #include <linux/highmem.h>
+#include <linux/kmemleak.h>
 
 /**
  * sg_next - return the next scatterlist entry in a list
@@ -114,17 +116,29 @@ EXPORT_SYMBOL(sg_init_one);
  */
 static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
 {
-	if (nents == SG_MAX_SINGLE_ALLOC)
-		return (struct scatterlist *) __get_free_page(gfp_mask);
-	else
+	if (nents == SG_MAX_SINGLE_ALLOC) {
+		/*
+		 * Kmemleak doesn't track page allocations as they are not
+		 * commonly used (in a raw form) for kernel data structures.
+		 * As we chain together a list of pages and then a normal
+		 * kmalloc (tracked by kmemleak), in order to for that last
+		 * allocation not to become decoupled (and thus a
+		 * false-positive) we need to inform kmemleak of all the
+		 * intermediate allocations.
+		 */
+		void *ptr = (void *) __get_free_page(gfp_mask);
+		kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
+		return ptr;
+	} else
 		return kmalloc(nents * sizeof(struct scatterlist), gfp_mask);
 }
 
 static void sg_kfree(struct scatterlist *sg, unsigned int nents)
 {
-	if (nents == SG_MAX_SINGLE_ALLOC)
+	if (nents == SG_MAX_SINGLE_ALLOC) {
+		kmemleak_free(sg);
 		free_page((unsigned long) sg);
-	else
+	} else
 		kfree(sg);
 }
 
@@ -234,8 +248,18 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
 		left -= sg_size;
 
 		sg = alloc_fn(alloc_size, gfp_mask);
-		if (unlikely(!sg))
-			return -ENOMEM;
+		if (unlikely(!sg)) {
+			/*
+			 * Adjust entry count to reflect that the last
+			 * entry of the previous table won't be used for
+			 * linkage.  Without this, sg_kfree() may get
+			 * confused.
+			 */
+			if (prv)
+				table->nents = ++table->orig_nents;
+
+ 			return -ENOMEM;
+		}
 
 		sg_init_table(sg, alloc_size);
 		table->nents = table->orig_nents += sg_size;
@@ -314,6 +338,7 @@ void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
 	miter->__sg = sgl;
 	miter->__nents = nents;
 	miter->__offset = 0;
+	WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG)));
 	miter->__flags = flags;
 }
 EXPORT_SYMBOL(sg_miter_start);
@@ -394,6 +419,9 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
 	if (miter->addr) {
 		miter->__offset += miter->consumed;
 
+		if (miter->__flags & SG_MITER_TO_SG)
+			flush_kernel_dcache_page(miter->page);
+
 		if (miter->__flags & SG_MITER_ATOMIC) {
 			WARN_ON(!irqs_disabled());
 			kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ);
@@ -426,8 +454,14 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
 	unsigned int offset = 0;
 	struct sg_mapping_iter miter;
 	unsigned long flags;
+	unsigned int sg_flags = SG_MITER_ATOMIC;
+
+	if (to_buffer)
+		sg_flags |= SG_MITER_FROM_SG;
+	else
+		sg_flags |= SG_MITER_TO_SG;
 
-	sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC);
+	sg_miter_start(&miter, sgl, nents, sg_flags);
 
 	local_irq_save(flags);
 
@@ -438,10 +472,8 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
 
 		if (to_buffer)
 			memcpy(buf + offset, miter.addr, len);
-		else {
+		else
 			memcpy(miter.addr, buf + offset, len);
-			flush_kernel_dcache_page(miter.page);
-		}
 
 		offset += len;
 	}
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 238e72a18ce1..fdc77c82f922 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -15,7 +15,7 @@ void show_mem(void)
 	unsigned long total = 0, reserved = 0, shared = 0,
 		nonshared = 0, highmem = 0;
 
-	printk(KERN_INFO "Mem-Info:\n");
+	printk("Mem-Info:\n");
 	show_free_areas();
 
 	for_each_online_pgdat(pgdat) {
@@ -49,15 +49,15 @@ void show_mem(void)
 		pgdat_resize_unlock(pgdat, &flags);
 	}
 
-	printk(KERN_INFO "%lu pages RAM\n", total);
+	printk("%lu pages RAM\n", total);
 #ifdef CONFIG_HIGHMEM
-	printk(KERN_INFO "%lu pages HighMem\n", highmem);
+	printk("%lu pages HighMem\n", highmem);
 #endif
-	printk(KERN_INFO "%lu pages reserved\n", reserved);
-	printk(KERN_INFO "%lu pages shared\n", shared);
-	printk(KERN_INFO "%lu pages non-shared\n", nonshared);
+	printk("%lu pages reserved\n", reserved);
+	printk("%lu pages shared\n", shared);
+	printk("%lu pages non-shared\n", nonshared);
 #ifdef CONFIG_QUICKLIST
-	printk(KERN_INFO "%lu pages in pagetable cache\n",
+	printk("%lu pages in pagetable cache\n",
 		quicklist_total_size());
 #endif
 }
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 9c4b0256490b..4755b98b6dfb 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -13,8 +13,8 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 
-void __spin_lock_init(spinlock_t *lock, const char *name,
-		      struct lock_class_key *key)
+void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
+			  struct lock_class_key *key)
 {
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	/*
@@ -23,13 +23,13 @@ void __spin_lock_init(spinlock_t *lock, const char *name,
 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
 	lockdep_init_map(&lock->dep_map, name, key, 0);
 #endif
-	lock->raw_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+	lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 	lock->magic = SPINLOCK_MAGIC;
 	lock->owner = SPINLOCK_OWNER_INIT;
 	lock->owner_cpu = -1;
 }
 
-EXPORT_SYMBOL(__spin_lock_init);
+EXPORT_SYMBOL(__raw_spin_lock_init);
 
 void __rwlock_init(rwlock_t *lock, const char *name,
 		   struct lock_class_key *key)
@@ -41,7 +41,7 @@ void __rwlock_init(rwlock_t *lock, const char *name,
 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
 	lockdep_init_map(&lock->dep_map, name, key, 0);
 #endif
-	lock->raw_lock = (raw_rwlock_t) __RAW_RW_LOCK_UNLOCKED;
+	lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED;
 	lock->magic = RWLOCK_MAGIC;
 	lock->owner = SPINLOCK_OWNER_INIT;
 	lock->owner_cpu = -1;
@@ -49,7 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name,
 
 EXPORT_SYMBOL(__rwlock_init);
 
-static void spin_bug(spinlock_t *lock, const char *msg)
+static void spin_bug(raw_spinlock_t *lock, const char *msg)
 {
 	struct task_struct *owner = NULL;
 
@@ -73,7 +73,7 @@ static void spin_bug(spinlock_t *lock, const char *msg)
 #define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
 
 static inline void
-debug_spin_lock_before(spinlock_t *lock)
+debug_spin_lock_before(raw_spinlock_t *lock)
 {
 	SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
 	SPIN_BUG_ON(lock->owner == current, lock, "recursion");
@@ -81,16 +81,16 @@ debug_spin_lock_before(spinlock_t *lock)
 							lock, "cpu recursion");
 }
 
-static inline void debug_spin_lock_after(spinlock_t *lock)
+static inline void debug_spin_lock_after(raw_spinlock_t *lock)
 {
 	lock->owner_cpu = raw_smp_processor_id();
 	lock->owner = current;
 }
 
-static inline void debug_spin_unlock(spinlock_t *lock)
+static inline void debug_spin_unlock(raw_spinlock_t *lock)
 {
 	SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
-	SPIN_BUG_ON(!spin_is_locked(lock), lock, "already unlocked");
+	SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked");
 	SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
 	SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
 							lock, "wrong CPU");
@@ -98,7 +98,7 @@ static inline void debug_spin_unlock(spinlock_t *lock)
 	lock->owner_cpu = -1;
 }
 
-static void __spin_lock_debug(spinlock_t *lock)
+static void __spin_lock_debug(raw_spinlock_t *lock)
 {
 	u64 i;
 	u64 loops = loops_per_jiffy * HZ;
@@ -106,7 +106,7 @@ static void __spin_lock_debug(spinlock_t *lock)
 
 	for (;;) {
 		for (i = 0; i < loops; i++) {
-			if (__raw_spin_trylock(&lock->raw_lock))
+			if (arch_spin_trylock(&lock->raw_lock))
 				return;
 			__delay(1);
 		}
@@ -125,17 +125,17 @@ static void __spin_lock_debug(spinlock_t *lock)
 	}
 }
 
-void _raw_spin_lock(spinlock_t *lock)
+void do_raw_spin_lock(raw_spinlock_t *lock)
 {
 	debug_spin_lock_before(lock);
-	if (unlikely(!__raw_spin_trylock(&lock->raw_lock)))
+	if (unlikely(!arch_spin_trylock(&lock->raw_lock)))
 		__spin_lock_debug(lock);
 	debug_spin_lock_after(lock);
 }
 
-int _raw_spin_trylock(spinlock_t *lock)
+int do_raw_spin_trylock(raw_spinlock_t *lock)
 {
-	int ret = __raw_spin_trylock(&lock->raw_lock);
+	int ret = arch_spin_trylock(&lock->raw_lock);
 
 	if (ret)
 		debug_spin_lock_after(lock);
@@ -148,10 +148,10 @@ int _raw_spin_trylock(spinlock_t *lock)
 	return ret;
 }
 
-void _raw_spin_unlock(spinlock_t *lock)
+void do_raw_spin_unlock(raw_spinlock_t *lock)
 {
 	debug_spin_unlock(lock);
-	__raw_spin_unlock(&lock->raw_lock);
+	arch_spin_unlock(&lock->raw_lock);
 }
 
 static void rwlock_bug(rwlock_t *lock, const char *msg)
@@ -176,7 +176,7 @@ static void __read_lock_debug(rwlock_t *lock)
 
 	for (;;) {
 		for (i = 0; i < loops; i++) {
-			if (__raw_read_trylock(&lock->raw_lock))
+			if (arch_read_trylock(&lock->raw_lock))
 				return;
 			__delay(1);
 		}
@@ -193,15 +193,15 @@ static void __read_lock_debug(rwlock_t *lock)
 }
 #endif
 
-void _raw_read_lock(rwlock_t *lock)
+void do_raw_read_lock(rwlock_t *lock)
 {
 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-	__raw_read_lock(&lock->raw_lock);
+	arch_read_lock(&lock->raw_lock);
 }
 
-int _raw_read_trylock(rwlock_t *lock)
+int do_raw_read_trylock(rwlock_t *lock)
 {
-	int ret = __raw_read_trylock(&lock->raw_lock);
+	int ret = arch_read_trylock(&lock->raw_lock);
 
 #ifndef CONFIG_SMP
 	/*
@@ -212,10 +212,10 @@ int _raw_read_trylock(rwlock_t *lock)
 	return ret;
 }
 
-void _raw_read_unlock(rwlock_t *lock)
+void do_raw_read_unlock(rwlock_t *lock)
 {
 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-	__raw_read_unlock(&lock->raw_lock);
+	arch_read_unlock(&lock->raw_lock);
 }
 
 static inline void debug_write_lock_before(rwlock_t *lock)
@@ -251,7 +251,7 @@ static void __write_lock_debug(rwlock_t *lock)
 
 	for (;;) {
 		for (i = 0; i < loops; i++) {
-			if (__raw_write_trylock(&lock->raw_lock))
+			if (arch_write_trylock(&lock->raw_lock))
 				return;
 			__delay(1);
 		}
@@ -268,16 +268,16 @@ static void __write_lock_debug(rwlock_t *lock)
 }
 #endif
 
-void _raw_write_lock(rwlock_t *lock)
+void do_raw_write_lock(rwlock_t *lock)
 {
 	debug_write_lock_before(lock);
-	__raw_write_lock(&lock->raw_lock);
+	arch_write_lock(&lock->raw_lock);
 	debug_write_lock_after(lock);
 }
 
-int _raw_write_trylock(rwlock_t *lock)
+int do_raw_write_trylock(rwlock_t *lock)
 {
-	int ret = __raw_write_trylock(&lock->raw_lock);
+	int ret = arch_write_trylock(&lock->raw_lock);
 
 	if (ret)
 		debug_write_lock_after(lock);
@@ -290,8 +290,8 @@ int _raw_write_trylock(rwlock_t *lock)
 	return ret;
 }
 
-void _raw_write_unlock(rwlock_t *lock)
+void do_raw_write_unlock(rwlock_t *lock)
 {
 	debug_write_unlock(lock);
-	__raw_write_unlock(&lock->raw_lock);
+	arch_write_unlock(&lock->raw_lock);
 }
diff --git a/lib/string.c b/lib/string.c
index b19b87af65a3..f71bead1be3e 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -36,25 +36,21 @@ int strnicmp(const char *s1, const char *s2, size_t len)
 	/* Yes, Virginia, it had better be unsigned */
 	unsigned char c1, c2;
 
-	c1 = c2 = 0;
-	if (len) {
-		do {
-			c1 = *s1;
-			c2 = *s2;
-			s1++;
-			s2++;
-			if (!c1)
-				break;
-			if (!c2)
-				break;
-			if (c1 == c2)
-				continue;
-			c1 = tolower(c1);
-			c2 = tolower(c2);
-			if (c1 != c2)
-				break;
-		} while (--len);
-	}
+	if (!len)
+		return 0;
+
+	do {
+		c1 = *s1++;
+		c2 = *s2++;
+		if (!c1 || !c2)
+			break;
+		if (c1 == c2)
+			continue;
+		c1 = tolower(c1);
+		c2 = tolower(c2);
+		if (c1 != c2)
+			break;
+	} while (--len);
 	return (int)c1 - (int)c2;
 }
 EXPORT_SYMBOL(strnicmp);
@@ -246,13 +242,17 @@ EXPORT_SYMBOL(strlcat);
 #undef strcmp
 int strcmp(const char *cs, const char *ct)
 {
-	signed char __res;
+	unsigned char c1, c2;
 
 	while (1) {
-		if ((__res = *cs - *ct++) != 0 || !*cs++)
+		c1 = *cs++;
+		c2 = *ct++;
+		if (c1 != c2)
+			return c1 < c2 ? -1 : 1;
+		if (!c1)
 			break;
 	}
-	return __res;
+	return 0;
 }
 EXPORT_SYMBOL(strcmp);
 #endif
@@ -266,14 +266,18 @@ EXPORT_SYMBOL(strcmp);
  */
 int strncmp(const char *cs, const char *ct, size_t count)
 {
-	signed char __res = 0;
+	unsigned char c1, c2;
 
 	while (count) {
-		if ((__res = *cs - *ct++) != 0 || !*cs++)
+		c1 = *cs++;
+		c2 = *ct++;
+		if (c1 != c2)
+			return c1 < c2 ? -1 : 1;
+		if (!c1)
 			break;
 		count--;
 	}
-	return __res;
+	return 0;
 }
 EXPORT_SYMBOL(strncmp);
 #endif
@@ -330,20 +334,34 @@ EXPORT_SYMBOL(strnchr);
 #endif
 
 /**
- * strstrip - Removes leading and trailing whitespace from @s.
+ * skip_spaces - Removes leading whitespace from @str.
+ * @str: The string to be stripped.
+ *
+ * Returns a pointer to the first non-whitespace character in @str.
+ */
+char *skip_spaces(const char *str)
+{
+	while (isspace(*str))
+		++str;
+	return (char *)str;
+}
+EXPORT_SYMBOL(skip_spaces);
+
+/**
+ * strim - Removes leading and trailing whitespace from @s.
  * @s: The string to be stripped.
  *
  * Note that the first trailing whitespace is replaced with a %NUL-terminator
  * in the given string @s. Returns a pointer to the first non-whitespace
  * character in @s.
  */
-char *strstrip(char *s)
+char *strim(char *s)
 {
 	size_t size;
 	char *end;
 
+	s = skip_spaces(s);
 	size = strlen(s);
-
 	if (!size)
 		return s;
 
@@ -352,12 +370,9 @@ char *strstrip(char *s)
 		end--;
 	*(end + 1) = '\0';
 
-	while (*s && isspace(*s))
-		s++;
-
 	return s;
 }
-EXPORT_SYMBOL(strstrip);
+EXPORT_SYMBOL(strim);
 
 #ifndef __HAVE_ARCH_STRLEN
 /**
@@ -648,7 +663,7 @@ EXPORT_SYMBOL(memscan);
  */
 char *strstr(const char *s1, const char *s2)
 {
-	int l1, l2;
+	size_t l1, l2;
 
 	l2 = strlen(s2);
 	if (!l2)
@@ -665,6 +680,31 @@ char *strstr(const char *s1, const char *s2)
 EXPORT_SYMBOL(strstr);
 #endif
 
+#ifndef __HAVE_ARCH_STRNSTR
+/**
+ * strnstr - Find the first substring in a length-limited string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ * @len: the maximum number of characters to search
+ */
+char *strnstr(const char *s1, const char *s2, size_t len)
+{
+	size_t l2;
+
+	l2 = strlen(s2);
+	if (!l2)
+		return (char *)s1;
+	while (len >= l2) {
+		len--;
+		if (!memcmp(s1, s2, l2))
+			return (char *)s1;
+		s1++;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(strnstr);
+#endif
+
 #ifndef __HAVE_ARCH_MEMCHR
 /**
  * memchr - Find a character in an area of memory.
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index bffe6d7ef9d9..7c06ee51a29a 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -28,6 +28,7 @@
 #include <linux/types.h>
 #include <linux/ctype.h>
 #include <linux/highmem.h>
+#include <linux/gfp.h>
 
 #include <asm/io.h>
 #include <asm/dma.h>
@@ -49,19 +50,11 @@
  */
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
 
-/*
- * Enumeration for sync targets
- */
-enum dma_sync_target {
-	SYNC_FOR_CPU = 0,
-	SYNC_FOR_DEVICE = 1,
-};
-
 int swiotlb_force;
 
 /*
- * Used to do a quick range check in unmap_single and
- * sync_single_*, to see if the memory was in fact allocated by this
+ * Used to do a quick range check in swiotlb_tbl_unmap_single and
+ * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
  * API.
  */
 static char *io_tlb_start, *io_tlb_end;
@@ -77,7 +70,7 @@ static unsigned long io_tlb_nslabs;
  */
 static unsigned long io_tlb_overflow = 32*1024;
 
-void *io_tlb_overflow_buffer;
+static void *io_tlb_overflow_buffer;
 
 /*
  * This is a free list describing the number of free entries available from
@@ -97,6 +90,8 @@ static phys_addr_t *io_tlb_orig_addr;
  */
 static DEFINE_SPINLOCK(io_tlb_lock);
 
+static int late_alloc;
+
 static int __init
 setup_io_tlb_npages(char *str)
 {
@@ -109,55 +104,22 @@ setup_io_tlb_npages(char *str)
 		++str;
 	if (!strcmp(str, "force"))
 		swiotlb_force = 1;
+
 	return 1;
 }
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
-void * __weak __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
-{
-	return alloc_bootmem_low_pages(size);
-}
-
-void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
-{
-	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
-}
-
-dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
-{
-	return paddr;
-}
-
-phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
-{
-	return baddr;
-}
-
+/* Note that this doesn't work with highmem page */
 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 				      volatile void *address)
 {
-	return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
-}
-
-void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
-{
-	return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
-}
-
-int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
-					       dma_addr_t addr, size_t size)
-{
-	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
+	return phys_to_dma(hwdev, virt_to_phys(address));
 }
 
-int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
-{
-	return 0;
-}
-
-static void swiotlb_print_info(unsigned long bytes)
+void swiotlb_print_info(void)
 {
+	unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 	phys_addr_t pstart, pend;
 
 	pstart = virt_to_phys(io_tlb_start);
@@ -170,28 +132,14 @@ static void swiotlb_print_info(unsigned long bytes)
 	       (unsigned long long)pend);
 }
 
-/*
- * Statically reserve bounce buffer space and initialize bounce buffer data
- * structures for the software IO TLB used to implement the DMA API.
- */
-void __init
-swiotlb_init_with_default_size(size_t default_size)
+void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 {
 	unsigned long i, bytes;
 
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-	}
-
-	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+	bytes = nslabs << IO_TLB_SHIFT;
 
-	/*
-	 * Get IO TLB memory from the low pages
-	 */
-	io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs);
-	if (!io_tlb_start)
-		panic("Cannot allocate SWIOTLB buffer");
+	io_tlb_nslabs = nslabs;
+	io_tlb_start = tlb;
 	io_tlb_end = io_tlb_start + bytes;
 
 	/*
@@ -199,26 +147,52 @@ swiotlb_init_with_default_size(size_t default_size)
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
 	 * between io_tlb_start and io_tlb_end.
 	 */
-	io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
+	io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
 	for (i = 0; i < io_tlb_nslabs; i++)
  		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
 	io_tlb_index = 0;
-	io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t));
+	io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
 
 	/*
 	 * Get the overflow emergency buffer
 	 */
-	io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
+	io_tlb_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow));
 	if (!io_tlb_overflow_buffer)
 		panic("Cannot allocate SWIOTLB overflow buffer!\n");
+	if (verbose)
+		swiotlb_print_info();
+}
+
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the DMA API.
+ */
+void __init
+swiotlb_init_with_default_size(size_t default_size, int verbose)
+{
+	unsigned long bytes;
+
+	if (!io_tlb_nslabs) {
+		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
+		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+	}
+
+	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+
+	/*
+	 * Get IO TLB memory from the low pages
+	 */
+	io_tlb_start = alloc_bootmem_low_pages(PAGE_ALIGN(bytes));
+	if (!io_tlb_start)
+		panic("Cannot allocate SWIOTLB buffer");
 
-	swiotlb_print_info(bytes);
+	swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose);
 }
 
 void __init
-swiotlb_init(void)
+swiotlb_init(int verbose)
 {
-	swiotlb_init_with_default_size(64 * (1<<20));	/* default to 64MB */
+	swiotlb_init_with_default_size(64 * (1<<20), verbose);	/* default to 64MB */
 }
 
 /*
@@ -245,7 +219,8 @@ swiotlb_late_init_with_default_size(size_t default_size)
 	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
 	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-		io_tlb_start = swiotlb_alloc(order, io_tlb_nslabs);
+		io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
+							order);
 		if (io_tlb_start)
 			break;
 		order--;
@@ -294,7 +269,9 @@ swiotlb_late_init_with_default_size(size_t default_size)
 	if (!io_tlb_overflow_buffer)
 		goto cleanup4;
 
-	swiotlb_print_info(bytes);
+	swiotlb_print_info();
+
+	late_alloc = 1;
 
 	return 0;
 
@@ -315,27 +292,43 @@ cleanup1:
 	return -ENOMEM;
 }
 
-static inline int
-address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
+void __init swiotlb_free(void)
 {
-	return swiotlb_arch_address_needs_mapping(hwdev, addr, size);
-}
+	if (!io_tlb_overflow_buffer)
+		return;
 
-static inline int range_needs_mapping(phys_addr_t paddr, size_t size)
-{
-	return swiotlb_force || swiotlb_arch_range_needs_mapping(paddr, size);
+	if (late_alloc) {
+		free_pages((unsigned long)io_tlb_overflow_buffer,
+			   get_order(io_tlb_overflow));
+		free_pages((unsigned long)io_tlb_orig_addr,
+			   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
+		free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
+								 sizeof(int)));
+		free_pages((unsigned long)io_tlb_start,
+			   get_order(io_tlb_nslabs << IO_TLB_SHIFT));
+	} else {
+		free_bootmem_late(__pa(io_tlb_overflow_buffer),
+				  PAGE_ALIGN(io_tlb_overflow));
+		free_bootmem_late(__pa(io_tlb_orig_addr),
+				  PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
+		free_bootmem_late(__pa(io_tlb_list),
+				  PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
+		free_bootmem_late(__pa(io_tlb_start),
+				  PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+	}
 }
 
-static int is_swiotlb_buffer(char *addr)
+static int is_swiotlb_buffer(phys_addr_t paddr)
 {
-	return addr >= io_tlb_start && addr < io_tlb_end;
+	return paddr >= virt_to_phys(io_tlb_start) &&
+		paddr < virt_to_phys(io_tlb_end);
 }
 
 /*
  * Bounce: copy the swiotlb buffer back to the original dma location
  */
-static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
-			   enum dma_data_direction dir)
+void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
+		    enum dma_data_direction dir)
 {
 	unsigned long pfn = PFN_DOWN(phys);
 
@@ -371,26 +364,25 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
 			memcpy(phys_to_virt(phys), dma_addr, size);
 	}
 }
+EXPORT_SYMBOL_GPL(swiotlb_bounce);
 
-/*
- * Allocates bounce buffer and returns its kernel virtual address.
- */
-static void *
-map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
+void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
+			     phys_addr_t phys, size_t size,
+			     enum dma_data_direction dir)
 {
 	unsigned long flags;
 	char *dma_addr;
 	unsigned int nslots, stride, index, wrap;
 	int i;
-	unsigned long start_dma_addr;
 	unsigned long mask;
 	unsigned long offset_slots;
 	unsigned long max_slots;
 
 	mask = dma_get_seg_boundary(hwdev);
-	start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask;
 
-	offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+	tbl_dma_addr &= mask;
+
+	offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
 
 	/*
  	 * Carefully handle integer overflow which can occur when mask == ~0UL.
@@ -477,12 +469,27 @@ found:
 
 	return dma_addr;
 }
+EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
+
+/*
+ * Allocates bounce buffer and returns its kernel virtual address.
+ */
+
+static void *
+map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+	   enum dma_data_direction dir)
+{
+	dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start);
+
+	return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir);
+}
 
 /*
  * dma_addr is the kernel virtual address of the bounce buffer to unmap.
  */
-static void
-do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+void
+swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
+			enum dma_data_direction dir)
 {
 	unsigned long flags;
 	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -497,7 +504,7 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
 
 	/*
 	 * Return the buffer to the free list by setting the corresponding
-	 * entries to indicate the number of contigous entries available.
+	 * entries to indicate the number of contiguous entries available.
 	 * While returning the entries to the free list, we merge the entries
 	 * with slots below and above the pool being returned.
 	 */
@@ -520,10 +527,12 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
 	}
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 }
+EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
 
-static void
-sync_single(struct device *hwdev, char *dma_addr, size_t size,
-	    int dir, int target)
+void
+swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
+			enum dma_data_direction dir,
+			enum dma_sync_target target)
 {
 	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
 	phys_addr_t phys = io_tlb_orig_addr[index];
@@ -547,6 +556,7 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size,
 		BUG();
 	}
 }
+EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
 
 void *
 swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -561,9 +571,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		dma_mask = hwdev->coherent_dma_mask;
 
 	ret = (void *)__get_free_pages(flags, order);
-	if (ret &&
-	    !is_buffer_dma_capable(dma_mask, swiotlb_virt_to_bus(hwdev, ret),
-				   size)) {
+	if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) {
 		/*
 		 * The allocated memory isn't reachable by the device.
 		 */
@@ -572,8 +580,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	}
 	if (!ret) {
 		/*
-		 * We are either out of memory or the device can't DMA
-		 * to GFP_DMA memory; fall back on map_single(), which
+		 * We are either out of memory or the device can't DMA to
+		 * GFP_DMA memory; fall back on map_single(), which
 		 * will grab memory from the lowest available address range.
 		 */
 		ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
@@ -585,13 +593,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	dev_addr = swiotlb_virt_to_bus(hwdev, ret);
 
 	/* Confirm address can be DMA'd by device */
-	if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) {
+	if (dev_addr + size - 1 > dma_mask) {
 		printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
 		       (unsigned long long)dma_mask,
 		       (unsigned long long)dev_addr);
 
 		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-		do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
+		swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
 		return NULL;
 	}
 	*dma_handle = dev_addr;
@@ -601,19 +609,22 @@ EXPORT_SYMBOL(swiotlb_alloc_coherent);
 
 void
 swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
-		      dma_addr_t dma_handle)
+		      dma_addr_t dev_addr)
 {
+	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
+
 	WARN_ON(irqs_disabled());
-	if (!is_swiotlb_buffer(vaddr))
-		free_pages((unsigned long) vaddr, get_order(size));
+	if (!is_swiotlb_buffer(paddr))
+		free_pages((unsigned long)vaddr, get_order(size));
 	else
-		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-		do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
+		/* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
+		swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
 }
 EXPORT_SYMBOL(swiotlb_free_coherent);
 
 static void
-swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
+swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
+	     int do_panic)
 {
 	/*
 	 * Ran out of IOMMU space for this operation. This is very bad.
@@ -625,12 +636,15 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
 	printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at "
 	       "device %s\n", size, dev ? dev_name(dev) : "?");
 
-	if (size > io_tlb_overflow && do_panic) {
-		if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
-			panic("DMA: Memory would be corrupted\n");
-		if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
-			panic("DMA: Random memory would be DMAed\n");
-	}
+	if (size <= io_tlb_overflow || !do_panic)
+		return;
+
+	if (dir == DMA_BIDIRECTIONAL)
+		panic("DMA: Random memory could be DMA accessed\n");
+	if (dir == DMA_FROM_DEVICE)
+		panic("DMA: Random memory could be DMA written\n");
+	if (dir == DMA_TO_DEVICE)
+		panic("DMA: Random memory could be DMA read\n");
 }
 
 /*
@@ -646,7 +660,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 			    struct dma_attrs *attrs)
 {
 	phys_addr_t phys = page_to_phys(page) + offset;
-	dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys);
+	dma_addr_t dev_addr = phys_to_dma(dev, phys);
 	void *map;
 
 	BUG_ON(dir == DMA_NONE);
@@ -655,8 +669,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
-	if (!address_needs_mapping(dev, dev_addr, size) &&
-	    !range_needs_mapping(phys, size))
+	if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
 		return dev_addr;
 
 	/*
@@ -673,7 +686,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	/*
 	 * Ensure that the address returned is DMA'ble
 	 */
-	if (address_needs_mapping(dev, dev_addr, size))
+	if (!dma_capable(dev, dev_addr, size))
 		panic("map_single: bounce buffer is not DMA'ble");
 
 	return dev_addr;
@@ -689,21 +702,27 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
  * whatever the device wrote there.
  */
 static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
-			 size_t size, int dir)
+			 size_t size, enum dma_data_direction dir)
 {
-	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
-	if (is_swiotlb_buffer(dma_addr)) {
-		do_unmap_single(hwdev, dma_addr, size, dir);
+	if (is_swiotlb_buffer(paddr)) {
+		swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
 		return;
 	}
 
 	if (dir != DMA_FROM_DEVICE)
 		return;
 
-	dma_mark_clean(dma_addr, size);
+	/*
+	 * phys_to_virt doesn't work with hihgmem page but we could
+	 * call dma_mark_clean() with hihgmem page here. However, we
+	 * are fine since dma_mark_clean() is null on POWERPC. We can
+	 * make dma_mark_clean() take a physical address if necessary.
+	 */
+	dma_mark_clean(phys_to_virt(paddr), size);
 }
 
 void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
@@ -726,21 +745,23 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
  */
 static void
 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
-		    size_t size, int dir, int target)
+		    size_t size, enum dma_data_direction dir,
+		    enum dma_sync_target target)
 {
-	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
-	if (is_swiotlb_buffer(dma_addr)) {
-		sync_single(hwdev, dma_addr, size, dir, target);
+	if (is_swiotlb_buffer(paddr)) {
+		swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
+				       target);
 		return;
 	}
 
 	if (dir != DMA_FROM_DEVICE)
 		return;
 
-	dma_mark_clean(dma_addr, size);
+	dma_mark_clean(phys_to_virt(paddr), size);
 }
 
 void
@@ -760,37 +781,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
 EXPORT_SYMBOL(swiotlb_sync_single_for_device);
 
 /*
- * Same as above, but for a sub-range of the mapping.
- */
-static void
-swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
-			  unsigned long offset, size_t size,
-			  int dir, int target)
-{
-	swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
-}
-
-void
-swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-				  unsigned long offset, size_t size,
-				  enum dma_data_direction dir)
-{
-	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
-				  SYNC_FOR_CPU);
-}
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
-
-void
-swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
-				     unsigned long offset, size_t size,
-				     enum dma_data_direction dir)
-{
-	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
-				  SYNC_FOR_DEVICE);
-}
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
-
-/*
  * Map a set of buffers described by scatterlist in streaming mode for DMA.
  * This is the scatter-gather version of the above swiotlb_map_page
  * interface.  Here the scatter gather list elements are each tagged with the
@@ -817,10 +807,10 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 
 	for_each_sg(sgl, sg, nelems, i) {
 		phys_addr_t paddr = sg_phys(sg);
-		dma_addr_t dev_addr = swiotlb_phys_to_bus(hwdev, paddr);
+		dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
 
-		if (range_needs_mapping(paddr, sg->length) ||
-		    address_needs_mapping(hwdev, dev_addr, sg->length)) {
+		if (swiotlb_force ||
+		    !dma_capable(hwdev, dev_addr, sg->length)) {
 			void *map = map_single(hwdev, sg_phys(sg),
 					       sg->length, dir);
 			if (!map) {
@@ -843,7 +833,7 @@ EXPORT_SYMBOL(swiotlb_map_sg_attrs);
 
 int
 swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
-	       int dir)
+	       enum dma_data_direction dir)
 {
 	return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
 }
@@ -870,7 +860,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
 
 void
 swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
-		 int dir)
+		 enum dma_data_direction dir)
 {
 	return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
 }
@@ -885,7 +875,8 @@ EXPORT_SYMBOL(swiotlb_unmap_sg);
  */
 static void
 swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
-		int nelems, int dir, int target)
+		int nelems, enum dma_data_direction dir,
+		enum dma_sync_target target)
 {
 	struct scatterlist *sg;
 	int i;
diff --git a/lib/textsearch.c b/lib/textsearch.c
index 9fbcb44c554f..d608331b3e47 100644
--- a/lib/textsearch.c
+++ b/lib/textsearch.c
@@ -103,6 +103,7 @@
 #include <linux/rcupdate.h>
 #include <linux/err.h>
 #include <linux/textsearch.h>
+#include <linux/slab.h>
 
 static LIST_HEAD(ts_ops);
 static DEFINE_SPINLOCK(ts_mod_lock);
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
new file mode 100644
index 000000000000..e3a1050e6820
--- /dev/null
+++ b/lib/timerqueue.c
@@ -0,0 +1,107 @@
+/*
+ *  Generic Timer-queue
+ *
+ *  Manages a simple queue of timers, ordered by expiration time.
+ *  Uses rbtrees for quick list adds and expiration.
+ *
+ *  NOTE: All of the following functions need to be serialized
+ *  to avoid races. No locking is done by this libary code.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/timerqueue.h>
+#include <linux/rbtree.h>
+#include <linux/module.h>
+
+/**
+ * timerqueue_add - Adds timer to timerqueue.
+ *
+ * @head: head of timerqueue
+ * @node: timer node to be added
+ *
+ * Adds the timer node to the timerqueue, sorted by the
+ * node's expires value.
+ */
+void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
+{
+	struct rb_node **p = &head->head.rb_node;
+	struct rb_node *parent = NULL;
+	struct timerqueue_node  *ptr;
+
+	/* Make sure we don't add nodes that are already added */
+	WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node));
+
+	while (*p) {
+		parent = *p;
+		ptr = rb_entry(parent, struct timerqueue_node, node);
+		if (node->expires.tv64 < ptr->expires.tv64)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&node->node, parent, p);
+	rb_insert_color(&node->node, &head->head);
+
+	if (!head->next || node->expires.tv64 < head->next->expires.tv64)
+		head->next = node;
+}
+EXPORT_SYMBOL_GPL(timerqueue_add);
+
+/**
+ * timerqueue_del - Removes a timer from the timerqueue.
+ *
+ * @head: head of timerqueue
+ * @node: timer node to be removed
+ *
+ * Removes the timer node from the timerqueue.
+ */
+void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
+{
+	WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
+
+	/* update next pointer */
+	if (head->next == node) {
+		struct rb_node *rbn = rb_next(&node->node);
+
+		head->next = rbn ?
+			rb_entry(rbn, struct timerqueue_node, node) : NULL;
+	}
+	rb_erase(&node->node, &head->head);
+	RB_CLEAR_NODE(&node->node);
+}
+EXPORT_SYMBOL_GPL(timerqueue_del);
+
+/**
+ * timerqueue_iterate_next - Returns the timer after the provided timer
+ *
+ * @node: Pointer to a timer.
+ *
+ * Provides the timer that is after the given node. This is used, when
+ * necessary, to iterate through the list of timers in a timer list
+ * without modifying the list.
+ */
+struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node)
+{
+	struct rb_node *next;
+
+	if (!node)
+		return NULL;
+	next = rb_next(&node->node);
+	if (!next)
+		return NULL;
+	return container_of(next, struct timerqueue_node, node);
+}
+EXPORT_SYMBOL_GPL(timerqueue_iterate_next);
diff --git a/lib/uuid.c b/lib/uuid.c
new file mode 100644
index 000000000000..8fadd7cef46c
--- /dev/null
+++ b/lib/uuid.c
@@ -0,0 +1,53 @@
+/*
+ * Unified UUID/GUID definition
+ *
+ * Copyright (C) 2009, Intel Corp.
+ *	Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uuid.h>
+#include <linux/random.h>
+
+static void __uuid_gen_common(__u8 b[16])
+{
+	int i;
+	u32 r;
+
+	for (i = 0; i < 4; i++) {
+		r = random32();
+		memcpy(b + i * 4, &r, 4);
+	}
+	/* reversion 0b10 */
+	b[8] = (b[8] & 0x3F) | 0x80;
+}
+
+void uuid_le_gen(uuid_le *lu)
+{
+	__uuid_gen_common(lu->b);
+	/* version 4 : random generation */
+	lu->b[7] = (lu->b[7] & 0x0F) | 0x40;
+}
+EXPORT_SYMBOL_GPL(uuid_le_gen);
+
+void uuid_be_gen(uuid_be *bu)
+{
+	__uuid_gen_common(bu->b);
+	/* version 4 : random generation */
+	bu->b[6] = (bu->b[6] & 0x0F) | 0x40;
+}
+EXPORT_SYMBOL_GPL(uuid_be_gen);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 756ccafa9cec..c150d3dafff4 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -9,7 +9,7 @@
  * Wirzenius wrote this portably, Torvalds fucked it up :-)
  */
 
-/* 
+/*
  * Fri Jul 13 2001 Crutcher Dunnavant <crutcher+kernel@datastacks.com>
  * - changed to provide snprintf and vsnprintf functions
  * So Feb  1 16:51:32 CET 2004 Juergen Quade <quade@hsnr.de>
@@ -25,6 +25,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ioport.h>
+#include <net/addrconf.h>
 
 #include <asm/page.h>		/* for PAGE_SIZE */
 #include <asm/div64.h>
@@ -46,14 +47,14 @@ static unsigned int simple_guess_base(const char *cp)
 }
 
 /**
- * simple_strtoul - convert a string to an unsigned long
+ * simple_strtoull - convert a string to an unsigned long long
  * @cp: The start of the string
  * @endp: A pointer to the end of the parsed string will be placed here
  * @base: The number base to use
  */
-unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base)
+unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
 {
-	unsigned long result = 0;
+	unsigned long long result = 0;
 
 	if (!base)
 		base = simple_guess_base(cp);
@@ -70,58 +71,39 @@ unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base)
 		result = result * base + value;
 		cp++;
 	}
-
 	if (endp)
 		*endp = (char *)cp;
+
 	return result;
 }
-EXPORT_SYMBOL(simple_strtoul);
+EXPORT_SYMBOL(simple_strtoull);
 
 /**
- * simple_strtol - convert a string to a signed long
+ * simple_strtoul - convert a string to an unsigned long
  * @cp: The start of the string
  * @endp: A pointer to the end of the parsed string will be placed here
  * @base: The number base to use
  */
-long simple_strtol(const char *cp, char **endp, unsigned int base)
+unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base)
 {
-	if(*cp == '-')
-		return -simple_strtoul(cp + 1, endp, base);
-	return simple_strtoul(cp, endp, base);
+	return simple_strtoull(cp, endp, base);
 }
-EXPORT_SYMBOL(simple_strtol);
+EXPORT_SYMBOL(simple_strtoul);
 
 /**
- * simple_strtoull - convert a string to an unsigned long long
+ * simple_strtol - convert a string to a signed long
  * @cp: The start of the string
  * @endp: A pointer to the end of the parsed string will be placed here
  * @base: The number base to use
  */
-unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
+long simple_strtol(const char *cp, char **endp, unsigned int base)
 {
-	unsigned long long result = 0;
-
-	if (!base)
-		base = simple_guess_base(cp);
-
-	if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x')
-		cp += 2;
-
-	while (isxdigit(*cp)) {
-		unsigned int value;
-
-		value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10;
-		if (value >= base)
-			break;
-		result = result * base + value;
-		cp++;
-	}
+	if (*cp == '-')
+		return -simple_strtoul(cp + 1, endp, base);
 
-	if (endp)
-		*endp = (char *)cp;
-	return result;
+	return simple_strtoul(cp, endp, base);
 }
-EXPORT_SYMBOL(simple_strtoull);
+EXPORT_SYMBOL(simple_strtol);
 
 /**
  * simple_strtoll - convert a string to a signed long long
@@ -131,10 +113,12 @@ EXPORT_SYMBOL(simple_strtoull);
  */
 long long simple_strtoll(const char *cp, char **endp, unsigned int base)
 {
-	if(*cp=='-')
+	if (*cp == '-')
 		return -simple_strtoull(cp + 1, endp, base);
+
 	return simple_strtoull(cp, endp, base);
 }
+EXPORT_SYMBOL(simple_strtoll);
 
 /**
  * strict_strtoul - convert a string to an unsigned long strictly
@@ -162,18 +146,16 @@ int strict_strtoul(const char *cp, unsigned int base, unsigned long *res)
 {
 	char *tail;
 	unsigned long val;
-	size_t len;
 
 	*res = 0;
-	len = strlen(cp);
-	if (len == 0)
+	if (!*cp)
 		return -EINVAL;
 
 	val = simple_strtoul(cp, &tail, base);
 	if (tail == cp)
 		return -EINVAL;
-	if ((*tail == '\0') ||
-		((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {
+
+	if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
 		*res = val;
 		return 0;
 	}
@@ -235,18 +217,15 @@ int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res)
 {
 	char *tail;
 	unsigned long long val;
-	size_t len;
 
 	*res = 0;
-	len = strlen(cp);
-	if (len == 0)
+	if (!*cp)
 		return -EINVAL;
 
 	val = simple_strtoull(cp, &tail, base);
 	if (tail == cp)
 		return -EINVAL;
-	if ((*tail == '\0') ||
-		((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {
+	if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
 		*res = val;
 		return 0;
 	}
@@ -282,12 +261,14 @@ int strict_strtoll(const char *cp, unsigned int base, long long *res)
 }
 EXPORT_SYMBOL(strict_strtoll);
 
-static int skip_atoi(const char **s)
+static noinline_for_stack
+int skip_atoi(const char **s)
 {
-	int i=0;
+	int i = 0;
 
 	while (isdigit(**s))
 		i = i*10 + *((*s)++) - '0';
+
 	return i;
 }
 
@@ -301,7 +282,8 @@ static int skip_atoi(const char **s)
 /* Formats correctly any integer in [0,99999].
  * Outputs from one to five digits depending on input.
  * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */
-static char* put_dec_trunc(char *buf, unsigned q)
+static noinline_for_stack
+char *put_dec_trunc(char *buf, unsigned q)
 {
 	unsigned d3, d2, d1, d0;
 	d1 = (q>>4) & 0xf;
@@ -330,14 +312,16 @@ static char* put_dec_trunc(char *buf, unsigned q)
 				d3 = d3 - 10*q;
 				*buf++ = d3 + '0';  /* next digit */
 				if (q != 0)
-					*buf++ = q + '0';  /* most sign. digit */
+					*buf++ = q + '0'; /* most sign. digit */
 			}
 		}
 	}
+
 	return buf;
 }
 /* Same with if's removed. Always emits five digits */
-static char* put_dec_full(char *buf, unsigned q)
+static noinline_for_stack
+char *put_dec_full(char *buf, unsigned q)
 {
 	/* BTW, if q is in [0,9999], 8-bit ints will be enough, */
 	/* but anyway, gcc produces better code with full-sized ints */
@@ -346,14 +330,15 @@ static char* put_dec_full(char *buf, unsigned q)
 	d2 = (q>>8) & 0xf;
 	d3 = (q>>12);
 
-	/* Possible ways to approx. divide by 10 */
-	/* gcc -O2 replaces multiply with shifts and adds */
-	// (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386)
-	// (x * 0x67) >> 10:  1100111
-	// (x * 0x34) >> 9:    110100 - same
-	// (x * 0x1a) >> 8:     11010 - same
-	// (x * 0x0d) >> 7:      1101 - same, shortest code (on i386)
-
+	/*
+	 * Possible ways to approx. divide by 10
+	 * gcc -O2 replaces multiply with shifts and adds
+	 * (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386)
+	 * (x * 0x67) >> 10:  1100111
+	 * (x * 0x34) >> 9:    110100 - same
+	 * (x * 0x1a) >> 8:     11010 - same
+	 * (x * 0x0d) >> 7:      1101 - same, shortest code (on i386)
+	 */
 	d0 = 6*(d3 + d2 + d1) + (q & 0xf);
 	q = (d0 * 0xcd) >> 11;
 	d0 = d0 - 10*q;
@@ -374,10 +359,12 @@ static char* put_dec_full(char *buf, unsigned q)
 				d3 = d3 - 10*q;
 				*buf++ = d3 + '0';
 					*buf++ = q + '0';
+
 	return buf;
 }
 /* No inlining helps gcc to use registers better */
-static noinline char* put_dec(char *buf, unsigned long long num)
+static noinline_for_stack
+char *put_dec(char *buf, unsigned long long num)
 {
 	while (1) {
 		unsigned rem;
@@ -393,8 +380,8 @@ static noinline char* put_dec(char *buf, unsigned long long num)
 #define PLUS	4		/* show plus */
 #define SPACE	8		/* space if plus */
 #define LEFT	16		/* left justified */
-#define SMALL	32		/* Must be 32 == 0x20 */
-#define SPECIAL	64		/* 0x */
+#define SMALL	32		/* use lowercase in hex (must be 32 == 0x20) */
+#define SPECIAL	64		/* prefix hex with "0x", octal with "0" */
 
 enum format_type {
 	FORMAT_TYPE_NONE, /* Just a string part */
@@ -420,16 +407,17 @@ enum format_type {
 };
 
 struct printf_spec {
-	enum format_type	type;
-	int			flags;		/* flags to number() */
-	int			field_width;	/* width of output field */
-	int			base;
-	int			precision;	/* # of digits/chars */
-	int			qualifier;
+	u8	type;		/* format_type enum */
+	u8	flags;		/* flags to number() */
+	u8	base;		/* number base, 8, 10 or 16 only */
+	u8	qualifier;	/* number qualifier, one of 'hHlLtzZ' */
+	s16	field_width;	/* width of output field */
+	s16	precision;	/* # of digits/chars */
 };
 
-static char *number(char *buf, char *end, unsigned long long num,
-			struct printf_spec spec)
+static noinline_for_stack
+char *number(char *buf, char *end, unsigned long long num,
+	     struct printf_spec spec)
 {
 	/* we are called with base 8, 10 or 16, only, thus don't need "G..."  */
 	static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
@@ -447,9 +435,9 @@ static char *number(char *buf, char *end, unsigned long long num,
 		spec.flags &= ~ZEROPAD;
 	sign = 0;
 	if (spec.flags & SIGN) {
-		if ((signed long long) num < 0) {
+		if ((signed long long)num < 0) {
 			sign = '-';
-			num = - (signed long long) num;
+			num = -(signed long long)num;
 			spec.field_width--;
 		} else if (spec.flags & PLUS) {
 			sign = '+';
@@ -477,7 +465,9 @@ static char *number(char *buf, char *end, unsigned long long num,
 	else if (spec.base != 10) { /* 8 or 16 */
 		int mask = spec.base - 1;
 		int shift = 3;
-		if (spec.base == 16) shift = 4;
+
+		if (spec.base == 16)
+			shift = 4;
 		do {
 			tmp[i++] = (digits[((unsigned char)num) & mask] | locase);
 			num >>= shift;
@@ -492,7 +482,7 @@ static char *number(char *buf, char *end, unsigned long long num,
 	/* leading space padding */
 	spec.field_width -= spec.precision;
 	if (!(spec.flags & (ZEROPAD+LEFT))) {
-		while(--spec.field_width >= 0) {
+		while (--spec.field_width >= 0) {
 			if (buf < end)
 				*buf = ' ';
 			++buf;
@@ -542,15 +532,17 @@ static char *number(char *buf, char *end, unsigned long long num,
 			*buf = ' ';
 		++buf;
 	}
+
 	return buf;
 }
 
-static char *string(char *buf, char *end, char *s, struct printf_spec spec)
+static noinline_for_stack
+char *string(char *buf, char *end, const char *s, struct printf_spec spec)
 {
 	int len, i;
 
 	if ((unsigned long)s < PAGE_SIZE)
-		s = "<NULL>";
+		s = "(null)";
 
 	len = strnlen(s, spec.precision);
 
@@ -571,123 +563,379 @@ static char *string(char *buf, char *end, char *s, struct printf_spec spec)
 			*buf = ' ';
 		++buf;
 	}
+
 	return buf;
 }
 
-static char *symbol_string(char *buf, char *end, void *ptr,
-				struct printf_spec spec, char ext)
+static noinline_for_stack
+char *symbol_string(char *buf, char *end, void *ptr,
+		    struct printf_spec spec, char ext)
 {
 	unsigned long value = (unsigned long) ptr;
 #ifdef CONFIG_KALLSYMS
 	char sym[KSYM_SYMBOL_LEN];
-	if (ext != 'f')
+	if (ext != 'f' && ext != 's')
 		sprint_symbol(sym, value);
 	else
 		kallsyms_lookup(value, NULL, NULL, NULL, sym);
+
 	return string(buf, end, sym, spec);
 #else
-	spec.field_width = 2*sizeof(void *);
+	spec.field_width = 2 * sizeof(void *);
 	spec.flags |= SPECIAL | SMALL | ZEROPAD;
 	spec.base = 16;
+
 	return number(buf, end, value, spec);
 #endif
 }
 
-static char *resource_string(char *buf, char *end, struct resource *res,
-				struct printf_spec spec)
+static noinline_for_stack
+char *resource_string(char *buf, char *end, struct resource *res,
+		      struct printf_spec spec, const char *fmt)
 {
 #ifndef IO_RSRC_PRINTK_SIZE
-#define IO_RSRC_PRINTK_SIZE	4
+#define IO_RSRC_PRINTK_SIZE	6
 #endif
 
 #ifndef MEM_RSRC_PRINTK_SIZE
-#define MEM_RSRC_PRINTK_SIZE	8
+#define MEM_RSRC_PRINTK_SIZE	10
 #endif
-	struct printf_spec num_spec = {
+	static const struct printf_spec io_spec = {
 		.base = 16,
+		.field_width = IO_RSRC_PRINTK_SIZE,
 		.precision = -1,
 		.flags = SPECIAL | SMALL | ZEROPAD,
 	};
-	/* room for the actual numbers, the two "0x", -, [, ] and the final zero */
-	char sym[4*sizeof(resource_size_t) + 8];
-	char *p = sym, *pend = sym + sizeof(sym);
-	int size = -1;
+	static const struct printf_spec mem_spec = {
+		.base = 16,
+		.field_width = MEM_RSRC_PRINTK_SIZE,
+		.precision = -1,
+		.flags = SPECIAL | SMALL | ZEROPAD,
+	};
+	static const struct printf_spec bus_spec = {
+		.base = 16,
+		.field_width = 2,
+		.precision = -1,
+		.flags = SMALL | ZEROPAD,
+	};
+	static const struct printf_spec dec_spec = {
+		.base = 10,
+		.precision = -1,
+		.flags = 0,
+	};
+	static const struct printf_spec str_spec = {
+		.field_width = -1,
+		.precision = 10,
+		.flags = LEFT,
+	};
+	static const struct printf_spec flag_spec = {
+		.base = 16,
+		.precision = -1,
+		.flags = SPECIAL | SMALL,
+	};
 
-	if (res->flags & IORESOURCE_IO)
-		size = IO_RSRC_PRINTK_SIZE;
-	else if (res->flags & IORESOURCE_MEM)
-		size = MEM_RSRC_PRINTK_SIZE;
+	/* 32-bit res (sizeof==4): 10 chars in dec, 10 in hex ("0x" + 8)
+	 * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */
+#define RSRC_BUF_SIZE		((2 * sizeof(resource_size_t)) + 4)
+#define FLAG_BUF_SIZE		(2 * sizeof(res->flags))
+#define DECODED_BUF_SIZE	sizeof("[mem - 64bit pref window disabled]")
+#define RAW_BUF_SIZE		sizeof("[mem - flags 0x]")
+	char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE,
+		     2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)];
+
+	char *p = sym, *pend = sym + sizeof(sym);
+	int decode = (fmt[0] == 'R') ? 1 : 0;
+	const struct printf_spec *specp;
 
 	*p++ = '[';
-	num_spec.field_width = size;
-	p = number(p, pend, res->start, num_spec);
-	*p++ = '-';
-	p = number(p, pend, res->end, num_spec);
+	if (res->flags & IORESOURCE_IO) {
+		p = string(p, pend, "io  ", str_spec);
+		specp = &io_spec;
+	} else if (res->flags & IORESOURCE_MEM) {
+		p = string(p, pend, "mem ", str_spec);
+		specp = &mem_spec;
+	} else if (res->flags & IORESOURCE_IRQ) {
+		p = string(p, pend, "irq ", str_spec);
+		specp = &dec_spec;
+	} else if (res->flags & IORESOURCE_DMA) {
+		p = string(p, pend, "dma ", str_spec);
+		specp = &dec_spec;
+	} else if (res->flags & IORESOURCE_BUS) {
+		p = string(p, pend, "bus ", str_spec);
+		specp = &bus_spec;
+	} else {
+		p = string(p, pend, "??? ", str_spec);
+		specp = &mem_spec;
+		decode = 0;
+	}
+	p = number(p, pend, res->start, *specp);
+	if (res->start != res->end) {
+		*p++ = '-';
+		p = number(p, pend, res->end, *specp);
+	}
+	if (decode) {
+		if (res->flags & IORESOURCE_MEM_64)
+			p = string(p, pend, " 64bit", str_spec);
+		if (res->flags & IORESOURCE_PREFETCH)
+			p = string(p, pend, " pref", str_spec);
+		if (res->flags & IORESOURCE_WINDOW)
+			p = string(p, pend, " window", str_spec);
+		if (res->flags & IORESOURCE_DISABLED)
+			p = string(p, pend, " disabled", str_spec);
+	} else {
+		p = string(p, pend, " flags ", str_spec);
+		p = number(p, pend, res->flags, flag_spec);
+	}
 	*p++ = ']';
-	*p = 0;
+	*p = '\0';
 
 	return string(buf, end, sym, spec);
 }
 
-static char *mac_address_string(char *buf, char *end, u8 *addr,
-				struct printf_spec spec)
+static noinline_for_stack
+char *mac_address_string(char *buf, char *end, u8 *addr,
+			 struct printf_spec spec, const char *fmt)
 {
-	char mac_addr[6 * 3]; /* (6 * 2 hex digits), 5 colons and trailing zero */
+	char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")];
 	char *p = mac_addr;
 	int i;
+	char separator;
+
+	if (fmt[1] == 'F') {		/* FDDI canonical format */
+		separator = '-';
+	} else {
+		separator = ':';
+	}
 
 	for (i = 0; i < 6; i++) {
 		p = pack_hex_byte(p, addr[i]);
-		if (!(spec.flags & SPECIAL) && i != 5)
-			*p++ = ':';
+		if (fmt[0] == 'M' && i != 5)
+			*p++ = separator;
 	}
 	*p = '\0';
-	spec.flags &= ~SPECIAL;
 
 	return string(buf, end, mac_addr, spec);
 }
 
-static char *ip6_addr_string(char *buf, char *end, u8 *addr,
-				struct printf_spec spec)
+static noinline_for_stack
+char *ip4_string(char *p, const u8 *addr, const char *fmt)
 {
-	char ip6_addr[8 * 5]; /* (8 * 4 hex digits), 7 colons and trailing zero */
-	char *p = ip6_addr;
 	int i;
+	bool leading_zeros = (fmt[0] == 'i');
+	int index;
+	int step;
+
+	switch (fmt[2]) {
+	case 'h':
+#ifdef __BIG_ENDIAN
+		index = 0;
+		step = 1;
+#else
+		index = 3;
+		step = -1;
+#endif
+		break;
+	case 'l':
+		index = 3;
+		step = -1;
+		break;
+	case 'n':
+	case 'b':
+	default:
+		index = 0;
+		step = 1;
+		break;
+	}
+	for (i = 0; i < 4; i++) {
+		char temp[3];	/* hold each IP quad in reverse order */
+		int digits = put_dec_trunc(temp, addr[index]) - temp;
+		if (leading_zeros) {
+			if (digits < 3)
+				*p++ = '0';
+			if (digits < 2)
+				*p++ = '0';
+		}
+		/* reverse the digits in the quad */
+		while (digits--)
+			*p++ = temp[digits];
+		if (i < 3)
+			*p++ = '.';
+		index += step;
+	}
+	*p = '\0';
 
-	for (i = 0; i < 8; i++) {
-		p = pack_hex_byte(p, addr[2 * i]);
-		p = pack_hex_byte(p, addr[2 * i + 1]);
-		if (!(spec.flags & SPECIAL) && i != 7)
+	return p;
+}
+
+static noinline_for_stack
+char *ip6_compressed_string(char *p, const char *addr)
+{
+	int i, j, range;
+	unsigned char zerolength[8];
+	int longest = 1;
+	int colonpos = -1;
+	u16 word;
+	u8 hi, lo;
+	bool needcolon = false;
+	bool useIPv4;
+	struct in6_addr in6;
+
+	memcpy(&in6, addr, sizeof(struct in6_addr));
+
+	useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6);
+
+	memset(zerolength, 0, sizeof(zerolength));
+
+	if (useIPv4)
+		range = 6;
+	else
+		range = 8;
+
+	/* find position of longest 0 run */
+	for (i = 0; i < range; i++) {
+		for (j = i; j < range; j++) {
+			if (in6.s6_addr16[j] != 0)
+				break;
+			zerolength[i]++;
+		}
+	}
+	for (i = 0; i < range; i++) {
+		if (zerolength[i] > longest) {
+			longest = zerolength[i];
+			colonpos = i;
+		}
+	}
+
+	/* emit address */
+	for (i = 0; i < range; i++) {
+		if (i == colonpos) {
+			if (needcolon || i == 0)
+				*p++ = ':';
 			*p++ = ':';
+			needcolon = false;
+			i += longest - 1;
+			continue;
+		}
+		if (needcolon) {
+			*p++ = ':';
+			needcolon = false;
+		}
+		/* hex u16 without leading 0s */
+		word = ntohs(in6.s6_addr16[i]);
+		hi = word >> 8;
+		lo = word & 0xff;
+		if (hi) {
+			if (hi > 0x0f)
+				p = pack_hex_byte(p, hi);
+			else
+				*p++ = hex_asc_lo(hi);
+			p = pack_hex_byte(p, lo);
+		}
+		else if (lo > 0x0f)
+			p = pack_hex_byte(p, lo);
+		else
+			*p++ = hex_asc_lo(lo);
+		needcolon = true;
+	}
+
+	if (useIPv4) {
+		if (needcolon)
+			*p++ = ':';
+		p = ip4_string(p, &in6.s6_addr[12], "I4");
 	}
 	*p = '\0';
-	spec.flags &= ~SPECIAL;
 
-	return string(buf, end, ip6_addr, spec);
+	return p;
 }
 
-static char *ip4_addr_string(char *buf, char *end, u8 *addr,
-				struct printf_spec spec)
+static noinline_for_stack
+char *ip6_string(char *p, const char *addr, const char *fmt)
 {
-	char ip4_addr[4 * 4]; /* (4 * 3 decimal digits), 3 dots and trailing zero */
-	char temp[3];	/* hold each IP quad in reverse order */
-	char *p = ip4_addr;
-	int i, digits;
+	int i;
 
-	for (i = 0; i < 4; i++) {
-		digits = put_dec_trunc(temp, addr[i]) - temp;
-		/* reverse the digits in the quad */
-		while (digits--)
-			*p++ = temp[digits];
-		if (i != 3)
-			*p++ = '.';
+	for (i = 0; i < 8; i++) {
+		p = pack_hex_byte(p, *addr++);
+		p = pack_hex_byte(p, *addr++);
+		if (fmt[0] == 'I' && i != 7)
+			*p++ = ':';
 	}
 	*p = '\0';
-	spec.flags &= ~SPECIAL;
+
+	return p;
+}
+
+static noinline_for_stack
+char *ip6_addr_string(char *buf, char *end, const u8 *addr,
+		      struct printf_spec spec, const char *fmt)
+{
+	char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")];
+
+	if (fmt[0] == 'I' && fmt[2] == 'c')
+		ip6_compressed_string(ip6_addr, addr);
+	else
+		ip6_string(ip6_addr, addr, fmt);
+
+	return string(buf, end, ip6_addr, spec);
+}
+
+static noinline_for_stack
+char *ip4_addr_string(char *buf, char *end, const u8 *addr,
+		      struct printf_spec spec, const char *fmt)
+{
+	char ip4_addr[sizeof("255.255.255.255")];
+
+	ip4_string(ip4_addr, addr, fmt);
 
 	return string(buf, end, ip4_addr, spec);
 }
 
+static noinline_for_stack
+char *uuid_string(char *buf, char *end, const u8 *addr,
+		  struct printf_spec spec, const char *fmt)
+{
+	char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")];
+	char *p = uuid;
+	int i;
+	static const u8 be[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+	static const u8 le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
+	const u8 *index = be;
+	bool uc = false;
+
+	switch (*(++fmt)) {
+	case 'L':
+		uc = true;		/* fall-through */
+	case 'l':
+		index = le;
+		break;
+	case 'B':
+		uc = true;
+		break;
+	}
+
+	for (i = 0; i < 16; i++) {
+		p = pack_hex_byte(p, addr[index[i]]);
+		switch (i) {
+		case 3:
+		case 5:
+		case 7:
+		case 9:
+			*p++ = '-';
+			break;
+		}
+	}
+
+	*p = 0;
+
+	if (uc) {
+		p = uuid;
+		do {
+			*p = toupper(*p);
+		} while (*(++p));
+	}
+
+	return string(buf, end, uuid, spec);
+}
+
 /*
  * Show a '%p' thing.  A kernel extension is that the '%p' is followed
  * by an extra set of alphanumeric characters that are extended format
@@ -697,25 +945,58 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr,
  *
  * - 'F' For symbolic function descriptor pointers with offset
  * - 'f' For simple symbolic function names without offset
- * - 'S' For symbolic direct pointers
- * - 'R' For a struct resource pointer, it prints the range of
- *       addresses (not the name nor the flags)
+ * - 'S' For symbolic direct pointers with offset
+ * - 's' For symbolic direct pointers without offset
+ * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref]
+ * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201]
  * - 'M' For a 6-byte MAC address, it prints the address in the
  *       usual colon-separated hex notation
- * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way (dot-separated
- *       decimal for v4 and colon separated network-order 16 bit hex for v6)
- * - 'i' [46] for 'raw' IPv4/IPv6 addresses, IPv6 omits the colons, IPv4 is
- *       currently the same
+ * - 'm' For a 6-byte MAC address, it prints the hex address without colons
+ * - 'MF' For a 6-byte MAC FDDI address, it prints the address
+ *       with a dash-separated hex notation
+ * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way
+ *       IPv4 uses dot-separated decimal without leading 0's (1.2.3.4)
+ *       IPv6 uses colon separated network-order 16 bit hex with leading 0's
+ * - 'i' [46] for 'raw' IPv4/IPv6 addresses
+ *       IPv6 omits the colons (01020304...0f)
+ *       IPv4 uses dot-separated decimal with leading 0's (010.123.045.006)
+ * - '[Ii]4[hnbl]' IPv4 addresses in host, network, big or little endian order
+ * - 'I6c' for IPv6 addresses printed as specified by
+ *       http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00
+ * - 'U' For a 16 byte UUID/GUID, it prints the UUID/GUID in the form
+ *       "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
+ *       Options for %pU are:
+ *         b big endian lower case hex (default)
+ *         B big endian UPPER case hex
+ *         l little endian lower case hex
+ *         L little endian UPPER case hex
+ *           big endian output byte order is:
+ *             [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15]
+ *           little endian output byte order is:
+ *             [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15]
+ * - 'V' For a struct va_format which contains a format string * and va_list *,
+ *       call vsnprintf(->format, *->va_list).
+ *       Implements a "recursive vsnprintf".
+ *       Do not use this feature without some mechanism to verify the
+ *       correctness of the format string and va_list arguments.
  *
  * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
  * function pointers are really function descriptors, which contain a
  * pointer to the real address.
  */
-static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
-			struct printf_spec spec)
+static noinline_for_stack
+char *pointer(const char *fmt, char *buf, char *end, void *ptr,
+	      struct printf_spec spec)
 {
-	if (!ptr)
+	if (!ptr) {
+		/*
+		 * Print (null) with the same width as a pointer so it makes
+		 * tabular output look nice.
+		 */
+		if (spec.field_width == -1)
+			spec.field_width = 2 * sizeof(void *);
 		return string(buf, end, "(null)", spec);
+	}
 
 	switch (*fmt) {
 	case 'F':
@@ -723,28 +1004,41 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 		ptr = dereference_function_descriptor(ptr);
 		/* Fallthrough */
 	case 'S':
+	case 's':
 		return symbol_string(buf, end, ptr, spec, *fmt);
 	case 'R':
-		return resource_string(buf, end, ptr, spec);
-	case 'm':
-		spec.flags |= SPECIAL;
-		/* Fallthrough */
-	case 'M':
-		return mac_address_string(buf, end, ptr, spec);
-	case 'i':
-		spec.flags |= SPECIAL;
-		/* Fallthrough */
-	case 'I':
-		if (fmt[1] == '6')
-			return ip6_addr_string(buf, end, ptr, spec);
-		if (fmt[1] == '4')
-			return ip4_addr_string(buf, end, ptr, spec);
-		spec.flags &= ~SPECIAL;
+	case 'r':
+		return resource_string(buf, end, ptr, spec, fmt);
+	case 'M':			/* Colon separated: 00:01:02:03:04:05 */
+	case 'm':			/* Contiguous: 000102030405 */
+					/* [mM]F (FDDI, bit reversed) */
+		return mac_address_string(buf, end, ptr, spec, fmt);
+	case 'I':			/* Formatted IP supported
+					 * 4:	1.2.3.4
+					 * 6:	0001:0203:...:0708
+					 * 6c:	1::708 or 1::1.2.3.4
+					 */
+	case 'i':			/* Contiguous:
+					 * 4:	001.002.003.004
+					 * 6:   000102...0f
+					 */
+		switch (fmt[1]) {
+		case '6':
+			return ip6_addr_string(buf, end, ptr, spec, fmt);
+		case '4':
+			return ip4_addr_string(buf, end, ptr, spec, fmt);
+		}
 		break;
+	case 'U':
+		return uuid_string(buf, end, ptr, spec, fmt);
+	case 'V':
+		return buf + vsnprintf(buf, end - buf,
+				       ((struct va_format *)ptr)->fmt,
+				       *(((struct va_format *)ptr)->va));
 	}
 	spec.flags |= SMALL;
 	if (spec.field_width == -1) {
-		spec.field_width = 2*sizeof(void *);
+		spec.field_width = 2 * sizeof(void *);
 		spec.flags |= ZEROPAD;
 	}
 	spec.base = 16;
@@ -772,7 +1066,8 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
  * @precision: precision of a number
  * @qualifier: qualifier of a number (long, size_t, ...)
  */
-static int format_decode(const char *fmt, struct printf_spec *spec)
+static noinline_for_stack
+int format_decode(const char *fmt, struct printf_spec *spec)
 {
 	const char *start = fmt;
 
@@ -858,8 +1153,8 @@ precision:
 qualifier:
 	/* get the conversion qualifier */
 	spec->qualifier = -1;
-	if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
-	    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
+	if (*fmt == 'h' || TOLOWER(*fmt) == 'l' ||
+	    TOLOWER(*fmt) == 'z' || *fmt == 't') {
 		spec->qualifier = *fmt++;
 		if (unlikely(spec->qualifier == *fmt)) {
 			if (spec->qualifier == 'l') {
@@ -926,7 +1221,7 @@ qualifier:
 			spec->type = FORMAT_TYPE_LONG;
 		else
 			spec->type = FORMAT_TYPE_ULONG;
-	} else if (spec->qualifier == 'Z' || spec->qualifier == 'z') {
+	} else if (TOLOWER(spec->qualifier) == 'z') {
 		spec->type = FORMAT_TYPE_SIZE_T;
 	} else if (spec->qualifier == 't') {
 		spec->type = FORMAT_TYPE_PTRDIFF;
@@ -958,10 +1253,23 @@ qualifier:
  * @args: Arguments for the format string
  *
  * This function follows C99 vsnprintf, but has some extensions:
- * %pS output the name of a text symbol
+ * %pS output the name of a text symbol with offset
+ * %ps output the name of a text symbol without offset
  * %pF output the name of a function pointer with its offset
  * %pf output the name of a function pointer without its offset
- * %pR output the address range in a struct resource
+ * %pR output the address range in a struct resource with decoded flags
+ * %pr output the address range in a struct resource with raw flags
+ * %pM output a 6-byte MAC address with colons
+ * %pm output a 6-byte MAC address without colons
+ * %pI4 print an IPv4 address without leading zeros
+ * %pi4 print an IPv4 address with leading zeros
+ * %pI6 print an IPv6 address with colons
+ * %pi6 print an IPv6 address without colons
+ * %pI6c print an IPv6 address as specified by
+ *   http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00
+ * %pU[bBlL] print a UUID/GUID in big or little endian using lower or upper
+ *   case.
+ * %n is ignored
  *
  * The return value is the number of characters which would
  * be generated for the given input, excluding the trailing
@@ -977,19 +1285,13 @@ qualifier:
 int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 {
 	unsigned long long num;
-	char *str, *end, c;
-	int read;
+	char *str, *end;
 	struct printf_spec spec = {0};
 
 	/* Reject out-of-range values early.  Large positive sizes are
 	   used for unknown buffer sizes. */
-	if (unlikely((int) size < 0)) {
-		/* There can be only one.. */
-		static char warn = 1;
-		WARN_ON(warn);
-		warn = 0;
+	if (WARN_ON_ONCE((int) size < 0))
 		return 0;
-	}
 
 	str = buf;
 	end = buf + size;
@@ -1002,8 +1304,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 
 	while (*fmt) {
 		const char *old_fmt = fmt;
-
-		read = format_decode(fmt, &spec);
+		int read = format_decode(fmt, &spec);
 
 		fmt += read;
 
@@ -1027,7 +1328,9 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 			spec.precision = va_arg(args, int);
 			break;
 
-		case FORMAT_TYPE_CHAR:
+		case FORMAT_TYPE_CHAR: {
+			char c;
+
 			if (!(spec.flags & LEFT)) {
 				while (--spec.field_width > 0) {
 					if (str < end)
@@ -1046,6 +1349,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 				++str;
 			}
 			break;
+		}
 
 		case FORMAT_TYPE_STR:
 			str = string(str, end, va_arg(args, char *), spec);
@@ -1071,13 +1375,12 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 			break;
 
 		case FORMAT_TYPE_NRCHARS: {
-			int qualifier = spec.qualifier;
+			u8 qualifier = spec.qualifier;
 
 			if (qualifier == 'l') {
 				long *ip = va_arg(args, long *);
 				*ip = (str - buf);
-			} else if (qualifier == 'Z' ||
-					qualifier == 'z') {
+			} else if (TOLOWER(qualifier) == 'z') {
 				size_t *ip = va_arg(args, size_t *);
 				*ip = (str - buf);
 			} else {
@@ -1160,7 +1463,8 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
 {
 	int i;
 
-	i=vsnprintf(buf,size,fmt,args);
+	i = vsnprintf(buf, size, fmt, args);
+
 	return (i >= size) ? (size - 1) : i;
 }
 EXPORT_SYMBOL(vscnprintf);
@@ -1179,14 +1483,15 @@ EXPORT_SYMBOL(vscnprintf);
  *
  * See the vsnprintf() documentation for format string extensions over C99.
  */
-int snprintf(char * buf, size_t size, const char *fmt, ...)
+int snprintf(char *buf, size_t size, const char *fmt, ...)
 {
 	va_list args;
 	int i;
 
 	va_start(args, fmt);
-	i=vsnprintf(buf,size,fmt,args);
+	i = vsnprintf(buf, size, fmt, args);
 	va_end(args);
+
 	return i;
 }
 EXPORT_SYMBOL(snprintf);
@@ -1199,10 +1504,10 @@ EXPORT_SYMBOL(snprintf);
  * @...: Arguments for the format string
  *
  * The return value is the number of characters written into @buf not including
- * the trailing '\0'. If @size is <= 0 the function returns 0.
+ * the trailing '\0'. If @size is == 0 the function returns 0.
  */
 
-int scnprintf(char * buf, size_t size, const char *fmt, ...)
+int scnprintf(char *buf, size_t size, const char *fmt, ...)
 {
 	va_list args;
 	int i;
@@ -1210,7 +1515,12 @@ int scnprintf(char * buf, size_t size, const char *fmt, ...)
 	va_start(args, fmt);
 	i = vsnprintf(buf, size, fmt, args);
 	va_end(args);
-	return (i >= size) ? (size - 1) : i;
+
+	if (likely(i < size))
+		return i;
+	if (size != 0)
+		return size - 1;
+	return 0;
 }
 EXPORT_SYMBOL(scnprintf);
 
@@ -1247,14 +1557,15 @@ EXPORT_SYMBOL(vsprintf);
  *
  * See the vsnprintf() documentation for format string extensions over C99.
  */
-int sprintf(char * buf, const char *fmt, ...)
+int sprintf(char *buf, const char *fmt, ...)
 {
 	va_list args;
 	int i;
 
 	va_start(args, fmt);
-	i=vsnprintf(buf, INT_MAX, fmt, args);
+	i = vsnprintf(buf, INT_MAX, fmt, args);
 	va_end(args);
+
 	return i;
 }
 EXPORT_SYMBOL(sprintf);
@@ -1287,7 +1598,6 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
 {
 	struct printf_spec spec = {0};
 	char *str, *end;
-	int read;
 
 	str = (char *)bin_buf;
 	end = (char *)(bin_buf + size);
@@ -1312,14 +1622,15 @@ do {									\
 	str += sizeof(type);						\
 } while (0)
 
-
 	while (*fmt) {
-		read = format_decode(fmt, &spec);
+		int read = format_decode(fmt, &spec);
 
 		fmt += read;
 
 		switch (spec.type) {
 		case FORMAT_TYPE_NONE:
+		case FORMAT_TYPE_INVALID:
+		case FORMAT_TYPE_PERCENT_CHAR:
 			break;
 
 		case FORMAT_TYPE_WIDTH:
@@ -1334,13 +1645,14 @@ do {									\
 		case FORMAT_TYPE_STR: {
 			const char *save_str = va_arg(args, char *);
 			size_t len;
+
 			if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE
 					|| (unsigned long)save_str < PAGE_SIZE)
-				save_str = "<NULL>";
-			len = strlen(save_str);
-			if (str + len + 1 < end)
-				memcpy(str, save_str, len + 1);
-			str += len + 1;
+				save_str = "(null)";
+			len = strlen(save_str) + 1;
+			if (str + len < end)
+				memcpy(str, save_str, len);
+			str += len;
 			break;
 		}
 
@@ -1351,19 +1663,13 @@ do {									\
 				fmt++;
 			break;
 
-		case FORMAT_TYPE_PERCENT_CHAR:
-			break;
-
-		case FORMAT_TYPE_INVALID:
-			break;
-
 		case FORMAT_TYPE_NRCHARS: {
 			/* skip %n 's argument */
-			int qualifier = spec.qualifier;
+			u8 qualifier = spec.qualifier;
 			void *skip_arg;
 			if (qualifier == 'l')
 				skip_arg = va_arg(args, long *);
-			else if (qualifier == 'Z' || qualifier == 'z')
+			else if (TOLOWER(qualifier) == 'z')
 				skip_arg = va_arg(args, size_t *);
 			else
 				skip_arg = va_arg(args, int *);
@@ -1399,8 +1705,8 @@ do {									\
 			}
 		}
 	}
-	return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
 
+	return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
 #undef save_arg
 }
 EXPORT_SYMBOL_GPL(vbin_printf);
@@ -1417,11 +1723,7 @@ EXPORT_SYMBOL_GPL(vbin_printf);
  * a binary buffer that generated by vbin_printf.
  *
  * The format follows C99 vsnprintf, but has some extensions:
- * %pS output the name of a text symbol
- * %pF output the name of a function pointer with its offset
- * %pf output the name of a function pointer without its offset
- * %pR output the address range in a struct resource
- * %n is ignored
+ *  see vsnprintf comment for details.
  *
  * The return value is the number of characters which would
  * be generated for the given input, excluding the trailing
@@ -1433,19 +1735,12 @@ EXPORT_SYMBOL_GPL(vbin_printf);
  */
 int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 {
-	unsigned long long num;
-	char *str, *end, c;
-	const char *args = (const char *)bin_buf;
-
 	struct printf_spec spec = {0};
+	char *str, *end;
+	const char *args = (const char *)bin_buf;
 
-	if (unlikely((int) size < 0)) {
-		/* There can be only one.. */
-		static char warn = 1;
-		WARN_ON(warn);
-		warn = 0;
+	if (WARN_ON_ONCE((int) size < 0))
 		return 0;
-	}
 
 	str = buf;
 	end = buf + size;
@@ -1472,10 +1767,8 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 	}
 
 	while (*fmt) {
-		int read;
 		const char *old_fmt = fmt;
-
-		read = format_decode(fmt, &spec);
+		int read = format_decode(fmt, &spec);
 
 		fmt += read;
 
@@ -1499,7 +1792,9 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 			spec.precision = get_arg(int);
 			break;
 
-		case FORMAT_TYPE_CHAR:
+		case FORMAT_TYPE_CHAR: {
+			char c;
+
 			if (!(spec.flags & LEFT)) {
 				while (--spec.field_width > 0) {
 					if (str < end)
@@ -1517,11 +1812,11 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 				++str;
 			}
 			break;
+		}
 
 		case FORMAT_TYPE_STR: {
 			const char *str_arg = args;
-			size_t len = strlen(str_arg);
-			args += len + 1;
+			args += strlen(str_arg) + 1;
 			str = string(str, end, (char *)str_arg, spec);
 			break;
 		}
@@ -1533,11 +1828,6 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 			break;
 
 		case FORMAT_TYPE_PERCENT_CHAR:
-			if (str < end)
-				*str = '%';
-			++str;
-			break;
-
 		case FORMAT_TYPE_INVALID:
 			if (str < end)
 				*str = '%';
@@ -1548,15 +1838,15 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 			/* skip */
 			break;
 
-		default:
+		default: {
+			unsigned long long num;
+
 			switch (spec.type) {
 
 			case FORMAT_TYPE_LONG_LONG:
 				num = get_arg(long long);
 				break;
 			case FORMAT_TYPE_ULONG:
-				num = get_arg(unsigned long);
-				break;
 			case FORMAT_TYPE_LONG:
 				num = get_arg(unsigned long);
 				break;
@@ -1586,8 +1876,9 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 			}
 
 			str = number(str, end, num, spec);
-		}
-	}
+		} /* default: */
+		} /* switch(spec.type) */
+	} /* while(*fmt) */
 
 	if (size > 0) {
 		if (str < end)
@@ -1621,6 +1912,7 @@ int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...)
 	va_start(args, fmt);
 	ret = vbin_printf(bin_buf, size, fmt, args);
 	va_end(args);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(bprintf);
@@ -1633,27 +1925,25 @@ EXPORT_SYMBOL_GPL(bprintf);
  * @fmt:	format of buffer
  * @args:	arguments
  */
-int vsscanf(const char * buf, const char * fmt, va_list args)
+int vsscanf(const char *buf, const char *fmt, va_list args)
 {
 	const char *str = buf;
 	char *next;
 	char digit;
 	int num = 0;
-	int qualifier;
-	int base;
-	int field_width;
-	int is_sign = 0;
+	u8 qualifier;
+	u8 base;
+	s16 field_width;
+	bool is_sign;
 
-	while(*fmt && *str) {
+	while (*fmt && *str) {
 		/* skip any white space in format */
 		/* white space in format matchs any amount of
 		 * white space, including none, in the input.
 		 */
 		if (isspace(*fmt)) {
-			while (isspace(*fmt))
-				++fmt;
-			while (isspace(*str))
-				++str;
+			fmt = skip_spaces(++fmt);
+			str = skip_spaces(str);
 		}
 
 		/* anything that is not a conversion must match exactly */
@@ -1666,12 +1956,12 @@ int vsscanf(const char * buf, const char * fmt, va_list args)
 		if (!*fmt)
 			break;
 		++fmt;
-		
+
 		/* skip this conversion.
 		 * advance both strings to next white space
 		 */
 		if (*fmt == '*') {
-			while (!isspace(*fmt) && *fmt)
+			while (!isspace(*fmt) && *fmt != '%' && *fmt)
 				fmt++;
 			while (!isspace(*str) && *str)
 				str++;
@@ -1685,8 +1975,8 @@ int vsscanf(const char * buf, const char * fmt, va_list args)
 
 		/* get conversion qualifier */
 		qualifier = -1;
-		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
-		    *fmt == 'Z' || *fmt == 'z') {
+		if (*fmt == 'h' || TOLOWER(*fmt) == 'l' ||
+		    TOLOWER(*fmt) == 'z') {
 			qualifier = *fmt++;
 			if (unlikely(qualifier == *fmt)) {
 				if (qualifier == 'h') {
@@ -1698,16 +1988,17 @@ int vsscanf(const char * buf, const char * fmt, va_list args)
 				}
 			}
 		}
-		base = 10;
-		is_sign = 0;
 
 		if (!*fmt || !*str)
 			break;
 
-		switch(*fmt++) {
+		base = 10;
+		is_sign = 0;
+
+		switch (*fmt++) {
 		case 'c':
 		{
-			char *s = (char *) va_arg(args,char*);
+			char *s = (char *)va_arg(args, char*);
 			if (field_width == -1)
 				field_width = 1;
 			do {
@@ -1718,17 +2009,15 @@ int vsscanf(const char * buf, const char * fmt, va_list args)
 		continue;
 		case 's':
 		{
-			char *s = (char *) va_arg(args, char *);
-			if(field_width == -1)
-				field_width = INT_MAX;
+			char *s = (char *)va_arg(args, char *);
+			if (field_width == -1)
+				field_width = SHRT_MAX;
 			/* first, skip leading white space in buffer */
-			while (isspace(*str))
-				str++;
+			str = skip_spaces(str);
 
 			/* now copy until next white space */
-			while (*str && !isspace(*str) && field_width--) {
+			while (*str && !isspace(*str) && field_width--)
 				*s++ = *str++;
-			}
 			*s = '\0';
 			num++;
 		}
@@ -1736,7 +2025,7 @@ int vsscanf(const char * buf, const char * fmt, va_list args)
 		case 'n':
 			/* return number of characters read so far */
 		{
-			int *i = (int *)va_arg(args,int*);
+			int *i = (int *)va_arg(args, int*);
 			*i = str - buf;
 		}
 		continue;
@@ -1748,14 +2037,14 @@ int vsscanf(const char * buf, const char * fmt, va_list args)
 			base = 16;
 			break;
 		case 'i':
-                        base = 0;
+			base = 0;
 		case 'd':
 			is_sign = 1;
 		case 'u':
 			break;
 		case '%':
 			/* looking for '%' in str */
-			if (*str++ != '%') 
+			if (*str++ != '%')
 				return num;
 			continue;
 		default:
@@ -1766,71 +2055,70 @@ int vsscanf(const char * buf, const char * fmt, va_list args)
 		/* have some sort of integer conversion.
 		 * first, skip white space in buffer.
 		 */
-		while (isspace(*str))
-			str++;
+		str = skip_spaces(str);
 
 		digit = *str;
 		if (is_sign && digit == '-')
 			digit = *(str + 1);
 
 		if (!digit
-                    || (base == 16 && !isxdigit(digit))
-                    || (base == 10 && !isdigit(digit))
-                    || (base == 8 && (!isdigit(digit) || digit > '7'))
-                    || (base == 0 && !isdigit(digit)))
-				break;
+		    || (base == 16 && !isxdigit(digit))
+		    || (base == 10 && !isdigit(digit))
+		    || (base == 8 && (!isdigit(digit) || digit > '7'))
+		    || (base == 0 && !isdigit(digit)))
+			break;
 
-		switch(qualifier) {
+		switch (qualifier) {
 		case 'H':	/* that's 'hh' in format */
 			if (is_sign) {
-				signed char *s = (signed char *) va_arg(args,signed char *);
-				*s = (signed char) simple_strtol(str,&next,base);
+				signed char *s = (signed char *)va_arg(args, signed char *);
+				*s = (signed char)simple_strtol(str, &next, base);
 			} else {
-				unsigned char *s = (unsigned char *) va_arg(args, unsigned char *);
-				*s = (unsigned char) simple_strtoul(str, &next, base);
+				unsigned char *s = (unsigned char *)va_arg(args, unsigned char *);
+				*s = (unsigned char)simple_strtoul(str, &next, base);
 			}
 			break;
 		case 'h':
 			if (is_sign) {
-				short *s = (short *) va_arg(args,short *);
-				*s = (short) simple_strtol(str,&next,base);
+				short *s = (short *)va_arg(args, short *);
+				*s = (short)simple_strtol(str, &next, base);
 			} else {
-				unsigned short *s = (unsigned short *) va_arg(args, unsigned short *);
-				*s = (unsigned short) simple_strtoul(str, &next, base);
+				unsigned short *s = (unsigned short *)va_arg(args, unsigned short *);
+				*s = (unsigned short)simple_strtoul(str, &next, base);
 			}
 			break;
 		case 'l':
 			if (is_sign) {
-				long *l = (long *) va_arg(args,long *);
-				*l = simple_strtol(str,&next,base);
+				long *l = (long *)va_arg(args, long *);
+				*l = simple_strtol(str, &next, base);
 			} else {
-				unsigned long *l = (unsigned long*) va_arg(args,unsigned long*);
-				*l = simple_strtoul(str,&next,base);
+				unsigned long *l = (unsigned long *)va_arg(args, unsigned long *);
+				*l = simple_strtoul(str, &next, base);
 			}
 			break;
 		case 'L':
 			if (is_sign) {
-				long long *l = (long long*) va_arg(args,long long *);
-				*l = simple_strtoll(str,&next,base);
+				long long *l = (long long *)va_arg(args, long long *);
+				*l = simple_strtoll(str, &next, base);
 			} else {
-				unsigned long long *l = (unsigned long long*) va_arg(args,unsigned long long*);
-				*l = simple_strtoull(str,&next,base);
+				unsigned long long *l = (unsigned long long *)va_arg(args, unsigned long long *);
+				*l = simple_strtoull(str, &next, base);
 			}
 			break;
 		case 'Z':
 		case 'z':
 		{
-			size_t *s = (size_t*) va_arg(args,size_t*);
-			*s = (size_t) simple_strtoul(str,&next,base);
+			size_t *s = (size_t *)va_arg(args, size_t *);
+			*s = (size_t)simple_strtoul(str, &next, base);
 		}
 		break;
 		default:
 			if (is_sign) {
-				int *i = (int *) va_arg(args, int*);
-				*i = (int) simple_strtol(str,&next,base);
+				int *i = (int *)va_arg(args, int *);
+				*i = (int)simple_strtol(str, &next, base);
 			} else {
-				unsigned int *i = (unsigned int*) va_arg(args, unsigned int*);
-				*i = (unsigned int) simple_strtoul(str,&next,base);
+				unsigned int *i = (unsigned int *)va_arg(args, unsigned int*);
+				*i = (unsigned int)simple_strtoul(str, &next, base);
 			}
 			break;
 		}
@@ -1861,14 +2149,15 @@ EXPORT_SYMBOL(vsscanf);
  * @fmt:	formatting of buffer
  * @...:	resulting arguments
  */
-int sscanf(const char * buf, const char * fmt, ...)
+int sscanf(const char *buf, const char *fmt, ...)
 {
 	va_list args;
 	int i;
 
-	va_start(args,fmt);
-	i = vsscanf(buf,fmt,args);
+	va_start(args, fmt);
+	i = vsscanf(buf, fmt, args);
 	va_end(args);
+
 	return i;
 }
 EXPORT_SYMBOL(sscanf);
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
index c3e4a2baf835..46a31e5f49c3 100644
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -135,7 +135,7 @@ static const config configuration_table[10] = {
 
 /* ===========================================================================
  * Update a hash value with the given input byte
- * IN  assertion: all calls to to UPDATE_HASH are made with consecutive
+ * IN  assertion: all calls to UPDATE_HASH are made with consecutive
  *    input characters, so that a running hash key can be computed from the
  *    previous key instead of complete recalculation each time.
  */
@@ -146,7 +146,7 @@ static const config configuration_table[10] = {
  * Insert string str in the dictionary and set match_head to the previous head
  * of the hash chain (the most recent string with same hash key). Return
  * the previous length of the hash chain.
- * IN  assertion: all calls to to INSERT_STRING are made with consecutive
+ * IN  assertion: all calls to INSERT_STRING are made with consecutive
  *    input characters and the first MIN_MATCH bytes of str are valid
  *    (except for the last MIN_MATCH-1 bytes of the input file).
  */
diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c
index 8550b0c05d00..2c13ecc5bb2c 100644
--- a/lib/zlib_inflate/inffast.c
+++ b/lib/zlib_inflate/inffast.c
@@ -21,12 +21,31 @@
    - Pentium III (Anderson)
    - M68060 (Nikl)
  */
+union uu {
+	unsigned short us;
+	unsigned char b[2];
+};
+
+/* Endian independed version */
+static inline unsigned short
+get_unaligned16(const unsigned short *p)
+{
+	union uu  mm;
+	unsigned char *b = (unsigned char *)p;
+
+	mm.b[0] = b[0];
+	mm.b[1] = b[1];
+	return mm.us;
+}
+
 #ifdef POSTINC
 #  define OFF 0
 #  define PUP(a) *(a)++
+#  define UP_UNALIGNED(a) get_unaligned16((a)++)
 #else
 #  define OFF 1
 #  define PUP(a) *++(a)
+#  define UP_UNALIGNED(a) get_unaligned16(++(a))
 #endif
 
 /*
@@ -239,18 +258,50 @@ void inflate_fast(z_streamp strm, unsigned start)
                     }
                 }
                 else {
+		    unsigned short *sout;
+		    unsigned long loops;
+
                     from = out - dist;          /* copy direct from output */
-                    do {                        /* minimum length is three */
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
-                        len -= 3;
-                    } while (len > 2);
-                    if (len) {
-                        PUP(out) = PUP(from);
-                        if (len > 1)
-                            PUP(out) = PUP(from);
-                    }
+		    /* minimum length is three */
+		    /* Align out addr */
+		    if (!((long)(out - 1 + OFF) & 1)) {
+			PUP(out) = PUP(from);
+			len--;
+		    }
+		    sout = (unsigned short *)(out - OFF);
+		    if (dist > 2) {
+			unsigned short *sfrom;
+
+			sfrom = (unsigned short *)(from - OFF);
+			loops = len >> 1;
+			do
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+			    PUP(sout) = PUP(sfrom);
+#else
+			    PUP(sout) = UP_UNALIGNED(sfrom);
+#endif
+			while (--loops);
+			out = (unsigned char *)sout + OFF;
+			from = (unsigned char *)sfrom + OFF;
+		    } else { /* dist == 1 or dist == 2 */
+			unsigned short pat16;
+
+			pat16 = *(sout-1+OFF);
+			if (dist == 1) {
+				union uu mm;
+				/* copy one char pattern to both bytes */
+				mm.us = pat16;
+				mm.b[0] = mm.b[1];
+				pat16 = mm.us;
+			}
+			loops = len >> 1;
+			do
+			    PUP(sout) = pat16;
+			while (--loops);
+			out = (unsigned char *)sout + OFF;
+		    }
+		    if (len & 1)
+			PUP(out) = PUP(from);
                 }
             }
             else if ((op & 64) == 0) {          /* 2nd level distance code */
author	Paul Mundt <lethal@linux-sh.org>	2011-01-13 09:06:28 +0300
committer	Paul Mundt <lethal@linux-sh.org>	2011-01-13 09:06:28 +0300
commit	f43dc23d5ea91fca257be02138a255f02d98e806 (patch)
tree	b29722f6e965316e90ac97abf79923ced250dc21 /lib
parent	f8e53553f452dcbf67cb89c8cba63a1cd6eb4cc0 (diff)
parent	4162cf64973df51fc885825bc9ca4d055891c49f (diff)
download	linux-f43dc23d5ea91fca257be02138a255f02d98e806.tar.xz