193 files changed, 4525 insertions, 1704 deletions
diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt
index dfab71848dc8..5cc699ba5453 100644
--- a/Documentation/acpi/apei/einj.txt
+++ b/Documentation/acpi/apei/einj.txt
@@ -48,12 +48,19 @@ directory apei/einj. The following files are provided.
 - param1
   This file is used to set the first error parameter value. Effect of
   parameter depends on error_type specified. For memory error, this is
-  physical memory address.
+  physical memory address.  Only available if param_extension module
+  parameter is specified.
 
 - param2
   This file is used to set the second error parameter value. Effect of
   parameter depends on error_type specified. For memory error, this is
-  physical memory address mask.
+  physical memory address mask.  Only available if param_extension
+  module parameter is specified.
+
+Injecting parameter support is a BIOS version specific extension, that
+is, it only works on some BIOS version.  If you want to use it, please
+make sure your BIOS version has the proper support and specify
+"param_extension=y" in module parameter.
 
 For more information about EINJ, please refer to ACPI specification
 version 4.0, section 17.5.
diff --git a/Documentation/devicetree/bindings/gpio/gpio_keys.txt b/Documentation/devicetree/bindings/gpio/gpio_keys.txt
index 7190c99d7611..5c2c02140a62 100644
--- a/Documentation/devicetree/bindings/gpio/gpio_keys.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio_keys.txt
@@ -10,7 +10,7 @@ Optional properties:
 Each button (key) is represented as a sub-node of "gpio-keys":
 Subnode properties:
 
-	- gpios: OF devcie-tree gpio specificatin.
+	- gpios: OF device-tree gpio specification.
 	- label: Descriptive name of the key.
 	- linux,code: Keycode to emit.
 
diff --git a/Documentation/devicetree/bindings/input/fsl-mma8450.txt b/Documentation/devicetree/bindings/input/fsl-mma8450.txt
new file mode 100644
index 000000000000..a00c94ccbdee
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/fsl-mma8450.txt
@@ -0,0 +1,11 @@
+* Freescale MMA8450 3-Axis Accelerometer
+
+Required properties:
+- compatible : "fsl,mma8450".
+
+Example:
+
+accelerometer: mma8450@1c {
+	compatible = "fsl,mma8450";
+	reg = <0x1c>;
+};
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 7be15e44d481..82a5d250d75e 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -143,8 +143,7 @@ o provide a way to configure fault attributes
   failslab, fail_page_alloc, and fail_make_request use this way.
   Helper functions:
 
-	init_fault_attr_dentries(entries, attr, name);
-	void cleanup_fault_attr_dentries(entries);
+	fault_create_debugfs_attr(name, parent, attr);
 
 - module parameters
 
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index ea0bace0124a..43f48098220d 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -296,15 +296,6 @@ Who:	Ravikiran Thirumalai <kiran@scalex86.org>
 
 ---------------------------
 
-What:	CONFIG_THERMAL_HWMON
-When:	January 2009
-Why:	This option was introduced just to allow older lm-sensors userspace
-	to keep working over the upgrade to 2.6.26. At the scheduled time of
-	removal fixed lm-sensors (2.x or 3.x) should be readily available.
-Who:	Rene Herman <rene.herman@gmail.com>
-
----------------------------
-
 What:	Code that is now under CONFIG_WIRELESS_EXT_SYSFS
 	(in net/core/net-sysfs.c)
 When:	After the only user (hal) has seen a release with the patches
diff --git a/Documentation/frv/booting.txt b/Documentation/frv/booting.txt
index ace200b7c214..37c4d84a0e57 100644
--- a/Documentation/frv/booting.txt
+++ b/Documentation/frv/booting.txt
@@ -106,13 +106,20 @@ separated by spaces:
       To use the first on-chip serial port at baud rate 115200, no parity, 8
       bits, and no flow control.
 
-  (*) root=/dev/<xxxx>
+  (*) root=<xxxx>
 
-      This specifies the device upon which the root filesystem resides. For
-      example:
+      This specifies the device upon which the root filesystem resides. It
+      may be specified by major and minor number, device path, or even
+      partition uuid, if supported.  For example:
 
 	/dev/nfs	NFS root filesystem
 	/dev/mtdblock3	Fourth RedBoot partition on the System Flash
+	PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=1
+		first partition after the partition with the given UUID
+	253:0		Device with major 253 and minor 0
+
+      Authoritative information can be found in
+      "Documentation/kernel-parameters.txt".
 
   (*) rw
 
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 72ba8d51dbc1..845a191004b1 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -292,6 +292,7 @@ Code  Seq#(hex)	Include File		Comments
 					<mailto:buk@buks.ipn.de>
 0xA0	all	linux/sdp/sdp.h		Industrial Device Project
 					<mailto:kenji@bitgate.com>
+0xA2	00-0F	arch/tile/include/asm/hardwall.h
 0xA3	80-8F	Port ACL		in development:
 					<mailto:tlewis@mindspring.com>
 0xA3	90-9F	linux/dtlk.h
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 26a83743af19..e279b7242912 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -163,6 +163,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 			See also Documentation/power/pm.txt, pci=noacpi
 
+	acpi_rsdp=	[ACPI,EFI,KEXEC]
+			Pass the RSDP address to the kernel, mostly used
+			on machines running EFI runtime service to boot the
+			second kernel for kdump.
+
 	acpi_apic_instance=	[ACPI, IOAPIC]
 			Format: <int>
 			2: use 2nd APIC table, if available
@@ -546,6 +551,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			/proc/<pid>/coredump_filter.
 			See also Documentation/filesystems/proc.txt.
 
+	cpuidle.off=1	[CPU_IDLE]
+			disable the cpuidle sub-system
+
 	cpcihp_generic=	[HW,PCI] Generic port I/O CompactPCI driver
 			Format:
 			<first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
@@ -2240,6 +2248,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	ro		[KNL] Mount root device read-only on boot
 
 	root=		[KNL] Root filesystem
+			See name_to_dev_t comment in init/do_mounts.c.
 
 	rootdelay=	[KNL] Delay (in seconds) to pause before attempting to
 			mount the root filesystem
diff --git a/Documentation/m68k/kernel-options.txt b/Documentation/m68k/kernel-options.txt
index c93bed66e25d..97d45f276fe6 100644
--- a/Documentation/m68k/kernel-options.txt
+++ b/Documentation/m68k/kernel-options.txt
@@ -129,6 +129,20 @@ decimal 11 is the major of SCSI CD-ROMs, and the minor 0 stands for
 the first of these. You can find out all valid major numbers by
 looking into include/linux/major.h.
 
+In addition to major and minor numbers, if the device containing your
+root partition uses a partition table format with unique partition
+identifiers, then you may use them.  For instance,
+"root=PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF".  It is also
+possible to reference another partition on the same device using a
+known partition UUID as the starting point.  For example,
+if partition 5 of the device has the UUID of
+00112233-4455-6677-8899-AABBCCDDEEFF then partition 3 may be found as
+follows:
+  PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2
+
+Authoritative information can be found in
+"Documentation/kernel-parameters.txt".
+
 
 2.2) ro, rw
 -----------
diff --git a/MAINTAINERS b/MAINTAINERS
index 0d2fcda465eb..07cfd8deaad5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3367,6 +3367,12 @@ F:	drivers/net/ixgb/
 F:	drivers/net/ixgbe/
 F:	drivers/net/ixgbevf/
 
+INTEL MRST PMU DRIVER
+M:	Len Brown <len.brown@intel.com>
+L:	linux-pm@lists.linux-foundation.org
+S:	Supported
+F:	arch/x86/platform/mrst/pmu.*
+
 INTEL PRO/WIRELESS 2100 NETWORK CONNECTION SUPPORT
 L:	linux-wireless@vger.kernel.org
 S:	Orphan
@@ -6319,6 +6325,7 @@ F:	include/linux/sysv_fs.h
 TARGET SUBSYSTEM
 M:	Nicholas A. Bellinger <nab@linux-iscsi.org>
 L:	linux-scsi@vger.kernel.org
+L:	target-devel@vger.kernel.org
 L:	http://groups.google.com/group/linux-iscsi-target-dev
 W:	http://www.linux-iscsi.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/nab/lio-core-2.6.git master
diff --git a/arch/Kconfig b/arch/Kconfig
index 26b0e2397a57..4b0669cbb3b0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -178,4 +178,7 @@ config HAVE_ARCH_MUTEX_CPU_RELAX
 config HAVE_RCU_TABLE_FREE
 	bool
 
+config ARCH_HAVE_NMI_SAFE_CMPXCHG
+	bool
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index ca2da8da6e9c..60cde53d266c 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -14,6 +14,7 @@ config ALPHA
 	select AUTO_IRQ_AFFINITY if SMP
 	select GENERIC_IRQ_SHOW
 	select ARCH_WANT_OPTIONAL_GPIOLIB
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	help
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 5e1e54197227..d7ee0d4c072d 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -30,6 +30,7 @@
 #include <linux/uaccess.h>
 #include <linux/random.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/cpuidle.h>
 
 #include <asm/cacheflush.h>
 #include <asm/leds.h>
@@ -196,7 +197,8 @@ void cpu_idle(void)
 				cpu_relax();
 			} else {
 				stop_critical_timings();
-				pm_idle();
+				if (cpuidle_call_idle())
+					pm_idle();
 				start_critical_timings();
 				/*
 				 * This will eventually be removed - pm_idle
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index e9d689b7c833..197e96f70405 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -10,6 +10,7 @@ config AVR32
 	select GENERIC_IRQ_PROBE
 	select HARDIRQS_SW_RESEND
 	select GENERIC_IRQ_SHOW
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	help
 	  AVR32 is a high-performance 32-bit RISC microprocessor core,
 	  designed for cost-sensitive embedded applications, with particular
diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c
index 850265373611..466af40c5822 100644
--- a/arch/cris/arch-v10/drivers/sync_serial.c
+++ b/arch/cris/arch-v10/drivers/sync_serial.c
@@ -158,7 +158,7 @@ static int sync_serial_open(struct inode *inode, struct file *file);
 static int sync_serial_release(struct inode *inode, struct file *file);
 static unsigned int sync_serial_poll(struct file *filp, poll_table *wait);
 
-static int sync_serial_ioctl(struct file *file,
+static long sync_serial_ioctl(struct file *file,
 	unsigned int cmd, unsigned long arg);
 static ssize_t sync_serial_write(struct file *file, const char *buf,
 	size_t count, loff_t *ppos);
@@ -625,11 +625,11 @@ static int sync_serial_open(struct inode *inode, struct file *file)
 			*R_IRQ_MASK1_SET = 1 << port->data_avail_bit;
 		DEBUG(printk(KERN_DEBUG "sser%d rec started\n", dev));
 	}
-	ret = 0;
+	err = 0;
 	
 out:
 	mutex_unlock(&sync_serial_mutex);
-	return ret;
+	return err;
 }
 
 static int sync_serial_release(struct inode *inode, struct file *file)
diff --git a/arch/cris/arch-v10/kernel/irq.c b/arch/cris/arch-v10/kernel/irq.c
index 907cfb5a873d..ba0e5965d6e3 100644
--- a/arch/cris/arch-v10/kernel/irq.c
+++ b/arch/cris/arch-v10/kernel/irq.c
@@ -20,6 +20,9 @@
 #define crisv10_mask_irq(irq_nr) (*R_VECT_MASK_CLR = 1 << (irq_nr));
 #define crisv10_unmask_irq(irq_nr) (*R_VECT_MASK_SET = 1 << (irq_nr));
 
+extern void kgdb_init(void);
+extern void breakpoint(void);
+
 /* don't use set_int_vector, it bypasses the linux interrupt handlers. it is
  * global just so that the kernel gdb can use it.
  */
diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h
index 29b74a105830..332f19c54557 100644
--- a/arch/cris/include/asm/thread_info.h
+++ b/arch/cris/include/asm/thread_info.h
@@ -11,8 +11,6 @@
 
 #ifdef __KERNEL__
 
-#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
-
 #ifndef __ASSEMBLY__
 #include <asm/types.h>
 #include <asm/processor.h>
@@ -67,8 +65,10 @@ struct thread_info {
 
 #define init_thread_info	(init_thread_union.thread_info)
 
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
 /* thread information allocation */
-#define alloc_thread_info(tsk, node) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
+#define alloc_thread_info_node(tsk, node)	\
+	((struct thread_info *) __get_free_pages(GFP_KERNEL, 1))
 #define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index cb884e489425..bad27a6ff407 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -7,6 +7,7 @@ config FRV
 	select HAVE_PERF_EVENTS
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_SHOW
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 
 config ZONE_DMA
 	bool
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 64c7ab7e7a81..124854714958 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -28,6 +28,7 @@ config IA64
 	select IRQ_PER_CPU
 	select GENERIC_IRQ_SHOW
 	select ARCH_WANT_OPTIONAL_GPIOLIB
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 284cd3771eaa..9e8ee9d2b8ca 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -6,6 +6,7 @@ config M68K
 	select GENERIC_ATOMIC64 if MMU
 	select HAVE_GENERIC_HARDIRQS if !MMU
 	select GENERIC_IRQ_SHOW if !MMU
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
 
 config RWSEM_GENERIC_SPINLOCK
 	bool
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 65adc86a230e..e077b0bf56ca 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -15,6 +15,7 @@ config PARISC
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_PROBE
 	select IRQ_PER_CPU
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 374c475e56a3..6926b61acfea 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -136,6 +136,7 @@ config PPC
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_BPF_JIT if (PPC64 && NET)
 	select HAVE_ARCH_JUMP_LABEL
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 17dbb4318261..ed5cb5af5281 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -81,6 +81,7 @@ config S390
 	select INIT_ALL_POSSIBLE
 	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 748ff1920068..ff9177c8f643 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -11,6 +11,7 @@ config SUPERH
 	select HAVE_DMA_ATTRS
 	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
 	select PERF_USE_VMALLOC
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index 84db0d6ccd0d..3c45de1db716 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -16,12 +16,13 @@
 #include <linux/thread_info.h>
 #include <linux/irqflags.h>
 #include <linux/smp.h>
+#include <linux/cpuidle.h>
 #include <asm/pgalloc.h>
 #include <asm/system.h>
 #include <linux/atomic.h>
 #include <asm/smp.h>
 
-void (*pm_idle)(void) = NULL;
+static void (*pm_idle)(void);
 
 static int hlt_counter;
 
@@ -100,7 +101,8 @@ void cpu_idle(void)
 			local_irq_disable();
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
-			pm_idle();
+			if (cpuidle_call_idle())
+				pm_idle();
 			/*
 			 * Sanity check to ensure that pm_idle() returns
 			 * with IRQs enabled
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 1074dddcb104..42c67beadcae 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -54,6 +54,7 @@ config SPARC64
 	select HAVE_PERF_EVENTS
 	select PERF_USE_VMALLOC
 	select IRQ_PREFLOW_FASTEOI
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 0249b8b4db54..b30f71ac0d06 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -12,6 +12,7 @@ config TILE
 	select GENERIC_PENDING_IRQ if SMP
 	select GENERIC_IRQ_SHOW
 	select SYS_HYPERVISOR
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG if !M386
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 849ab2fa1f5c..aec60dc06007 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -2,3 +2,41 @@ include include/asm-generic/Kbuild.asm
 
 header-y += ucontext.h
 header-y += hardwall.h
+
+generic-y += bug.h
+generic-y += bugs.h
+generic-y += cputime.h
+generic-y += device.h
+generic-y += div64.h
+generic-y += emergency-restart.h
+generic-y += errno.h
+generic-y += fb.h
+generic-y += fcntl.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipc.h
+generic-y += ipcbuf.h
+generic-y += irq_regs.h
+generic-y += kdebug.h
+generic-y += local.h
+generic-y += module.h
+generic-y += msgbuf.h
+generic-y += mutex.h
+generic-y += param.h
+generic-y += parport.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += resource.h
+generic-y += scatterlist.h
+generic-y += sembuf.h
+generic-y += serial.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += statfs.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += types.h
+generic-y += ucontext.h
+generic-y += xor.h
diff --git a/arch/tile/include/asm/bug.h b/arch/tile/include/asm/bug.h
deleted file mode 100644
index b12fd89e42e9..000000000000
--- a/arch/tile/include/asm/bug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bug.h>
diff --git a/arch/tile/include/asm/bugs.h b/arch/tile/include/asm/bugs.h
deleted file mode 100644
index 61791e1ad9f5..000000000000
--- a/arch/tile/include/asm/bugs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/bugs.h>
diff --git a/arch/tile/include/asm/cputime.h b/arch/tile/include/asm/cputime.h
deleted file mode 100644
index 6d68ad7e0ea3..000000000000
--- a/arch/tile/include/asm/cputime.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/cputime.h>
diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h
deleted file mode 100644
index f0a4c256403b..000000000000
--- a/arch/tile/include/asm/device.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/device.h>
diff --git a/arch/tile/include/asm/div64.h b/arch/tile/include/asm/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/arch/tile/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/arch/tile/include/asm/emergency-restart.h b/arch/tile/include/asm/emergency-restart.h
deleted file mode 100644
index 3711bd9d50bd..000000000000
--- a/arch/tile/include/asm/emergency-restart.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/emergency-restart.h>
diff --git a/arch/tile/include/asm/errno.h b/arch/tile/include/asm/errno.h
deleted file mode 100644
index 4c82b503d92f..000000000000
--- a/arch/tile/include/asm/errno.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/errno.h>
diff --git a/arch/tile/include/asm/fb.h b/arch/tile/include/asm/fb.h
deleted file mode 100644
index 3a4988e8df45..000000000000
--- a/arch/tile/include/asm/fb.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fb.h>
diff --git a/arch/tile/include/asm/fcntl.h b/arch/tile/include/asm/fcntl.h
deleted file mode 100644
index 46ab12db5739..000000000000
--- a/arch/tile/include/asm/fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fcntl.h>
diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h
index 51537ff9265a..c66f7933beaa 100644
--- a/arch/tile/include/asm/fixmap.h
+++ b/arch/tile/include/asm/fixmap.h
@@ -75,12 +75,6 @@ extern void __set_fixmap(enum fixed_addresses idx,
 
 #define set_fixmap(idx, phys) \
 		__set_fixmap(idx, phys, PAGE_KERNEL)
-/*
- * Some hardware wants to get fixmapped without caching.
- */
-#define set_fixmap_nocache(idx, phys) \
-		__set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
-
 #define clear_fixmap(idx) \
 		__set_fixmap(idx, 0, __pgprot(0))
 
diff --git a/arch/tile/include/asm/ioctl.h b/arch/tile/include/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/arch/tile/include/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/arch/tile/include/asm/ioctls.h b/arch/tile/include/asm/ioctls.h
deleted file mode 100644
index ec34c760665e..000000000000
--- a/arch/tile/include/asm/ioctls.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctls.h>
diff --git a/arch/tile/include/asm/ipc.h b/arch/tile/include/asm/ipc.h
deleted file mode 100644
index a46e3d9c2a3f..000000000000
--- a/arch/tile/include/asm/ipc.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipc.h>
diff --git a/arch/tile/include/asm/ipcbuf.h b/arch/tile/include/asm/ipcbuf.h
deleted file mode 100644
index 84c7e51cb6d0..000000000000
--- a/arch/tile/include/asm/ipcbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/tile/include/asm/irq_regs.h b/arch/tile/include/asm/irq_regs.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/tile/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h
deleted file mode 100644
index 6ece1b037665..000000000000
--- a/arch/tile/include/asm/kdebug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kdebug.h>
diff --git a/arch/tile/include/asm/local.h b/arch/tile/include/asm/local.h
deleted file mode 100644
index c11c530f74d0..000000000000
--- a/arch/tile/include/asm/local.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local.h>
diff --git a/arch/tile/include/asm/module.h b/arch/tile/include/asm/module.h
deleted file mode 100644
index 1e4b79fe8584..000000000000
--- a/arch/tile/include/asm/module.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/module.h>
diff --git a/arch/tile/include/asm/msgbuf.h b/arch/tile/include/asm/msgbuf.h
deleted file mode 100644
index 809134c644a6..000000000000
--- a/arch/tile/include/asm/msgbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/msgbuf.h>
diff --git a/arch/tile/include/asm/mutex.h b/arch/tile/include/asm/mutex.h
deleted file mode 100644
index ff6101aa2c71..000000000000
--- a/arch/tile/include/asm/mutex.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/mutex-dec.h>
diff --git a/arch/tile/include/asm/param.h b/arch/tile/include/asm/param.h
deleted file mode 100644
index 965d45427975..000000000000
--- a/arch/tile/include/asm/param.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/param.h>
diff --git a/arch/tile/include/asm/parport.h b/arch/tile/include/asm/parport.h
deleted file mode 100644
index cf252af64590..000000000000
--- a/arch/tile/include/asm/parport.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/parport.h>
diff --git a/arch/tile/include/asm/poll.h b/arch/tile/include/asm/poll.h
deleted file mode 100644
index c98509d3149e..000000000000
--- a/arch/tile/include/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
diff --git a/arch/tile/include/asm/posix_types.h b/arch/tile/include/asm/posix_types.h
deleted file mode 100644
index 22cae6230ceb..000000000000
--- a/arch/tile/include/asm/posix_types.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/posix_types.h>
diff --git a/arch/tile/include/asm/resource.h b/arch/tile/include/asm/resource.h
deleted file mode 100644
index 04bc4db8921b..000000000000
--- a/arch/tile/include/asm/resource.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/resource.h>
diff --git a/arch/tile/include/asm/scatterlist.h b/arch/tile/include/asm/scatterlist.h
deleted file mode 100644
index 35d786fe93ae..000000000000
--- a/arch/tile/include/asm/scatterlist.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/scatterlist.h>
diff --git a/arch/tile/include/asm/sembuf.h b/arch/tile/include/asm/sembuf.h
deleted file mode 100644
index 7673b83cfef7..000000000000
--- a/arch/tile/include/asm/sembuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/sembuf.h>
diff --git a/arch/tile/include/asm/serial.h b/arch/tile/include/asm/serial.h
deleted file mode 100644
index a0cb0caff152..000000000000
--- a/arch/tile/include/asm/serial.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/serial.h>
diff --git a/arch/tile/include/asm/shmbuf.h b/arch/tile/include/asm/shmbuf.h
deleted file mode 100644
index 83c05fc2de38..000000000000
--- a/arch/tile/include/asm/shmbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/shmbuf.h>
diff --git a/arch/tile/include/asm/shmparam.h b/arch/tile/include/asm/shmparam.h
deleted file mode 100644
index 93f30deb95d0..000000000000
--- a/arch/tile/include/asm/shmparam.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/shmparam.h>
diff --git a/arch/tile/include/asm/socket.h b/arch/tile/include/asm/socket.h
deleted file mode 100644
index 6b71384b9d8b..000000000000
--- a/arch/tile/include/asm/socket.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/socket.h>
diff --git a/arch/tile/include/asm/sockios.h b/arch/tile/include/asm/sockios.h
deleted file mode 100644
index def6d4746ee7..000000000000
--- a/arch/tile/include/asm/sockios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/sockios.h>
diff --git a/arch/tile/include/asm/statfs.h b/arch/tile/include/asm/statfs.h
deleted file mode 100644
index 0b91fe198c20..000000000000
--- a/arch/tile/include/asm/statfs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/statfs.h>
diff --git a/arch/tile/include/asm/termbits.h b/arch/tile/include/asm/termbits.h
deleted file mode 100644
index 3935b106de79..000000000000
--- a/arch/tile/include/asm/termbits.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termbits.h>
diff --git a/arch/tile/include/asm/termios.h b/arch/tile/include/asm/termios.h
deleted file mode 100644
index 280d78a9d966..000000000000
--- a/arch/tile/include/asm/termios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termios.h>
diff --git a/arch/tile/include/asm/types.h b/arch/tile/include/asm/types.h
deleted file mode 100644
index b9e79bc580dd..000000000000
--- a/arch/tile/include/asm/types.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/types.h>
diff --git a/arch/tile/include/asm/ucontext.h b/arch/tile/include/asm/ucontext.h
deleted file mode 100644
index 9bc07b9f30fb..000000000000
--- a/arch/tile/include/asm/ucontext.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ucontext.h>
diff --git a/arch/tile/include/asm/xor.h b/arch/tile/include/asm/xor.h
deleted file mode 100644
index c82eb12a5b18..000000000000
--- a/arch/tile/include/asm/xor.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/xor.h>
diff --git a/arch/tile/include/hv/drv_srom_intf.h b/arch/tile/include/hv/drv_srom_intf.h
new file mode 100644
index 000000000000..6395faa6d9e6
--- /dev/null
+++ b/arch/tile/include/hv/drv_srom_intf.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/**
+ * @file drv_srom_intf.h
+ * Interface definitions for the SPI Flash ROM driver.
+ */
+
+#ifndef _SYS_HV_INCLUDE_DRV_SROM_INTF_H
+#define _SYS_HV_INCLUDE_DRV_SROM_INTF_H
+
+/** Read this offset to get the total device size. */
+#define SROM_TOTAL_SIZE_OFF   0xF0000000
+
+/** Read this offset to get the device sector size. */
+#define SROM_SECTOR_SIZE_OFF  0xF0000004
+
+/** Read this offset to get the device page size. */
+#define SROM_PAGE_SIZE_OFF    0xF0000008
+
+/** Write this offset to flush any pending writes. */
+#define SROM_FLUSH_OFF        0xF1000000
+
+/** Write this offset, plus the byte offset of the start of a sector, to
+ *  erase a sector.  Any write data is ignored, but there must be at least
+ *  one byte of write data.  Only applies when the driver is in MTD mode.
+ */
+#define SROM_ERASE_OFF        0xF2000000
+
+#endif /* _SYS_HV_INCLUDE_DRV_SROM_INTF_H */
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index c4be58cc5d50..f6f50f2a5e37 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -78,7 +78,6 @@ static struct clocksource cycle_counter_cs = {
 	.rating = 300,
 	.read = clocksource_get_cycles,
 	.mask = CLOCKSOURCE_MASK(64),
-	.shift = 22,   /* typical value, e.g. x86 tsc uses this */
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -91,8 +90,6 @@ void __init setup_clock(void)
 	cycles_per_sec = hv_sysconf(HV_SYSCONF_CPU_SPEED);
 	sched_clock_mult =
 		clocksource_hz2mult(cycles_per_sec, SCHED_CLOCK_SHIFT);
-	cycle_counter_cs.mult =
-		clocksource_hz2mult(cycles_per_sec, cycle_counter_cs.shift);
 }
 
 void __init calibrate_delay(void)
@@ -107,7 +104,7 @@ void __init calibrate_delay(void)
 void __init time_init(void)
 {
 	/* Initialize and register the clock source. */
-	clocksource_register(&cycle_counter_cs);
+	clocksource_register_hz(&cycle_counter_cs, cycles_per_sec);
 
 	/* Start up the tile-timer interrupt source on the boot cpu. */
 	setup_tile_timer();
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 4e10c4023028..7309988c9794 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -836,8 +836,7 @@ void __init mem_init(void)
 #endif
 
 #ifdef CONFIG_FLATMEM
-	if (!mem_map)
-		BUG();
+	BUG_ON(!mem_map);
 #endif
 
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7cf916fc1ce7..6a47bb22657f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -72,6 +72,7 @@ config X86
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select HAVE_BPF_JIT if (X86_64 && NET)
 	select CLKEVT_I8253
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index d02804d650c4..d8e8eefbe24c 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -40,8 +40,6 @@
 #include <linux/compiler.h>
 #include <asm/page.h>
 
-#include <xen/xen.h>
-
 #define build_mmio_read(name, size, type, reg, barrier) \
 static inline type name(const volatile void __iomem *addr) \
 { type ret; asm volatile("mov" size " %1,%0":reg (ret) \
@@ -334,6 +332,7 @@ extern void fixup_early_ioremap(void);
 extern bool is_early_ioremap_ptep(pte_t *ptep);
 
 #ifdef CONFIG_XEN
+#include <xen/xen.h>
 struct bio_vec;
 
 extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 219371546afd..0d1171c97729 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -751,8 +751,6 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
 		     :: "a" (eax), "c" (ecx));
 }
 
-extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
-
 extern void select_idle_routine(const struct cpuinfo_x86 *c);
 extern void init_amd_e400_c1e_mask(void);
 
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 5812404a0d4c..f50e7fb2a201 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -149,6 +149,29 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
 }
 EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
 
+/*
+ * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
+ * which can obviate IPI to trigger checking of need_resched.
+ * We execute MONITOR against need_resched and enter optimized wait state
+ * through MWAIT. Whenever someone changes need_resched, we would be woken
+ * up from MWAIT (without an IPI).
+ *
+ * New with Core Duo processors, MWAIT can take some hints based on CPU
+ * capability.
+ */
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
+{
+	if (!need_resched()) {
+		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
+			clflush((void *)&current_thread_info()->flags);
+
+		__monitor((void *)&current_thread_info()->flags, 0, 0);
+		smp_mb();
+		if (!need_resched())
+			__mwait(ax, cx);
+	}
+}
+
 void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
 {
 	unsigned int cpu = smp_processor_id();
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e1ba8cb24e4e..e7e3b019c439 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -438,29 +438,6 @@ void cpu_idle_wait(void)
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- *
- * New with Core Duo processors, MWAIT can take some hints based on CPU
- * capability.
- */
-void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
-{
-	if (!need_resched()) {
-		if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
-			clflush((void *)&current_thread_info()->flags);
-
-		__monitor((void *)&current_thread_info()->flags, 0, 0);
-		smp_mb();
-		if (!need_resched())
-			__mwait(ax, cx);
-	}
-}
-
 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
 static void mwait_idle(void)
 {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a3d0dc59067b..7a3b65107a27 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/kdebug.h>
+#include <linux/cpuidle.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -109,7 +110,8 @@ void cpu_idle(void)
 			local_irq_disable();
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
-			pm_idle();
+			if (cpuidle_idle_call())
+				pm_idle();
 			start_critical_timings();
 		}
 		tick_nohz_restart_sched_tick();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ca6f7ab8df33..f693e44e1bf6 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -37,6 +37,7 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/ftrace.h>
+#include <linux/cpuidle.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -136,7 +137,8 @@ void cpu_idle(void)
 			enter_idle();
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
-			pm_idle();
+			if (cpuidle_idle_call())
+				pm_idle();
 			start_critical_timings();
 
 			/* In many cases the interrupt that ended idle
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile
index f61ccdd49341..1ea38775a6d3 100644
--- a/arch/x86/platform/mrst/Makefile
+++ b/arch/x86/platform/mrst/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_X86_MRST)		+= mrst.o
 obj-$(CONFIG_X86_MRST)		+= vrtc.o
 obj-$(CONFIG_EARLY_PRINTK_MRST)	+= early_printk_mrst.o
+obj-$(CONFIG_X86_MRST)		+= pmu.o
diff --git a/arch/x86/platform/mrst/pmu.c b/arch/x86/platform/mrst/pmu.c
new file mode 100644
index 000000000000..9281da7d91bd
--- /dev/null
+++ b/arch/x86/platform/mrst/pmu.c
@@ -0,0 +1,817 @@
+/*
+ * mrst/pmu.c - driver for MRST Power Management Unit
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/cpuidle.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/seq_file.h>
+#include <linux/sfi.h>
+#include <asm/intel_scu_ipc.h>
+#include "pmu.h"
+
+#define IPCMSG_FW_REVISION	0xF4
+
+struct mrst_device {
+	u16 pci_dev_num;	/* DEBUG only */
+	u16 lss;
+	u16 latest_request;
+	unsigned int pci_state_counts[PCI_D3cold + 1]; /* DEBUG only */
+};
+
+/*
+ * comlete list of MRST PCI devices
+ */
+static struct mrst_device mrst_devs[] = {
+/*  0 */ { 0x0800, LSS_SPI0 },		/* Moorestown SPI Ctrl 0 */
+/*  1 */ { 0x0801, LSS_SPI1 },		/* Moorestown SPI Ctrl 1 */
+/*  2 */ { 0x0802, LSS_I2C0 },		/* Moorestown I2C 0 */
+/*  3 */ { 0x0803, LSS_I2C1 },		/* Moorestown I2C 1 */
+/*  4 */ { 0x0804, LSS_I2C2 },		/* Moorestown I2C 2 */
+/*  5 */ { 0x0805, LSS_KBD },		/* Moorestown Keyboard Ctrl */
+/*  6 */ { 0x0806, LSS_USB_HC },	/* Moorestown USB Ctrl */
+/*  7 */ { 0x0807, LSS_SD_HC0 },	/* Moorestown SD Host Ctrl 0 */
+/*  8 */ { 0x0808, LSS_SD_HC1 },	/* Moorestown SD Host Ctrl 1 */
+/*  9 */ { 0x0809, LSS_NAND },		/* Moorestown NAND Ctrl */
+/* 10 */ { 0x080a, LSS_AUDIO },		/* Moorestown Audio Ctrl */
+/* 11 */ { 0x080b, LSS_IMAGING },	/* Moorestown ISP */
+/* 12 */ { 0x080c, LSS_SECURITY },	/* Moorestown Security Controller */
+/* 13 */ { 0x080d, LSS_DISPLAY },	/* Moorestown External Displays */
+/* 14 */ { 0x080e, 0 },			/* Moorestown SCU IPC */
+/* 15 */ { 0x080f, LSS_GPIO },		/* Moorestown GPIO Controller */
+/* 16 */ { 0x0810, 0 },			/* Moorestown Power Management Unit */
+/* 17 */ { 0x0811, LSS_USB_OTG },	/* Moorestown OTG Ctrl */
+/* 18 */ { 0x0812, LSS_SPI2 },		/* Moorestown SPI Ctrl 2 */
+/* 19 */ { 0x0813, 0 },			/* Moorestown SC DMA */
+/* 20 */ { 0x0814, LSS_AUDIO_LPE },	/* Moorestown LPE DMA */
+/* 21 */ { 0x0815, LSS_AUDIO_SSP },	/* Moorestown SSP0 */
+
+/* 22 */ { 0x084F, LSS_SD_HC2 },	/* Moorestown SD Host Ctrl 2 */
+
+/* 23 */ { 0x4102, 0 },			/* Lincroft */
+/* 24 */ { 0x4110, 0 },			/* Lincroft */
+};
+
+/* n.b. We ignore PCI-id 0x815 in LSS9 b/c MeeGo has no driver for it */
+static u16 mrst_lss9_pci_ids[] = {0x080a, 0x0814, 0};
+static u16 mrst_lss10_pci_ids[] = {0x0800, 0x0801, 0x0802, 0x0803,
+					0x0804, 0x0805, 0x080f, 0};
+
+/* handle concurrent SMP invokations of pmu_pci_set_power_state() */
+static spinlock_t mrst_pmu_power_state_lock;
+
+static unsigned int wake_counters[MRST_NUM_LSS];	/* DEBUG only */
+static unsigned int pmu_irq_stats[INT_INVALID + 1];	/* DEBUG only */
+
+static int graphics_is_off;
+static int lss_s0i3_enabled;
+static bool mrst_pmu_s0i3_enable;
+
+/*  debug counters */
+static u32 pmu_wait_ready_calls;
+static u32 pmu_wait_ready_udelays;
+static u32 pmu_wait_ready_udelays_max;
+static u32 pmu_wait_done_calls;
+static u32 pmu_wait_done_udelays;
+static u32 pmu_wait_done_udelays_max;
+static u32 pmu_set_power_state_entry;
+static u32 pmu_set_power_state_send_cmd;
+
+static struct mrst_device *pci_id_2_mrst_dev(u16 pci_dev_num)
+{
+	int index = 0;
+
+	if ((pci_dev_num >= 0x0800) && (pci_dev_num <= 0x815))
+		index = pci_dev_num - 0x800;
+	else if (pci_dev_num == 0x084F)
+		index = 22;
+	else if (pci_dev_num == 0x4102)
+		index = 23;
+	else if (pci_dev_num == 0x4110)
+		index = 24;
+
+	if (pci_dev_num != mrst_devs[index].pci_dev_num) {
+		WARN_ONCE(1, FW_BUG "Unknown PCI device 0x%04X\n", pci_dev_num);
+		return 0;
+	}
+
+	return &mrst_devs[index];
+}
+
+/**
+ * mrst_pmu_validate_cstates
+ * @dev: cpuidle_device
+ *
+ * Certain states are not appropriate for governor to pick in some cases.
+ * This function will be called as cpuidle_device's prepare callback and
+ * thus tells governor to ignore such states when selecting the next state
+ * to enter.
+ */
+
+#define IDLE_STATE4_IS_C6	4
+#define IDLE_STATE5_IS_S0I3	5
+
+int mrst_pmu_invalid_cstates(void)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * Demote to C4 if the PMU is busy.
+	 * Since LSS changes leave the busy bit clear...
+	 * busy means either the PMU is waiting for an ACK-C6 that
+	 * isn't coming due to an MWAIT that returned immediately;
+	 * or we returned from S0i3 successfully, and the PMU
+	 * is not done sending us interrupts.
+	 */
+	if (pmu_read_busy_status())
+		return 1 << IDLE_STATE4_IS_C6 | 1 << IDLE_STATE5_IS_S0I3;
+
+	/*
+	 * Disallow S0i3 if: PMU is not initialized, or CPU1 is active,
+	 * or if device LSS is insufficient, or the GPU is active,
+	 * or if it has been explicitly disabled.
+	 */
+	if (!pmu_reg || !cpumask_equal(cpu_online_mask, cpumask_of(cpu)) ||
+	    !lss_s0i3_enabled || !graphics_is_off || !mrst_pmu_s0i3_enable)
+		return 1 << IDLE_STATE5_IS_S0I3;
+	else
+		return 0;
+}
+
+/*
+ * pmu_update_wake_counters(): read PM_WKS, update wake_counters[]
+ * DEBUG only.
+ */
+static void pmu_update_wake_counters(void)
+{
+	int lss;
+	u32 wake_status;
+
+	wake_status = pmu_read_wks();
+
+	for (lss = 0; lss < MRST_NUM_LSS; ++lss) {
+		if (wake_status & (1 << lss))
+			wake_counters[lss]++;
+	}
+}
+
+int mrst_pmu_s0i3_entry(void)
+{
+	int status;
+
+	/* Clear any possible error conditions */
+	pmu_write_ics(0x300);
+
+	/* set wake control to current D-states */
+	pmu_write_wssc(S0I3_SSS_TARGET);
+
+	status = mrst_s0i3_entry(PM_S0I3_COMMAND, &pmu_reg->pm_cmd);
+	pmu_update_wake_counters();
+	return status;
+}
+
+/* poll for maximum of 5ms for busy bit to clear */
+static int pmu_wait_ready(void)
+{
+	int udelays;
+
+	pmu_wait_ready_calls++;
+
+	for (udelays = 0; udelays < 500; ++udelays) {
+		if (udelays > pmu_wait_ready_udelays_max)
+			pmu_wait_ready_udelays_max = udelays;
+
+		if (pmu_read_busy_status() == 0)
+			return 0;
+
+		udelay(10);
+		pmu_wait_ready_udelays++;
+	}
+
+	/*
+	 * if this fires, observe
+	 * /sys/kernel/debug/mrst_pmu_wait_ready_calls
+	 * /sys/kernel/debug/mrst_pmu_wait_ready_udelays
+	 */
+	WARN_ONCE(1, "SCU not ready for 5ms");
+	return -EBUSY;
+}
+/* poll for maximum of 50ms us for busy bit to clear */
+static int pmu_wait_done(void)
+{
+	int udelays;
+
+	pmu_wait_done_calls++;
+
+	for (udelays = 0; udelays < 500; ++udelays) {
+		if (udelays > pmu_wait_done_udelays_max)
+			pmu_wait_done_udelays_max = udelays;
+
+		if (pmu_read_busy_status() == 0)
+			return 0;
+
+		udelay(100);
+		pmu_wait_done_udelays++;
+	}
+
+	/*
+	 * if this fires, observe
+	 * /sys/kernel/debug/mrst_pmu_wait_done_calls
+	 * /sys/kernel/debug/mrst_pmu_wait_done_udelays
+	 */
+	WARN_ONCE(1, "SCU not done for 50ms");
+	return -EBUSY;
+}
+
+u32 mrst_pmu_msi_is_disabled(void)
+{
+	return pmu_msi_is_disabled();
+}
+
+void mrst_pmu_enable_msi(void)
+{
+	pmu_msi_enable();
+}
+
+/**
+ * pmu_irq - pmu driver interrupt handler
+ * Context: interrupt context
+ */
+static irqreturn_t pmu_irq(int irq, void *dummy)
+{
+	union pmu_pm_ics pmu_ics;
+
+	pmu_ics.value = pmu_read_ics();
+
+	if (!pmu_ics.bits.pending)
+		return IRQ_NONE;
+
+	switch (pmu_ics.bits.cause) {
+	case INT_SPURIOUS:
+	case INT_CMD_DONE:
+	case INT_CMD_ERR:
+	case INT_WAKE_RX:
+	case INT_SS_ERROR:
+	case INT_S0IX_MISS:
+	case INT_NO_ACKC6:
+		pmu_irq_stats[pmu_ics.bits.cause]++;
+		break;
+	default:
+		pmu_irq_stats[INT_INVALID]++;
+	}
+
+	pmu_write_ics(pmu_ics.value); /* Clear pending interrupt */
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Translate PCI power management to MRST LSS D-states
+ */
+static int pci_2_mrst_state(int lss, pci_power_t pci_state)
+{
+	switch (pci_state) {
+	case PCI_D0:
+		if (SSMSK(D0i1, lss) & D0I1_ACG_SSS_TARGET)
+			return D0i1;
+		else
+			return D0;
+	case PCI_D1:
+		return D0i1;
+	case PCI_D2:
+		return D0i2;
+	case PCI_D3hot:
+	case PCI_D3cold:
+		return D0i3;
+	default:
+		WARN(1, "pci_state %d\n", pci_state);
+		return 0;
+	}
+}
+
+static int pmu_issue_command(u32 pm_ssc)
+{
+	union pmu_pm_set_cfg_cmd_t command;
+
+	if (pmu_read_busy_status()) {
+		pr_debug("pmu is busy, Operation not permitted\n");
+		return -1;
+	}
+
+	/*
+	 * enable interrupts in PMU so that interrupts are
+	 * propagated when ioc bit for a particular set
+	 * command is set
+	 */
+
+	pmu_irq_enable();
+
+	/* Configure the sub systems for pmu2 */
+
+	pmu_write_ssc(pm_ssc);
+
+	/*
+	 * Send the set config command for pmu its configured
+	 * for mode CM_IMMEDIATE & hence with No Trigger
+	 */
+
+	command.pmu2_params.d_param.cfg_mode = CM_IMMEDIATE;
+	command.pmu2_params.d_param.cfg_delay = 0;
+	command.pmu2_params.d_param.rsvd = 0;
+
+	/* construct the command to send SET_CFG to particular PMU */
+	command.pmu2_params.d_param.cmd = SET_CFG_CMD;
+	command.pmu2_params.d_param.ioc = 0;
+	command.pmu2_params.d_param.mode_id = 0;
+	command.pmu2_params.d_param.sys_state = SYS_STATE_S0I0;
+
+	/* write the value of PM_CMD into particular PMU */
+	pr_debug("pmu command being written %x\n",
+			command.pmu_pm_set_cfg_cmd_value);
+
+	pmu_write_cmd(command.pmu_pm_set_cfg_cmd_value);
+
+	return 0;
+}
+
+static u16 pmu_min_lss_pci_req(u16 *ids, u16 pci_state)
+{
+	u16 existing_request;
+	int i;
+
+	for (i = 0; ids[i]; ++i) {
+		struct mrst_device *mrst_dev;
+
+		mrst_dev = pci_id_2_mrst_dev(ids[i]);
+		if (unlikely(!mrst_dev))
+			continue;
+
+		existing_request = mrst_dev->latest_request;
+		if (existing_request < pci_state)
+			pci_state = existing_request;
+	}
+	return pci_state;
+}
+
+/**
+ * pmu_pci_set_power_state - Callback function is used by all the PCI devices
+ *			for a platform  specific device power on/shutdown.
+ */
+
+int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
+{
+	u32 old_sss, new_sss;
+	int status = 0;
+	struct mrst_device *mrst_dev;
+
+	pmu_set_power_state_entry++;
+
+	BUG_ON(pdev->vendor != PCI_VENDOR_ID_INTEL);
+	BUG_ON(pci_state < PCI_D0 || pci_state > PCI_D3cold);
+
+	mrst_dev = pci_id_2_mrst_dev(pdev->device);
+	if (unlikely(!mrst_dev))
+		return -ENODEV;
+
+	mrst_dev->pci_state_counts[pci_state]++;	/* count invocations */
+
+	/* PMU driver calls self as part of PCI initialization, ignore */
+	if (pdev->device == PCI_DEV_ID_MRST_PMU)
+		return 0;
+
+	BUG_ON(!pmu_reg); /* SW bug if called before initialized */
+
+	spin_lock(&mrst_pmu_power_state_lock);
+
+	if (pdev->d3_delay) {
+		dev_dbg(&pdev->dev, "d3_delay %d, should be 0\n",
+			pdev->d3_delay);
+		pdev->d3_delay = 0;
+	}
+	/*
+	 * If Lincroft graphics, simply remember state
+	 */
+	if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY
+		&& !((pdev->class & PCI_SUB_CLASS_MASK) >> 8)) {
+		if (pci_state == PCI_D0)
+			graphics_is_off = 0;
+		else
+			graphics_is_off = 1;
+		goto ret;
+	}
+
+	if (!mrst_dev->lss)
+		goto ret;	/* device with no LSS */
+
+	if (mrst_dev->latest_request == pci_state)
+		goto ret;	/* no change */
+
+	mrst_dev->latest_request = pci_state;	/* record latest request */
+
+	/*
+	 * LSS9 and LSS10 contain multiple PCI devices.
+	 * Use the lowest numbered (highest power) state in the LSS
+	 */
+	if (mrst_dev->lss == 9)
+		pci_state = pmu_min_lss_pci_req(mrst_lss9_pci_ids, pci_state);
+	else if (mrst_dev->lss == 10)
+		pci_state = pmu_min_lss_pci_req(mrst_lss10_pci_ids, pci_state);
+
+	status = pmu_wait_ready();
+	if (status)
+		goto ret;
+
+	old_sss = pmu_read_sss();
+	new_sss = old_sss & ~SSMSK(3, mrst_dev->lss);
+	new_sss |= SSMSK(pci_2_mrst_state(mrst_dev->lss, pci_state),
+			mrst_dev->lss);
+
+	if (new_sss == old_sss)
+		goto ret;	/* nothing to do */
+
+	pmu_set_power_state_send_cmd++;
+
+	status = pmu_issue_command(new_sss);
+
+	if (unlikely(status != 0)) {
+		dev_err(&pdev->dev, "Failed to Issue a PM command\n");
+		goto ret;
+	}
+
+	if (pmu_wait_done())
+		goto ret;
+
+	lss_s0i3_enabled =
+	((pmu_read_sss() & S0I3_SSS_TARGET) == S0I3_SSS_TARGET);
+ret:
+	spin_unlock(&mrst_pmu_power_state_lock);
+	return status;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static char *d0ix_names[] = {"D0", "D0i1", "D0i2", "D0i3"};
+
+static inline const char *d0ix_name(int state)
+{
+	return d0ix_names[(int) state];
+}
+
+static int debug_mrst_pmu_show(struct seq_file *s, void *unused)
+{
+	struct pci_dev *pdev = NULL;
+	u32 cur_pmsss;
+	int lss;
+
+	seq_printf(s, "0x%08X D0I1_ACG_SSS_TARGET\n", D0I1_ACG_SSS_TARGET);
+
+	cur_pmsss = pmu_read_sss();
+
+	seq_printf(s, "0x%08X S0I3_SSS_TARGET\n", S0I3_SSS_TARGET);
+
+	seq_printf(s, "0x%08X Current SSS ", cur_pmsss);
+	seq_printf(s, lss_s0i3_enabled ? "\n" : "[BLOCKS s0i3]\n");
+
+	if (cpumask_equal(cpu_online_mask, cpumask_of(0)))
+		seq_printf(s, "cpu0 is only cpu online\n");
+	else
+		seq_printf(s, "cpu0 is NOT only cpu online [BLOCKS S0i3]\n");
+
+	seq_printf(s, "GFX: %s\n", graphics_is_off ? "" : "[BLOCKS s0i3]");
+
+
+	for_each_pci_dev(pdev) {
+		int pos;
+		u16 pmcsr;
+		struct mrst_device *mrst_dev;
+		int i;
+
+		mrst_dev = pci_id_2_mrst_dev(pdev->device);
+
+		seq_printf(s, "%s %04x/%04X %-16.16s ",
+			dev_name(&pdev->dev),
+			pdev->vendor, pdev->device,
+			dev_driver_string(&pdev->dev));
+
+		if (unlikely (!mrst_dev)) {
+			seq_printf(s, " UNKNOWN\n");
+			continue;
+		}
+
+		if (mrst_dev->lss)
+			seq_printf(s, "LSS %2d %-4s ", mrst_dev->lss,
+				d0ix_name(((cur_pmsss >>
+					(mrst_dev->lss * 2)) & 0x3)));
+		else
+			seq_printf(s, "            ");
+
+		/* PCI PM config space setting */
+		pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
+		if (pos != 0) {
+			pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
+		seq_printf(s, "PCI-%-4s",
+			pci_power_name(pmcsr & PCI_PM_CTRL_STATE_MASK));
+		} else {
+			seq_printf(s, "        ");
+		}
+
+		seq_printf(s, " %s ", pci_power_name(mrst_dev->latest_request));
+		for (i = 0; i <= PCI_D3cold; ++i)
+			seq_printf(s, "%d ", mrst_dev->pci_state_counts[i]);
+
+		if (mrst_dev->lss) {
+			unsigned int lssmask;
+
+			lssmask = SSMSK(D0i3, mrst_dev->lss);
+
+			if ((lssmask & S0I3_SSS_TARGET) &&
+				((lssmask & cur_pmsss) !=
+					(lssmask & S0I3_SSS_TARGET)))
+						seq_printf(s , "[BLOCKS s0i3]");
+		}
+
+		seq_printf(s, "\n");
+	}
+	seq_printf(s, "Wake Counters:\n");
+	for (lss = 0; lss < MRST_NUM_LSS; ++lss)
+		seq_printf(s, "LSS%d %d\n", lss, wake_counters[lss]);
+
+	seq_printf(s, "Interrupt Counters:\n");
+	seq_printf(s,
+		"INT_SPURIOUS \t%8u\n" "INT_CMD_DONE \t%8u\n"
+		"INT_CMD_ERR  \t%8u\n" "INT_WAKE_RX  \t%8u\n"
+		"INT_SS_ERROR \t%8u\n" "INT_S0IX_MISS\t%8u\n"
+		"INT_NO_ACKC6 \t%8u\n" "INT_INVALID  \t%8u\n",
+		pmu_irq_stats[INT_SPURIOUS], pmu_irq_stats[INT_CMD_DONE],
+		pmu_irq_stats[INT_CMD_ERR], pmu_irq_stats[INT_WAKE_RX],
+		pmu_irq_stats[INT_SS_ERROR], pmu_irq_stats[INT_S0IX_MISS],
+		pmu_irq_stats[INT_NO_ACKC6], pmu_irq_stats[INT_INVALID]);
+
+	seq_printf(s, "mrst_pmu_wait_ready_calls          %8d\n",
+			pmu_wait_ready_calls);
+	seq_printf(s, "mrst_pmu_wait_ready_udelays        %8d\n",
+			pmu_wait_ready_udelays);
+	seq_printf(s, "mrst_pmu_wait_ready_udelays_max    %8d\n",
+			pmu_wait_ready_udelays_max);
+	seq_printf(s, "mrst_pmu_wait_done_calls           %8d\n",
+			pmu_wait_done_calls);
+	seq_printf(s, "mrst_pmu_wait_done_udelays         %8d\n",
+			pmu_wait_done_udelays);
+	seq_printf(s, "mrst_pmu_wait_done_udelays_max     %8d\n",
+			pmu_wait_done_udelays_max);
+	seq_printf(s, "mrst_pmu_set_power_state_entry     %8d\n",
+			pmu_set_power_state_entry);
+	seq_printf(s, "mrst_pmu_set_power_state_send_cmd  %8d\n",
+			pmu_set_power_state_send_cmd);
+	seq_printf(s, "SCU busy: %d\n", pmu_read_busy_status());
+
+	return 0;
+}
+
+static int debug_mrst_pmu_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, debug_mrst_pmu_show, NULL);
+}
+
+static const struct file_operations devices_state_operations = {
+	.open		= debug_mrst_pmu_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif	/* DEBUG_FS */
+
+/*
+ * Validate SCU PCI shim PCI vendor capability byte
+ * against LSS hard-coded in mrst_devs[] above.
+ * DEBUG only.
+ */
+static void pmu_scu_firmware_debug(void)
+{
+	struct pci_dev *pdev = NULL;
+
+	for_each_pci_dev(pdev) {
+		struct mrst_device *mrst_dev;
+		u8 pci_config_lss;
+		int pos;
+
+		mrst_dev = pci_id_2_mrst_dev(pdev->device);
+		if (unlikely(!mrst_dev)) {
+			printk(KERN_ERR FW_BUG "pmu: Unknown "
+				"PCI device 0x%04X\n", pdev->device);
+			continue;
+		}
+
+		if (mrst_dev->lss == 0)
+			continue;	 /* no LSS in our table */
+
+		pos = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
+		if (!pos != 0) {
+			printk(KERN_ERR FW_BUG "pmu: 0x%04X "
+				"missing PCI Vendor Capability\n",
+				pdev->device);
+			continue;
+		}
+		pci_read_config_byte(pdev, pos + 4, &pci_config_lss);
+		if (!(pci_config_lss & PCI_VENDOR_CAP_LOG_SS_MASK)) {
+			printk(KERN_ERR FW_BUG "pmu: 0x%04X "
+				"invalid PCI Vendor Capability 0x%x "
+				" expected LSS 0x%X\n",
+				pdev->device, pci_config_lss, mrst_dev->lss);
+			continue;
+		}
+		pci_config_lss &= PCI_VENDOR_CAP_LOG_ID_MASK;
+
+		if (mrst_dev->lss == pci_config_lss)
+			continue;
+
+		printk(KERN_ERR FW_BUG "pmu: 0x%04X LSS = %d, expected %d\n",
+			pdev->device, pci_config_lss, mrst_dev->lss);
+	}
+}
+
+/**
+ * pmu_probe
+ */
+static int __devinit pmu_probe(struct pci_dev *pdev,
+				   const struct pci_device_id *pci_id)
+{
+	int ret;
+	struct mrst_pmu_reg *pmu;
+
+	/* Init the device */
+	ret = pci_enable_device(pdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to Enable PCI device\n");
+		return ret;
+	}
+
+	ret = pci_request_regions(pdev, MRST_PMU_DRV_NAME);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Cannot obtain PCI resources, aborting\n");
+		goto out_err1;
+	}
+
+	/* Map the memory of PMU reg base */
+	pmu = pci_iomap(pdev, 0, 0);
+	if (!pmu) {
+		dev_err(&pdev->dev, "Unable to map the PMU address space\n");
+		ret = -ENOMEM;
+		goto out_err2;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	/* /sys/kernel/debug/mrst_pmu */
+	(void) debugfs_create_file("mrst_pmu", S_IFREG | S_IRUGO,
+				NULL, NULL, &devices_state_operations);
+#endif
+	pmu_reg = pmu;	/* success */
+
+	if (request_irq(pdev->irq, pmu_irq, 0, MRST_PMU_DRV_NAME, NULL)) {
+		dev_err(&pdev->dev, "Registering isr has failed\n");
+		ret = -1;
+		goto out_err3;
+	}
+
+	pmu_scu_firmware_debug();
+
+	pmu_write_wkc(S0I3_WAKE_SOURCES);	/* Enable S0i3 wakeup sources */
+
+	pmu_wait_ready();
+
+	pmu_write_ssc(D0I1_ACG_SSS_TARGET);	/* Enable Auto-Clock_Gating */
+	pmu_write_cmd(0x201);
+
+	spin_lock_init(&mrst_pmu_power_state_lock);
+
+	/* Enable the hardware interrupt */
+	pmu_irq_enable();
+	return 0;
+
+out_err3:
+	free_irq(pdev->irq, NULL);
+	pci_iounmap(pdev, pmu_reg);
+	pmu_reg = NULL;
+out_err2:
+	pci_release_region(pdev, 0);
+out_err1:
+	pci_disable_device(pdev);
+	return ret;
+}
+
+static void __devexit pmu_remove(struct pci_dev *pdev)
+{
+	dev_err(&pdev->dev, "Mid PM pmu_remove called\n");
+
+	/* Freeing up the irq */
+	free_irq(pdev->irq, NULL);
+
+	pci_iounmap(pdev, pmu_reg);
+	pmu_reg = NULL;
+
+	/* disable the current PCI device */
+	pci_release_region(pdev, 0);
+	pci_disable_device(pdev);
+}
+
+static DEFINE_PCI_DEVICE_TABLE(pmu_pci_ids) = {
+	{ PCI_VDEVICE(INTEL, PCI_DEV_ID_MRST_PMU), 0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(pci, pmu_pci_ids);
+
+static struct pci_driver driver = {
+	.name = MRST_PMU_DRV_NAME,
+	.id_table = pmu_pci_ids,
+	.probe = pmu_probe,
+	.remove = __devexit_p(pmu_remove),
+};
+
+/**
+ * pmu_pci_register - register the PMU driver as PCI device
+ */
+static int __init pmu_pci_register(void)
+{
+	return pci_register_driver(&driver);
+}
+
+/* Register and probe via fs_initcall() to preceed device_initcall() */
+fs_initcall(pmu_pci_register);
+
+static void __exit mid_pci_cleanup(void)
+{
+	pci_unregister_driver(&driver);
+}
+
+static int ia_major;
+static int ia_minor;
+
+static int pmu_sfi_parse_oem(struct sfi_table_header *table)
+{
+	struct sfi_table_simple *sb;
+
+	sb = (struct sfi_table_simple *)table;
+	ia_major = (sb->pentry[1] >> 0) & 0xFFFF;
+	ia_minor = (sb->pentry[1] >> 16) & 0xFFFF;
+	printk(KERN_INFO "mrst_pmu: IA FW version v%x.%x\n",
+		ia_major, ia_minor);
+
+	return 0;
+}
+
+static int __init scu_fw_check(void)
+{
+	int ret;
+	u32 fw_version;
+
+	if (!pmu_reg)
+		return 0;	/* this driver didn't probe-out */
+
+	sfi_table_parse("OEMB", NULL, NULL, pmu_sfi_parse_oem);
+
+	if (ia_major < 0x6005 || ia_minor < 0x1525) {
+		WARN(1, "mrst_pmu: IA FW version too old\n");
+		return -1;
+	}
+
+	ret = intel_scu_ipc_command(IPCMSG_FW_REVISION, 0, NULL, 0,
+					&fw_version, 1);
+
+	if (ret) {
+		WARN(1, "mrst_pmu: IPC FW version? %d\n", ret);
+	} else {
+		int scu_major = (fw_version >> 8) & 0xFF;
+		int scu_minor = (fw_version >> 0) & 0xFF;
+
+		printk(KERN_INFO "mrst_pmu: firmware v%x\n", fw_version);
+
+		if ((scu_major >= 0xC0) && (scu_minor >= 0x49)) {
+			printk(KERN_INFO "mrst_pmu: enabling S0i3\n");
+			mrst_pmu_s0i3_enable = true;
+		} else {
+			WARN(1, "mrst_pmu: S0i3 disabled, old firmware %X.%X",
+					scu_major, scu_minor);
+		}
+	}
+	return 0;
+}
+late_initcall(scu_fw_check);
+module_exit(mid_pci_cleanup);
diff --git a/arch/x86/platform/mrst/pmu.h b/arch/x86/platform/mrst/pmu.h
new file mode 100644
index 000000000000..bfbfe64b167b
--- /dev/null
+++ b/arch/x86/platform/mrst/pmu.h
@@ -0,0 +1,234 @@
+/*
+ * mrst/pmu.h - private definitions for MRST Power Management Unit mrst/pmu.c
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _MRST_PMU_H_
+#define _MRST_PMU_H_
+
+#define PCI_DEV_ID_MRST_PMU		0x0810
+#define MRST_PMU_DRV_NAME		"mrst_pmu"
+#define	PCI_SUB_CLASS_MASK		0xFF00
+
+#define	PCI_VENDOR_CAP_LOG_ID_MASK	0x7F
+#define PCI_VENDOR_CAP_LOG_SS_MASK	0x80
+
+#define SUB_SYS_ALL_D0I1	0x01155555
+#define S0I3_WAKE_SOURCES	0x00001FFF
+
+#define PM_S0I3_COMMAND					\
+	((0 << 31) |	/* Reserved */			\
+	(0 << 30) |	/* Core must be idle */		\
+	(0xc2 << 22) |	/* ACK C6 trigger */		\
+	(3 << 19) |	/* Trigger on DMI message */	\
+	(3 << 16) |	/* Enter S0i3 */		\
+	(0 << 13) |	/* Numeric mode ID (sw) */	\
+	(3 << 9) |	/* Trigger mode */		\
+	(0 << 8) |	/* Do not interrupt */		\
+	(1 << 0))	/* Set configuration */
+
+#define	LSS_DMI		0
+#define	LSS_SD_HC0	1
+#define	LSS_SD_HC1	2
+#define	LSS_NAND	3
+#define	LSS_IMAGING	4
+#define	LSS_SECURITY	5
+#define	LSS_DISPLAY	6
+#define	LSS_USB_HC	7
+#define	LSS_USB_OTG	8
+#define	LSS_AUDIO	9
+#define	LSS_AUDIO_LPE	9
+#define	LSS_AUDIO_SSP	9
+#define	LSS_I2C0	10
+#define	LSS_I2C1	10
+#define	LSS_I2C2	10
+#define	LSS_KBD		10
+#define	LSS_SPI0	10
+#define	LSS_SPI1	10
+#define	LSS_SPI2	10
+#define	LSS_GPIO	10
+#define	LSS_SRAM	11	/* used by SCU, do not touch */
+#define	LSS_SD_HC2	12
+/* LSS hardware bits 15,14,13 are hardwired to 0, thus unusable */
+#define MRST_NUM_LSS	13
+
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+#define	SSMSK(mask, lss) ((mask) << ((lss) * 2))
+#define	D0	0
+#define	D0i1	1
+#define	D0i2	2
+#define	D0i3	3
+
+#define S0I3_SSS_TARGET	(		\
+	SSMSK(D0i1, LSS_DMI) |		\
+	SSMSK(D0i3, LSS_SD_HC0) |	\
+	SSMSK(D0i3, LSS_SD_HC1) |	\
+	SSMSK(D0i3, LSS_NAND) |		\
+	SSMSK(D0i3, LSS_SD_HC2) |	\
+	SSMSK(D0i3, LSS_IMAGING) |	\
+	SSMSK(D0i3, LSS_SECURITY) |	\
+	SSMSK(D0i3, LSS_DISPLAY) |	\
+	SSMSK(D0i3, LSS_USB_HC) |	\
+	SSMSK(D0i3, LSS_USB_OTG) |	\
+	SSMSK(D0i3, LSS_AUDIO) |	\
+	SSMSK(D0i1, LSS_I2C0))
+
+/*
+ * D0i1 on Langwell is Autonomous Clock Gating (ACG).
+ * Enable ACG on every LSS except camera and audio
+ */
+#define D0I1_ACG_SSS_TARGET	 \
+	(SUB_SYS_ALL_D0I1 & ~SSMSK(D0i1, LSS_IMAGING) & ~SSMSK(D0i1, LSS_AUDIO))
+
+enum cm_mode {
+	CM_NOP,			/* ignore the config mode value */
+	CM_IMMEDIATE,
+	CM_DELAY,
+	CM_TRIGGER,
+	CM_INVALID
+};
+
+enum sys_state {
+	SYS_STATE_S0I0,
+	SYS_STATE_S0I1,
+	SYS_STATE_S0I2,
+	SYS_STATE_S0I3,
+	SYS_STATE_S3,
+	SYS_STATE_S5
+};
+
+#define SET_CFG_CMD	1
+
+enum int_status {
+	INT_SPURIOUS = 0,
+	INT_CMD_DONE = 1,
+	INT_CMD_ERR = 2,
+	INT_WAKE_RX = 3,
+	INT_SS_ERROR = 4,
+	INT_S0IX_MISS = 5,
+	INT_NO_ACKC6 = 6,
+	INT_INVALID = 7,
+};
+
+/* PMU register interface */
+static struct mrst_pmu_reg {
+	u32 pm_sts;		/* 0x00 */
+	u32 pm_cmd;		/* 0x04 */
+	u32 pm_ics;		/* 0x08 */
+	u32 _resv1;		/* 0x0C */
+	u32 pm_wkc[2];		/* 0x10 */
+	u32 pm_wks[2];		/* 0x18 */
+	u32 pm_ssc[4];		/* 0x20 */
+	u32 pm_sss[4];		/* 0x30 */
+	u32 pm_wssc[4];		/* 0x40 */
+	u32 pm_c3c4;		/* 0x50 */
+	u32 pm_c5c6;		/* 0x54 */
+	u32 pm_msi_disable;	/* 0x58 */
+} *pmu_reg;
+
+static inline u32 pmu_read_sts(void) { return readl(&pmu_reg->pm_sts); }
+static inline u32 pmu_read_ics(void) { return readl(&pmu_reg->pm_ics); }
+static inline u32 pmu_read_wks(void) { return readl(&pmu_reg->pm_wks[0]); }
+static inline u32 pmu_read_sss(void) { return readl(&pmu_reg->pm_sss[0]); }
+
+static inline void pmu_write_cmd(u32 arg) { writel(arg, &pmu_reg->pm_cmd); }
+static inline void pmu_write_ics(u32 arg) { writel(arg, &pmu_reg->pm_ics); }
+static inline void pmu_write_wkc(u32 arg) { writel(arg, &pmu_reg->pm_wkc[0]); }
+static inline void pmu_write_ssc(u32 arg) { writel(arg, &pmu_reg->pm_ssc[0]); }
+static inline void pmu_write_wssc(u32 arg)
+					{ writel(arg, &pmu_reg->pm_wssc[0]); }
+
+static inline void pmu_msi_enable(void) { writel(0, &pmu_reg->pm_msi_disable); }
+static inline u32 pmu_msi_is_disabled(void)
+				{ return readl(&pmu_reg->pm_msi_disable); }
+
+union pmu_pm_ics {
+	struct {
+		u32 cause:8;
+		u32 enable:1;
+		u32 pending:1;
+		u32 reserved:22;
+	} bits;
+	u32 value;
+};
+
+static inline void pmu_irq_enable(void)
+{
+	union pmu_pm_ics pmu_ics;
+
+	pmu_ics.value = pmu_read_ics();
+	pmu_ics.bits.enable = 1;
+	pmu_write_ics(pmu_ics.value);
+}
+
+union pmu_pm_status {
+	struct {
+		u32 pmu_rev:8;
+		u32 pmu_busy:1;
+		u32 mode_id:4;
+		u32 Reserved:19;
+	} pmu_status_parts;
+	u32 pmu_status_value;
+};
+
+static inline int pmu_read_busy_status(void)
+{
+	union pmu_pm_status result;
+
+	result.pmu_status_value = pmu_read_sts();
+
+	return result.pmu_status_parts.pmu_busy;
+}
+
+/* pmu set config parameters */
+struct cfg_delay_param_t {
+	u32 cmd:8;
+	u32 ioc:1;
+	u32 cfg_mode:4;
+	u32 mode_id:3;
+	u32 sys_state:3;
+	u32 cfg_delay:8;
+	u32 rsvd:5;
+};
+
+struct cfg_trig_param_t {
+	u32 cmd:8;
+	u32 ioc:1;
+	u32 cfg_mode:4;
+	u32 mode_id:3;
+	u32 sys_state:3;
+	u32 cfg_trig_type:3;
+	u32 cfg_trig_val:8;
+	u32 cmbi:1;
+	u32 rsvd1:1;
+};
+
+union pmu_pm_set_cfg_cmd_t {
+	union {
+		struct cfg_delay_param_t d_param;
+		struct cfg_trig_param_t t_param;
+	} pmu2_params;
+	u32 pmu_pm_set_cfg_cmd_value;
+};
+
+#ifdef FUTURE_PATCH
+extern int mrst_s0i3_entry(u32 regval, u32 *regaddr);
+#else
+static inline int mrst_s0i3_entry(u32 regval, u32 *regaddr) { return -1; }
+#endif
+#endif
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 60aeeb56948f..a9627e2e3295 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/pm.h>
 #include <linux/memblock.h>
+#include <linux/cpuidle.h>
 
 #include <asm/elf.h>
 #include <asm/vdso.h>
@@ -426,7 +427,7 @@ void __init xen_arch_setup(void)
 #ifdef CONFIG_X86_32
 	boot_cpu_data.hlt_works_ok = 1;
 #endif
-	pm_idle = default_idle;
+	disable_cpuidle();
 	boot_option_idle_override = IDLE_HALT;
 
 	fiddle_vdso();
diff --git a/block/blk-core.c b/block/blk-core.c
index b850bedad229..b627558c461f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1368,8 +1368,10 @@ static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
 
 static int __init fail_make_request_debugfs(void)
 {
-	return init_fault_attr_dentries(&fail_make_request,
-					"fail_make_request");
+	struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
+						NULL, &fail_make_request);
+
+	return IS_ERR(dir) ? PTR_ERR(dir) : 0;
 }
 
 late_initcall(fail_make_request_debugfs);
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 4f0c06c7a338..780354888958 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -28,7 +28,10 @@ int blk_should_fake_timeout(struct request_queue *q)
 
 static int __init fail_io_timeout_debugfs(void)
 {
-	return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout");
+	struct dentry *dir = fault_create_debugfs_attr("fail_io_timeout",
+						NULL, &fail_io_timeout);
+
+	return IS_ERR(dir) ? PTR_ERR(dir) : 0;
 }
 
 late_initcall(fail_io_timeout_debugfs);
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 73863d86f022..76dc02f15574 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -126,6 +126,12 @@ u8 ACPI_INIT_GLOBAL(acpi_gbl_copy_dsdt_locally, FALSE);
  */
 u8 ACPI_INIT_GLOBAL(acpi_gbl_truncate_io_addresses, FALSE);
 
+/*
+ * Disable runtime checking and repair of values returned by control methods.
+ * Use only if the repair is causing a problem on a particular machine.
+ */
+u8 ACPI_INIT_GLOBAL(acpi_gbl_disable_auto_repair, FALSE);
+
 /* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */
 
 struct acpi_table_fadt acpi_gbl_FADT;
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index c7f743ca395b..5552125d8340 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -357,6 +357,7 @@ struct acpi_predefined_data {
 	char *pathname;
 	const union acpi_predefined_info *predefined;
 	union acpi_operand_object *parent_package;
+	struct acpi_namespace_node *node;
 	u32 flags;
 	u8 node_flags;
 };
diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h
index 94e73c97cf85..c445cca490ea 100644
--- a/drivers/acpi/acpica/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h
@@ -468,6 +468,7 @@ static const union acpi_predefined_info predefined_names[] =
 	{{"_SWS", 0, ACPI_RTYPE_INTEGER}},
 	{{"_TC1", 0, ACPI_RTYPE_INTEGER}},
 	{{"_TC2", 0, ACPI_RTYPE_INTEGER}},
+	{{"_TDL", 0, ACPI_RTYPE_INTEGER}},
 	{{"_TIP", 1, ACPI_RTYPE_INTEGER}},
 	{{"_TIV", 1, ACPI_RTYPE_INTEGER}},
 	{{"_TMP", 0, ACPI_RTYPE_INTEGER}},
diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c
index 9fb03fa8ffde..c845c8089f39 100644
--- a/drivers/acpi/acpica/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -193,14 +193,20 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
 	}
 
 	/*
-	 * 1) We have a return value, but if one wasn't expected, just exit, this is
-	 * not a problem. For example, if the "Implicit Return" feature is
-	 * enabled, methods will always return a value.
+	 * Return value validation and possible repair.
 	 *
-	 * 2) If the return value can be of any type, then we cannot perform any
-	 * validation, exit.
+	 * 1) Don't perform return value validation/repair if this feature
+	 * has been disabled via a global option.
+	 *
+	 * 2) We have a return value, but if one wasn't expected, just exit,
+	 * this is not a problem. For example, if the "Implicit Return"
+	 * feature is enabled, methods will always return a value.
+	 *
+	 * 3) If the return value can be of any type, then we cannot perform
+	 * any validation, just exit.
 	 */
-	if ((!predefined->info.expected_btypes) ||
+	if (acpi_gbl_disable_auto_repair ||
+	    (!predefined->info.expected_btypes) ||
 	    (predefined->info.expected_btypes == ACPI_RTYPE_ALL)) {
 		goto cleanup;
 	}
@@ -212,6 +218,7 @@ acpi_ns_check_predefined_names(struct acpi_namespace_node *node,
 		goto cleanup;
 	}
 	data->predefined = predefined;
+	data->node = node;
 	data->node_flags = node->flags;
 	data->pathname = pathname;
 
diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index 973883babee1..024c4f263f87 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -503,6 +503,21 @@ acpi_ns_repair_TSS(struct acpi_predefined_data *data,
 {
 	union acpi_operand_object *return_object = *return_object_ptr;
 	acpi_status status;
+	struct acpi_namespace_node *node;
+
+	/*
+	 * We can only sort the _TSS return package if there is no _PSS in the
+	 * same scope. This is because if _PSS is present, the ACPI specification
+	 * dictates that the _TSS Power Dissipation field is to be ignored, and
+	 * therefore some BIOSs leave garbage values in the _TSS Power field(s).
+	 * In this case, it is best to just return the _TSS package as-is.
+	 * (May, 2011)
+	 */
+	status =
+	    acpi_ns_get_node(data->node, "^_PSS", ACPI_NS_NO_UPSEARCH, &node);
+	if (ACPI_SUCCESS(status)) {
+		return (AE_OK);
+	}
 
 	status = acpi_ns_check_sorted_list(data, return_object, 5, 1,
 					   ACPI_SORT_DESCENDING,
diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index 48db0944ce4a..62365f6075dd 100644
--- a/drivers/acpi/acpica/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c
@@ -126,12 +126,29 @@ acpi_tb_add_table(struct acpi_table_desc *table_desc, u32 *table_index)
 	}
 
 	/*
-	 * Originally, we checked the table signature for "SSDT" or "PSDT" here.
-	 * Next, we added support for OEMx tables, signature "OEM".
-	 * Valid tables were encountered with a null signature, so we've just
-	 * given up on validating the signature, since it seems to be a waste
-	 * of code. The original code was removed (05/2008).
+	 * Validate the incoming table signature.
+	 *
+	 * 1) Originally, we checked the table signature for "SSDT" or "PSDT".
+	 * 2) We added support for OEMx tables, signature "OEM".
+	 * 3) Valid tables were encountered with a null signature, so we just
+	 *    gave up on validating the signature, (05/2008).
+	 * 4) We encountered non-AML tables such as the MADT, which caused
+	 *    interpreter errors and kernel faults. So now, we once again allow
+	 *    only "SSDT", "OEMx", and now, also a null signature. (05/2011).
 	 */
+	if ((table_desc->pointer->signature[0] != 0x00) &&
+	    (!ACPI_COMPARE_NAME(table_desc->pointer->signature, ACPI_SIG_SSDT))
+	    && (ACPI_STRNCMP(table_desc->pointer->signature, "OEM", 3))) {
+		ACPI_ERROR((AE_INFO,
+			    "Table has invalid signature [%4.4s] (0x%8.8X), must be SSDT or OEMx",
+			    acpi_ut_valid_acpi_name(*(u32 *)table_desc->
+						    pointer->
+						    signature) ? table_desc->
+			    pointer->signature : "????",
+			    *(u32 *)table_desc->pointer->signature));
+
+		return_ACPI_STATUS(AE_BAD_SIGNATURE);
+	}
 
 	(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
 
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index f739a70b1c70..c34aa51af4ee 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -10,9 +10,11 @@ config ACPI_APEI
 	  error injection.
 
 config ACPI_APEI_GHES
-	tristate "APEI Generic Hardware Error Source"
+	bool "APEI Generic Hardware Error Source"
 	depends on ACPI_APEI && X86
 	select ACPI_HED
+	select LLIST
+	select GENERIC_ALLOCATOR
 	help
 	  Generic Hardware Error Source provides a way to report
 	  platform hardware errors (such as that from chipset). It
@@ -30,6 +32,13 @@ config ACPI_APEI_PCIEAER
 	  PCIe AER errors may be reported via APEI firmware first mode.
 	  Turn on this option to enable the corresponding support.
 
+config ACPI_APEI_MEMORY_FAILURE
+	bool "APEI memory error recovering support"
+	depends on ACPI_APEI && MEMORY_FAILURE
+	help
+	  Memory errors may be reported via APEI firmware first mode.
+	  Turn on this option to enable the memory recovering support.
+
 config ACPI_APEI_EINJ
 	tristate "APEI Error INJection (EINJ)"
 	depends on ACPI_APEI && DEBUG_FS
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index 4a904a4bf05f..8041248fce9b 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -157,9 +157,10 @@ EXPORT_SYMBOL_GPL(apei_exec_noop);
  * Interpret the specified action. Go through whole action table,
  * execute all instructions belong to the action.
  */
-int apei_exec_run(struct apei_exec_context *ctx, u8 action)
+int __apei_exec_run(struct apei_exec_context *ctx, u8 action,
+		    bool optional)
 {
-	int rc;
+	int rc = -ENOENT;
 	u32 i, ip;
 	struct acpi_whea_header *entry;
 	apei_exec_ins_func_t run;
@@ -198,9 +199,9 @@ rewind:
 			goto rewind;
 	}
 
-	return 0;
+	return !optional && rc < 0 ? rc : 0;
 }
-EXPORT_SYMBOL_GPL(apei_exec_run);
+EXPORT_SYMBOL_GPL(__apei_exec_run);
 
 typedef int (*apei_exec_entry_func_t)(struct apei_exec_context *ctx,
 				      struct acpi_whea_header *entry,
@@ -603,3 +604,29 @@ struct dentry *apei_get_debugfs_dir(void)
 	return dapei;
 }
 EXPORT_SYMBOL_GPL(apei_get_debugfs_dir);
+
+int apei_osc_setup(void)
+{
+	static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";
+	acpi_handle handle;
+	u32 capbuf[3];
+	struct acpi_osc_context context = {
+		.uuid_str	= whea_uuid_str,
+		.rev		= 1,
+		.cap.length	= sizeof(capbuf),
+		.cap.pointer	= capbuf,
+	};
+
+	capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
+	capbuf[OSC_SUPPORT_TYPE] = 0;
+	capbuf[OSC_CONTROL_TYPE] = 0;
+
+	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))
+	    || ACPI_FAILURE(acpi_run_osc(handle, &context)))
+		return -EIO;
+	else {
+		kfree(context.ret.pointer);
+		return 0;
+	}
+}
+EXPORT_SYMBOL_GPL(apei_osc_setup);
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index ef0581f2094d..f57050e7a5e7 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -50,7 +50,18 @@ static inline u64 apei_exec_ctx_get_output(struct apei_exec_context *ctx)
 	return ctx->value;
 }
 
-int apei_exec_run(struct apei_exec_context *ctx, u8 action);
+int __apei_exec_run(struct apei_exec_context *ctx, u8 action, bool optional);
+
+static inline int apei_exec_run(struct apei_exec_context *ctx, u8 action)
+{
+	return __apei_exec_run(ctx, action, 0);
+}
+
+/* It is optional whether the firmware provides the action */
+static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 action)
+{
+	return __apei_exec_run(ctx, action, 1);
+}
 
 /* Common instruction implementation */
 
@@ -113,4 +124,6 @@ void apei_estatus_print(const char *pfx,
 			const struct acpi_hest_generic_status *estatus);
 int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus);
 int apei_estatus_check(const struct acpi_hest_generic_status *estatus);
+
+int apei_osc_setup(void);
 #endif
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index f74b2ea11f21..589b96c38704 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -46,7 +46,8 @@
  * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
  * EINJ table through an unpublished extension. Use with caution as
  * most will ignore the parameter and make their own choice of address
- * for error injection.
+ * for error injection.  This extension is used only if
+ * param_extension module parameter is specified.
  */
 struct einj_parameter {
 	u64 type;
@@ -65,6 +66,9 @@ struct einj_parameter {
 	((struct acpi_whea_header *)((char *)(tab) +			\
 				    sizeof(struct acpi_table_einj)))
 
+static bool param_extension;
+module_param(param_extension, bool, 0);
+
 static struct acpi_table_einj *einj_tab;
 
 static struct apei_resources einj_resources;
@@ -285,7 +289,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
 
 	einj_exec_ctx_init(&ctx);
 
-	rc = apei_exec_run(&ctx, ACPI_EINJ_BEGIN_OPERATION);
+	rc = apei_exec_run_optional(&ctx, ACPI_EINJ_BEGIN_OPERATION);
 	if (rc)
 		return rc;
 	apei_exec_ctx_set_input(&ctx, type);
@@ -323,7 +327,7 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
 	rc = __einj_error_trigger(trigger_paddr);
 	if (rc)
 		return rc;
-	rc = apei_exec_run(&ctx, ACPI_EINJ_END_OPERATION);
+	rc = apei_exec_run_optional(&ctx, ACPI_EINJ_END_OPERATION);
 
 	return rc;
 }
@@ -489,14 +493,6 @@ static int __init einj_init(void)
 				     einj_debug_dir, NULL, &error_type_fops);
 	if (!fentry)
 		goto err_cleanup;
-	fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
-				    einj_debug_dir, &error_param1);
-	if (!fentry)
-		goto err_cleanup;
-	fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
-				    einj_debug_dir, &error_param2);
-	if (!fentry)
-		goto err_cleanup;
 	fentry = debugfs_create_file("error_inject", S_IWUSR,
 				     einj_debug_dir, NULL, &error_inject_fops);
 	if (!fentry)
@@ -513,12 +509,23 @@ static int __init einj_init(void)
 	rc = apei_exec_pre_map_gars(&ctx);
 	if (rc)
 		goto err_release;
-	param_paddr = einj_get_parameter_address();
-	if (param_paddr) {
-		einj_param = ioremap(param_paddr, sizeof(*einj_param));
-		rc = -ENOMEM;
-		if (!einj_param)
-			goto err_unmap;
+	if (param_extension) {
+		param_paddr = einj_get_parameter_address();
+		if (param_paddr) {
+			einj_param = ioremap(param_paddr, sizeof(*einj_param));
+			rc = -ENOMEM;
+			if (!einj_param)
+				goto err_unmap;
+			fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
+						    einj_debug_dir, &error_param1);
+			if (!fentry)
+				goto err_unmap;
+			fentry = debugfs_create_x64("param2", S_IRUSR | S_IWUSR,
+						    einj_debug_dir, &error_param2);
+			if (!fentry)
+				goto err_unmap;
+		} else
+			pr_warn(EINJ_PFX "Parameter extension is not supported.\n");
 	}
 
 	pr_info(EINJ_PFX "Error INJection is initialized.\n");
@@ -526,6 +533,8 @@ static int __init einj_init(void)
 	return 0;
 
 err_unmap:
+	if (einj_param)
+		iounmap(einj_param);
 	apei_exec_post_unmap_gars(&ctx);
 err_release:
 	apei_resources_release(&einj_resources);
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
index a4cfb64c86a1..903549df809b 100644
--- a/drivers/acpi/apei/erst-dbg.c
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -33,7 +33,7 @@
 
 #define ERST_DBG_PFX			"ERST DBG: "
 
-#define ERST_DBG_RECORD_LEN_MAX		4096
+#define ERST_DBG_RECORD_LEN_MAX		0x4000
 
 static void *erst_dbg_buf;
 static unsigned int erst_dbg_buf_len;
@@ -213,6 +213,10 @@ static struct miscdevice erst_dbg_dev = {
 
 static __init int erst_dbg_init(void)
 {
+	if (erst_disable) {
+		pr_info(ERST_DBG_PFX "ERST support is disabled.\n");
+		return -ENODEV;
+	}
 	return misc_register(&erst_dbg_dev);
 }
 
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 6053f4780df9..2ca59dc69f7f 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -642,7 +642,7 @@ static int __erst_write_to_storage(u64 offset)
 	int rc;
 
 	erst_exec_ctx_init(&ctx);
-	rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_WRITE);
+	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
 	if (rc)
 		return rc;
 	apei_exec_ctx_set_input(&ctx, offset);
@@ -666,7 +666,7 @@ static int __erst_write_to_storage(u64 offset)
 	if (rc)
 		return rc;
 	val = apei_exec_ctx_get_output(&ctx);
-	rc = apei_exec_run(&ctx, ACPI_ERST_END);
+	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
 	if (rc)
 		return rc;
 
@@ -681,7 +681,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
 	int rc;
 
 	erst_exec_ctx_init(&ctx);
-	rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_READ);
+	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
 	if (rc)
 		return rc;
 	apei_exec_ctx_set_input(&ctx, offset);
@@ -709,7 +709,7 @@ static int __erst_read_from_storage(u64 record_id, u64 offset)
 	if (rc)
 		return rc;
 	val = apei_exec_ctx_get_output(&ctx);
-	rc = apei_exec_run(&ctx, ACPI_ERST_END);
+	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
 	if (rc)
 		return rc;
 
@@ -724,7 +724,7 @@ static int __erst_clear_from_storage(u64 record_id)
 	int rc;
 
 	erst_exec_ctx_init(&ctx);
-	rc = apei_exec_run(&ctx, ACPI_ERST_BEGIN_CLEAR);
+	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
 	if (rc)
 		return rc;
 	apei_exec_ctx_set_input(&ctx, record_id);
@@ -748,7 +748,7 @@ static int __erst_clear_from_storage(u64 record_id)
 	if (rc)
 		return rc;
 	val = apei_exec_ctx_get_output(&ctx);
-	rc = apei_exec_run(&ctx, ACPI_ERST_END);
+	rc = apei_exec_run_optional(&ctx, ACPI_ERST_END);
 	if (rc)
 		return rc;
 
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index f703b2881153..0784f99a4665 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -12,7 +12,7 @@
  * For more information about Generic Hardware Error Source, please
  * refer to ACPI Specification version 4.0, section 17.3.2.6
  *
- * Copyright 2010 Intel Corp.
+ * Copyright 2010,2011 Intel Corp.
  *   Author: Huang Ying <ying.huang@intel.com>
  *
  * This program is free software; you can redistribute it and/or
@@ -42,6 +42,9 @@
 #include <linux/mutex.h>
 #include <linux/ratelimit.h>
 #include <linux/vmalloc.h>
+#include <linux/irq_work.h>
+#include <linux/llist.h>
+#include <linux/genalloc.h>
 #include <acpi/apei.h>
 #include <acpi/atomicio.h>
 #include <acpi/hed.h>
@@ -53,6 +56,30 @@
 #define GHES_PFX	"GHES: "
 
 #define GHES_ESTATUS_MAX_SIZE		65536
+#define GHES_ESOURCE_PREALLOC_MAX_SIZE	65536
+
+#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
+
+/* This is just an estimation for memory pool allocation */
+#define GHES_ESTATUS_CACHE_AVG_SIZE	512
+
+#define GHES_ESTATUS_CACHES_SIZE	4
+
+#define GHES_ESTATUS_IN_CACHE_MAX_NSEC	10000000000ULL
+/* Prevent too many caches are allocated because of RCU */
+#define GHES_ESTATUS_CACHE_ALLOCED_MAX	(GHES_ESTATUS_CACHES_SIZE * 3 / 2)
+
+#define GHES_ESTATUS_CACHE_LEN(estatus_len)			\
+	(sizeof(struct ghes_estatus_cache) + (estatus_len))
+#define GHES_ESTATUS_FROM_CACHE(estatus_cache)			\
+	((struct acpi_hest_generic_status *)			\
+	 ((struct ghes_estatus_cache *)(estatus_cache) + 1))
+
+#define GHES_ESTATUS_NODE_LEN(estatus_len)			\
+	(sizeof(struct ghes_estatus_node) + (estatus_len))
+#define GHES_ESTATUS_FROM_NODE(estatus_node)				\
+	((struct acpi_hest_generic_status *)				\
+	 ((struct ghes_estatus_node *)(estatus_node) + 1))
 
 /*
  * One struct ghes is created for each generic hardware error source.
@@ -77,6 +104,22 @@ struct ghes {
 	};
 };
 
+struct ghes_estatus_node {
+	struct llist_node llnode;
+	struct acpi_hest_generic *generic;
+};
+
+struct ghes_estatus_cache {
+	u32 estatus_len;
+	atomic_t count;
+	struct acpi_hest_generic *generic;
+	unsigned long long time_in;
+	struct rcu_head rcu;
+};
+
+int ghes_disable;
+module_param_named(disable, ghes_disable, bool, 0);
+
 static int ghes_panic_timeout	__read_mostly = 30;
 
 /*
@@ -121,6 +164,22 @@ static struct vm_struct *ghes_ioremap_area;
 static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
 static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
 
+/*
+ * printk is not safe in NMI context.  So in NMI handler, we allocate
+ * required memory from lock-less memory allocator
+ * (ghes_estatus_pool), save estatus into it, put them into lock-less
+ * list (ghes_estatus_llist), then delay printk into IRQ context via
+ * irq_work (ghes_proc_irq_work).  ghes_estatus_size_request record
+ * required pool size by all NMI error source.
+ */
+static struct gen_pool *ghes_estatus_pool;
+static unsigned long ghes_estatus_pool_size_request;
+static struct llist_head ghes_estatus_llist;
+static struct irq_work ghes_proc_irq_work;
+
+struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
+static atomic_t ghes_estatus_cache_alloced;
+
 static int ghes_ioremap_init(void)
 {
 	ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
@@ -180,6 +239,55 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
 	__flush_tlb_one(vaddr);
 }
 
+static int ghes_estatus_pool_init(void)
+{
+	ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
+	if (!ghes_estatus_pool)
+		return -ENOMEM;
+	return 0;
+}
+
+static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
+					      struct gen_pool_chunk *chunk,
+					      void *data)
+{
+	free_page(chunk->start_addr);
+}
+
+static void ghes_estatus_pool_exit(void)
+{
+	gen_pool_for_each_chunk(ghes_estatus_pool,
+				ghes_estatus_pool_free_chunk_page, NULL);
+	gen_pool_destroy(ghes_estatus_pool);
+}
+
+static int ghes_estatus_pool_expand(unsigned long len)
+{
+	unsigned long i, pages, size, addr;
+	int ret;
+
+	ghes_estatus_pool_size_request += PAGE_ALIGN(len);
+	size = gen_pool_size(ghes_estatus_pool);
+	if (size >= ghes_estatus_pool_size_request)
+		return 0;
+	pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
+	for (i = 0; i < pages; i++) {
+		addr = __get_free_page(GFP_KERNEL);
+		if (!addr)
+			return -ENOMEM;
+		ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void ghes_estatus_pool_shrink(unsigned long len)
+{
+	ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
+}
+
 static struct ghes *ghes_new(struct acpi_hest_generic *generic)
 {
 	struct ghes *ghes;
@@ -341,43 +449,196 @@ static void ghes_clear_estatus(struct ghes *ghes)
 	ghes->flags &= ~GHES_TO_CLEAR;
 }
 
-static void ghes_do_proc(struct ghes *ghes)
+static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
 {
-	int sev, processed = 0;
+	int sev, sec_sev;
 	struct acpi_hest_generic_data *gdata;
 
-	sev = ghes_severity(ghes->estatus->error_severity);
-	apei_estatus_for_each_section(ghes->estatus, gdata) {
-#ifdef CONFIG_X86_MCE
+	sev = ghes_severity(estatus->error_severity);
+	apei_estatus_for_each_section(estatus, gdata) {
+		sec_sev = ghes_severity(gdata->error_severity);
 		if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
 				 CPER_SEC_PLATFORM_MEM)) {
-			apei_mce_report_mem_error(
-				sev == GHES_SEV_CORRECTED,
-				(struct cper_sec_mem_err *)(gdata+1));
-			processed = 1;
-		}
+			struct cper_sec_mem_err *mem_err;
+			mem_err = (struct cper_sec_mem_err *)(gdata+1);
+#ifdef CONFIG_X86_MCE
+			apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
+						  mem_err);
 #endif
+#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
+			if (sev == GHES_SEV_RECOVERABLE &&
+			    sec_sev == GHES_SEV_RECOVERABLE &&
+			    mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
+				unsigned long pfn;
+				pfn = mem_err->physical_addr >> PAGE_SHIFT;
+				memory_failure_queue(pfn, 0, 0);
+			}
+#endif
+		}
 	}
 }
 
-static void ghes_print_estatus(const char *pfx, struct ghes *ghes)
+static void __ghes_print_estatus(const char *pfx,
+				 const struct acpi_hest_generic *generic,
+				 const struct acpi_hest_generic_status *estatus)
 {
-	/* Not more than 2 messages every 5 seconds */
-	static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2);
-
 	if (pfx == NULL) {
-		if (ghes_severity(ghes->estatus->error_severity) <=
+		if (ghes_severity(estatus->error_severity) <=
 		    GHES_SEV_CORRECTED)
 			pfx = KERN_WARNING HW_ERR;
 		else
 			pfx = KERN_ERR HW_ERR;
 	}
-	if (__ratelimit(&ratelimit)) {
-		printk(
-	"%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
-	pfx, ghes->generic->header.source_id);
-		apei_estatus_print(pfx, ghes->estatus);
+	printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
+	       pfx, generic->header.source_id);
+	apei_estatus_print(pfx, estatus);
+}
+
+static int ghes_print_estatus(const char *pfx,
+			      const struct acpi_hest_generic *generic,
+			      const struct acpi_hest_generic_status *estatus)
+{
+	/* Not more than 2 messages every 5 seconds */
+	static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
+	static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
+	struct ratelimit_state *ratelimit;
+
+	if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
+		ratelimit = &ratelimit_corrected;
+	else
+		ratelimit = &ratelimit_uncorrected;
+	if (__ratelimit(ratelimit)) {
+		__ghes_print_estatus(pfx, generic, estatus);
+		return 1;
 	}
+	return 0;
+}
+
+/*
+ * GHES error status reporting throttle, to report more kinds of
+ * errors, instead of just most frequently occurred errors.
+ */
+static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
+{
+	u32 len;
+	int i, cached = 0;
+	unsigned long long now;
+	struct ghes_estatus_cache *cache;
+	struct acpi_hest_generic_status *cache_estatus;
+
+	len = apei_estatus_len(estatus);
+	rcu_read_lock();
+	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+		cache = rcu_dereference(ghes_estatus_caches[i]);
+		if (cache == NULL)
+			continue;
+		if (len != cache->estatus_len)
+			continue;
+		cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+		if (memcmp(estatus, cache_estatus, len))
+			continue;
+		atomic_inc(&cache->count);
+		now = sched_clock();
+		if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
+			cached = 1;
+		break;
+	}
+	rcu_read_unlock();
+	return cached;
+}
+
+static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
+	struct acpi_hest_generic *generic,
+	struct acpi_hest_generic_status *estatus)
+{
+	int alloced;
+	u32 len, cache_len;
+	struct ghes_estatus_cache *cache;
+	struct acpi_hest_generic_status *cache_estatus;
+
+	alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
+	if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
+		atomic_dec(&ghes_estatus_cache_alloced);
+		return NULL;
+	}
+	len = apei_estatus_len(estatus);
+	cache_len = GHES_ESTATUS_CACHE_LEN(len);
+	cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
+	if (!cache) {
+		atomic_dec(&ghes_estatus_cache_alloced);
+		return NULL;
+	}
+	cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
+	memcpy(cache_estatus, estatus, len);
+	cache->estatus_len = len;
+	atomic_set(&cache->count, 0);
+	cache->generic = generic;
+	cache->time_in = sched_clock();
+	return cache;
+}
+
+static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
+{
+	u32 len;
+
+	len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
+	len = GHES_ESTATUS_CACHE_LEN(len);
+	gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
+	atomic_dec(&ghes_estatus_cache_alloced);
+}
+
+static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
+{
+	struct ghes_estatus_cache *cache;
+
+	cache = container_of(head, struct ghes_estatus_cache, rcu);
+	ghes_estatus_cache_free(cache);
+}
+
+static void ghes_estatus_cache_add(
+	struct acpi_hest_generic *generic,
+	struct acpi_hest_generic_status *estatus)
+{
+	int i, slot = -1, count;
+	unsigned long long now, duration, period, max_period = 0;
+	struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
+
+	new_cache = ghes_estatus_cache_alloc(generic, estatus);
+	if (new_cache == NULL)
+		return;
+	rcu_read_lock();
+	now = sched_clock();
+	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
+		cache = rcu_dereference(ghes_estatus_caches[i]);
+		if (cache == NULL) {
+			slot = i;
+			slot_cache = NULL;
+			break;
+		}
+		duration = now - cache->time_in;
+		if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
+			slot = i;
+			slot_cache = cache;
+			break;
+		}
+		count = atomic_read(&cache->count);
+		period = duration;
+		do_div(period, (count + 1));
+		if (period > max_period) {
+			max_period = period;
+			slot = i;
+			slot_cache = cache;
+		}
+	}
+	/* new_cache must be put into array after its contents are written */
+	smp_wmb();
+	if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
+				  slot_cache, new_cache) == slot_cache) {
+		if (slot_cache)
+			call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
+	} else
+		ghes_estatus_cache_free(new_cache);
+	rcu_read_unlock();
 }
 
 static int ghes_proc(struct ghes *ghes)
@@ -387,9 +648,11 @@ static int ghes_proc(struct ghes *ghes)
 	rc = ghes_read_estatus(ghes, 0);
 	if (rc)
 		goto out;
-	ghes_print_estatus(NULL, ghes);
-	ghes_do_proc(ghes);
-
+	if (!ghes_estatus_cached(ghes->estatus)) {
+		if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
+			ghes_estatus_cache_add(ghes->generic, ghes->estatus);
+	}
+	ghes_do_proc(ghes->estatus);
 out:
 	ghes_clear_estatus(ghes);
 	return 0;
@@ -447,6 +710,45 @@ static int ghes_notify_sci(struct notifier_block *this,
 	return ret;
 }
 
+static void ghes_proc_in_irq(struct irq_work *irq_work)
+{
+	struct llist_node *llnode, *next, *tail = NULL;
+	struct ghes_estatus_node *estatus_node;
+	struct acpi_hest_generic *generic;
+	struct acpi_hest_generic_status *estatus;
+	u32 len, node_len;
+
+	/*
+	 * Because the time order of estatus in list is reversed,
+	 * revert it back to proper order.
+	 */
+	llnode = llist_del_all(&ghes_estatus_llist);
+	while (llnode) {
+		next = llnode->next;
+		llnode->next = tail;
+		tail = llnode;
+		llnode = next;
+	}
+	llnode = tail;
+	while (llnode) {
+		next = llnode->next;
+		estatus_node = llist_entry(llnode, struct ghes_estatus_node,
+					   llnode);
+		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+		len = apei_estatus_len(estatus);
+		node_len = GHES_ESTATUS_NODE_LEN(len);
+		ghes_do_proc(estatus);
+		if (!ghes_estatus_cached(estatus)) {
+			generic = estatus_node->generic;
+			if (ghes_print_estatus(NULL, generic, estatus))
+				ghes_estatus_cache_add(generic, estatus);
+		}
+		gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
+			      node_len);
+		llnode = next;
+	}
+}
+
 static int ghes_notify_nmi(struct notifier_block *this,
 				  unsigned long cmd, void *data)
 {
@@ -476,7 +778,8 @@ static int ghes_notify_nmi(struct notifier_block *this,
 
 	if (sev_global >= GHES_SEV_PANIC) {
 		oops_begin();
-		ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global);
+		__ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic,
+				     ghes_global->estatus);
 		/* reboot to log the error! */
 		if (panic_timeout == 0)
 			panic_timeout = ghes_panic_timeout;
@@ -484,12 +787,34 @@ static int ghes_notify_nmi(struct notifier_block *this,
 	}
 
 	list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
+#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+		u32 len, node_len;
+		struct ghes_estatus_node *estatus_node;
+		struct acpi_hest_generic_status *estatus;
+#endif
 		if (!(ghes->flags & GHES_TO_CLEAR))
 			continue;
-		/* Do not print estatus because printk is not NMI safe */
-		ghes_do_proc(ghes);
+#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+		if (ghes_estatus_cached(ghes->estatus))
+			goto next;
+		/* Save estatus for further processing in IRQ context */
+		len = apei_estatus_len(ghes->estatus);
+		node_len = GHES_ESTATUS_NODE_LEN(len);
+		estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
+						      node_len);
+		if (estatus_node) {
+			estatus_node->generic = ghes->generic;
+			estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
+			memcpy(estatus, ghes->estatus, len);
+			llist_add(&estatus_node->llnode, &ghes_estatus_llist);
+		}
+next:
+#endif
 		ghes_clear_estatus(ghes);
 	}
+#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+	irq_work_queue(&ghes_proc_irq_work);
+#endif
 
 out:
 	raw_spin_unlock(&ghes_nmi_lock);
@@ -504,10 +829,26 @@ static struct notifier_block ghes_notifier_nmi = {
 	.notifier_call = ghes_notify_nmi,
 };
 
+static unsigned long ghes_esource_prealloc_size(
+	const struct acpi_hest_generic *generic)
+{
+	unsigned long block_length, prealloc_records, prealloc_size;
+
+	block_length = min_t(unsigned long, generic->error_block_length,
+			     GHES_ESTATUS_MAX_SIZE);
+	prealloc_records = max_t(unsigned long,
+				 generic->records_to_preallocate, 1);
+	prealloc_size = min_t(unsigned long, block_length * prealloc_records,
+			      GHES_ESOURCE_PREALLOC_MAX_SIZE);
+
+	return prealloc_size;
+}
+
 static int __devinit ghes_probe(struct platform_device *ghes_dev)
 {
 	struct acpi_hest_generic *generic;
 	struct ghes *ghes = NULL;
+	unsigned long len;
 	int rc = -EINVAL;
 
 	generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
@@ -573,6 +914,8 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev)
 		mutex_unlock(&ghes_list_mutex);
 		break;
 	case ACPI_HEST_NOTIFY_NMI:
+		len = ghes_esource_prealloc_size(generic);
+		ghes_estatus_pool_expand(len);
 		mutex_lock(&ghes_list_mutex);
 		if (list_empty(&ghes_nmi))
 			register_die_notifier(&ghes_notifier_nmi);
@@ -597,6 +940,7 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
 {
 	struct ghes *ghes;
 	struct acpi_hest_generic *generic;
+	unsigned long len;
 
 	ghes = platform_get_drvdata(ghes_dev);
 	generic = ghes->generic;
@@ -627,6 +971,8 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
 		 * freed after NMI handler finishes.
 		 */
 		synchronize_rcu();
+		len = ghes_esource_prealloc_size(generic);
+		ghes_estatus_pool_shrink(len);
 		break;
 	default:
 		BUG();
@@ -662,15 +1008,43 @@ static int __init ghes_init(void)
 		return -EINVAL;
 	}
 
+	if (ghes_disable) {
+		pr_info(GHES_PFX "GHES is not enabled!\n");
+		return -EINVAL;
+	}
+
+	init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
+
 	rc = ghes_ioremap_init();
 	if (rc)
 		goto err;
 
-	rc = platform_driver_register(&ghes_platform_driver);
+	rc = ghes_estatus_pool_init();
 	if (rc)
 		goto err_ioremap_exit;
 
+	rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
+				      GHES_ESTATUS_CACHE_ALLOCED_MAX);
+	if (rc)
+		goto err_pool_exit;
+
+	rc = platform_driver_register(&ghes_platform_driver);
+	if (rc)
+		goto err_pool_exit;
+
+	rc = apei_osc_setup();
+	if (rc == 0 && osc_sb_apei_support_acked)
+		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
+	else if (rc == 0 && !osc_sb_apei_support_acked)
+		pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
+	else if (rc && osc_sb_apei_support_acked)
+		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
+	else
+		pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
+
 	return 0;
+err_pool_exit:
+	ghes_estatus_pool_exit();
 err_ioremap_exit:
 	ghes_ioremap_exit();
 err:
@@ -680,6 +1054,7 @@ err:
 static void __exit ghes_exit(void)
 {
 	platform_driver_unregister(&ghes_platform_driver);
+	ghes_estatus_pool_exit();
 	ghes_ioremap_exit();
 }
 
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 181bc2f7bb74..05fee06f4d6e 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -231,16 +231,17 @@ void __init acpi_hest_init(void)
 		goto err;
 	}
 
-	rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
-	if (rc)
-		goto err;
-
-	rc = hest_ghes_dev_register(ghes_count);
-	if (!rc) {
-		pr_info(HEST_PFX "Table parsing has been initialized.\n");
-		return;
+	if (!ghes_disable) {
+		rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
+		if (rc)
+			goto err;
+		rc = hest_ghes_dev_register(ghes_count);
+		if (rc)
+			goto err;
 	}
 
+	pr_info(HEST_PFX "Table parsing has been initialized.\n");
+	return;
 err:
 	hest_disable = 1;
 }
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 2c661353e8f2..87c0a8daa99a 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -55,6 +55,9 @@
 #define ACPI_BATTERY_NOTIFY_INFO	0x81
 #define ACPI_BATTERY_NOTIFY_THRESHOLD   0x82
 
+/* Battery power unit: 0 means mW, 1 means mA */
+#define ACPI_BATTERY_POWER_UNIT_MA	1
+
 #define _COMPONENT		ACPI_BATTERY_COMPONENT
 
 ACPI_MODULE_NAME("battery");
@@ -91,11 +94,6 @@ MODULE_DEVICE_TABLE(acpi, battery_device_ids);
 enum {
 	ACPI_BATTERY_ALARM_PRESENT,
 	ACPI_BATTERY_XINFO_PRESENT,
-	/* For buggy DSDTs that report negative 16-bit values for either
-	 * charging or discharging current and/or report 0 as 65536
-	 * due to bad math.
-	 */
-	ACPI_BATTERY_QUIRK_SIGNED16_CURRENT,
 	ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY,
 };
 
@@ -301,7 +299,8 @@ static enum power_supply_property energy_battery_props[] = {
 #ifdef CONFIG_ACPI_PROCFS_POWER
 inline char *acpi_battery_units(struct acpi_battery *battery)
 {
-	return (battery->power_unit)?"mA":"mW";
+	return (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) ?
+		"mA" : "mW";
 }
 #endif
 
@@ -461,9 +460,17 @@ static int acpi_battery_get_state(struct acpi_battery *battery)
 	battery->update_time = jiffies;
 	kfree(buffer.pointer);
 
-	if (test_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags) &&
-	    battery->rate_now != -1)
+	/* For buggy DSDTs that report negative 16-bit values for either
+	 * charging or discharging current and/or report 0 as 65536
+	 * due to bad math.
+	 */
+	if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA &&
+		battery->rate_now != ACPI_BATTERY_VALUE_UNKNOWN &&
+		(s16)(battery->rate_now) < 0) {
 		battery->rate_now = abs((s16)battery->rate_now);
+		printk_once(KERN_WARNING FW_BUG "battery: (dis)charge rate"
+			" invalid.\n");
+	}
 
 	if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags)
 	    && battery->capacity_now >= 0 && battery->capacity_now <= 100)
@@ -544,7 +551,7 @@ static int sysfs_add_battery(struct acpi_battery *battery)
 {
 	int result;
 
-	if (battery->power_unit) {
+	if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) {
 		battery->bat.properties = charge_battery_props;
 		battery->bat.num_properties =
 			ARRAY_SIZE(charge_battery_props);
@@ -566,18 +573,16 @@ static int sysfs_add_battery(struct acpi_battery *battery)
 
 static void sysfs_remove_battery(struct acpi_battery *battery)
 {
-	if (!battery->bat.dev)
+	mutex_lock(&battery->lock);
+	if (!battery->bat.dev) {
+		mutex_unlock(&battery->lock);
 		return;
+	}
+
 	device_remove_file(battery->bat.dev, &alarm_attr);
 	power_supply_unregister(&battery->bat);
 	battery->bat.dev = NULL;
-}
-
-static void acpi_battery_quirks(struct acpi_battery *battery)
-{
-	if (dmi_name_in_vendors("Acer") && battery->power_unit) {
-		set_bit(ACPI_BATTERY_QUIRK_SIGNED16_CURRENT, &battery->flags);
-	}
+	mutex_unlock(&battery->lock);
 }
 
 /*
@@ -592,7 +597,7 @@ static void acpi_battery_quirks(struct acpi_battery *battery)
  *
  * Handle this correctly so that they won't break userspace.
  */
-static void acpi_battery_quirks2(struct acpi_battery *battery)
+static void acpi_battery_quirks(struct acpi_battery *battery)
 {
 	if (test_bit(ACPI_BATTERY_QUIRK_PERCENTAGE_CAPACITY, &battery->flags))
 		return ;
@@ -623,13 +628,15 @@ static int acpi_battery_update(struct acpi_battery *battery)
 		result = acpi_battery_get_info(battery);
 		if (result)
 			return result;
-		acpi_battery_quirks(battery);
 		acpi_battery_init_alarm(battery);
 	}
-	if (!battery->bat.dev)
-		sysfs_add_battery(battery);
+	if (!battery->bat.dev) {
+		result = sysfs_add_battery(battery);
+		if (result)
+			return result;
+	}
 	result = acpi_battery_get_state(battery);
-	acpi_battery_quirks2(battery);
+	acpi_battery_quirks(battery);
 	return result;
 }
 
@@ -863,7 +870,7 @@ DECLARE_FILE_FUNCTIONS(alarm);
 		}, \
 	}
 
-static struct battery_file {
+static const struct battery_file {
 	struct file_operations ops;
 	mode_t mode;
 	const char *name;
@@ -948,9 +955,12 @@ static int battery_notify(struct notifier_block *nb,
 	struct acpi_battery *battery = container_of(nb, struct acpi_battery,
 						    pm_nb);
 	switch (mode) {
+	case PM_POST_HIBERNATION:
 	case PM_POST_SUSPEND:
-		sysfs_remove_battery(battery);
-		sysfs_add_battery(battery);
+		if (battery->bat.dev) {
+			sysfs_remove_battery(battery);
+			sysfs_add_battery(battery);
+		}
 		break;
 	}
 
@@ -975,25 +985,33 @@ static int acpi_battery_add(struct acpi_device *device)
 	if (ACPI_SUCCESS(acpi_get_handle(battery->device->handle,
 			"_BIX", &handle)))
 		set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags);
-	acpi_battery_update(battery);
+	result = acpi_battery_update(battery);
+	if (result)
+		goto fail;
 #ifdef CONFIG_ACPI_PROCFS_POWER
 	result = acpi_battery_add_fs(device);
 #endif
-	if (!result) {
-		printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
-			ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
-			device->status.battery_present ? "present" : "absent");
-	} else {
+	if (result) {
 #ifdef CONFIG_ACPI_PROCFS_POWER
 		acpi_battery_remove_fs(device);
 #endif
-		kfree(battery);
+		goto fail;
 	}
 
+	printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
+		ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
+		device->status.battery_present ? "present" : "absent");
+
 	battery->pm_nb.notifier_call = battery_notify;
 	register_pm_notifier(&battery->pm_nb);
 
 	return result;
+
+fail:
+	sysfs_remove_battery(battery);
+	mutex_destroy(&battery->lock);
+	kfree(battery);
+	return result;
 }
 
 static int acpi_battery_remove(struct acpi_device *device, int type)
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index d1e06c182cdb..437ddbf0c49a 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -39,6 +39,7 @@
 #include <linux/pci.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
+#include <acpi/apei.h>
 #include <linux/dmi.h>
 #include <linux/suspend.h>
 
@@ -519,6 +520,7 @@ out_kfree:
 }
 EXPORT_SYMBOL(acpi_run_osc);
 
+bool osc_sb_apei_support_acked;
 static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48";
 static void acpi_bus_osc_support(void)
 {
@@ -541,11 +543,19 @@ static void acpi_bus_osc_support(void)
 #if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE)
 	capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_PPC_OST_SUPPORT;
 #endif
+
+	if (!ghes_disable)
+		capbuf[OSC_SUPPORT_TYPE] |= OSC_SB_APEI_SUPPORT;
 	if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
 		return;
-	if (ACPI_SUCCESS(acpi_run_osc(handle, &context)))
+	if (ACPI_SUCCESS(acpi_run_osc(handle, &context))) {
+		u32 *capbuf_ret = context.ret.pointer;
+		if (context.ret.length > OSC_SUPPORT_TYPE)
+			osc_sb_apei_support_acked =
+				capbuf_ret[OSC_SUPPORT_TYPE] & OSC_SB_APEI_SUPPORT;
 		kfree(context.ret.pointer);
-	/* do we need to check the returned cap? Sounds no */
+	}
+	/* do we need to check other returned cap? Sounds no */
 }
 
 /* --------------------------------------------------------------------------
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index 1864ad3cf895..19a61136d848 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -77,7 +77,7 @@ struct dock_dependent_device {
 	struct list_head list;
 	struct list_head hotplug_list;
 	acpi_handle handle;
-	struct acpi_dock_ops *ops;
+	const struct acpi_dock_ops *ops;
 	void *context;
 };
 
@@ -589,7 +589,7 @@ EXPORT_SYMBOL_GPL(unregister_dock_notifier);
  * the dock driver after _DCK is executed.
  */
 int
-register_hotplug_dock_device(acpi_handle handle, struct acpi_dock_ops *ops,
+register_hotplug_dock_device(acpi_handle handle, const struct acpi_dock_ops *ops,
 			     void *context)
 {
 	struct dock_dependent_device *dd;
diff --git a/drivers/acpi/ec_sys.c b/drivers/acpi/ec_sys.c
index 05b44201a614..22f918bacd35 100644
--- a/drivers/acpi/ec_sys.c
+++ b/drivers/acpi/ec_sys.c
@@ -92,7 +92,7 @@ static ssize_t acpi_ec_write_io(struct file *f, const char __user *buf,
 	return count;
 }
 
-static struct file_operations acpi_ec_io_ops = {
+static const struct file_operations acpi_ec_io_ops = {
 	.owner = THIS_MODULE,
 	.open  = acpi_ec_open_io,
 	.read  = acpi_ec_read_io,
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 467479f07c1f..0f0356ca1a9e 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -110,7 +110,7 @@ fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
 	return result;
 }
 
-static struct thermal_cooling_device_ops fan_cooling_ops = {
+static const struct thermal_cooling_device_ops fan_cooling_ops = {
 	.get_max_state = fan_get_max_state,
 	.get_cur_state = fan_get_cur_state,
 	.set_cur_state = fan_set_cur_state,
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 372f9b70f7f4..fa32f584229f 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -155,7 +155,7 @@ static u32 acpi_osi_handler(acpi_string interface, u32 supported)
 {
 	if (!strcmp("Linux", interface)) {
 
-		printk(KERN_NOTICE FW_BUG PREFIX
+		printk_once(KERN_NOTICE FW_BUG PREFIX
 			"BIOS _OSI(Linux) query %s%s\n",
 			osi_linux.enable ? "honored" : "ignored",
 			osi_linux.cmdline ? " via cmdline" :
@@ -237,8 +237,23 @@ void acpi_os_vprintf(const char *fmt, va_list args)
 #endif
 }
 
+#ifdef CONFIG_KEXEC
+static unsigned long acpi_rsdp;
+static int __init setup_acpi_rsdp(char *arg)
+{
+	acpi_rsdp = simple_strtoul(arg, NULL, 16);
+	return 0;
+}
+early_param("acpi_rsdp", setup_acpi_rsdp);
+#endif
+
 acpi_physical_address __init acpi_os_get_root_pointer(void)
 {
+#ifdef CONFIG_KEXEC
+	if (acpi_rsdp)
+		return acpi_rsdp;
+#endif
+
 	if (efi_enabled) {
 		if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
 			return efi.acpi20;
@@ -1083,7 +1098,13 @@ struct osi_setup_entry {
 	bool enable;
 };
 
-static struct osi_setup_entry __initdata osi_setup_entries[OSI_STRING_ENTRIES_MAX];
+static struct osi_setup_entry __initdata
+		osi_setup_entries[OSI_STRING_ENTRIES_MAX] = {
+	{"Module Device", true},
+	{"Processor Device", true},
+	{"3.0 _SCP Extensions", true},
+	{"Processor Aggregator Device", true},
+};
 
 void __init acpi_osi_setup(char *str)
 {
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index f907cfbfa13c..7f9eba9a0b02 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -303,6 +303,61 @@ void acpi_pci_irq_del_prt(struct pci_bus *bus)
 /* --------------------------------------------------------------------------
                           PCI Interrupt Routing Support
    -------------------------------------------------------------------------- */
+#ifdef CONFIG_X86_IO_APIC
+extern int noioapicquirk;
+extern int noioapicreroute;
+
+static int bridge_has_boot_interrupt_variant(struct pci_bus *bus)
+{
+	struct pci_bus *bus_it;
+
+	for (bus_it = bus ; bus_it ; bus_it = bus_it->parent) {
+		if (!bus_it->self)
+			return 0;
+		if (bus_it->self->irq_reroute_variant)
+			return bus_it->self->irq_reroute_variant;
+	}
+	return 0;
+}
+
+/*
+ * Some chipsets (e.g. Intel 6700PXH) generate a legacy INTx when the IRQ
+ * entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel does
+ * during interrupt handling). When this INTx generation cannot be disabled,
+ * we reroute these interrupts to their legacy equivalent to get rid of
+ * spurious interrupts.
+ */
+static int acpi_reroute_boot_interrupt(struct pci_dev *dev,
+				       struct acpi_prt_entry *entry)
+{
+	if (noioapicquirk || noioapicreroute) {
+		return 0;
+	} else {
+		switch (bridge_has_boot_interrupt_variant(dev->bus)) {
+		case 0:
+			/* no rerouting necessary */
+			return 0;
+		case INTEL_IRQ_REROUTE_VARIANT:
+			/*
+			 * Remap according to INTx routing table in 6700PXH
+			 * specs, intel order number 302628-002, section
+			 * 2.15.2. Other chipsets (80332, ...) have the same
+			 * mapping and are handled here as well.
+			 */
+			dev_info(&dev->dev, "PCI IRQ %d -> rerouted to legacy "
+				 "IRQ %d\n", entry->index,
+				 (entry->index % 4) + 16);
+			entry->index = (entry->index % 4) + 16;
+			return 1;
+		default:
+			dev_warn(&dev->dev, "Cannot reroute IRQ %d to legacy "
+				 "IRQ: unknown mapping\n", entry->index);
+			return -1;
+		}
+	}
+}
+#endif /* CONFIG_X86_IO_APIC */
+
 static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
 {
 	struct acpi_prt_entry *entry;
@@ -311,6 +366,9 @@ static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
 
 	entry = acpi_pci_irq_find_prt_entry(dev, pin);
 	if (entry) {
+#ifdef CONFIG_X86_IO_APIC
+		acpi_reroute_boot_interrupt(dev, entry);
+#endif /* CONFIG_X86_IO_APIC */
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %s[%c] _PRT entry\n",
 				  pci_name(dev), pin_name(pin)));
 		return entry;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index d06078d660ad..2672c798272f 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -485,7 +485,8 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 		root->secondary.end = 0xFF;
 		printk(KERN_WARNING FW_BUG PREFIX
 		       "no secondary bus range in _CRS\n");
-		status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN,					       NULL, &bus);
+		status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN,
+					       NULL, &bus);
 		if (ACPI_SUCCESS(status))
 			root->secondary.start = bus;
 		else if (status == AE_NOT_FOUND)
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index 79cb65332894..870550d6a4bf 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -244,7 +244,7 @@ processor_set_cur_state(struct thermal_cooling_device *cdev,
 	return result;
 }
 
-struct thermal_cooling_device_ops processor_cooling_ops = {
+const struct thermal_cooling_device_ops processor_cooling_ops = {
 	.get_max_state = processor_get_max_state,
 	.get_cur_state = processor_get_cur_state,
 	.set_cur_state = processor_set_cur_state,
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 50658ff887d9..6e36d0c0057c 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -130,6 +130,9 @@ struct acpi_sbs {
 
 #define to_acpi_sbs(x) container_of(x, struct acpi_sbs, charger)
 
+static int acpi_sbs_remove(struct acpi_device *device, int type);
+static int acpi_battery_get_state(struct acpi_battery *battery);
+
 static inline int battery_scale(int log)
 {
 	int scale = 1;
@@ -195,6 +198,8 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy,
 
 	if ((!battery->present) && psp != POWER_SUPPLY_PROP_PRESENT)
 		return -ENODEV;
+
+	acpi_battery_get_state(battery);
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
 		if (battery->rate_now < 0)
@@ -225,11 +230,17 @@ static int acpi_sbs_battery_get_property(struct power_supply *psy,
 	case POWER_SUPPLY_PROP_POWER_NOW:
 		val->intval = abs(battery->rate_now) *
 				acpi_battery_ipscale(battery) * 1000;
+		val->intval *= (acpi_battery_mode(battery)) ?
+				(battery->voltage_now *
+				acpi_battery_vscale(battery) / 1000) : 1;
 		break;
 	case POWER_SUPPLY_PROP_CURRENT_AVG:
 	case POWER_SUPPLY_PROP_POWER_AVG:
 		val->intval = abs(battery->rate_avg) *
 				acpi_battery_ipscale(battery) * 1000;
+		val->intval *= (acpi_battery_mode(battery)) ?
+				(battery->voltage_now *
+				acpi_battery_vscale(battery) / 1000) : 1;
 		break;
 	case POWER_SUPPLY_PROP_CAPACITY:
 		val->intval = battery->state_of_charge;
@@ -903,8 +914,6 @@ static void acpi_sbs_callback(void *context)
 	}
 }
 
-static int acpi_sbs_remove(struct acpi_device *device, int type);
-
 static int acpi_sbs_add(struct acpi_device *device)
 {
 	struct acpi_sbs *sbs;
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 6c949602cbd1..3ed80b2ca907 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -428,6 +428,22 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "1000 Series"),
 		},
 	},
+	{
+	.callback = init_old_suspend_ordering,
+	.ident = "Asus A8N-SLI DELUXE",
+	.matches = {
+		DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+		DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI DELUXE"),
+		},
+	},
+	{
+	.callback = init_old_suspend_ordering,
+	.ident = "Asus A8N-SLI Premium",
+	.matches = {
+		DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+		DMI_MATCH(DMI_BOARD_NAME, "A8N-SLI Premium"),
+		},
+	},
 	{},
 };
 #endif /* CONFIG_SUSPEND */
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 77255f250dbb..c538d0ef10ff 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -149,12 +149,12 @@ static int param_get_debug_level(char *buffer, const struct kernel_param *kp)
 	return result;
 }
 
-static struct kernel_param_ops param_ops_debug_layer = {
+static const struct kernel_param_ops param_ops_debug_layer = {
 	.set = param_set_uint,
 	.get = param_get_debug_layer,
 };
 
-static struct kernel_param_ops param_ops_debug_level = {
+static const struct kernel_param_ops param_ops_debug_level = {
 	.set = param_set_uint,
 	.get = param_get_debug_level,
 };
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 2607e17b520f..48fbc647b178 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -812,7 +812,7 @@ acpi_thermal_unbind_cooling_device(struct thermal_zone_device *thermal,
 				thermal_zone_unbind_cooling_device);
 }
 
-static struct thermal_zone_device_ops acpi_thermal_zone_ops = {
+static const struct thermal_zone_device_ops acpi_thermal_zone_ops = {
 	.bind = acpi_thermal_bind_cooling_device,
 	.unbind	= acpi_thermal_unbind_cooling_device,
 	.get_temp = thermal_get_temp,
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index ada4b4d9bdc8..08a44b532f7c 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -307,7 +307,7 @@ video_set_cur_state(struct thermal_cooling_device *cooling_dev, unsigned long st
 	return acpi_video_device_lcd_set_level(video, level);
 }
 
-static struct thermal_cooling_device_ops video_cooling_ops = {
+static const struct thermal_cooling_device_ops video_cooling_ops = {
 	.get_max_state = video_get_max_state,
 	.get_cur_state = video_get_cur_state,
 	.set_cur_state = video_set_cur_state,
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index e0a5b555cee1..bb7c5f1085cc 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -218,12 +218,12 @@ static void ata_acpi_dev_uevent(acpi_handle handle, u32 event, void *data)
 	ata_acpi_uevent(dev->link->ap, dev, event);
 }
 
-static struct acpi_dock_ops ata_acpi_dev_dock_ops = {
+static const struct acpi_dock_ops ata_acpi_dev_dock_ops = {
 	.handler = ata_acpi_dev_notify_dock,
 	.uevent = ata_acpi_dev_uevent,
 };
 
-static struct acpi_dock_ops ata_acpi_ap_dock_ops = {
+static const struct acpi_dock_ops ata_acpi_ap_dock_ops = {
 	.handler = ata_acpi_ap_notify_dock,
 	.uevent = ata_acpi_ap_uevent,
 };
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 49502bc5360a..423fd56bf612 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -616,5 +616,16 @@ config MSM_SMD_PKT
 	  Enables userspace clients to read and write to some packet SMD
 	  ports via device interface for MSM chipset.
 
+config TILE_SROM
+	bool "Character-device access via hypervisor to the Tilera SPI ROM"
+	depends on TILE
+	default y
+	---help---
+	  This device provides character-level read-write access
+	  to the SROM, typically via the "0", "1", and "2" devices
+	  in /dev/srom/.  The Tilera hypervisor makes the flash
+	  device appear much like a simple EEPROM, and knows
+	  how to partition a single ROM for multiple purposes.
+
 endmenu
 
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 7a00672bd85d..32762ba769c2 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -63,3 +63,5 @@ obj-$(CONFIG_RAMOOPS)		+= ramoops.o
 
 obj-$(CONFIG_JS_RTC)		+= js-rtc.o
 js-rtc-y = rtc.o
+
+obj-$(CONFIG_TILE_SROM)		+= tile-srom.o
diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c
index fca0c51bbc90..810aff9e750f 100644
--- a/drivers/char/ramoops.c
+++ b/drivers/char/ramoops.c
@@ -147,6 +147,14 @@ static int __init ramoops_probe(struct platform_device *pdev)
 	cxt->phys_addr = pdata->mem_address;
 	cxt->record_size = pdata->record_size;
 	cxt->dump_oops = pdata->dump_oops;
+	/*
+	 * Update the module parameter variables as well so they are visible
+	 * through /sys/module/ramoops/parameters/
+	 */
+	mem_size = pdata->mem_size;
+	mem_address = pdata->mem_address;
+	record_size = pdata->record_size;
+	dump_oops = pdata->dump_oops;
 
 	if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) {
 		pr_err("request mem region failed\n");
diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c
new file mode 100644
index 000000000000..cf3ee008dca2
--- /dev/null
+++ b/drivers/char/tile-srom.c
@@ -0,0 +1,481 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * SPI Flash ROM driver
+ *
+ * This source code is derived from code provided in "Linux Device
+ * Drivers, Third Edition", by Jonathan Corbet, Alessandro Rubini, and
+ * Greg Kroah-Hartman, published by O'Reilly Media, Inc.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/kernel.h>	/* printk() */
+#include <linux/slab.h>		/* kmalloc() */
+#include <linux/fs.h>		/* everything... */
+#include <linux/errno.h>	/* error codes */
+#include <linux/types.h>	/* size_t */
+#include <linux/proc_fs.h>
+#include <linux/fcntl.h>	/* O_ACCMODE */
+#include <linux/aio.h>
+#include <linux/pagemap.h>
+#include <linux/hugetlb.h>
+#include <linux/uaccess.h>
+#include <linux/platform_device.h>
+#include <hv/hypervisor.h>
+#include <linux/ioctl.h>
+#include <linux/cdev.h>
+#include <linux/delay.h>
+#include <hv/drv_srom_intf.h>
+
+/*
+ * Size of our hypervisor I/O requests.  We break up large transfers
+ * so that we don't spend large uninterrupted spans of time in the
+ * hypervisor.  Erasing an SROM sector takes a significant fraction of
+ * a second, so if we allowed the user to, say, do one I/O to write the
+ * entire ROM, we'd get soft lockup timeouts, or worse.
+ */
+#define SROM_CHUNK_SIZE ((size_t)4096)
+
+/*
+ * When hypervisor is busy (e.g. erasing), poll the status periodically.
+ */
+
+/*
+ * Interval to poll the state in msec
+ */
+#define SROM_WAIT_TRY_INTERVAL 20
+
+/*
+ * Maximum times to poll the state
+ */
+#define SROM_MAX_WAIT_TRY_TIMES 1000
+
+struct srom_dev {
+	int hv_devhdl;			/* Handle for hypervisor device */
+	u32 total_size;			/* Size of this device */
+	u32 sector_size;		/* Size of a sector */
+	u32 page_size;			/* Size of a page */
+	struct mutex lock;		/* Allow only one accessor at a time */
+};
+
+static int srom_major;			/* Dynamic major by default */
+module_param(srom_major, int, 0);
+MODULE_AUTHOR("Tilera Corporation");
+MODULE_LICENSE("GPL");
+
+static int srom_devs;			/* Number of SROM partitions */
+static struct cdev srom_cdev;
+static struct class *srom_class;
+static struct srom_dev *srom_devices;
+
+/*
+ * Handle calling the hypervisor and managing EAGAIN/EBUSY.
+ */
+
+static ssize_t _srom_read(int hv_devhdl, void *buf,
+			  loff_t off, size_t count)
+{
+	int retval, retries = SROM_MAX_WAIT_TRY_TIMES;
+	for (;;) {
+		retval = hv_dev_pread(hv_devhdl, 0, (HV_VirtAddr)buf,
+				      count, off);
+		if (retval >= 0)
+			return retval;
+		if (retval == HV_EAGAIN)
+			continue;
+		if (retval == HV_EBUSY && --retries > 0) {
+			msleep(SROM_WAIT_TRY_INTERVAL);
+			continue;
+		}
+		pr_err("_srom_read: error %d\n", retval);
+		return -EIO;
+	}
+}
+
+static ssize_t _srom_write(int hv_devhdl, const void *buf,
+			   loff_t off, size_t count)
+{
+	int retval, retries = SROM_MAX_WAIT_TRY_TIMES;
+	for (;;) {
+		retval = hv_dev_pwrite(hv_devhdl, 0, (HV_VirtAddr)buf,
+				       count, off);
+		if (retval >= 0)
+			return retval;
+		if (retval == HV_EAGAIN)
+			continue;
+		if (retval == HV_EBUSY && --retries > 0) {
+			msleep(SROM_WAIT_TRY_INTERVAL);
+			continue;
+		}
+		pr_err("_srom_write: error %d\n", retval);
+		return -EIO;
+	}
+}
+
+/**
+ * srom_open() - Device open routine.
+ * @inode: Inode for this device.
+ * @filp: File for this specific open of the device.
+ *
+ * Returns zero, or an error code.
+ */
+static int srom_open(struct inode *inode, struct file *filp)
+{
+	filp->private_data = &srom_devices[iminor(inode)];
+	return 0;
+}
+
+
+/**
+ * srom_release() - Device release routine.
+ * @inode: Inode for this device.
+ * @filp: File for this specific open of the device.
+ *
+ * Returns zero, or an error code.
+ */
+static int srom_release(struct inode *inode, struct file *filp)
+{
+	struct srom_dev *srom = filp->private_data;
+	char dummy;
+
+	/* Make sure we've flushed anything written to the ROM. */
+	mutex_lock(&srom->lock);
+	if (srom->hv_devhdl >= 0)
+		_srom_write(srom->hv_devhdl, &dummy, SROM_FLUSH_OFF, 1);
+	mutex_unlock(&srom->lock);
+
+	filp->private_data = NULL;
+
+	return 0;
+}
+
+
+/**
+ * srom_read() - Read data from the device.
+ * @filp: File for this specific open of the device.
+ * @buf: User's data buffer.
+ * @count: Number of bytes requested.
+ * @f_pos: File position.
+ *
+ * Returns number of bytes read, or an error code.
+ */
+static ssize_t srom_read(struct file *filp, char __user *buf,
+			 size_t count, loff_t *f_pos)
+{
+	int retval = 0;
+	void *kernbuf;
+	struct srom_dev *srom = filp->private_data;
+
+	kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL);
+	if (!kernbuf)
+		return -ENOMEM;
+
+	if (mutex_lock_interruptible(&srom->lock)) {
+		retval = -ERESTARTSYS;
+		kfree(kernbuf);
+		return retval;
+	}
+
+	while (count) {
+		int hv_retval;
+		int bytes_this_pass = min(count, SROM_CHUNK_SIZE);
+
+		hv_retval = _srom_read(srom->hv_devhdl, kernbuf,
+				       *f_pos, bytes_this_pass);
+		if (hv_retval > 0) {
+			if (copy_to_user(buf, kernbuf, hv_retval) != 0) {
+				retval = -EFAULT;
+				break;
+			}
+		} else if (hv_retval <= 0) {
+			if (retval == 0)
+				retval = hv_retval;
+			break;
+		}
+
+		retval += hv_retval;
+		*f_pos += hv_retval;
+		buf += hv_retval;
+		count -= hv_retval;
+	}
+
+	mutex_unlock(&srom->lock);
+	kfree(kernbuf);
+
+	return retval;
+}
+
+/**
+ * srom_write() - Write data to the device.
+ * @filp: File for this specific open of the device.
+ * @buf: User's data buffer.
+ * @count: Number of bytes requested.
+ * @f_pos: File position.
+ *
+ * Returns number of bytes written, or an error code.
+ */
+static ssize_t srom_write(struct file *filp, const char __user *buf,
+			  size_t count, loff_t *f_pos)
+{
+	int retval = 0;
+	void *kernbuf;
+	struct srom_dev *srom = filp->private_data;
+
+	kernbuf = kmalloc(SROM_CHUNK_SIZE, GFP_KERNEL);
+	if (!kernbuf)
+		return -ENOMEM;
+
+	if (mutex_lock_interruptible(&srom->lock)) {
+		retval = -ERESTARTSYS;
+		kfree(kernbuf);
+		return retval;
+	}
+
+	while (count) {
+		int hv_retval;
+		int bytes_this_pass = min(count, SROM_CHUNK_SIZE);
+
+		if (copy_from_user(kernbuf, buf, bytes_this_pass) != 0) {
+			retval = -EFAULT;
+			break;
+		}
+
+		hv_retval = _srom_write(srom->hv_devhdl, kernbuf,
+					*f_pos, bytes_this_pass);
+		if (hv_retval <= 0) {
+			if (retval == 0)
+				retval = hv_retval;
+			break;
+		}
+
+		retval += hv_retval;
+		*f_pos += hv_retval;
+		buf += hv_retval;
+		count -= hv_retval;
+	}
+
+	mutex_unlock(&srom->lock);
+	kfree(kernbuf);
+
+	return retval;
+}
+
+/* Provide our own implementation so we can use srom->total_size. */
+loff_t srom_llseek(struct file *filp, loff_t offset, int origin)
+{
+	struct srom_dev *srom = filp->private_data;
+
+	if (mutex_lock_interruptible(&srom->lock))
+		return -ERESTARTSYS;
+
+	switch (origin) {
+	case SEEK_END:
+		offset += srom->total_size;
+		break;
+	case SEEK_CUR:
+		offset += filp->f_pos;
+		break;
+	}
+
+	if (offset < 0 || offset > srom->total_size) {
+		offset = -EINVAL;
+	} else {
+		filp->f_pos = offset;
+		filp->f_version = 0;
+	}
+
+	mutex_unlock(&srom->lock);
+
+	return offset;
+}
+
+static ssize_t total_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	struct srom_dev *srom = dev_get_drvdata(dev);
+	return sprintf(buf, "%u\n", srom->total_size);
+}
+
+static ssize_t sector_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct srom_dev *srom = dev_get_drvdata(dev);
+	return sprintf(buf, "%u\n", srom->sector_size);
+}
+
+static ssize_t page_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	struct srom_dev *srom = dev_get_drvdata(dev);
+	return sprintf(buf, "%u\n", srom->page_size);
+}
+
+static struct device_attribute srom_dev_attrs[] = {
+	__ATTR(total_size, S_IRUGO, total_show, NULL),
+	__ATTR(sector_size, S_IRUGO, sector_show, NULL),
+	__ATTR(page_size, S_IRUGO, page_show, NULL),
+	__ATTR_NULL
+};
+
+static char *srom_devnode(struct device *dev, mode_t *mode)
+{
+	*mode = S_IRUGO | S_IWUSR;
+	return kasprintf(GFP_KERNEL, "srom/%s", dev_name(dev));
+}
+
+/*
+ * The fops
+ */
+static const struct file_operations srom_fops = {
+	.owner =     THIS_MODULE,
+	.llseek =    srom_llseek,
+	.read =	     srom_read,
+	.write =     srom_write,
+	.open =	     srom_open,
+	.release =   srom_release,
+};
+
+/**
+ * srom_setup_minor() - Initialize per-minor information.
+ * @srom: Per-device SROM state.
+ * @index: Device to set up.
+ */
+static int srom_setup_minor(struct srom_dev *srom, int index)
+{
+	struct device *dev;
+	int devhdl = srom->hv_devhdl;
+
+	mutex_init(&srom->lock);
+
+	if (_srom_read(devhdl, &srom->total_size,
+		       SROM_TOTAL_SIZE_OFF, sizeof(srom->total_size)) < 0)
+		return -EIO;
+	if (_srom_read(devhdl, &srom->sector_size,
+		       SROM_SECTOR_SIZE_OFF, sizeof(srom->sector_size)) < 0)
+		return -EIO;
+	if (_srom_read(devhdl, &srom->page_size,
+		       SROM_PAGE_SIZE_OFF, sizeof(srom->page_size)) < 0)
+		return -EIO;
+
+	dev = device_create(srom_class, &platform_bus,
+			    MKDEV(srom_major, index), srom, "%d", index);
+	return IS_ERR(dev) ? PTR_ERR(dev) : 0;
+}
+
+/** srom_init() - Initialize the driver's module. */
+static int srom_init(void)
+{
+	int result, i;
+	dev_t dev = MKDEV(srom_major, 0);
+
+	/*
+	 * Start with a plausible number of partitions; the krealloc() call
+	 * below will yield about log(srom_devs) additional allocations.
+	 */
+	srom_devices = kzalloc(4 * sizeof(struct srom_dev), GFP_KERNEL);
+
+	/* Discover the number of srom partitions. */
+	for (i = 0; ; i++) {
+		int devhdl;
+		char buf[20];
+		struct srom_dev *new_srom_devices =
+			krealloc(srom_devices, (i+1) * sizeof(struct srom_dev),
+				 GFP_KERNEL | __GFP_ZERO);
+		if (!new_srom_devices) {
+			result = -ENOMEM;
+			goto fail_mem;
+		}
+		srom_devices = new_srom_devices;
+		sprintf(buf, "srom/0/%d", i);
+		devhdl = hv_dev_open((HV_VirtAddr)buf, 0);
+		if (devhdl < 0) {
+			if (devhdl != HV_ENODEV)
+				pr_notice("srom/%d: hv_dev_open failed: %d.\n",
+					  i, devhdl);
+			break;
+		}
+		srom_devices[i].hv_devhdl = devhdl;
+	}
+	srom_devs = i;
+
+	/* Bail out early if we have no partitions at all. */
+	if (srom_devs == 0) {
+		result = -ENODEV;
+		goto fail_mem;
+	}
+
+	/* Register our major, and accept a dynamic number. */
+	if (srom_major)
+		result = register_chrdev_region(dev, srom_devs, "srom");
+	else {
+		result = alloc_chrdev_region(&dev, 0, srom_devs, "srom");
+		srom_major = MAJOR(dev);
+	}
+	if (result < 0)
+		goto fail_mem;
+
+	/* Register a character device. */
+	cdev_init(&srom_cdev, &srom_fops);
+	srom_cdev.owner = THIS_MODULE;
+	srom_cdev.ops = &srom_fops;
+	result = cdev_add(&srom_cdev, dev, srom_devs);
+	if (result < 0)
+		goto fail_chrdev;
+
+	/* Create a sysfs class. */
+	srom_class = class_create(THIS_MODULE, "srom");
+	if (IS_ERR(srom_class)) {
+		result = PTR_ERR(srom_class);
+		goto fail_cdev;
+	}
+	srom_class->dev_attrs = srom_dev_attrs;
+	srom_class->devnode = srom_devnode;
+
+	/* Do per-partition initialization */
+	for (i = 0; i < srom_devs; i++) {
+		result = srom_setup_minor(srom_devices + i, i);
+		if (result < 0)
+			goto fail_class;
+	}
+
+	return 0;
+
+fail_class:
+	for (i = 0; i < srom_devs; i++)
+		device_destroy(srom_class, MKDEV(srom_major, i));
+	class_destroy(srom_class);
+fail_cdev:
+	cdev_del(&srom_cdev);
+fail_chrdev:
+	unregister_chrdev_region(dev, srom_devs);
+fail_mem:
+	kfree(srom_devices);
+	return result;
+}
+
+/** srom_cleanup() - Clean up the driver's module. */
+static void srom_cleanup(void)
+{
+	int i;
+	for (i = 0; i < srom_devs; i++)
+		device_destroy(srom_class, MKDEV(srom_major, i));
+	class_destroy(srom_class);
+	cdev_del(&srom_cdev);
+	unregister_chrdev_region(MKDEV(srom_major, 0), srom_devs);
+	kfree(srom_devices);
+}
+
+module_init(srom_init);
+module_exit(srom_cleanup);
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 7fc2f108f490..3f4051a7c5a7 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -80,7 +80,7 @@ enum tis_defaults {
 static LIST_HEAD(tis_chips);
 static DEFINE_SPINLOCK(tis_lock);
 
-#ifdef CONFIG_PNP
+#if defined(CONFIG_PNP) && defined(CONFIG_ACPI)
 static int is_itpm(struct pnp_dev *dev)
 {
 	struct acpi_device *acpi = pnp_acpi_device(dev);
@@ -93,6 +93,11 @@ static int is_itpm(struct pnp_dev *dev)
 
 	return 0;
 }
+#else
+static inline int is_itpm(struct pnp_dev *dev)
+{
+	return 0;
+}
 #endif
 
 static int check_locality(struct tpm_chip *chip, int l)
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index bf5092455a8f..d4c542372886 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -25,9 +25,19 @@ DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
 
 DEFINE_MUTEX(cpuidle_lock);
 LIST_HEAD(cpuidle_detected_devices);
-static void (*pm_idle_old)(void);
 
 static int enabled_devices;
+static int off __read_mostly;
+static int initialized __read_mostly;
+
+int cpuidle_disabled(void)
+{
+	return off;
+}
+void disable_cpuidle(void)
+{
+	off = 1;
+}
 
 #if defined(CONFIG_ARCH_HAS_CPU_IDLE_WAIT)
 static void cpuidle_kick_cpus(void)
@@ -46,25 +56,23 @@ static int __cpuidle_register_device(struct cpuidle_device *dev);
  * cpuidle_idle_call - the main idle loop
  *
  * NOTE: no locks or semaphores should be used here
+ * return non-zero on failure
  */
-static void cpuidle_idle_call(void)
+int cpuidle_idle_call(void)
 {
 	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
 	struct cpuidle_state *target_state;
 	int next_state;
 
+	if (off)
+		return -ENODEV;
+
+	if (!initialized)
+		return -ENODEV;
+
 	/* check if the device is ready */
-	if (!dev || !dev->enabled) {
-		if (pm_idle_old)
-			pm_idle_old();
-		else
-#if defined(CONFIG_ARCH_HAS_DEFAULT_IDLE)
-			default_idle();
-#else
-			local_irq_enable();
-#endif
-		return;
-	}
+	if (!dev || !dev->enabled)
+		return -EBUSY;
 
 #if 0
 	/* shows regressions, re-enable for 2.6.29 */
@@ -89,7 +97,7 @@ static void cpuidle_idle_call(void)
 	next_state = cpuidle_curr_governor->select(dev);
 	if (need_resched()) {
 		local_irq_enable();
-		return;
+		return 0;
 	}
 
 	target_state = &dev->states[next_state];
@@ -114,6 +122,8 @@ static void cpuidle_idle_call(void)
 	/* give the governor an opportunity to reflect on the outcome */
 	if (cpuidle_curr_governor->reflect)
 		cpuidle_curr_governor->reflect(dev);
+
+	return 0;
 }
 
 /**
@@ -121,10 +131,10 @@ static void cpuidle_idle_call(void)
  */
 void cpuidle_install_idle_handler(void)
 {
-	if (enabled_devices && (pm_idle != cpuidle_idle_call)) {
+	if (enabled_devices) {
 		/* Make sure all changes finished before we switch to new idle */
 		smp_wmb();
-		pm_idle = cpuidle_idle_call;
+		initialized = 1;
 	}
 }
 
@@ -133,8 +143,8 @@ void cpuidle_install_idle_handler(void)
  */
 void cpuidle_uninstall_idle_handler(void)
 {
-	if (enabled_devices && pm_idle_old && (pm_idle != pm_idle_old)) {
-		pm_idle = pm_idle_old;
+	if (enabled_devices) {
+		initialized = 0;
 		cpuidle_kick_cpus();
 	}
 }
@@ -427,7 +437,8 @@ static int __init cpuidle_init(void)
 {
 	int ret;
 
-	pm_idle_old = pm_idle;
+	if (cpuidle_disabled())
+		return -ENODEV;
 
 	ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
 	if (ret)
@@ -438,4 +449,5 @@ static int __init cpuidle_init(void)
 	return 0;
 }
 
+module_param(off, int, 0444);
 core_initcall(cpuidle_init);
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
index 33e50d556f17..38c3fd8b9d76 100644
--- a/drivers/cpuidle/cpuidle.h
+++ b/drivers/cpuidle/cpuidle.h
@@ -13,6 +13,7 @@ extern struct list_head cpuidle_governors;
 extern struct list_head cpuidle_detected_devices;
 extern struct mutex cpuidle_lock;
 extern spinlock_t cpuidle_driver_lock;
+extern int cpuidle_disabled(void);
 
 /* idle loop */
 extern void cpuidle_install_idle_handler(void);
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index fd1601e3d125..3f7e3cedd133 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -26,6 +26,9 @@ int cpuidle_register_driver(struct cpuidle_driver *drv)
 	if (!drv)
 		return -EINVAL;
 
+	if (cpuidle_disabled())
+		return -ENODEV;
+
 	spin_lock(&cpuidle_driver_lock);
 	if (cpuidle_curr_driver) {
 		spin_unlock(&cpuidle_driver_lock);
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index 724c164d31c9..ea2f8e7aa24a 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -81,6 +81,9 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
 	if (!gov || !gov->select)
 		return -EINVAL;
 
+	if (cpuidle_disabled())
+		return -ENODEV;
+
 	mutex_lock(&cpuidle_lock);
 	if (__cpuidle_find_governor(gov->name) == NULL) {
 		ret = 0;
diff --git a/drivers/eisa/pci_eisa.c b/drivers/eisa/pci_eisa.c
index 30da70d06a6d..cdae207028a7 100644
--- a/drivers/eisa/pci_eisa.c
+++ b/drivers/eisa/pci_eisa.c
@@ -45,13 +45,13 @@ static int __init pci_eisa_init(struct pci_dev *pdev,
 	return 0;
 }
 
-static struct pci_device_id __initdata pci_eisa_pci_tbl[] = {
+static struct pci_device_id pci_eisa_pci_tbl[] = {
 	{ PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
 	  PCI_CLASS_BRIDGE_EISA << 8, 0xffff00, 0 },
 	{ 0, }
 };
 
-static struct pci_driver __initdata pci_eisa_driver = {
+static struct pci_driver __refdata pci_eisa_driver = {
 	.name		= "pci_eisa",
 	.id_table	= pci_eisa_pci_tbl,
 	.probe		= pci_eisa_init,
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index eacb05e6cfb3..eb80b549ed8d 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -157,7 +157,7 @@ utf16_strnlen(efi_char16_t *s, size_t maxlength)
 	return length;
 }
 
-static unsigned long
+static inline unsigned long
 utf16_strlen(efi_char16_t *s)
 {
 	return utf16_strnlen(s, ~0UL);
@@ -580,8 +580,8 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
 	return -1;
 }
 
-static u64 efi_pstore_write(enum pstore_type_id type, int part, size_t size,
-			    struct pstore_info *psi)
+static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part,
+			    size_t size, struct pstore_info *psi)
 {
 	return 0;
 }
diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index ce281d152275..67df91af8424 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -483,7 +483,7 @@ static int gpio_keys_get_devtree_pdata(struct device *dev,
 
 	buttons = kzalloc(pdata->nbuttons * (sizeof *buttons), GFP_KERNEL);
 	if (!buttons)
-		return -ENODEV;
+		return -ENOMEM;
 
 	pp = NULL;
 	i = 0;
diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c
index ab0acaf7fe8f..756348a7f93a 100644
--- a/drivers/input/keyboard/lm8323.c
+++ b/drivers/input/keyboard/lm8323.c
@@ -754,8 +754,11 @@ fail3:
 	device_remove_file(&client->dev, &dev_attr_disable_kp);
 fail2:
 	while (--pwm >= 0)
-		if (lm->pwm[pwm].enabled)
+		if (lm->pwm[pwm].enabled) {
+			device_remove_file(lm->pwm[pwm].cdev.dev,
+					   &dev_attr_time);
 			led_classdev_unregister(&lm->pwm[pwm].cdev);
+		}
 fail1:
 	input_free_device(idev);
 	kfree(lm);
@@ -775,8 +778,10 @@ static int __devexit lm8323_remove(struct i2c_client *client)
 	device_remove_file(&lm->client->dev, &dev_attr_disable_kp);
 
 	for (i = 0; i < 3; i++)
-		if (lm->pwm[i].enabled)
+		if (lm->pwm[i].enabled) {
+			device_remove_file(lm->pwm[i].cdev.dev, &dev_attr_time);
 			led_classdev_unregister(&lm->pwm[i].cdev);
+		}
 
 	kfree(lm);
 
diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
index da3828fc2c09..f270447ba951 100644
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c
@@ -19,6 +19,7 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/input.h>
 #include <linux/platform_device.h>
@@ -37,7 +38,7 @@
 #define KBC_ROW_SCAN_DLY	5
 
 /* KBC uses a 32KHz clock so a cycle = 1/32Khz */
-#define KBC_CYCLE_USEC	32
+#define KBC_CYCLE_MS	32
 
 /* KBC Registers */
 
@@ -647,7 +648,7 @@ static int __devinit tegra_kbc_probe(struct platform_device *pdev)
 	debounce_cnt = min(pdata->debounce_cnt, KBC_MAX_DEBOUNCE_CNT);
 	scan_time_rows = (KBC_ROW_SCAN_TIME + debounce_cnt) * num_rows;
 	kbc->repoll_dly = KBC_ROW_SCAN_DLY + scan_time_rows + pdata->repeat_cnt;
-	kbc->repoll_dly = ((kbc->repoll_dly * KBC_CYCLE_USEC) + 999) / 1000;
+	kbc->repoll_dly = DIV_ROUND_UP(kbc->repoll_dly, KBC_CYCLE_MS);
 
 	input_dev->name = pdev->name;
 	input_dev->id.bustype = BUS_HOST;
diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c
index c456f63b6bae..783597a9a64a 100644
--- a/drivers/input/misc/kxtj9.c
+++ b/drivers/input/misc/kxtj9.c
@@ -21,6 +21,7 @@
 #include <linux/i2c.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/input/kxtj9.h>
 #include <linux/input-polldev.h>
diff --git a/drivers/input/misc/mma8450.c b/drivers/input/misc/mma8450.c
index 20f8f9284f02..6c76cf792991 100644
--- a/drivers/input/misc/mma8450.c
+++ b/drivers/input/misc/mma8450.c
@@ -24,6 +24,7 @@
 #include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/input-polldev.h>
+#include <linux/of_device.h>
 
 #define MMA8450_DRV_NAME	"mma8450"
 
@@ -229,10 +230,17 @@ static const struct i2c_device_id mma8450_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, mma8450_id);
 
+static const struct of_device_id mma8450_dt_ids[] = {
+	{ .compatible = "fsl,mma8450", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(i2c, mma8450_dt_ids);
+
 static struct i2c_driver mma8450_driver = {
 	.driver = {
 		.name	= MMA8450_DRV_NAME,
 		.owner	= THIS_MODULE,
+		.of_match_table = mma8450_dt_ids,
 	},
 	.probe		= mma8450_probe,
 	.remove		= __devexit_p(mma8450_remove),
diff --git a/drivers/input/mouse/hgpk.c b/drivers/input/mouse/hgpk.c
index 95577c15ae56..4d17d9f3320b 100644
--- a/drivers/input/mouse/hgpk.c
+++ b/drivers/input/mouse/hgpk.c
@@ -32,6 +32,7 @@
 #define DEBUG
 #include <linux/slab.h>
 #include <linux/input.h>
+#include <linux/module.h>
 #include <linux/serio.h>
 #include <linux/libps2.h>
 #include <linux/delay.h>
diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c
index bc3b5187f3a3..131f9d1c921b 100644
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -249,12 +249,14 @@ static void __ad7879_enable(struct ad7879 *ts)
 
 static void __ad7879_disable(struct ad7879 *ts)
 {
+	u16 reg = (ts->cmd_crtl2 & ~AD7879_PM(-1)) |
+		AD7879_PM(AD7879_PM_SHUTDOWN);
 	disable_irq(ts->irq);
 
 	if (del_timer_sync(&ts->timer))
 		ad7879_ts_event_release(ts);
 
-	ad7879_write(ts, AD7879_REG_CTRL2, AD7879_PM(AD7879_PM_SHUTDOWN));
+	ad7879_write(ts, AD7879_REG_CTRL2, reg);
 }
 
 
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 3ff22e32b602..fb28b5af733b 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -17,14 +17,39 @@
  *      as published by the Free Software Foundation; either version
  *      2 of the License, or (at your option) any later version.
  */
+#include <linux/ctype.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 
+/**
+ * struct alias_prop - Alias property in 'aliases' node
+ * @link:	List node to link the structure in aliases_lookup list
+ * @alias:	Alias property name
+ * @np:		Pointer to device_node that the alias stands for
+ * @id:		Index value from end of alias name
+ * @stem:	Alias string without the index
+ *
+ * The structure represents one alias property of 'aliases' node as
+ * an entry in aliases_lookup list.
+ */
+struct alias_prop {
+	struct list_head link;
+	const char *alias;
+	struct device_node *np;
+	int id;
+	char stem[0];
+};
+
+static LIST_HEAD(aliases_lookup);
+
 struct device_node *allnodes;
 struct device_node *of_chosen;
+struct device_node *of_aliases;
+
+static DEFINE_MUTEX(of_aliases_mutex);
 
 /* use when traversing tree through the allnext, child, sibling,
  * or parent members of struct device_node.
@@ -988,3 +1013,108 @@ out_unlock:
 }
 #endif /* defined(CONFIG_OF_DYNAMIC) */
 
+static void of_alias_add(struct alias_prop *ap, struct device_node *np,
+			 int id, const char *stem, int stem_len)
+{
+	ap->id = id;
+	ap->np = np;
+	strncpy(ap->stem, stem, stem_len);
+	ap->stem[stem_len] = 0;
+	list_add_tail(&ap->link, &aliases_lookup);
+	pr_debug("adding DT alias:%s: stem=%s id=%i node=%s\n",
+		 ap->alias, ap->stem, ap->id, np ? np->full_name : NULL);
+}
+
+/**
+ * of_alias_scan() - Scan all properties of 'aliases' node
+ *
+ * The function scans all the properties of 'aliases' node and populate
+ * the global lookup table with the properties.  It returns the
+ * number of alias_prop found, or error code in error case.
+ */
+__init void of_alias_scan(void)
+{
+	struct property *pp;
+
+	if (!of_aliases)
+		return;
+
+	for_each_property(pp, of_aliases->properties) {
+		const char *start = pp->name;
+		const char *end = start + strlen(start);
+		struct device_node *np;
+		struct alias_prop *ap;
+		int id, len;
+
+		/* Skip those we do not want to proceed */
+		if (!strcmp(pp->name, "name") ||
+		    !strcmp(pp->name, "phandle") ||
+		    !strcmp(pp->name, "linux,phandle"))
+			continue;
+
+		np = of_find_node_by_path(pp->value);
+		if (!np)
+			continue;
+
+		/* walk alias backwards to extract the id and 'stem' string */
+		while (isdigit(*(end-1)) && end > start)
+			end--;
+		len = end - start;
+		id = strlen(end) ? simple_strtoul(end, NULL, 10) : -1;
+
+		/* Allocate an alias_prop with enough space for the stem */
+		ap = early_init_dt_alloc_memory_arch(sizeof(*ap) + len + 1, 4);
+		if (!ap)
+			continue;
+		ap->alias = start;
+		of_alias_add(ap, np, id, start, len);
+	}
+}
+
+/**
+ * of_alias_get_id() - Get alias id for the given device_node
+ * @np:		Pointer to the given device_node
+ * @stem:	Alias stem of the given device_node
+ *
+ * The function travels the lookup table to get alias id for the given
+ * device_node and alias stem.  It returns the alias id if find it.
+ * If not, dynamically creates one in the lookup table and returns it,
+ * or returns error code if fail to create.
+ */
+int of_alias_get_id(struct device_node *np, const char *stem)
+{
+	struct alias_prop *app;
+	int id = 0;
+	bool found = false;
+
+	mutex_lock(&of_aliases_mutex);
+	list_for_each_entry(app, &aliases_lookup, link) {
+		if (strcmp(app->stem, stem) != 0)
+			continue;
+
+		if (np == app->np) {
+			found = true;
+			id = app->id;
+			break;
+		}
+
+		if (id <= app->id)
+			id = app->id + 1;
+	}
+
+	/* If an id is not found, then allocate a new one */
+	if (!found) {
+		app = kzalloc(sizeof(*app) + strlen(stem) + 1, 4);
+		if (!app) {
+			id = -ENODEV;
+			goto out;
+		}
+		of_alias_add(app, np, id, stem, strlen(stem));
+	}
+
+ out:
+	mutex_unlock(&of_aliases_mutex);
+
+	return id;
+}
+EXPORT_SYMBOL_GPL(of_alias_get_id);
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 65200af29c52..13d6d3a96b31 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -707,10 +707,12 @@ void __init unflatten_device_tree(void)
 	__unflatten_device_tree(initial_boot_params, &allnodes,
 				early_init_dt_alloc_memory_arch);
 
-	/* Get pointer to OF "/chosen" node for use everywhere */
+	/* Get pointer to "/chosen" and "/aliasas" nodes for use everywhere */
 	of_chosen = of_find_node_by_path("/chosen");
 	if (of_chosen == NULL)
 		of_chosen = of_find_node_by_path("/chosen@0");
+	of_aliases = of_find_node_by_path("/aliases");
+	of_alias_scan();
 }
 
 #endif /* CONFIG_OF_EARLY_FLATTREE */
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index a70fa89f76fd..220285760b68 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -110,7 +110,7 @@ static int post_dock_fixups(struct notifier_block *nb, unsigned long val,
 }
 
 
-static struct acpi_dock_ops acpiphp_dock_ops = {
+static const struct acpi_dock_ops acpiphp_dock_ops = {
 	.handler = handle_hotplug_event_func,
 };
 
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index bcae8dd41496..7789002bdd5c 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -368,7 +368,7 @@ static int __init omap_rtc_probe(struct platform_device *pdev)
 		pr_info("%s: already running\n", pdev->name);
 
 	/* force to 24 hour mode */
-	new_ctrl = reg & ~(OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP);
+	new_ctrl = reg & (OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP);
 	new_ctrl |= OMAP_RTC_CTRL_STOP;
 
 	/* BOARD-SPECIFIC CUSTOMIZATION CAN GO HERE:
diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig
index 564ff4e0dbc4..8345fb457a40 100644
--- a/drivers/target/iscsi/Kconfig
+++ b/drivers/target/iscsi/Kconfig
@@ -1,5 +1,6 @@
 config ISCSI_TARGET
 	tristate "Linux-iSCSI.org iSCSI Target Mode Stack"
+	depends on NET
 	select CRYPTO
 	select CRYPTO_CRC32C
 	select CRYPTO_CRC32C_INTEL if X86
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 14c81c4265bd..c24fb10de60b 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -120,7 +120,7 @@ struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf)
 	struct iscsi_tiqn *tiqn = NULL;
 	int ret;
 
-	if (strlen(buf) > ISCSI_IQN_LEN) {
+	if (strlen(buf) >= ISCSI_IQN_LEN) {
 		pr_err("Target IQN exceeds %d bytes\n",
 				ISCSI_IQN_LEN);
 		return ERR_PTR(-EINVAL);
@@ -1857,7 +1857,7 @@ static int iscsit_handle_text_cmd(
 	char *text_ptr, *text_in;
 	int cmdsn_ret, niov = 0, rx_got, rx_size;
 	u32 checksum = 0, data_crc = 0, payload_length;
-	u32 padding = 0, text_length = 0;
+	u32 padding = 0, pad_bytes = 0, text_length = 0;
 	struct iscsi_cmd *cmd;
 	struct kvec iov[3];
 	struct iscsi_text *hdr;
@@ -1896,7 +1896,7 @@ static int iscsit_handle_text_cmd(
 
 		padding = ((-payload_length) & 3);
 		if (padding != 0) {
-			iov[niov].iov_base = cmd->pad_bytes;
+			iov[niov].iov_base = &pad_bytes;
 			iov[niov++].iov_len  = padding;
 			rx_size += padding;
 			pr_debug("Receiving %u additional bytes"
@@ -1917,7 +1917,7 @@ static int iscsit_handle_text_cmd(
 		if (conn->conn_ops->DataDigest) {
 			iscsit_do_crypto_hash_buf(&conn->conn_rx_hash,
 					text_in, text_length,
-					padding, cmd->pad_bytes,
+					padding, (u8 *)&pad_bytes,
 					(u8 *)&data_crc);
 
 			if (checksum != data_crc) {
@@ -3468,7 +3468,12 @@ static inline void iscsit_thread_check_cpumask(
 }
 
 #else
-#define iscsit_thread_get_cpumask(X) ({})
+
+void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
+{
+	return;
+}
+
 #define iscsit_thread_check_cpumask(X, Y, Z) ({})
 #endif /* CONFIG_SMP */
 
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 32bb92c44450..f095e65b1ccf 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -181,7 +181,7 @@ struct se_tpg_np *lio_target_call_addnptotpg(
 		return ERR_PTR(-EOVERFLOW);
 	}
 	memset(buf, 0, MAX_PORTAL_LEN + 1);
-	snprintf(buf, MAX_PORTAL_LEN, "%s", name);
+	snprintf(buf, MAX_PORTAL_LEN + 1, "%s", name);
 
 	memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage));
 
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 713a4d23557a..4d087ac11067 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -978,7 +978,7 @@ struct iscsi_login *iscsi_target_init_negotiation(
 		pr_err("Unable to allocate memory for struct iscsi_login.\n");
 		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
 				ISCSI_LOGIN_STATUS_NO_RESOURCES);
-		goto out;
+		return NULL;
 	}
 
 	login->req = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index c75a01a1c475..89760329d5d0 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1747,6 +1747,8 @@ int transport_generic_handle_cdb(
 }
 EXPORT_SYMBOL(transport_generic_handle_cdb);
 
+static void transport_generic_request_failure(struct se_cmd *,
+			struct se_device *, int, int);
 /*
  * Used by fabric module frontends to queue tasks directly.
  * Many only be used from process context only
@@ -1754,6 +1756,8 @@ EXPORT_SYMBOL(transport_generic_handle_cdb);
 int transport_handle_cdb_direct(
 	struct se_cmd *cmd)
 {
+	int ret;
+
 	if (!cmd->se_lun) {
 		dump_stack();
 		pr_err("cmd->se_lun is NULL\n");
@@ -1765,8 +1769,31 @@ int transport_handle_cdb_direct(
 				" from interrupt context\n");
 		return -EINVAL;
 	}
-
-	return transport_generic_new_cmd(cmd);
+	/*
+	 * Set TRANSPORT_NEW_CMD state and cmd->t_transport_active=1 following
+	 * transport_generic_handle_cdb*() -> transport_add_cmd_to_queue()
+	 * in existing usage to ensure that outstanding descriptors are handled
+	 * correctly during shutdown via transport_generic_wait_for_tasks()
+	 *
+	 * Also, we don't take cmd->t_state_lock here as we only expect
+	 * this to be called for initial descriptor submission.
+	 */
+	cmd->t_state = TRANSPORT_NEW_CMD;
+	atomic_set(&cmd->t_transport_active, 1);
+	/*
+	 * transport_generic_new_cmd() is already handling QUEUE_FULL,
+	 * so follow TRANSPORT_NEW_CMD processing thread context usage
+	 * and call transport_generic_request_failure() if necessary..
+	 */
+	ret = transport_generic_new_cmd(cmd);
+	if (ret == -EAGAIN)
+		return 0;
+	else if (ret < 0) {
+		cmd->transport_error_status = ret;
+		transport_generic_request_failure(cmd, NULL, 0,
+				(cmd->data_direction != DMA_TO_DEVICE));
+	}
+	return 0;
 }
 EXPORT_SYMBOL(transport_handle_cdb_direct);
 
@@ -3324,7 +3351,7 @@ static int transport_generic_cmd_sequencer(
 			goto out_invalid_cdb_field;
 		}
 
-		cmd->t_task_lba = get_unaligned_be16(&cdb[2]);
+		cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
 		passthrough = (dev->transport->transport_type ==
 				TRANSPORT_PLUGIN_PHBA_PDEV);
 		/*
diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h
index f7fff7ed63c3..bd4fe21a23b8 100644
--- a/drivers/target/tcm_fc/tcm_fc.h
+++ b/drivers/target/tcm_fc/tcm_fc.h
@@ -187,4 +187,9 @@ void ft_dump_cmd(struct ft_cmd *, const char *caller);
 
 ssize_t ft_format_wwn(char *, size_t, u64);
 
+/*
+ * Underlying HW specific helper function
+ */
+void ft_invl_hw_context(struct ft_cmd *);
+
 #endif /* __TCM_FC_H__ */
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index 09df38b4610c..5654dc22f7ae 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -320,6 +320,7 @@ static void ft_recv_seq(struct fc_seq *sp, struct fc_frame *fp, void *arg)
 	default:
 		pr_debug("%s: unhandled frame r_ctl %x\n",
 		       __func__, fh->fh_r_ctl);
+		ft_invl_hw_context(cmd);
 		fc_frame_free(fp);
 		transport_generic_free_cmd(&cmd->se_cmd, 0, 0);
 		break;
diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c
index 8e2a46ddcccb..c37f4cd96452 100644
--- a/drivers/target/tcm_fc/tfc_io.c
+++ b/drivers/target/tcm_fc/tfc_io.c
@@ -213,62 +213,49 @@ void ft_recv_write_data(struct ft_cmd *cmd, struct fc_frame *fp)
 	if (!(ntoh24(fh->fh_f_ctl) & FC_FC_REL_OFF))
 		goto drop;
 
+	f_ctl = ntoh24(fh->fh_f_ctl);
+	ep = fc_seq_exch(seq);
+	lport = ep->lp;
+	if (cmd->was_ddp_setup) {
+		BUG_ON(!ep);
+		BUG_ON(!lport);
+	}
+
 	/*
-	 * Doesn't expect even single byte of payload. Payload
+	 * Doesn't expect payload if DDP is setup. Payload
 	 * is expected to be copied directly to user buffers
-	 * due to DDP (Large Rx offload) feature, hence
-	 * BUG_ON if BUF is non-NULL
+	 * due to DDP (Large Rx offload),
 	 */
 	buf = fc_frame_payload_get(fp, 1);
-	if (cmd->was_ddp_setup && buf) {
-		pr_debug("%s: When DDP was setup, not expected to"
-				 "receive frame with payload, Payload shall be"
-				 "copied directly to buffer instead of coming "
-				 "via. legacy receive queues\n", __func__);
-		BUG_ON(buf);
-	}
+	if (buf)
+		pr_err("%s: xid 0x%x, f_ctl 0x%x, cmd->sg %p, "
+				"cmd->sg_cnt 0x%x. DDP was setup"
+				" hence not expected to receive frame with "
+				"payload, Frame will be dropped if "
+				"'Sequence Initiative' bit in f_ctl is "
+				"not set\n", __func__, ep->xid, f_ctl,
+				cmd->sg, cmd->sg_cnt);
+	/*
+ 	 * Invalidate HW DDP context if it was setup for respective
+ 	 * command. Invalidation of HW DDP context is requited in both
+ 	 * situation (success and error). 
+ 	 */
+	ft_invl_hw_context(cmd);
 
 	/*
-	 * If ft_cmd indicated 'ddp_setup', in that case only the last frame
-	 * should come with 'TSI bit being set'. If 'TSI bit is not set and if
-	 * data frame appears here, means error condition. In both the cases
-	 * release the DDP context (ddp_put) and in error case, as well
-	 * initiate error recovery mechanism.
+	 * If "Sequence Initiative (TSI)" bit set in f_ctl, means last
+	 * write data frame is received successfully where payload is
+	 * posted directly to user buffer and only the last frame's
+	 * header is posted in receive queue.
+	 *
+	 * If "Sequence Initiative (TSI)" bit is not set, means error
+	 * condition w.r.t. DDP, hence drop the packet and let explict
+	 * ABORTS from other end of exchange timer trigger the recovery.
 	 */
-	ep = fc_seq_exch(seq);
-	if (cmd->was_ddp_setup) {
-		BUG_ON(!ep);
-		lport = ep->lp;
-		BUG_ON(!lport);
-	}
-	if (cmd->was_ddp_setup && ep->xid != FC_XID_UNKNOWN) {
-		f_ctl = ntoh24(fh->fh_f_ctl);
-		/*
-		 * If TSI bit set in f_ctl, means last write data frame is
-		 * received successfully where payload is posted directly
-		 * to user buffer and only the last frame's header is posted
-		 * in legacy receive queue
-		 */
-		if (f_ctl & FC_FC_SEQ_INIT) { /* TSI bit set in FC frame */
-			cmd->write_data_len = lport->tt.ddp_done(lport,
-								ep->xid);
-			goto last_frame;
-		} else {
-			/*
-			 * Updating the write_data_len may be meaningless at
-			 * this point, but just in case if required in future
-			 * for debugging or any other purpose
-			 */
-			pr_err("%s: Received frame with TSI bit not"
-					" being SET, dropping the frame, "
-					"cmd->sg <%p>, cmd->sg_cnt <0x%x>\n",
-					__func__, cmd->sg, cmd->sg_cnt);
-			cmd->write_data_len = lport->tt.ddp_done(lport,
-							      ep->xid);
-			lport->tt.seq_exch_abort(cmd->seq, 0);
-			goto drop;
-		}
-	}
+	if (f_ctl & FC_FC_SEQ_INIT)
+		goto last_frame;
+	else
+		goto drop;
 
 	rel_off = ntohl(fh->fh_parm_offset);
 	frame_len = fr_len(fp);
@@ -331,3 +318,39 @@ last_frame:
 drop:
 	fc_frame_free(fp);
 }
+
+/*
+ * Handle and cleanup any HW specific resources if
+ * received ABORTS, errors, timeouts.
+ */
+void ft_invl_hw_context(struct ft_cmd *cmd)
+{
+	struct fc_seq *seq = cmd->seq;
+	struct fc_exch *ep = NULL;
+	struct fc_lport *lport = NULL;
+
+	BUG_ON(!cmd);
+
+	/* Cleanup the DDP context in HW if DDP was setup */
+	if (cmd->was_ddp_setup && seq) {
+		ep = fc_seq_exch(seq);
+		if (ep) {
+			lport = ep->lp;
+			if (lport && (ep->xid <= lport->lro_xid))
+				/*
+				 * "ddp_done" trigger invalidation of HW
+				 * specific DDP context
+				 */
+				cmd->write_data_len = lport->tt.ddp_done(lport,
+								      ep->xid);
+
+				/*
+				 * Resetting same variable to indicate HW's
+				 * DDP context has been invalidated to avoid
+				 * re_invalidation of same context (context is
+				 * identified using ep->xid)
+				 */
+				cmd->was_ddp_setup = 0;
+		}
+	}
+}
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index bf7c687519ef..f7f71b2d3101 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -14,11 +14,7 @@ menuconfig THERMAL
 	  If you want this support, you should say Y or M here.
 
 config THERMAL_HWMON
-	bool "Hardware monitoring support"
+	bool
 	depends on THERMAL
 	depends on HWMON=y || HWMON=THERMAL
-	help
-	  The generic thermal sysfs driver's hardware monitoring support
-	  requires a 2.10.7/3.0.2 or later lm-sensors userspace.
-
-	  Say Y if your user-space is new enough.
+	default y
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 0b1c82ad6805..708f8e92771a 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -420,6 +420,29 @@ thermal_cooling_device_trip_point_show(struct device *dev,
 
 /* hwmon sys I/F */
 #include <linux/hwmon.h>
+
+/* thermal zone devices with the same type share one hwmon device */
+struct thermal_hwmon_device {
+	char type[THERMAL_NAME_LENGTH];
+	struct device *device;
+	int count;
+	struct list_head tz_list;
+	struct list_head node;
+};
+
+struct thermal_hwmon_attr {
+	struct device_attribute attr;
+	char name[16];
+};
+
+/* one temperature input for each thermal zone */
+struct thermal_hwmon_temp {
+	struct list_head hwmon_node;
+	struct thermal_zone_device *tz;
+	struct thermal_hwmon_attr temp_input;	/* hwmon sys attr */
+	struct thermal_hwmon_attr temp_crit;	/* hwmon sys attr */
+};
+
 static LIST_HEAD(thermal_hwmon_list);
 
 static ssize_t
@@ -437,9 +460,10 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf)
 	int ret;
 	struct thermal_hwmon_attr *hwmon_attr
 			= container_of(attr, struct thermal_hwmon_attr, attr);
-	struct thermal_zone_device *tz
-			= container_of(hwmon_attr, struct thermal_zone_device,
+	struct thermal_hwmon_temp *temp
+			= container_of(hwmon_attr, struct thermal_hwmon_temp,
 				       temp_input);
+	struct thermal_zone_device *tz = temp->tz;
 
 	ret = tz->ops->get_temp(tz, &temperature);
 
@@ -455,9 +479,10 @@ temp_crit_show(struct device *dev, struct device_attribute *attr,
 {
 	struct thermal_hwmon_attr *hwmon_attr
 			= container_of(attr, struct thermal_hwmon_attr, attr);
-	struct thermal_zone_device *tz
-			= container_of(hwmon_attr, struct thermal_zone_device,
+	struct thermal_hwmon_temp *temp
+			= container_of(hwmon_attr, struct thermal_hwmon_temp,
 				       temp_crit);
+	struct thermal_zone_device *tz = temp->tz;
 	long temperature;
 	int ret;
 
@@ -469,22 +494,54 @@ temp_crit_show(struct device *dev, struct device_attribute *attr,
 }
 
 
-static int
-thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
+static struct thermal_hwmon_device *
+thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz)
 {
 	struct thermal_hwmon_device *hwmon;
-	int new_hwmon_device = 1;
-	int result;
 
 	mutex_lock(&thermal_list_lock);
 	list_for_each_entry(hwmon, &thermal_hwmon_list, node)
 		if (!strcmp(hwmon->type, tz->type)) {
-			new_hwmon_device = 0;
 			mutex_unlock(&thermal_list_lock);
-			goto register_sys_interface;
+			return hwmon;
+		}
+	mutex_unlock(&thermal_list_lock);
+
+	return NULL;
+}
+
+/* Find the temperature input matching a given thermal zone */
+static struct thermal_hwmon_temp *
+thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon,
+			  const struct thermal_zone_device *tz)
+{
+	struct thermal_hwmon_temp *temp;
+
+	mutex_lock(&thermal_list_lock);
+	list_for_each_entry(temp, &hwmon->tz_list, hwmon_node)
+		if (temp->tz == tz) {
+			mutex_unlock(&thermal_list_lock);
+			return temp;
 		}
 	mutex_unlock(&thermal_list_lock);
 
+	return NULL;
+}
+
+static int
+thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
+{
+	struct thermal_hwmon_device *hwmon;
+	struct thermal_hwmon_temp *temp;
+	int new_hwmon_device = 1;
+	int result;
+
+	hwmon = thermal_hwmon_lookup_by_type(tz);
+	if (hwmon) {
+		new_hwmon_device = 0;
+		goto register_sys_interface;
+	}
+
 	hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL);
 	if (!hwmon)
 		return -ENOMEM;
@@ -502,30 +559,36 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
 		goto free_mem;
 
  register_sys_interface:
-	tz->hwmon = hwmon;
+	temp = kzalloc(sizeof(struct thermal_hwmon_temp), GFP_KERNEL);
+	if (!temp) {
+		result = -ENOMEM;
+		goto unregister_name;
+	}
+
+	temp->tz = tz;
 	hwmon->count++;
 
-	snprintf(tz->temp_input.name, THERMAL_NAME_LENGTH,
+	snprintf(temp->temp_input.name, THERMAL_NAME_LENGTH,
 		 "temp%d_input", hwmon->count);
-	tz->temp_input.attr.attr.name = tz->temp_input.name;
-	tz->temp_input.attr.attr.mode = 0444;
-	tz->temp_input.attr.show = temp_input_show;
-	sysfs_attr_init(&tz->temp_input.attr.attr);
-	result = device_create_file(hwmon->device, &tz->temp_input.attr);
+	temp->temp_input.attr.attr.name = temp->temp_input.name;
+	temp->temp_input.attr.attr.mode = 0444;
+	temp->temp_input.attr.show = temp_input_show;
+	sysfs_attr_init(&temp->temp_input.attr.attr);
+	result = device_create_file(hwmon->device, &temp->temp_input.attr);
 	if (result)
-		goto unregister_name;
+		goto free_temp_mem;
 
 	if (tz->ops->get_crit_temp) {
 		unsigned long temperature;
 		if (!tz->ops->get_crit_temp(tz, &temperature)) {
-			snprintf(tz->temp_crit.name, THERMAL_NAME_LENGTH,
+			snprintf(temp->temp_crit.name, THERMAL_NAME_LENGTH,
 				"temp%d_crit", hwmon->count);
-			tz->temp_crit.attr.attr.name = tz->temp_crit.name;
-			tz->temp_crit.attr.attr.mode = 0444;
-			tz->temp_crit.attr.show = temp_crit_show;
-			sysfs_attr_init(&tz->temp_crit.attr.attr);
+			temp->temp_crit.attr.attr.name = temp->temp_crit.name;
+			temp->temp_crit.attr.attr.mode = 0444;
+			temp->temp_crit.attr.show = temp_crit_show;
+			sysfs_attr_init(&temp->temp_crit.attr.attr);
 			result = device_create_file(hwmon->device,
-						    &tz->temp_crit.attr);
+						    &temp->temp_crit.attr);
 			if (result)
 				goto unregister_input;
 		}
@@ -534,13 +597,15 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
 	mutex_lock(&thermal_list_lock);
 	if (new_hwmon_device)
 		list_add_tail(&hwmon->node, &thermal_hwmon_list);
-	list_add_tail(&tz->hwmon_node, &hwmon->tz_list);
+	list_add_tail(&temp->hwmon_node, &hwmon->tz_list);
 	mutex_unlock(&thermal_list_lock);
 
 	return 0;
 
  unregister_input:
-	device_remove_file(hwmon->device, &tz->temp_input.attr);
+	device_remove_file(hwmon->device, &temp->temp_input.attr);
+ free_temp_mem:
+	kfree(temp);
  unregister_name:
 	if (new_hwmon_device) {
 		device_remove_file(hwmon->device, &dev_attr_name);
@@ -556,15 +621,30 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
 static void
 thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
 {
-	struct thermal_hwmon_device *hwmon = tz->hwmon;
+	struct thermal_hwmon_device *hwmon;
+	struct thermal_hwmon_temp *temp;
+
+	hwmon = thermal_hwmon_lookup_by_type(tz);
+	if (unlikely(!hwmon)) {
+		/* Should never happen... */
+		dev_dbg(&tz->device, "hwmon device lookup failed!\n");
+		return;
+	}
+
+	temp = thermal_hwmon_lookup_temp(hwmon, tz);
+	if (unlikely(!temp)) {
+		/* Should never happen... */
+		dev_dbg(&tz->device, "temperature input lookup failed!\n");
+		return;
+	}
 
-	tz->hwmon = NULL;
-	device_remove_file(hwmon->device, &tz->temp_input.attr);
+	device_remove_file(hwmon->device, &temp->temp_input.attr);
 	if (tz->ops->get_crit_temp)
-		device_remove_file(hwmon->device, &tz->temp_crit.attr);
+		device_remove_file(hwmon->device, &temp->temp_crit.attr);
 
 	mutex_lock(&thermal_list_lock);
-	list_del(&tz->hwmon_node);
+	list_del(&temp->hwmon_node);
+	kfree(temp);
 	if (!list_empty(&hwmon->tz_list)) {
 		mutex_unlock(&thermal_list_lock);
 		return;
diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 69407e72aac1..278aeaa92505 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -336,7 +336,7 @@ config BACKLIGHT_PCF50633
 	  enable its driver.
 
 config BACKLIGHT_AAT2870
-	bool "AnalogicTech AAT2870 Backlight"
+	tristate "AnalogicTech AAT2870 Backlight"
 	depends on BACKLIGHT_CLASS_DEVICE && MFD_AAT2870_CORE
 	help
 	  If you have a AnalogicTech AAT2870 say Y to enable the
diff --git a/drivers/video/backlight/aat2870_bl.c b/drivers/video/backlight/aat2870_bl.c
index 4952a617563d..331f1ef1dad5 100644
--- a/drivers/video/backlight/aat2870_bl.c
+++ b/drivers/video/backlight/aat2870_bl.c
@@ -44,7 +44,7 @@ static inline int aat2870_brightness(struct aat2870_bl_driver_data *aat2870_bl,
 	struct backlight_device *bd = aat2870_bl->bd;
 	int val;
 
-	val = brightness * aat2870_bl->max_current;
+	val = brightness * (aat2870_bl->max_current - 1);
 	val /= bd->props.max_brightness;
 
 	return val;
@@ -158,10 +158,10 @@ static int aat2870_bl_probe(struct platform_device *pdev)
 	props.type = BACKLIGHT_RAW;
 	bd = backlight_device_register("aat2870-backlight", &pdev->dev,
 				       aat2870_bl, &aat2870_bl_ops, &props);
-	if (!bd) {
+	if (IS_ERR(bd)) {
 		dev_err(&pdev->dev,
 			"Failed allocate memory for backlight device\n");
-		ret = -ENOMEM;
+		ret = PTR_ERR(bd);
 		goto out_kfree;
 	}
 
@@ -175,7 +175,7 @@ static int aat2870_bl_probe(struct platform_device *pdev)
 	else
 		aat2870_bl->channels = AAT2870_BL_CH_ALL;
 
-	if (pdata->max_brightness > 0)
+	if (pdata->max_current > 0)
 		aat2870_bl->max_current = pdata->max_current;
 	else
 		aat2870_bl->max_current = AAT2870_CURRENT_27_9;
diff --git a/fs/Kconfig b/fs/Kconfig
index 19891aab9c6e..9fe0b349f4cd 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -127,14 +127,21 @@ config TMPFS_POSIX_ACL
 	select TMPFS_XATTR
 	select GENERIC_ACL
 	help
-	  POSIX Access Control Lists (ACLs) support permissions for users and
-	  groups beyond the owner/group/world scheme.
+	  POSIX Access Control Lists (ACLs) support additional access rights
+	  for users and groups beyond the standard owner/group/world scheme,
+	  and this option selects support for ACLs specifically for tmpfs
+	  filesystems.
+
+	  If you've selected TMPFS, it's possible that you'll also need
+	  this option as there are a number of Linux distros that require
+	  POSIX ACL support under /dev for certain features to work properly.
+	  For example, some distros need this feature for ALSA-related /dev
+	  files for sound to work properly.  In short, if you're not sure,
+	  say Y.
 
 	  To learn more about Access Control Lists, visit the POSIX ACLs for
 	  Linux website <http://acl.bestbits.at/>.
 
-	  If you don't know what Access Control Lists are, say N.
-
 config TMPFS_XATTR
 	bool "Tmpfs extended attributes"
 	depends on TMPFS
diff --git a/fs/dcache.c b/fs/dcache.c
index 2347cdb15abb..c83cae19161e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -795,6 +795,7 @@ relock:
 
 /**
  * prune_dcache_sb - shrink the dcache
+ * @sb: superblock
  * @nr_to_scan: number of entries to try to free
  *
  * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e2d88baf91d3..4687fea0c00f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -124,7 +124,7 @@ void *ext4_kvzalloc(size_t size, gfp_t flags)
 {
 	void *ret;
 
-	ret = kmalloc(size, flags);
+	ret = kzalloc(size, flags);
 	if (!ret)
 		ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
 	return ret;
diff --git a/fs/stack.c b/fs/stack.c
index 4a6f7f440658..b4f2ab48a61f 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -29,10 +29,7 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
 	 *
 	 * We don't actually know what locking is used at the lower level;
 	 * but if it's a filesystem that supports quotas, it will be using
-	 * i_lock as in inode_add_bytes().  tmpfs uses other locking, and
-	 * its 32-bit is (just) able to exceed 2TB i_size with the aid of
-	 * holes; but its i_blocks cannot carry into the upper long without
-	 * almost 2TB swap - let's ignore that case.
+	 * i_lock as in inode_add_bytes().
 	 */
 	if (sizeof(i_blocks) > sizeof(long))
 		spin_lock(&src->i_lock);
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 3090471b2a5e..e49c36d38d7e 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -128,7 +128,7 @@ extern int is_dock_device(acpi_handle handle);
 extern int register_dock_notifier(struct notifier_block *nb);
 extern void unregister_dock_notifier(struct notifier_block *nb);
 extern int register_hotplug_dock_device(acpi_handle handle,
-					struct acpi_dock_ops *ops,
+					const struct acpi_dock_ops *ops,
 					void *context);
 extern void unregister_hotplug_dock_device(acpi_handle handle);
 #else
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 2ed0a8486c19..f554a9313b43 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20110413
+#define ACPI_CA_VERSION                 0x20110623
 
 #include "actypes.h"
 #include "actbl.h"
@@ -69,6 +69,7 @@ extern u32 acpi_gbl_trace_flags;
 extern u32 acpi_gbl_enable_aml_debug_object;
 extern u8 acpi_gbl_copy_dsdt_locally;
 extern u8 acpi_gbl_truncate_io_addresses;
+extern u8 acpi_gbl_disable_auto_repair;
 
 extern u32 acpi_current_gpe_count;
 extern struct acpi_table_fadt acpi_gbl_FADT;
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index e67b523a50e1..51a527d24a8a 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -18,6 +18,11 @@
 
 extern int hest_disable;
 extern int erst_disable;
+#ifdef CONFIG_ACPI_APEI_GHES
+extern int ghes_disable;
+#else
+#define ghes_disable 1
+#endif
 
 #ifdef CONFIG_ACPI_APEI
 void __init acpi_hest_init(void);
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index ba4928cae473..67055f180330 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -337,7 +337,7 @@ extern struct cpuidle_driver acpi_idle_driver;
 
 /* in processor_thermal.c */
 int acpi_processor_get_limit_info(struct acpi_processor *pr);
-extern struct thermal_cooling_device_ops processor_cooling_ops;
+extern const struct thermal_cooling_device_ops processor_cooling_ops;
 #ifdef CONFIG_CPU_FREQ
 void acpi_thermal_cpufreq_init(void);
 void acpi_thermal_cpufreq_exit(void);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 1deb2a73c2da..6001b4da39dd 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -238,7 +238,6 @@ extern int acpi_paddr_to_node(u64 start_addr, u64 size);
 extern int pnpacpi_disabled;
 
 #define PXM_INVAL	(-1)
-#define NID_INVAL	(-1)
 
 int acpi_check_resource_conflict(const struct resource *res);
 
@@ -280,6 +279,8 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 #define OSC_SB_CPUHP_OST_SUPPORT	8
 #define OSC_SB_APEI_SUPPORT		16
 
+extern bool osc_sb_apei_support_acked;
+
 /* PCI defined _OSC bits */
 /* _OSC DW1 Definition (OS Support Fields) */
 #define OSC_EXT_PCI_CONFIG_SUPPORT		1
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 3bac44cce142..7ad634501e48 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -146,6 +146,7 @@ extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
 extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
 extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
 
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
 #define BITMAP_LAST_WORD_MASK(nbits)					\
 (									\
 	((nbits) % BITS_PER_LONG) ?					\
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 36719ead50e8..b51629e15cfc 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -122,6 +122,8 @@ struct cpuidle_driver {
 };
 
 #ifdef CONFIG_CPU_IDLE
+extern void disable_cpuidle(void);
+extern int cpuidle_idle_call(void);
 
 extern int cpuidle_register_driver(struct cpuidle_driver *drv);
 struct cpuidle_driver *cpuidle_get_driver(void);
@@ -135,6 +137,8 @@ extern int cpuidle_enable_device(struct cpuidle_device *dev);
 extern void cpuidle_disable_device(struct cpuidle_device *dev);
 
 #else
+static inline void disable_cpuidle(void) { }
+static inline int cpuidle_idle_call(void) { return -ENODEV; }
 
 static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
 {return -ENODEV; }
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 3ff060ac7810..c6f996f2abb6 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -25,10 +25,6 @@ struct fault_attr {
 	unsigned long reject_end;
 
 	unsigned long count;
-
-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-	struct dentry *dir;
-#endif
 };
 
 #define FAULT_ATTR_INITIALIZER {				\
@@ -45,19 +41,15 @@ bool should_fail(struct fault_attr *attr, ssize_t size);
 
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
 
-int init_fault_attr_dentries(struct fault_attr *attr, const char *name);
-void cleanup_fault_attr_dentries(struct fault_attr *attr);
+struct dentry *fault_create_debugfs_attr(const char *name,
+			struct dentry *parent, struct fault_attr *attr);
 
 #else /* CONFIG_FAULT_INJECTION_DEBUG_FS */
 
-static inline int init_fault_attr_dentries(struct fault_attr *attr,
-					  const char *name)
-{
-	return -ENODEV;
-}
-
-static inline void cleanup_fault_attr_dentries(struct fault_attr *attr)
+static inline struct dentry *fault_create_debugfs_attr(const char *name,
+			struct dentry *parent, struct fault_attr *attr)
 {
+	return ERR_PTR(-ENODEV);
 }
 
 #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 5bbebda78b02..5e98eeb2af3b 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -1,8 +1,26 @@
 /*
- * Basic general purpose allocator for managing special purpose memory
- * not managed by the regular kmalloc/kfree interface.
- * Uses for this includes on-device special memory, uncached memory
- * etc.
+ * Basic general purpose allocator for managing special purpose
+ * memory, for example, memory that is not managed by the regular
+ * kmalloc/kfree interface.  Uses for this includes on-device special
+ * memory, uncached memory etc.
+ *
+ * It is safe to use the allocator in NMI handlers and other special
+ * unblockable contexts that could otherwise deadlock on locks.  This
+ * is implemented by using atomic operations and retries on any
+ * conflicts.  The disadvantage is that there may be livelocks in
+ * extreme cases.  For better scalability, one allocator can be used
+ * for each CPU.
+ *
+ * The lockless operation only works if there is enough memory
+ * available.  If new memory is added to the pool a lock has to be
+ * still taken.  So any user relying on locklessness has to ensure
+ * that sufficient memory is preallocated.
+ *
+ * The basic atomic operation of this allocator is cmpxchg on long.
+ * On architectures that don't have NMI-safe cmpxchg implementation,
+ * the allocator can NOT be used in NMI handler.  So code uses the
+ * allocator in NMI handler should depend on
+ * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
  *
  * This source code is licensed under the GNU General Public License,
  * Version 2.  See the file COPYING for more details.
@@ -15,7 +33,7 @@
  *  General purpose special memory pool descriptor.
  */
 struct gen_pool {
-	rwlock_t lock;
+	spinlock_t lock;
 	struct list_head chunks;	/* list of chunks in this pool */
 	int min_alloc_order;		/* minimum allocation order */
 };
@@ -24,8 +42,8 @@ struct gen_pool {
  *  General purpose special memory pool chunk descriptor.
  */
 struct gen_pool_chunk {
-	spinlock_t lock;
 	struct list_head next_chunk;	/* next chunk in pool */
+	atomic_t avail;
 	phys_addr_t phys_addr;		/* physical starting address of memory chunk */
 	unsigned long start_addr;	/* starting address of memory chunk */
 	unsigned long end_addr;		/* ending address of memory chunk */
@@ -56,4 +74,8 @@ static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr,
 extern void gen_pool_destroy(struct gen_pool *);
 extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
 extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
+extern void gen_pool_for_each_chunk(struct gen_pool *,
+	void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *);
+extern size_t gen_pool_avail(struct gen_pool *);
+extern size_t gen_pool_size(struct gen_pool *);
 #endif /* __GENALLOC_H__ */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index cb4089254f01..3a76faf6a3ee 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -92,7 +92,7 @@ struct vm_area_struct;
  */
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
 
-#define __GFP_BITS_SHIFT 23	/* Room for 23 __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 24	/* Room for N __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /* This equals 0, but use constants in case they ever change */
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 13a801f3d028..255491cf522e 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -146,6 +146,10 @@ void ida_remove(struct ida *ida, int id);
 void ida_destroy(struct ida *ida);
 void ida_init(struct ida *ida);
 
+int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
+		   gfp_t gfp_mask);
+void ida_simple_remove(struct ida *ida, unsigned int id);
+
 void __init idr_init_cache(void);
 
 #endif /* __IDR_H__ */
diff --git a/include/linux/llist.h b/include/linux/llist.h
new file mode 100644
index 000000000000..aa0c8b5b3cd0
--- /dev/null
+++ b/include/linux/llist.h
@@ -0,0 +1,126 @@
+#ifndef LLIST_H
+#define LLIST_H
+/*
+ * Lock-less NULL terminated single linked list
+ *
+ * If there are multiple producers and multiple consumers, llist_add
+ * can be used in producers and llist_del_all can be used in
+ * consumers.  They can work simultaneously without lock.  But
+ * llist_del_first can not be used here.  Because llist_del_first
+ * depends on list->first->next does not changed if list->first is not
+ * changed during its operation, but llist_del_first, llist_add,
+ * llist_add (or llist_del_all, llist_add, llist_add) sequence in
+ * another consumer may violate that.
+ *
+ * If there are multiple producers and one consumer, llist_add can be
+ * used in producers and llist_del_all or llist_del_first can be used
+ * in the consumer.
+ *
+ * This can be summarized as follow:
+ *
+ *           |   add    | del_first |  del_all
+ * add       |    -     |     -     |     -
+ * del_first |          |     L     |     L
+ * del_all   |          |           |     -
+ *
+ * Where "-" stands for no lock is needed, while "L" stands for lock
+ * is needed.
+ *
+ * The list entries deleted via llist_del_all can be traversed with
+ * traversing function such as llist_for_each etc.  But the list
+ * entries can not be traversed safely before deleted from the list.
+ * The order of deleted entries is from the newest to the oldest added
+ * one.  If you want to traverse from the oldest to the newest, you
+ * must reverse the order by yourself before traversing.
+ *
+ * The basic atomic operation of this list is cmpxchg on long.  On
+ * architectures that don't have NMI-safe cmpxchg implementation, the
+ * list can NOT be used in NMI handler.  So code uses the list in NMI
+ * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ */
+
+struct llist_head {
+	struct llist_node *first;
+};
+
+struct llist_node {
+	struct llist_node *next;
+};
+
+#define LLIST_HEAD_INIT(name)	{ NULL }
+#define LLIST_HEAD(name)	struct llist_head name = LLIST_HEAD_INIT(name)
+
+/**
+ * init_llist_head - initialize lock-less list head
+ * @head:	the head for your lock-less list
+ */
+static inline void init_llist_head(struct llist_head *list)
+{
+	list->first = NULL;
+}
+
+/**
+ * llist_entry - get the struct of this entry
+ * @ptr:	the &struct llist_node pointer.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the llist_node within the struct.
+ */
+#define llist_entry(ptr, type, member)		\
+	container_of(ptr, type, member)
+
+/**
+ * llist_for_each - iterate over some deleted entries of a lock-less list
+ * @pos:	the &struct llist_node to use as a loop cursor
+ * @node:	the first entry of deleted list entries
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being deleted from list, so start with an entry
+ * instead of list head.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry.  If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each(pos, node)			\
+	for ((pos) = (node); pos; (pos) = (pos)->next)
+
+/**
+ * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @node:	the fist entry of deleted list entries.
+ * @member:	the name of the llist_node with the struct.
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being removed from list, so start with an entry
+ * instead of list head.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry.  If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each_entry(pos, node, member)				\
+	for ((pos) = llist_entry((node), typeof(*(pos)), member);	\
+	     &(pos)->member != NULL;					\
+	     (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
+
+/**
+ * llist_empty - tests whether a lock-less list is empty
+ * @head:	the list to test
+ *
+ * Not guaranteed to be accurate or up to date.  Just a quick way to
+ * test whether the list is empty without deleting something from the
+ * list.
+ */
+static inline int llist_empty(const struct llist_head *head)
+{
+	return ACCESS_ONCE(head->first) == NULL;
+}
+
+void llist_add(struct llist_node *new, struct llist_head *head);
+void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
+		     struct llist_head *head);
+struct llist_node *llist_del_first(struct llist_head *head);
+struct llist_node *llist_del_all(struct llist_head *head);
+#endif /* LLIST_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b96600786913..3b535db00a94 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -86,8 +86,6 @@ extern void mem_cgroup_uncharge_end(void);
 
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_uncharge_cache_page(struct page *page);
-extern int mem_cgroup_shmem_charge_fallback(struct page *page,
-			struct mm_struct *mm, gfp_t gfp_mask);
 
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
@@ -225,12 +223,6 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
 {
 }
 
-static inline int mem_cgroup_shmem_charge_fallback(struct page *page,
-			struct mm_struct *mm, gfp_t gfp_mask)
-{
-	return 0;
-}
-
 static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
 {
 }
diff --git a/include/linux/mfd/aat2870.h b/include/linux/mfd/aat2870.h
index 89212df05622..f7316c29bdec 100644
--- a/include/linux/mfd/aat2870.h
+++ b/include/linux/mfd/aat2870.h
@@ -89,7 +89,7 @@ enum aat2870_id {
 
 /* Backlight current magnitude (mA) */
 enum aat2870_current {
-	AAT2870_CURRENT_0_45,
+	AAT2870_CURRENT_0_45 = 1,
 	AAT2870_CURRENT_0_90,
 	AAT2870_CURRENT_1_80,
 	AAT2870_CURRENT_2_70,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3172a1c0f08e..f2690cf49827 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1600,6 +1600,7 @@ enum mf_flags {
 };
 extern void memory_failure(unsigned long pfn, int trapno);
 extern int __memory_failure(unsigned long pfn, int trapno, int flags);
+extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
 extern int unpoison_memory(unsigned long pfn);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
diff --git a/include/linux/of.h b/include/linux/of.h
index 0085bb01c041..bc3dc6399547 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -68,6 +68,7 @@ struct device_node {
 /* Pointer for first entry in chain of all nodes. */
 extern struct device_node *allnodes;
 extern struct device_node *of_chosen;
+extern struct device_node *of_aliases;
 extern rwlock_t devtree_lock;
 
 static inline bool of_have_populated_dt(void)
@@ -209,6 +210,9 @@ extern int of_device_is_available(const struct device_node *device);
 extern const void *of_get_property(const struct device_node *node,
 				const char *name,
 				int *lenp);
+#define for_each_property(pp, properties) \
+	for (pp = properties; pp != NULL; pp = pp->next)
+
 extern int of_n_addr_cells(struct device_node *np);
 extern int of_n_size_cells(struct device_node *np);
 extern const struct of_device_id *of_match_node(
@@ -221,6 +225,10 @@ extern int of_parse_phandles_with_args(struct device_node *np,
 	const char *list_name, const char *cells_name, int index,
 	struct device_node **out_node, const void **out_args);
 
+extern void *early_init_dt_alloc_memory_arch(u64 size, u64 align);
+extern void of_alias_scan(void);
+extern int of_alias_get_id(struct device_node *np, const char *stem);
+
 extern int of_machine_is_compatible(const char *compat);
 
 extern int prom_add_property(struct device_node* np, struct property* prop);
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index c84d900fbbb3..b74b74ffe0e7 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -97,7 +97,6 @@ extern void early_init_dt_check_for_initrd(unsigned long node);
 extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
 				     int depth, void *data);
 extern void early_init_dt_add_memory_arch(u64 base, u64 size);
-extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align);
 extern u64 dt_mem_next_cell(int s, __be32 **cellp);
 
 /*
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 23241c2fecce..9d4539c52e53 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -39,7 +39,15 @@
  * when it is shrunk, before we rcu free the node. See shrink code for
  * details.
  */
-#define RADIX_TREE_INDIRECT_PTR	1
+#define RADIX_TREE_INDIRECT_PTR		1
+/*
+ * A common use of the radix tree is to store pointers to struct pages;
+ * but shmem/tmpfs needs also to store swap entries in the same tree:
+ * those are marked as exceptional entries to distinguish them.
+ * EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it.
+ */
+#define RADIX_TREE_EXCEPTIONAL_ENTRY	2
+#define RADIX_TREE_EXCEPTIONAL_SHIFT	2
 
 #define radix_tree_indirect_to_ptr(ptr) \
 	radix_tree_indirect_to_ptr((void __force *)(ptr))
@@ -174,6 +182,28 @@ static inline int radix_tree_deref_retry(void *arg)
 }
 
 /**
+ * radix_tree_exceptional_entry	- radix_tree_deref_slot gave exceptional entry?
+ * @arg:	value returned by radix_tree_deref_slot
+ * Returns:	0 if well-aligned pointer, non-0 if exceptional entry.
+ */
+static inline int radix_tree_exceptional_entry(void *arg)
+{
+	/* Not unlikely because radix_tree_exception often tested first */
+	return (unsigned long)arg & RADIX_TREE_EXCEPTIONAL_ENTRY;
+}
+
+/**
+ * radix_tree_exception	- radix_tree_deref_slot returned either exception?
+ * @arg:	value returned by radix_tree_deref_slot
+ * Returns:	0 if well-aligned pointer, non-0 if either kind of exception.
+ */
+static inline int radix_tree_exception(void *arg)
+{
+	return unlikely((unsigned long)arg &
+		(RADIX_TREE_INDIRECT_PTR | RADIX_TREE_EXCEPTIONAL_ENTRY));
+}
+
+/**
  * radix_tree_replace_slot	- replace item in a slot
  * @pslot:	pointer to slot, returned by radix_tree_lookup_slot
  * @item:	new item to store in the slot.
@@ -194,8 +224,8 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long);
 unsigned int
 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 			unsigned long first_index, unsigned int max_items);
-unsigned int
-radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
+unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
+			void ***results, unsigned long *indices,
 			unsigned long first_index, unsigned int max_items);
 unsigned long radix_tree_next_hole(struct radix_tree_root *root,
 				unsigned long index, unsigned long max_scan);
@@ -222,6 +252,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
 		unsigned long nr_to_tag,
 		unsigned int fromtag, unsigned int totag);
 int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
+unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item);
 
 static inline void radix_tree_preload_end(void)
 {
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index aa08fa8fd79b..9291ac3cc627 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -8,22 +8,15 @@
 
 /* inode in-kernel data */
 
-#define SHMEM_NR_DIRECT 16
-
-#define SHMEM_SYMLINK_INLINE_LEN (SHMEM_NR_DIRECT * sizeof(swp_entry_t))
-
 struct shmem_inode_info {
 	spinlock_t		lock;
 	unsigned long		flags;
 	unsigned long		alloced;	/* data pages alloced to file */
-	unsigned long		swapped;	/* subtotal assigned to swap */
-	unsigned long		next_index;	/* highest alloced index + 1 */
-	struct shared_policy	policy;		/* NUMA memory alloc policy */
-	struct page		*i_indirect;	/* top indirect blocks page */
 	union {
-		swp_entry_t	i_direct[SHMEM_NR_DIRECT]; /* first blocks */
-		char		inline_symlink[SHMEM_SYMLINK_INLINE_LEN];
+		unsigned long	swapped;	/* subtotal assigned to swap */
+		char		*symlink;	/* unswappable short symlink */
 	};
+	struct shared_policy	policy;		/* NUMA memory alloc policy */
 	struct list_head	swaplist;	/* chain of maybes on swap */
 	struct list_head	xattr_list;	/* list of shmem_xattr */
 	struct inode		vfs_inode;
@@ -49,7 +42,7 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 /*
  * Functions in mm/shmem.c called directly from elsewhere:
  */
-extern int init_tmpfs(void);
+extern int shmem_init(void);
 extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
 extern struct file *shmem_file_setup(const char *name,
 					loff_t size, unsigned long flags);
@@ -59,8 +52,6 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
 					pgoff_t index, gfp_t gfp_mask);
 extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
 extern int shmem_unuse(swp_entry_t entry, struct page *page);
-extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
-					struct page **pagep, swp_entry_t *ent);
 
 static inline struct page *shmem_read_mapping_page(
 				struct address_space *mapping, pgoff_t index)
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index cd42e30b7c6e..2189d3ffc85d 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -1,3 +1,8 @@
+#ifndef _LINUX_SWAPOPS_H
+#define _LINUX_SWAPOPS_H
+
+#include <linux/radix-tree.h>
+
 /*
  * swapcache pages are stored in the swapper_space radix tree.  We want to
  * get good packing density in that tree, so the index should be dense in
@@ -76,6 +81,22 @@ static inline pte_t swp_entry_to_pte(swp_entry_t entry)
 	return __swp_entry_to_pte(arch_entry);
 }
 
+static inline swp_entry_t radix_to_swp_entry(void *arg)
+{
+	swp_entry_t entry;
+
+	entry.val = (unsigned long)arg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
+	return entry;
+}
+
+static inline void *swp_to_radix_entry(swp_entry_t entry)
+{
+	unsigned long value;
+
+	value = entry.val << RADIX_TREE_EXCEPTIONAL_SHIFT;
+	return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY);
+}
+
 #ifdef CONFIG_MIGRATION
 static inline swp_entry_t make_migration_entry(struct page *page, int write)
 {
@@ -169,3 +190,5 @@ static inline int non_swap_entry(swp_entry_t entry)
 	return 0;
 }
 #endif
+
+#endif /* _LINUX_SWAPOPS_H */
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index d3ec89fb4122..47b4a27e6e97 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -85,22 +85,6 @@ struct thermal_cooling_device {
 				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
 #define CELSIUS_TO_KELVIN(t)	((t)*10+2732)
 
-#if defined(CONFIG_THERMAL_HWMON)
-/* thermal zone devices with the same type share one hwmon device */
-struct thermal_hwmon_device {
-	char type[THERMAL_NAME_LENGTH];
-	struct device *device;
-	int count;
-	struct list_head tz_list;
-	struct list_head node;
-};
-
-struct thermal_hwmon_attr {
-	struct device_attribute attr;
-	char name[16];
-};
-#endif
-
 struct thermal_zone_device {
 	int id;
 	char type[THERMAL_NAME_LENGTH];
@@ -120,12 +104,6 @@ struct thermal_zone_device {
 	struct mutex lock;	/* protect cooling devices list */
 	struct list_head node;
 	struct delayed_work poll_queue;
-#if defined(CONFIG_THERMAL_HWMON)
-	struct list_head hwmon_node;
-	struct thermal_hwmon_device *hwmon;
-	struct thermal_hwmon_attr temp_input;	/* hwmon sys attr */
-	struct thermal_hwmon_attr temp_crit;	/* hwmon sys attr */
-#endif
 };
 /* Adding event notification support elements */
 #define THERMAL_GENL_FAMILY_NAME                "thermal_event"
diff --git a/init/main.c b/init/main.c
index d7211faed2ad..9c51ee7adf3d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -369,9 +369,12 @@ static noinline void __init_refok rest_init(void)
 	init_idle_bootup_task(current);
 	preempt_enable_no_resched();
 	schedule();
-	preempt_disable();
+
+	/* At this point, we can enable user mode helper functionality */
+	usermodehelper_enable();
 
 	/* Call into cpu_idle with preempt disabled */
+	preempt_disable();
 	cpu_idle();
 }
 
@@ -715,7 +718,7 @@ static void __init do_basic_setup(void)
 {
 	cpuset_init_smp();
 	usermodehelper_init();
-	init_tmpfs();
+	shmem_init();
 	driver_init();
 	init_irq_proc();
 	do_ctors();
diff --git a/ipc/shm.c b/ipc/shm.c
index 9fb044f3b345..b5bae9d945b6 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -294,7 +294,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
 void shm_destroy_orphaned(struct ipc_namespace *ns)
 {
 	down_write(&shm_ids(ns).rw_mutex);
-	if (&shm_ids(ns).in_use)
+	if (shm_ids(ns).in_use)
 		idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
 	up_write(&shm_ids(ns).rw_mutex);
 }
@@ -304,9 +304,12 @@ void exit_shm(struct task_struct *task)
 {
 	struct ipc_namespace *ns = task->nsproxy->ipc_ns;
 
+	if (shm_ids(ns).in_use == 0)
+		return;
+
 	/* Destroy all already created segments, but not mapped yet */
 	down_write(&shm_ids(ns).rw_mutex);
-	if (&shm_ids(ns).in_use)
+	if (shm_ids(ns).in_use)
 		idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
 	up_write(&shm_ids(ns).rw_mutex);
 }
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 47613dfb7b28..ddc7644c1305 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -274,7 +274,7 @@ static void __call_usermodehelper(struct work_struct *work)
  * (used for preventing user land processes from being created after the user
  * land has been frozen during a system-wide hibernation or suspend operation).
  */
-static int usermodehelper_disabled;
+static int usermodehelper_disabled = 1;
 
 /* Number of helpers running */
 static atomic_t running_helpers = ATOMIC_INIT(0);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index d1db2880d1cf..e19ce1454ee1 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -291,30 +291,28 @@ static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
 	if (!cpumask_subset(mask, cpu_possible_mask))
 		return -EINVAL;
 
-	s = NULL;
 	if (isadd == REGISTER) {
 		for_each_cpu(cpu, mask) {
-			if (!s)
-				s = kmalloc_node(sizeof(struct listener),
-						 GFP_KERNEL, cpu_to_node(cpu));
+			s = kmalloc_node(sizeof(struct listener),
+					GFP_KERNEL, cpu_to_node(cpu));
 			if (!s)
 				goto cleanup;
+
 			s->pid = pid;
-			INIT_LIST_HEAD(&s->list);
 			s->valid = 1;
 
 			listeners = &per_cpu(listener_array, cpu);
 			down_write(&listeners->sem);
-			list_for_each_entry_safe(s2, tmp, &listeners->list, list) {
-				if (s2->pid == pid)
-					goto next_cpu;
+			list_for_each_entry(s2, &listeners->list, list) {
+				if (s2->pid == pid && s2->valid)
+					goto exists;
 			}
 			list_add(&s->list, &listeners->list);
 			s = NULL;
-next_cpu:
+exists:
 			up_write(&listeners->sem);
+			kfree(s); /* nop if NULL */
 		}
-		kfree(s);
 		return 0;
 	}
 
diff --git a/lib/Kconfig b/lib/Kconfig
index 32f3e5ae2be5..6c695ff9caba 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -276,4 +276,7 @@ config CORDIC
 	  so its calculations are in fixed point. Modules can select this
 	  when they require this function. Module will be called cordic.
 
+config LLIST
+	bool
+
 endmenu
diff --git a/lib/Makefile b/lib/Makefile
index 892f4e282ea1..6457af4a7caf 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -115,6 +115,8 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
 
 obj-$(CONFIG_CORDIC) += cordic.o
 
+obj-$(CONFIG_LLIST) += llist.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 37ef4b048795..2f4412e4d071 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -271,8 +271,6 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
 }
 EXPORT_SYMBOL(__bitmap_weight);
 
-#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
-
 void bitmap_set(unsigned long *map, int start, int nr)
 {
 	unsigned long *p = map + BIT_WORD(start);
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 2577b121c7c1..f193b7796449 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -197,21 +197,15 @@ static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode,
 	return debugfs_create_file(name, mode, parent, value, &fops_atomic_t);
 }
 
-void cleanup_fault_attr_dentries(struct fault_attr *attr)
-{
-	debugfs_remove_recursive(attr->dir);
-}
-
-int init_fault_attr_dentries(struct fault_attr *attr, const char *name)
+struct dentry *fault_create_debugfs_attr(const char *name,
+			struct dentry *parent, struct fault_attr *attr)
 {
 	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
 	struct dentry *dir;
 
-	dir = debugfs_create_dir(name, NULL);
+	dir = debugfs_create_dir(name, parent);
 	if (!dir)
-		return -ENOMEM;
-
-	attr->dir = dir;
+		return ERR_PTR(-ENOMEM);
 
 	if (!debugfs_create_ul("probability", mode, dir, &attr->probability))
 		goto fail;
@@ -243,11 +237,11 @@ int init_fault_attr_dentries(struct fault_attr *attr, const char *name)
 
 #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
 
-	return 0;
+	return dir;
 fail:
-	debugfs_remove_recursive(attr->dir);
+	debugfs_remove_recursive(dir);
 
-	return -ENOMEM;
+	return ERR_PTR(-ENOMEM);
 }
 
 #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 577ddf805975..f352cc42f4f8 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -1,8 +1,26 @@
 /*
- * Basic general purpose allocator for managing special purpose memory
- * not managed by the regular kmalloc/kfree interface.
- * Uses for this includes on-device special memory, uncached memory
- * etc.
+ * Basic general purpose allocator for managing special purpose
+ * memory, for example, memory that is not managed by the regular
+ * kmalloc/kfree interface.  Uses for this includes on-device special
+ * memory, uncached memory etc.
+ *
+ * It is safe to use the allocator in NMI handlers and other special
+ * unblockable contexts that could otherwise deadlock on locks.  This
+ * is implemented by using atomic operations and retries on any
+ * conflicts.  The disadvantage is that there may be livelocks in
+ * extreme cases.  For better scalability, one allocator can be used
+ * for each CPU.
+ *
+ * The lockless operation only works if there is enough memory
+ * available.  If new memory is added to the pool a lock has to be
+ * still taken.  So any user relying on locklessness has to ensure
+ * that sufficient memory is preallocated.
+ *
+ * The basic atomic operation of this allocator is cmpxchg on long.
+ * On architectures that don't have NMI-safe cmpxchg implementation,
+ * the allocator can NOT be used in NMI handler.  So code uses the
+ * allocator in NMI handler should depend on
+ * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
  *
  * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org>
  *
@@ -13,8 +31,109 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/bitmap.h>
+#include <linux/rculist.h>
+#include <linux/interrupt.h>
 #include <linux/genalloc.h>
 
+static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set)
+{
+	unsigned long val, nval;
+
+	nval = *addr;
+	do {
+		val = nval;
+		if (val & mask_to_set)
+			return -EBUSY;
+		cpu_relax();
+	} while ((nval = cmpxchg(addr, val, val | mask_to_set)) != val);
+
+	return 0;
+}
+
+static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear)
+{
+	unsigned long val, nval;
+
+	nval = *addr;
+	do {
+		val = nval;
+		if ((val & mask_to_clear) != mask_to_clear)
+			return -EBUSY;
+		cpu_relax();
+	} while ((nval = cmpxchg(addr, val, val & ~mask_to_clear)) != val);
+
+	return 0;
+}
+
+/*
+ * bitmap_set_ll - set the specified number of bits at the specified position
+ * @map: pointer to a bitmap
+ * @start: a bit position in @map
+ * @nr: number of bits to set
+ *
+ * Set @nr bits start from @start in @map lock-lessly. Several users
+ * can set/clear the same bitmap simultaneously without lock. If two
+ * users set the same bit, one user will return remain bits, otherwise
+ * return 0.
+ */
+static int bitmap_set_ll(unsigned long *map, int start, int nr)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const int size = start + nr;
+	int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+	while (nr - bits_to_set >= 0) {
+		if (set_bits_ll(p, mask_to_set))
+			return nr;
+		nr -= bits_to_set;
+		bits_to_set = BITS_PER_LONG;
+		mask_to_set = ~0UL;
+		p++;
+	}
+	if (nr) {
+		mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+		if (set_bits_ll(p, mask_to_set))
+			return nr;
+	}
+
+	return 0;
+}
+
+/*
+ * bitmap_clear_ll - clear the specified number of bits at the specified position
+ * @map: pointer to a bitmap
+ * @start: a bit position in @map
+ * @nr: number of bits to set
+ *
+ * Clear @nr bits start from @start in @map lock-lessly. Several users
+ * can set/clear the same bitmap simultaneously without lock. If two
+ * users clear the same bit, one user will return remain bits,
+ * otherwise return 0.
+ */
+static int bitmap_clear_ll(unsigned long *map, int start, int nr)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const int size = start + nr;
+	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+	while (nr - bits_to_clear >= 0) {
+		if (clear_bits_ll(p, mask_to_clear))
+			return nr;
+		nr -= bits_to_clear;
+		bits_to_clear = BITS_PER_LONG;
+		mask_to_clear = ~0UL;
+		p++;
+	}
+	if (nr) {
+		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+		if (clear_bits_ll(p, mask_to_clear))
+			return nr;
+	}
+
+	return 0;
+}
 
 /**
  * gen_pool_create - create a new special memory pool
@@ -30,7 +149,7 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
 
 	pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid);
 	if (pool != NULL) {
-		rwlock_init(&pool->lock);
+		spin_lock_init(&pool->lock);
 		INIT_LIST_HEAD(&pool->chunks);
 		pool->min_alloc_order = min_alloc_order;
 	}
@@ -63,14 +182,14 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy
 	if (unlikely(chunk == NULL))
 		return -ENOMEM;
 
-	spin_lock_init(&chunk->lock);
 	chunk->phys_addr = phys;
 	chunk->start_addr = virt;
 	chunk->end_addr = virt + size;
+	atomic_set(&chunk->avail, size);
 
-	write_lock(&pool->lock);
-	list_add(&chunk->next_chunk, &pool->chunks);
-	write_unlock(&pool->lock);
+	spin_lock(&pool->lock);
+	list_add_rcu(&chunk->next_chunk, &pool->chunks);
+	spin_unlock(&pool->lock);
 
 	return 0;
 }
@@ -85,19 +204,19 @@ EXPORT_SYMBOL(gen_pool_add_virt);
  */
 phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr)
 {
-	struct list_head *_chunk;
 	struct gen_pool_chunk *chunk;
+	phys_addr_t paddr = -1;
 
-	read_lock(&pool->lock);
-	list_for_each(_chunk, &pool->chunks) {
-		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
-
-		if (addr >= chunk->start_addr && addr < chunk->end_addr)
-			return chunk->phys_addr + addr - chunk->start_addr;
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
+		if (addr >= chunk->start_addr && addr < chunk->end_addr) {
+			paddr = chunk->phys_addr + (addr - chunk->start_addr);
+			break;
+		}
 	}
-	read_unlock(&pool->lock);
+	rcu_read_unlock();
 
-	return -1;
+	return paddr;
 }
 EXPORT_SYMBOL(gen_pool_virt_to_phys);
 
@@ -115,7 +234,6 @@ void gen_pool_destroy(struct gen_pool *pool)
 	int order = pool->min_alloc_order;
 	int bit, end_bit;
 
-
 	list_for_each_safe(_chunk, _next_chunk, &pool->chunks) {
 		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
 		list_del(&chunk->next_chunk);
@@ -137,44 +255,50 @@ EXPORT_SYMBOL(gen_pool_destroy);
  * @size: number of bytes to allocate from the pool
  *
  * Allocate the requested number of bytes from the specified pool.
- * Uses a first-fit algorithm.
+ * Uses a first-fit algorithm. Can not be used in NMI handler on
+ * architectures without NMI-safe cmpxchg implementation.
  */
 unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
 {
-	struct list_head *_chunk;
 	struct gen_pool_chunk *chunk;
-	unsigned long addr, flags;
+	unsigned long addr = 0;
 	int order = pool->min_alloc_order;
-	int nbits, start_bit, end_bit;
+	int nbits, start_bit = 0, end_bit, remain;
+
+#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+	BUG_ON(in_nmi());
+#endif
 
 	if (size == 0)
 		return 0;
 
 	nbits = (size + (1UL << order) - 1) >> order;
-
-	read_lock(&pool->lock);
-	list_for_each(_chunk, &pool->chunks) {
-		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
+		if (size > atomic_read(&chunk->avail))
+			continue;
 
 		end_bit = (chunk->end_addr - chunk->start_addr) >> order;
-
-		spin_lock_irqsave(&chunk->lock, flags);
-		start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0,
-						nbits, 0);
-		if (start_bit >= end_bit) {
-			spin_unlock_irqrestore(&chunk->lock, flags);
+retry:
+		start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit,
+						       start_bit, nbits, 0);
+		if (start_bit >= end_bit)
 			continue;
+		remain = bitmap_set_ll(chunk->bits, start_bit, nbits);
+		if (remain) {
+			remain = bitmap_clear_ll(chunk->bits, start_bit,
+						 nbits - remain);
+			BUG_ON(remain);
+			goto retry;
 		}
 
 		addr = chunk->start_addr + ((unsigned long)start_bit << order);
-
-		bitmap_set(chunk->bits, start_bit, nbits);
-		spin_unlock_irqrestore(&chunk->lock, flags);
-		read_unlock(&pool->lock);
-		return addr;
+		size = nbits << order;
+		atomic_sub(size, &chunk->avail);
+		break;
 	}
-	read_unlock(&pool->lock);
-	return 0;
+	rcu_read_unlock();
+	return addr;
 }
 EXPORT_SYMBOL(gen_pool_alloc);
 
@@ -184,33 +308,95 @@ EXPORT_SYMBOL(gen_pool_alloc);
  * @addr: starting address of memory to free back to pool
  * @size: size in bytes of memory to free
  *
- * Free previously allocated special memory back to the specified pool.
+ * Free previously allocated special memory back to the specified
+ * pool.  Can not be used in NMI handler on architectures without
+ * NMI-safe cmpxchg implementation.
  */
 void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
 {
-	struct list_head *_chunk;
 	struct gen_pool_chunk *chunk;
-	unsigned long flags;
 	int order = pool->min_alloc_order;
-	int bit, nbits;
+	int start_bit, nbits, remain;
 
-	nbits = (size + (1UL << order) - 1) >> order;
-
-	read_lock(&pool->lock);
-	list_for_each(_chunk, &pool->chunks) {
-		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
+#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+	BUG_ON(in_nmi());
+#endif
 
+	nbits = (size + (1UL << order) - 1) >> order;
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
 		if (addr >= chunk->start_addr && addr < chunk->end_addr) {
 			BUG_ON(addr + size > chunk->end_addr);
-			spin_lock_irqsave(&chunk->lock, flags);
-			bit = (addr - chunk->start_addr) >> order;
-			while (nbits--)
-				__clear_bit(bit++, chunk->bits);
-			spin_unlock_irqrestore(&chunk->lock, flags);
-			break;
+			start_bit = (addr - chunk->start_addr) >> order;
+			remain = bitmap_clear_ll(chunk->bits, start_bit, nbits);
+			BUG_ON(remain);
+			size = nbits << order;
+			atomic_add(size, &chunk->avail);
+			rcu_read_unlock();
+			return;
 		}
 	}
-	BUG_ON(nbits > 0);
-	read_unlock(&pool->lock);
+	rcu_read_unlock();
+	BUG();
 }
 EXPORT_SYMBOL(gen_pool_free);
+
+/**
+ * gen_pool_for_each_chunk - call func for every chunk of generic memory pool
+ * @pool:	the generic memory pool
+ * @func:	func to call
+ * @data:	additional data used by @func
+ *
+ * Call @func for every chunk of generic memory pool.  The @func is
+ * called with rcu_read_lock held.
+ */
+void gen_pool_for_each_chunk(struct gen_pool *pool,
+	void (*func)(struct gen_pool *pool, struct gen_pool_chunk *chunk, void *data),
+	void *data)
+{
+	struct gen_pool_chunk *chunk;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk)
+		func(pool, chunk, data);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(gen_pool_for_each_chunk);
+
+/**
+ * gen_pool_avail - get available free space of the pool
+ * @pool: pool to get available free space
+ *
+ * Return available free space of the specified pool.
+ */
+size_t gen_pool_avail(struct gen_pool *pool)
+{
+	struct gen_pool_chunk *chunk;
+	size_t avail = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
+		avail += atomic_read(&chunk->avail);
+	rcu_read_unlock();
+	return avail;
+}
+EXPORT_SYMBOL_GPL(gen_pool_avail);
+
+/**
+ * gen_pool_size - get size in bytes of memory managed by the pool
+ * @pool: pool to get size
+ *
+ * Return size in bytes of memory managed by the pool.
+ */
+size_t gen_pool_size(struct gen_pool *pool)
+{
+	struct gen_pool_chunk *chunk;
+	size_t size = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
+		size += chunk->end_addr - chunk->start_addr;
+	rcu_read_unlock();
+	return size;
+}
+EXPORT_SYMBOL_GPL(gen_pool_size);
diff --git a/lib/idr.c b/lib/idr.c
index e15502e8b21e..db040ce3fa73 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -34,8 +34,10 @@
 #include <linux/err.h>
 #include <linux/string.h>
 #include <linux/idr.h>
+#include <linux/spinlock.h>
 
 static struct kmem_cache *idr_layer_cache;
+static DEFINE_SPINLOCK(simple_ida_lock);
 
 static struct idr_layer *get_from_free_list(struct idr *idp)
 {
@@ -926,6 +928,71 @@ void ida_destroy(struct ida *ida)
 EXPORT_SYMBOL(ida_destroy);
 
 /**
+ * ida_simple_get - get a new id.
+ * @ida: the (initialized) ida.
+ * @start: the minimum id (inclusive, < 0x8000000)
+ * @end: the maximum id (exclusive, < 0x8000000 or 0)
+ * @gfp_mask: memory allocation flags
+ *
+ * Allocates an id in the range start <= id < end, or returns -ENOSPC.
+ * On memory allocation failure, returns -ENOMEM.
+ *
+ * Use ida_simple_remove() to get rid of an id.
+ */
+int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
+		   gfp_t gfp_mask)
+{
+	int ret, id;
+	unsigned int max;
+
+	BUG_ON((int)start < 0);
+	BUG_ON((int)end < 0);
+
+	if (end == 0)
+		max = 0x80000000;
+	else {
+		BUG_ON(end < start);
+		max = end - 1;
+	}
+
+again:
+	if (!ida_pre_get(ida, gfp_mask))
+		return -ENOMEM;
+
+	spin_lock(&simple_ida_lock);
+	ret = ida_get_new_above(ida, start, &id);
+	if (!ret) {
+		if (id > max) {
+			ida_remove(ida, id);
+			ret = -ENOSPC;
+		} else {
+			ret = id;
+		}
+	}
+	spin_unlock(&simple_ida_lock);
+
+	if (unlikely(ret == -EAGAIN))
+		goto again;
+
+	return ret;
+}
+EXPORT_SYMBOL(ida_simple_get);
+
+/**
+ * ida_simple_remove - remove an allocated id.
+ * @ida: the (initialized) ida.
+ * @id: the id returned by ida_simple_get.
+ */
+void ida_simple_remove(struct ida *ida, unsigned int id)
+{
+	BUG_ON((int)id < 0);
+	spin_lock(&simple_ida_lock);
+	ida_remove(ida, id);
+	spin_unlock(&simple_ida_lock);
+}
+EXPORT_SYMBOL(ida_simple_remove);
+
+/**
  * ida_init - initialize ida handle
  * @ida:	ida handle
  *
diff --git a/lib/llist.c b/lib/llist.c
new file mode 100644
index 000000000000..da445724fa1f
--- /dev/null
+++ b/lib/llist.c
@@ -0,0 +1,129 @@
+/*
+ * Lock-less NULL terminated single linked list
+ *
+ * The basic atomic operation of this list is cmpxchg on long.  On
+ * architectures that don't have NMI-safe cmpxchg implementation, the
+ * list can NOT be used in NMI handler.  So code uses the list in NMI
+ * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ *
+ * Copyright 2010,2011 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/llist.h>
+
+#include <asm/system.h>
+
+/**
+ * llist_add - add a new entry
+ * @new:	new entry to be added
+ * @head:	the head for your lock-less list
+ */
+void llist_add(struct llist_node *new, struct llist_head *head)
+{
+	struct llist_node *entry, *old_entry;
+
+#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+	BUG_ON(in_nmi());
+#endif
+
+	entry = head->first;
+	do {
+		old_entry = entry;
+		new->next = entry;
+		cpu_relax();
+	} while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry);
+}
+EXPORT_SYMBOL_GPL(llist_add);
+
+/**
+ * llist_add_batch - add several linked entries in batch
+ * @new_first:	first entry in batch to be added
+ * @new_last:	last entry in batch to be added
+ * @head:	the head for your lock-less list
+ */
+void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
+		     struct llist_head *head)
+{
+	struct llist_node *entry, *old_entry;
+
+#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+	BUG_ON(in_nmi());
+#endif
+
+	entry = head->first;
+	do {
+		old_entry = entry;
+		new_last->next = entry;
+		cpu_relax();
+	} while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry);
+}
+EXPORT_SYMBOL_GPL(llist_add_batch);
+
+/**
+ * llist_del_first - delete the first entry of lock-less list
+ * @head:	the head for your lock-less list
+ *
+ * If list is empty, return NULL, otherwise, return the first entry
+ * deleted, this is the newest added one.
+ *
+ * Only one llist_del_first user can be used simultaneously with
+ * multiple llist_add users without lock.  Because otherwise
+ * llist_del_first, llist_add, llist_add (or llist_del_all, llist_add,
+ * llist_add) sequence in another user may change @head->first->next,
+ * but keep @head->first.  If multiple consumers are needed, please
+ * use llist_del_all or use lock between consumers.
+ */
+struct llist_node *llist_del_first(struct llist_head *head)
+{
+	struct llist_node *entry, *old_entry, *next;
+
+#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+	BUG_ON(in_nmi());
+#endif
+
+	entry = head->first;
+	do {
+		if (entry == NULL)
+			return NULL;
+		old_entry = entry;
+		next = entry->next;
+		cpu_relax();
+	} while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry);
+
+	return entry;
+}
+EXPORT_SYMBOL_GPL(llist_del_first);
+
+/**
+ * llist_del_all - delete all entries from lock-less list
+ * @head:	the head of lock-less list to delete all entries
+ *
+ * If list is empty, return NULL, otherwise, delete all entries and
+ * return the pointer to the first entry.  The order of entries
+ * deleted is from the newest to the oldest added one.
+ */
+struct llist_node *llist_del_all(struct llist_head *head)
+{
+#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+	BUG_ON(in_nmi());
+#endif
+
+	return xchg(&head->first, NULL);
+}
+EXPORT_SYMBOL_GPL(llist_del_all);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 7ea2e033d715..a2f9da59c197 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -823,8 +823,8 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
 EXPORT_SYMBOL(radix_tree_prev_hole);
 
 static unsigned int
-__lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
-	unsigned int max_items, unsigned long *next_index)
+__lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices,
+	unsigned long index, unsigned int max_items, unsigned long *next_index)
 {
 	unsigned int nr_found = 0;
 	unsigned int shift, height;
@@ -857,12 +857,16 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
 
 	/* Bottom level: grab some items */
 	for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
-		index++;
 		if (slot->slots[i]) {
-			results[nr_found++] = &(slot->slots[i]);
-			if (nr_found == max_items)
+			results[nr_found] = &(slot->slots[i]);
+			if (indices)
+				indices[nr_found] = index;
+			if (++nr_found == max_items) {
+				index++;
 				goto out;
+			}
 		}
+		index++;
 	}
 out:
 	*next_index = index;
@@ -918,8 +922,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 
 		if (cur_index > max_index)
 			break;
-		slots_found = __lookup(node, (void ***)results + ret, cur_index,
-					max_items - ret, &next_index);
+		slots_found = __lookup(node, (void ***)results + ret, NULL,
+				cur_index, max_items - ret, &next_index);
 		nr_found = 0;
 		for (i = 0; i < slots_found; i++) {
 			struct radix_tree_node *slot;
@@ -944,6 +948,7 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
  *	radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
  *	@root:		radix tree root
  *	@results:	where the results of the lookup are placed
+ *	@indices:	where their indices should be placed (but usually NULL)
  *	@first_index:	start the lookup from this key
  *	@max_items:	place up to this many items at *results
  *
@@ -958,7 +963,8 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
  *	protection, radix_tree_deref_slot may fail requiring a retry.
  */
 unsigned int
-radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
+radix_tree_gang_lookup_slot(struct radix_tree_root *root,
+			void ***results, unsigned long *indices,
 			unsigned long first_index, unsigned int max_items)
 {
 	unsigned long max_index;
@@ -974,6 +980,8 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
 		if (first_index > 0)
 			return 0;
 		results[0] = (void **)&root->rnode;
+		if (indices)
+			indices[0] = 0;
 		return 1;
 	}
 	node = indirect_to_ptr(node);
@@ -987,8 +995,9 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
 
 		if (cur_index > max_index)
 			break;
-		slots_found = __lookup(node, results + ret, cur_index,
-					max_items - ret, &next_index);
+		slots_found = __lookup(node, results + ret,
+				indices ? indices + ret : NULL,
+				cur_index, max_items - ret, &next_index);
 		ret += slots_found;
 		if (next_index == 0)
 			break;
@@ -1194,6 +1203,98 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
 }
 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
 
+#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
+#include <linux/sched.h> /* for cond_resched() */
+
+/*
+ * This linear search is at present only useful to shmem_unuse_inode().
+ */
+static unsigned long __locate(struct radix_tree_node *slot, void *item,
+			      unsigned long index, unsigned long *found_index)
+{
+	unsigned int shift, height;
+	unsigned long i;
+
+	height = slot->height;
+	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+
+	for ( ; height > 1; height--) {
+		i = (index >> shift) & RADIX_TREE_MAP_MASK;
+		for (;;) {
+			if (slot->slots[i] != NULL)
+				break;
+			index &= ~((1UL << shift) - 1);
+			index += 1UL << shift;
+			if (index == 0)
+				goto out;	/* 32-bit wraparound */
+			i++;
+			if (i == RADIX_TREE_MAP_SIZE)
+				goto out;
+		}
+
+		shift -= RADIX_TREE_MAP_SHIFT;
+		slot = rcu_dereference_raw(slot->slots[i]);
+		if (slot == NULL)
+			goto out;
+	}
+
+	/* Bottom level: check items */
+	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
+		if (slot->slots[i] == item) {
+			*found_index = index + i;
+			index = 0;
+			goto out;
+		}
+	}
+	index += RADIX_TREE_MAP_SIZE;
+out:
+	return index;
+}
+
+/**
+ *	radix_tree_locate_item - search through radix tree for item
+ *	@root:		radix tree root
+ *	@item:		item to be found
+ *
+ *	Returns index where item was found, or -1 if not found.
+ *	Caller must hold no lock (since this time-consuming function needs
+ *	to be preemptible), and must check afterwards if item is still there.
+ */
+unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
+{
+	struct radix_tree_node *node;
+	unsigned long max_index;
+	unsigned long cur_index = 0;
+	unsigned long found_index = -1;
+
+	do {
+		rcu_read_lock();
+		node = rcu_dereference_raw(root->rnode);
+		if (!radix_tree_is_indirect_ptr(node)) {
+			rcu_read_unlock();
+			if (node == item)
+				found_index = 0;
+			break;
+		}
+
+		node = indirect_to_ptr(node);
+		max_index = radix_tree_maxindex(node->height);
+		if (cur_index > max_index)
+			break;
+
+		cur_index = __locate(node, item, cur_index, &found_index);
+		rcu_read_unlock();
+		cond_resched();
+	} while (cur_index != 0 && cur_index <= max_index);
+
+	return found_index;
+}
+#else
+unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
+{
+	return -1;
+}
+#endif /* CONFIG_SHMEM && CONFIG_SWAP */
 
 /**
  *	radix_tree_shrink    -    shrink height of a radix tree to minimal
diff --git a/mm/failslab.c b/mm/failslab.c
index 1ce58c201dca..0dd7b8fec71c 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -34,23 +34,23 @@ __setup("failslab=", setup_failslab);
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
 static int __init failslab_debugfs_init(void)
 {
+	struct dentry *dir;
 	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
-	int err;
 
-	err = init_fault_attr_dentries(&failslab.attr, "failslab");
-	if (err)
-		return err;
+	dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
 
-	if (!debugfs_create_bool("ignore-gfp-wait", mode, failslab.attr.dir,
+	if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
 				&failslab.ignore_gfp_wait))
 		goto fail;
-	if (!debugfs_create_bool("cache-filter", mode, failslab.attr.dir,
+	if (!debugfs_create_bool("cache-filter", mode, dir,
 				&failslab.cache_filter))
 		goto fail;
 
 	return 0;
 fail:
-	cleanup_fault_attr_dentries(&failslab.attr);
+	debugfs_remove_recursive(dir);
 
 	return -ENOMEM;
 }
diff --git a/mm/filemap.c b/mm/filemap.c
index 867d40222ec7..645a080ba4df 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,7 +33,6 @@
 #include <linux/cpuset.h>
 #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
 #include <linux/memcontrol.h>
-#include <linux/mm_inline.h> /* for page_is_file_cache() */
 #include <linux/cleancache.h>
 #include "internal.h"
 
@@ -462,6 +461,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 	int error;
 
 	VM_BUG_ON(!PageLocked(page));
+	VM_BUG_ON(PageSwapBacked(page));
 
 	error = mem_cgroup_cache_charge(page, current->mm,
 					gfp_mask & GFP_RECLAIM_MASK);
@@ -479,8 +479,6 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 		if (likely(!error)) {
 			mapping->nrpages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
-			if (PageSwapBacked(page))
-				__inc_zone_page_state(page, NR_SHMEM);
 			spin_unlock_irq(&mapping->tree_lock);
 		} else {
 			page->mapping = NULL;
@@ -502,22 +500,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 {
 	int ret;
 
-	/*
-	 * Splice_read and readahead add shmem/tmpfs pages into the page cache
-	 * before shmem_readpage has a chance to mark them as SwapBacked: they
-	 * need to go on the anon lru below, and mem_cgroup_cache_charge
-	 * (called in add_to_page_cache) needs to know where they're going too.
-	 */
-	if (mapping_cap_swap_backed(mapping))
-		SetPageSwapBacked(page);
-
 	ret = add_to_page_cache(page, mapping, offset, gfp_mask);
-	if (ret == 0) {
-		if (page_is_file_cache(page))
-			lru_cache_add_file(page);
-		else
-			lru_cache_add_anon(page);
-	}
+	if (ret == 0)
+		lru_cache_add_file(page);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
@@ -714,9 +699,16 @@ repeat:
 		page = radix_tree_deref_slot(pagep);
 		if (unlikely(!page))
 			goto out;
-		if (radix_tree_deref_retry(page))
-			goto repeat;
-
+		if (radix_tree_exception(page)) {
+			if (radix_tree_deref_retry(page))
+				goto repeat;
+			/*
+			 * Otherwise, shmem/tmpfs must be storing a swap entry
+			 * here as an exceptional entry: so return it without
+			 * attempting to raise page count.
+			 */
+			goto out;
+		}
 		if (!page_cache_get_speculative(page))
 			goto repeat;
 
@@ -753,7 +745,7 @@ struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
 
 repeat:
 	page = find_get_page(mapping, offset);
-	if (page) {
+	if (page && !radix_tree_exception(page)) {
 		lock_page(page);
 		/* Has the page been truncated? */
 		if (unlikely(page->mapping != mapping)) {
@@ -840,7 +832,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 	rcu_read_lock();
 restart:
 	nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
-				(void ***)pages, start, nr_pages);
+				(void ***)pages, NULL, start, nr_pages);
 	ret = 0;
 	for (i = 0; i < nr_found; i++) {
 		struct page *page;
@@ -849,13 +841,22 @@ repeat:
 		if (unlikely(!page))
 			continue;
 
-		/*
-		 * This can only trigger when the entry at index 0 moves out
-		 * of or back to the root: none yet gotten, safe to restart.
-		 */
-		if (radix_tree_deref_retry(page)) {
-			WARN_ON(start | i);
-			goto restart;
+		if (radix_tree_exception(page)) {
+			if (radix_tree_deref_retry(page)) {
+				/*
+				 * Transient condition which can only trigger
+				 * when entry at index 0 moves out of or back
+				 * to root: none yet gotten, safe to restart.
+				 */
+				WARN_ON(start | i);
+				goto restart;
+			}
+			/*
+			 * Otherwise, shmem/tmpfs must be storing a swap entry
+			 * here as an exceptional entry: so skip over it -
+			 * we only reach this from invalidate_mapping_pages().
+			 */
+			continue;
 		}
 
 		if (!page_cache_get_speculative(page))
@@ -903,7 +904,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
 	rcu_read_lock();
 restart:
 	nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
-				(void ***)pages, index, nr_pages);
+				(void ***)pages, NULL, index, nr_pages);
 	ret = 0;
 	for (i = 0; i < nr_found; i++) {
 		struct page *page;
@@ -912,12 +913,22 @@ repeat:
 		if (unlikely(!page))
 			continue;
 
-		/*
-		 * This can only trigger when the entry at index 0 moves out
-		 * of or back to the root: none yet gotten, safe to restart.
-		 */
-		if (radix_tree_deref_retry(page))
-			goto restart;
+		if (radix_tree_exception(page)) {
+			if (radix_tree_deref_retry(page)) {
+				/*
+				 * Transient condition which can only trigger
+				 * when entry at index 0 moves out of or back
+				 * to root: none yet gotten, safe to restart.
+				 */
+				goto restart;
+			}
+			/*
+			 * Otherwise, shmem/tmpfs must be storing a swap entry
+			 * here as an exceptional entry: so stop looking for
+			 * contiguous pages.
+			 */
+			break;
+		}
 
 		if (!page_cache_get_speculative(page))
 			goto repeat;
@@ -977,12 +988,21 @@ repeat:
 		if (unlikely(!page))
 			continue;
 
-		/*
-		 * This can only trigger when the entry at index 0 moves out
-		 * of or back to the root: none yet gotten, safe to restart.
-		 */
-		if (radix_tree_deref_retry(page))
-			goto restart;
+		if (radix_tree_exception(page)) {
+			if (radix_tree_deref_retry(page)) {
+				/*
+				 * Transient condition which can only trigger
+				 * when entry at index 0 moves out of or back
+				 * to root: none yet gotten, safe to restart.
+				 */
+				goto restart;
+			}
+			/*
+			 * This function is never used on a shmem/tmpfs
+			 * mapping, so a swap entry won't be found here.
+			 */
+			BUG();
+		}
 
 		if (!page_cache_get_speculative(page))
 			goto repeat;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5f84d2351ddb..f4ec4e7ca4cd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -35,7 +35,6 @@
 #include <linux/limits.h>
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
-#include <linux/shmem_fs.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
@@ -2873,30 +2872,6 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 		return 0;
 	if (PageCompound(page))
 		return 0;
-	/*
-	 * Corner case handling. This is called from add_to_page_cache()
-	 * in usual. But some FS (shmem) precharges this page before calling it
-	 * and call add_to_page_cache() with GFP_NOWAIT.
-	 *
-	 * For GFP_NOWAIT case, the page may be pre-charged before calling
-	 * add_to_page_cache(). (See shmem.c) check it here and avoid to call
-	 * charge twice. (It works but has to pay a bit larger cost.)
-	 * And when the page is SwapCache, it should take swap information
-	 * into account. This is under lock_page() now.
-	 */
-	if (!(gfp_mask & __GFP_WAIT)) {
-		struct page_cgroup *pc;
-
-		pc = lookup_page_cgroup(page);
-		if (!pc)
-			return 0;
-		lock_page_cgroup(pc);
-		if (PageCgroupUsed(pc)) {
-			unlock_page_cgroup(pc);
-			return 0;
-		}
-		unlock_page_cgroup(pc);
-	}
 
 	if (unlikely(!mm))
 		mm = &init_mm;
@@ -3486,31 +3461,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
 	cgroup_release_and_wakeup_rmdir(&mem->css);
 }
 
-/*
- * A call to try to shrink memory usage on charge failure at shmem's swapin.
- * Calling hierarchical_reclaim is not enough because we should update
- * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM.
- * Moreover considering hierarchy, we should reclaim from the mem_over_limit,
- * not from the memcg which this page would be charged to.
- * try_charge_swapin does all of these works properly.
- */
-int mem_cgroup_shmem_charge_fallback(struct page *page,
-			    struct mm_struct *mm,
-			    gfp_t gfp_mask)
-{
-	struct mem_cgroup *mem;
-	int ret;
-
-	if (mem_cgroup_disabled())
-		return 0;
-
-	ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
-	if (!ret)
-		mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */
-
-	return ret;
-}
-
 #ifdef CONFIG_DEBUG_VM
 static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
 {
@@ -5330,15 +5280,17 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
 		pgoff = pte_to_pgoff(ptent);
 
 	/* page is moved even if it's not RSS of this task(page-faulted). */
-	if (!mapping_cap_swap_backed(mapping)) { /* normal file */
-		page = find_get_page(mapping, pgoff);
-	} else { /* shmem/tmpfs file. we should take account of swap too. */
-		swp_entry_t ent;
-		mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent);
+	page = find_get_page(mapping, pgoff);
+
+#ifdef CONFIG_SWAP
+	/* shmem/tmpfs may report page out on swap: account for that too. */
+	if (radix_tree_exceptional_entry(page)) {
+		swp_entry_t swap = radix_to_swp_entry(page);
 		if (do_swap_account)
-			entry->val = ent.val;
+			*entry = swap;
+		page = find_get_page(&swapper_space, swap.val);
 	}
-
+#endif
 	return page;
 }
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 740c4f52059c..2b43ba051ac9 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -53,6 +53,7 @@
 #include <linux/hugetlb.h>
 #include <linux/memory_hotplug.h>
 #include <linux/mm_inline.h>
+#include <linux/kfifo.h>
 #include "internal.h"
 
 int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -1178,6 +1179,97 @@ void memory_failure(unsigned long pfn, int trapno)
 	__memory_failure(pfn, trapno, 0);
 }
 
+#define MEMORY_FAILURE_FIFO_ORDER	4
+#define MEMORY_FAILURE_FIFO_SIZE	(1 << MEMORY_FAILURE_FIFO_ORDER)
+
+struct memory_failure_entry {
+	unsigned long pfn;
+	int trapno;
+	int flags;
+};
+
+struct memory_failure_cpu {
+	DECLARE_KFIFO(fifo, struct memory_failure_entry,
+		      MEMORY_FAILURE_FIFO_SIZE);
+	spinlock_t lock;
+	struct work_struct work;
+};
+
+static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu);
+
+/**
+ * memory_failure_queue - Schedule handling memory failure of a page.
+ * @pfn: Page Number of the corrupted page
+ * @trapno: Trap number reported in the signal to user space.
+ * @flags: Flags for memory failure handling
+ *
+ * This function is called by the low level hardware error handler
+ * when it detects hardware memory corruption of a page. It schedules
+ * the recovering of error page, including dropping pages, killing
+ * processes etc.
+ *
+ * The function is primarily of use for corruptions that
+ * happen outside the current execution context (e.g. when
+ * detected by a background scrubber)
+ *
+ * Can run in IRQ context.
+ */
+void memory_failure_queue(unsigned long pfn, int trapno, int flags)
+{
+	struct memory_failure_cpu *mf_cpu;
+	unsigned long proc_flags;
+	struct memory_failure_entry entry = {
+		.pfn =		pfn,
+		.trapno =	trapno,
+		.flags =	flags,
+	};
+
+	mf_cpu = &get_cpu_var(memory_failure_cpu);
+	spin_lock_irqsave(&mf_cpu->lock, proc_flags);
+	if (kfifo_put(&mf_cpu->fifo, &entry))
+		schedule_work_on(smp_processor_id(), &mf_cpu->work);
+	else
+		pr_err("Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n",
+		       pfn);
+	spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
+	put_cpu_var(memory_failure_cpu);
+}
+EXPORT_SYMBOL_GPL(memory_failure_queue);
+
+static void memory_failure_work_func(struct work_struct *work)
+{
+	struct memory_failure_cpu *mf_cpu;
+	struct memory_failure_entry entry = { 0, };
+	unsigned long proc_flags;
+	int gotten;
+
+	mf_cpu = &__get_cpu_var(memory_failure_cpu);
+	for (;;) {
+		spin_lock_irqsave(&mf_cpu->lock, proc_flags);
+		gotten = kfifo_get(&mf_cpu->fifo, &entry);
+		spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
+		if (!gotten)
+			break;
+		__memory_failure(entry.pfn, entry.trapno, entry.flags);
+	}
+}
+
+static int __init memory_failure_init(void)
+{
+	struct memory_failure_cpu *mf_cpu;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		mf_cpu = &per_cpu(memory_failure_cpu, cpu);
+		spin_lock_init(&mf_cpu->lock);
+		INIT_KFIFO(mf_cpu->fifo);
+		INIT_WORK(&mf_cpu->work, memory_failure_work_func);
+	}
+
+	return 0;
+}
+core_initcall(memory_failure_init);
+
 /**
  * unpoison_memory - Unpoison a previously poisoned page
  * @pfn: Page number of the to be unpoisoned page
diff --git a/mm/mincore.c b/mm/mincore.c
index a4e6b9d75c76..636a86876ff2 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -69,12 +69,15 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
 	 * file will not get a swp_entry_t in its pte, but rather it is like
 	 * any other file mapping (ie. marked !present and faulted in with
 	 * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
-	 *
-	 * However when tmpfs moves the page from pagecache and into swapcache,
-	 * it is still in core, but the find_get_page below won't find it.
-	 * No big deal, but make a note of it.
 	 */
 	page = find_get_page(mapping, pgoff);
+#ifdef CONFIG_SWAP
+	/* shmem/tmpfs may return swap: account for swapcache page too. */
+	if (radix_tree_exceptional_entry(page)) {
+		swp_entry_t swap = radix_to_swp_entry(page);
+		page = find_get_page(&swapper_space, swap.val);
+	}
+#endif
 	if (page) {
 		present = PageUptodate(page);
 		page_cache_release(page);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1dbcf8888f14..6e8ecb6e021c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1409,14 +1409,11 @@ static int __init fail_page_alloc_debugfs(void)
 {
 	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
 	struct dentry *dir;
-	int err;
 
-	err = init_fault_attr_dentries(&fail_page_alloc.attr,
-				       "fail_page_alloc");
-	if (err)
-		return err;
-
-	dir = fail_page_alloc.attr.dir;
+	dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
+					&fail_page_alloc.attr);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
 
 	if (!debugfs_create_bool("ignore-gfp-wait", mode, dir,
 				&fail_page_alloc.ignore_gfp_wait))
@@ -1430,7 +1427,7 @@ static int __init fail_page_alloc_debugfs(void)
 
 	return 0;
 fail:
-	cleanup_fault_attr_dentries(&fail_page_alloc.attr);
+	debugfs_remove_recursive(dir);
 
 	return -ENOMEM;
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index 5cc21f8b4cd3..32f6763f16fb 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -6,7 +6,8 @@
  *		 2000-2001 Christoph Rohland
  *		 2000-2001 SAP AG
  *		 2002 Red Hat Inc.
- * Copyright (C) 2002-2005 Hugh Dickins.
+ * Copyright (C) 2002-2011 Hugh Dickins.
+ * Copyright (C) 2011 Google Inc.
  * Copyright (C) 2002-2005 VERITAS Software Corporation.
  * Copyright (C) 2004 Andi Kleen, SuSE Labs
  *
@@ -28,7 +29,6 @@
 #include <linux/file.h>
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/percpu_counter.h>
 #include <linux/swap.h>
 
 static struct vfsmount *shm_mnt;
@@ -51,6 +51,8 @@ static struct vfsmount *shm_mnt;
 #include <linux/shmem_fs.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
+#include <linux/pagevec.h>
+#include <linux/percpu_counter.h>
 #include <linux/splice.h>
 #include <linux/security.h>
 #include <linux/swapops.h>
@@ -63,43 +65,17 @@ static struct vfsmount *shm_mnt;
 #include <linux/magic.h>
 
 #include <asm/uaccess.h>
-#include <asm/div64.h>
 #include <asm/pgtable.h>
 
-/*
- * The maximum size of a shmem/tmpfs file is limited by the maximum size of
- * its triple-indirect swap vector - see illustration at shmem_swp_entry().
- *
- * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel,
- * but one eighth of that on a 64-bit kernel.  With 8kB page size, maximum
- * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel,
- * MAX_LFS_FILESIZE being then more restrictive than swap vector layout.
- *
- * We use / and * instead of shifts in the definitions below, so that the swap
- * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE.
- */
-#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
-#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
-
-#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
-#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT)
-
-#define SHMEM_MAX_BYTES  min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE)
-#define SHMEM_MAX_INDEX  ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT))
-
 #define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
 #define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
 
-/* info->flags needs VM_flags to handle pagein/truncate races efficiently */
-#define SHMEM_PAGEIN	 VM_READ
-#define SHMEM_TRUNCATE	 VM_WRITE
-
-/* Definition to limit shmem_truncate's steps between cond_rescheds */
-#define LATENCY_LIMIT	 64
-
 /* Pretend that each entry is of this size in directory's i_size */
 #define BOGO_DIRENT_SIZE 20
 
+/* Symlink up to this size is kmalloc'ed instead of using a swappable page */
+#define SHORT_SYMLINK_LEN 128
+
 struct shmem_xattr {
 	struct list_head list;	/* anchored by shmem_inode_info->xattr_list */
 	char *name;		/* xattr name */
@@ -107,7 +83,7 @@ struct shmem_xattr {
 	char value[0];
 };
 
-/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
+/* Flag allocation requirements to shmem_getpage */
 enum sgp_type {
 	SGP_READ,	/* don't exceed i_size, don't allocate page */
 	SGP_CACHE,	/* don't exceed i_size, may allocate page */
@@ -137,56 +113,6 @@ static inline int shmem_getpage(struct inode *inode, pgoff_t index,
 			mapping_gfp_mask(inode->i_mapping), fault_type);
 }
 
-static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
-{
-	/*
-	 * The above definition of ENTRIES_PER_PAGE, and the use of
-	 * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
-	 * might be reconsidered if it ever diverges from PAGE_SIZE.
-	 *
-	 * Mobility flags are masked out as swap vectors cannot move
-	 */
-	return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO,
-				PAGE_CACHE_SHIFT-PAGE_SHIFT);
-}
-
-static inline void shmem_dir_free(struct page *page)
-{
-	__free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT);
-}
-
-static struct page **shmem_dir_map(struct page *page)
-{
-	return (struct page **)kmap_atomic(page, KM_USER0);
-}
-
-static inline void shmem_dir_unmap(struct page **dir)
-{
-	kunmap_atomic(dir, KM_USER0);
-}
-
-static swp_entry_t *shmem_swp_map(struct page *page)
-{
-	return (swp_entry_t *)kmap_atomic(page, KM_USER1);
-}
-
-static inline void shmem_swp_balance_unmap(void)
-{
-	/*
-	 * When passing a pointer to an i_direct entry, to code which
-	 * also handles indirect entries and so will shmem_swp_unmap,
-	 * we must arrange for the preempt count to remain in balance.
-	 * What kmap_atomic of a lowmem page does depends on config
-	 * and architecture, so pretend to kmap_atomic some lowmem page.
-	 */
-	(void) kmap_atomic(ZERO_PAGE(0), KM_USER1);
-}
-
-static inline void shmem_swp_unmap(swp_entry_t *entry)
-{
-	kunmap_atomic(entry, KM_USER1);
-}
-
 static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
 {
 	return sb->s_fs_info;
@@ -244,15 +170,6 @@ static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
 static LIST_HEAD(shmem_swaplist);
 static DEFINE_MUTEX(shmem_swaplist_mutex);
 
-static void shmem_free_blocks(struct inode *inode, long pages)
-{
-	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
-	if (sbinfo->max_blocks) {
-		percpu_counter_add(&sbinfo->used_blocks, -pages);
-		inode->i_blocks -= pages*BLOCKS_PER_PAGE;
-	}
-}
-
 static int shmem_reserve_inode(struct super_block *sb)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
@@ -279,7 +196,7 @@ static void shmem_free_inode(struct super_block *sb)
 }
 
 /**
- * shmem_recalc_inode - recalculate the size of an inode
+ * shmem_recalc_inode - recalculate the block usage of an inode
  * @inode: inode to recalc
  *
  * We have to calculate the free blocks since the mm can drop
@@ -297,474 +214,297 @@ static void shmem_recalc_inode(struct inode *inode)
 
 	freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
 	if (freed > 0) {
+		struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+		if (sbinfo->max_blocks)
+			percpu_counter_add(&sbinfo->used_blocks, -freed);
 		info->alloced -= freed;
+		inode->i_blocks -= freed * BLOCKS_PER_PAGE;
 		shmem_unacct_blocks(info->flags, freed);
-		shmem_free_blocks(inode, freed);
 	}
 }
 
-/**
- * shmem_swp_entry - find the swap vector position in the info structure
- * @info:  info structure for the inode
- * @index: index of the page to find
- * @page:  optional page to add to the structure. Has to be preset to
- *         all zeros
- *
- * If there is no space allocated yet it will return NULL when
- * page is NULL, else it will use the page for the needed block,
- * setting it to NULL on return to indicate that it has been used.
- *
- * The swap vector is organized the following way:
- *
- * There are SHMEM_NR_DIRECT entries directly stored in the
- * shmem_inode_info structure. So small files do not need an addional
- * allocation.
- *
- * For pages with index > SHMEM_NR_DIRECT there is the pointer
- * i_indirect which points to a page which holds in the first half
- * doubly indirect blocks, in the second half triple indirect blocks:
- *
- * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
- * following layout (for SHMEM_NR_DIRECT == 16):
- *
- * i_indirect -> dir --> 16-19
- * 	      |	     +-> 20-23
- * 	      |
- * 	      +-->dir2 --> 24-27
- * 	      |	       +-> 28-31
- * 	      |	       +-> 32-35
- * 	      |	       +-> 36-39
- * 	      |
- * 	      +-->dir3 --> 40-43
- * 	       	       +-> 44-47
- * 	      	       +-> 48-51
- * 	      	       +-> 52-55
+/*
+ * Replace item expected in radix tree by a new item, while holding tree lock.
  */
-static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page)
-{
-	unsigned long offset;
-	struct page **dir;
-	struct page *subdir;
-
-	if (index < SHMEM_NR_DIRECT) {
-		shmem_swp_balance_unmap();
-		return info->i_direct+index;
-	}
-	if (!info->i_indirect) {
-		if (page) {
-			info->i_indirect = *page;
-			*page = NULL;
-		}
-		return NULL;			/* need another page */
-	}
-
-	index -= SHMEM_NR_DIRECT;
-	offset = index % ENTRIES_PER_PAGE;
-	index /= ENTRIES_PER_PAGE;
-	dir = shmem_dir_map(info->i_indirect);
-
-	if (index >= ENTRIES_PER_PAGE/2) {
-		index -= ENTRIES_PER_PAGE/2;
-		dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
-		index %= ENTRIES_PER_PAGE;
-		subdir = *dir;
-		if (!subdir) {
-			if (page) {
-				*dir = *page;
-				*page = NULL;
-			}
-			shmem_dir_unmap(dir);
-			return NULL;		/* need another page */
-		}
-		shmem_dir_unmap(dir);
-		dir = shmem_dir_map(subdir);
-	}
+static int shmem_radix_tree_replace(struct address_space *mapping,
+			pgoff_t index, void *expected, void *replacement)
+{
+	void **pslot;
+	void *item = NULL;
+
+	VM_BUG_ON(!expected);
+	pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
+	if (pslot)
+		item = radix_tree_deref_slot_protected(pslot,
+							&mapping->tree_lock);
+	if (item != expected)
+		return -ENOENT;
+	if (replacement)
+		radix_tree_replace_slot(pslot, replacement);
+	else
+		radix_tree_delete(&mapping->page_tree, index);
+	return 0;
+}
 
-	dir += index;
-	subdir = *dir;
-	if (!subdir) {
-		if (!page || !(subdir = *page)) {
-			shmem_dir_unmap(dir);
-			return NULL;		/* need a page */
+/*
+ * Like add_to_page_cache_locked, but error if expected item has gone.
+ */
+static int shmem_add_to_page_cache(struct page *page,
+				   struct address_space *mapping,
+				   pgoff_t index, gfp_t gfp, void *expected)
+{
+	int error = 0;
+
+	VM_BUG_ON(!PageLocked(page));
+	VM_BUG_ON(!PageSwapBacked(page));
+
+	if (!expected)
+		error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
+	if (!error) {
+		page_cache_get(page);
+		page->mapping = mapping;
+		page->index = index;
+
+		spin_lock_irq(&mapping->tree_lock);
+		if (!expected)
+			error = radix_tree_insert(&mapping->page_tree,
+							index, page);
+		else
+			error = shmem_radix_tree_replace(mapping, index,
+							expected, page);
+		if (!error) {
+			mapping->nrpages++;
+			__inc_zone_page_state(page, NR_FILE_PAGES);
+			__inc_zone_page_state(page, NR_SHMEM);
+			spin_unlock_irq(&mapping->tree_lock);
+		} else {
+			page->mapping = NULL;
+			spin_unlock_irq(&mapping->tree_lock);
+			page_cache_release(page);
 		}
-		*dir = subdir;
-		*page = NULL;
+		if (!expected)
+			radix_tree_preload_end();
 	}
-	shmem_dir_unmap(dir);
-	return shmem_swp_map(subdir) + offset;
+	if (error)
+		mem_cgroup_uncharge_cache_page(page);
+	return error;
 }
 
-static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value)
+/*
+ * Like delete_from_page_cache, but substitutes swap for page.
+ */
+static void shmem_delete_from_page_cache(struct page *page, void *radswap)
 {
-	long incdec = value? 1: -1;
+	struct address_space *mapping = page->mapping;
+	int error;
 
-	entry->val = value;
-	info->swapped += incdec;
-	if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) {
-		struct page *page = kmap_atomic_to_page(entry);
-		set_page_private(page, page_private(page) + incdec);
-	}
+	spin_lock_irq(&mapping->tree_lock);
+	error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
+	page->mapping = NULL;
+	mapping->nrpages--;
+	__dec_zone_page_state(page, NR_FILE_PAGES);
+	__dec_zone_page_state(page, NR_SHMEM);
+	spin_unlock_irq(&mapping->tree_lock);
+	page_cache_release(page);
+	BUG_ON(error);
 }
 
-/**
- * shmem_swp_alloc - get the position of the swap entry for the page.
- * @info:	info structure for the inode
- * @index:	index of the page to find
- * @sgp:	check and recheck i_size? skip allocation?
- * @gfp:	gfp mask to use for any page allocation
- *
- * If the entry does not exist, allocate it.
+/*
+ * Like find_get_pages, but collecting swap entries as well as pages.
  */
-static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info,
-			unsigned long index, enum sgp_type sgp, gfp_t gfp)
-{
-	struct inode *inode = &info->vfs_inode;
-	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
-	struct page *page = NULL;
-	swp_entry_t *entry;
-
-	if (sgp != SGP_WRITE &&
-	    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
-		return ERR_PTR(-EINVAL);
-
-	while (!(entry = shmem_swp_entry(info, index, &page))) {
-		if (sgp == SGP_READ)
-			return shmem_swp_map(ZERO_PAGE(0));
-		/*
-		 * Test used_blocks against 1 less max_blocks, since we have 1 data
-		 * page (and perhaps indirect index pages) yet to allocate:
-		 * a waste to allocate index if we cannot allocate data.
-		 */
-		if (sbinfo->max_blocks) {
-			if (percpu_counter_compare(&sbinfo->used_blocks,
-						sbinfo->max_blocks - 1) >= 0)
-				return ERR_PTR(-ENOSPC);
-			percpu_counter_inc(&sbinfo->used_blocks);
-			inode->i_blocks += BLOCKS_PER_PAGE;
+static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
+					pgoff_t start, unsigned int nr_pages,
+					struct page **pages, pgoff_t *indices)
+{
+	unsigned int i;
+	unsigned int ret;
+	unsigned int nr_found;
+
+	rcu_read_lock();
+restart:
+	nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
+				(void ***)pages, indices, start, nr_pages);
+	ret = 0;
+	for (i = 0; i < nr_found; i++) {
+		struct page *page;
+repeat:
+		page = radix_tree_deref_slot((void **)pages[i]);
+		if (unlikely(!page))
+			continue;
+		if (radix_tree_exception(page)) {
+			if (radix_tree_deref_retry(page))
+				goto restart;
+			/*
+			 * Otherwise, we must be storing a swap entry
+			 * here as an exceptional entry: so return it
+			 * without attempting to raise page count.
+			 */
+			goto export;
 		}
+		if (!page_cache_get_speculative(page))
+			goto repeat;
 
-		spin_unlock(&info->lock);
-		page = shmem_dir_alloc(gfp);
-		spin_lock(&info->lock);
-
-		if (!page) {
-			shmem_free_blocks(inode, 1);
-			return ERR_PTR(-ENOMEM);
-		}
-		if (sgp != SGP_WRITE &&
-		    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
-			entry = ERR_PTR(-EINVAL);
-			break;
+		/* Has the page moved? */
+		if (unlikely(page != *((void **)pages[i]))) {
+			page_cache_release(page);
+			goto repeat;
 		}
-		if (info->next_index <= index)
-			info->next_index = index + 1;
-	}
-	if (page) {
-		/* another task gave its page, or truncated the file */
-		shmem_free_blocks(inode, 1);
-		shmem_dir_free(page);
-	}
-	if (info->next_index <= index && !IS_ERR(entry))
-		info->next_index = index + 1;
-	return entry;
+export:
+		indices[ret] = indices[i];
+		pages[ret] = page;
+		ret++;
+	}
+	if (unlikely(!ret && nr_found))
+		goto restart;
+	rcu_read_unlock();
+	return ret;
 }
 
-/**
- * shmem_free_swp - free some swap entries in a directory
- * @dir:        pointer to the directory
- * @edir:       pointer after last entry of the directory
- * @punch_lock: pointer to spinlock when needed for the holepunch case
+/*
+ * Remove swap entry from radix tree, free the swap and its page cache.
  */
-static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
-						spinlock_t *punch_lock)
-{
-	spinlock_t *punch_unlock = NULL;
-	swp_entry_t *ptr;
-	int freed = 0;
-
-	for (ptr = dir; ptr < edir; ptr++) {
-		if (ptr->val) {
-			if (unlikely(punch_lock)) {
-				punch_unlock = punch_lock;
-				punch_lock = NULL;
-				spin_lock(punch_unlock);
-				if (!ptr->val)
-					continue;
-			}
-			free_swap_and_cache(*ptr);
-			*ptr = (swp_entry_t){0};
-			freed++;
-		}
-	}
-	if (punch_unlock)
-		spin_unlock(punch_unlock);
-	return freed;
-}
-
-static int shmem_map_and_free_swp(struct page *subdir, int offset,
-		int limit, struct page ***dir, spinlock_t *punch_lock)
-{
-	swp_entry_t *ptr;
-	int freed = 0;
-
-	ptr = shmem_swp_map(subdir);
-	for (; offset < limit; offset += LATENCY_LIMIT) {
-		int size = limit - offset;
-		if (size > LATENCY_LIMIT)
-			size = LATENCY_LIMIT;
-		freed += shmem_free_swp(ptr+offset, ptr+offset+size,
-							punch_lock);
-		if (need_resched()) {
-			shmem_swp_unmap(ptr);
-			if (*dir) {
-				shmem_dir_unmap(*dir);
-				*dir = NULL;
-			}
-			cond_resched();
-			ptr = shmem_swp_map(subdir);
-		}
-	}
-	shmem_swp_unmap(ptr);
-	return freed;
+static int shmem_free_swap(struct address_space *mapping,
+			   pgoff_t index, void *radswap)
+{
+	int error;
+
+	spin_lock_irq(&mapping->tree_lock);
+	error = shmem_radix_tree_replace(mapping, index, radswap, NULL);
+	spin_unlock_irq(&mapping->tree_lock);
+	if (!error)
+		free_swap_and_cache(radix_to_swp_entry(radswap));
+	return error;
 }
 
-static void shmem_free_pages(struct list_head *next)
+/*
+ * Pagevec may contain swap entries, so shuffle up pages before releasing.
+ */
+static void shmem_pagevec_release(struct pagevec *pvec)
 {
-	struct page *page;
-	int freed = 0;
-
-	do {
-		page = container_of(next, struct page, lru);
-		next = next->next;
-		shmem_dir_free(page);
-		freed++;
-		if (freed >= LATENCY_LIMIT) {
-			cond_resched();
-			freed = 0;
-		}
-	} while (next);
+	int i, j;
+
+	for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
+		struct page *page = pvec->pages[i];
+		if (!radix_tree_exceptional_entry(page))
+			pvec->pages[j++] = page;
+	}
+	pvec->nr = j;
+	pagevec_release(pvec);
 }
 
-void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+/*
+ * Remove range of pages and swap entries from radix tree, and free them.
+ */
+void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 {
+	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	unsigned long idx;
-	unsigned long size;
-	unsigned long limit;
-	unsigned long stage;
-	unsigned long diroff;
-	struct page **dir;
-	struct page *topdir;
-	struct page *middir;
-	struct page *subdir;
-	swp_entry_t *ptr;
-	LIST_HEAD(pages_to_free);
-	long nr_pages_to_free = 0;
+	pgoff_t start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
+	pgoff_t end = (lend >> PAGE_CACHE_SHIFT);
+	struct pagevec pvec;
+	pgoff_t indices[PAGEVEC_SIZE];
 	long nr_swaps_freed = 0;
-	int offset;
-	int freed;
-	int punch_hole;
-	spinlock_t *needs_lock;
-	spinlock_t *punch_lock;
-	unsigned long upper_limit;
+	pgoff_t index;
+	int i;
 
-	truncate_inode_pages_range(inode->i_mapping, start, end);
+	BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
 
-	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-	idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	if (idx >= info->next_index)
-		return;
+	pagevec_init(&pvec, 0);
+	index = start;
+	while (index <= end) {
+		pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
+							pvec.pages, indices);
+		if (!pvec.nr)
+			break;
+		mem_cgroup_uncharge_start();
+		for (i = 0; i < pagevec_count(&pvec); i++) {
+			struct page *page = pvec.pages[i];
 
-	spin_lock(&info->lock);
-	info->flags |= SHMEM_TRUNCATE;
-	if (likely(end == (loff_t) -1)) {
-		limit = info->next_index;
-		upper_limit = SHMEM_MAX_INDEX;
-		info->next_index = idx;
-		needs_lock = NULL;
-		punch_hole = 0;
-	} else {
-		if (end + 1 >= inode->i_size) {	/* we may free a little more */
-			limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
-							PAGE_CACHE_SHIFT;
-			upper_limit = SHMEM_MAX_INDEX;
-		} else {
-			limit = (end + 1) >> PAGE_CACHE_SHIFT;
-			upper_limit = limit;
-		}
-		needs_lock = &info->lock;
-		punch_hole = 1;
-	}
+			index = indices[i];
+			if (index > end)
+				break;
+
+			if (radix_tree_exceptional_entry(page)) {
+				nr_swaps_freed += !shmem_free_swap(mapping,
+								index, page);
+				continue;
+			}
 
-	topdir = info->i_indirect;
-	if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) {
-		info->i_indirect = NULL;
-		nr_pages_to_free++;
-		list_add(&topdir->lru, &pages_to_free);
+			if (!trylock_page(page))
+				continue;
+			if (page->mapping == mapping) {
+				VM_BUG_ON(PageWriteback(page));
+				truncate_inode_page(mapping, page);
+			}
+			unlock_page(page);
+		}
+		shmem_pagevec_release(&pvec);
+		mem_cgroup_uncharge_end();
+		cond_resched();
+		index++;
 	}
-	spin_unlock(&info->lock);
 
-	if (info->swapped && idx < SHMEM_NR_DIRECT) {
-		ptr = info->i_direct;
-		size = limit;
-		if (size > SHMEM_NR_DIRECT)
-			size = SHMEM_NR_DIRECT;
-		nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
+	if (partial) {
+		struct page *page = NULL;
+		shmem_getpage(inode, start - 1, &page, SGP_READ, NULL);
+		if (page) {
+			zero_user_segment(page, partial, PAGE_CACHE_SIZE);
+			set_page_dirty(page);
+			unlock_page(page);
+			page_cache_release(page);
+		}
 	}
 
-	/*
-	 * If there are no indirect blocks or we are punching a hole
-	 * below indirect blocks, nothing to be done.
-	 */
-	if (!topdir || limit <= SHMEM_NR_DIRECT)
-		goto done2;
+	index = start;
+	for ( ; ; ) {
+		cond_resched();
+		pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
+							pvec.pages, indices);
+		if (!pvec.nr) {
+			if (index == start)
+				break;
+			index = start;
+			continue;
+		}
+		if (index == start && indices[0] > end) {
+			shmem_pagevec_release(&pvec);
+			break;
+		}
+		mem_cgroup_uncharge_start();
+		for (i = 0; i < pagevec_count(&pvec); i++) {
+			struct page *page = pvec.pages[i];
 
-	/*
-	 * The truncation case has already dropped info->lock, and we're safe
-	 * because i_size and next_index have already been lowered, preventing
-	 * access beyond.  But in the punch_hole case, we still need to take
-	 * the lock when updating the swap directory, because there might be
-	 * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
-	 * shmem_writepage.  However, whenever we find we can remove a whole
-	 * directory page (not at the misaligned start or end of the range),
-	 * we first NULLify its pointer in the level above, and then have no
-	 * need to take the lock when updating its contents: needs_lock and
-	 * punch_lock (either pointing to info->lock or NULL) manage this.
-	 */
+			index = indices[i];
+			if (index > end)
+				break;
 
-	upper_limit -= SHMEM_NR_DIRECT;
-	limit -= SHMEM_NR_DIRECT;
-	idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
-	offset = idx % ENTRIES_PER_PAGE;
-	idx -= offset;
-
-	dir = shmem_dir_map(topdir);
-	stage = ENTRIES_PER_PAGEPAGE/2;
-	if (idx < ENTRIES_PER_PAGEPAGE/2) {
-		middir = topdir;
-		diroff = idx/ENTRIES_PER_PAGE;
-	} else {
-		dir += ENTRIES_PER_PAGE/2;
-		dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE;
-		while (stage <= idx)
-			stage += ENTRIES_PER_PAGEPAGE;
-		middir = *dir;
-		if (*dir) {
-			diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
-				ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
-			if (!diroff && !offset && upper_limit >= stage) {
-				if (needs_lock) {
-					spin_lock(needs_lock);
-					*dir = NULL;
-					spin_unlock(needs_lock);
-					needs_lock = NULL;
-				} else
-					*dir = NULL;
-				nr_pages_to_free++;
-				list_add(&middir->lru, &pages_to_free);
+			if (radix_tree_exceptional_entry(page)) {
+				nr_swaps_freed += !shmem_free_swap(mapping,
+								index, page);
+				continue;
 			}
-			shmem_dir_unmap(dir);
-			dir = shmem_dir_map(middir);
-		} else {
-			diroff = 0;
-			offset = 0;
-			idx = stage;
-		}
-	}
 
-	for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) {
-		if (unlikely(idx == stage)) {
-			shmem_dir_unmap(dir);
-			dir = shmem_dir_map(topdir) +
-			    ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
-			while (!*dir) {
-				dir++;
-				idx += ENTRIES_PER_PAGEPAGE;
-				if (idx >= limit)
-					goto done1;
-			}
-			stage = idx + ENTRIES_PER_PAGEPAGE;
-			middir = *dir;
-			if (punch_hole)
-				needs_lock = &info->lock;
-			if (upper_limit >= stage) {
-				if (needs_lock) {
-					spin_lock(needs_lock);
-					*dir = NULL;
-					spin_unlock(needs_lock);
-					needs_lock = NULL;
-				} else
-					*dir = NULL;
-				nr_pages_to_free++;
-				list_add(&middir->lru, &pages_to_free);
+			lock_page(page);
+			if (page->mapping == mapping) {
+				VM_BUG_ON(PageWriteback(page));
+				truncate_inode_page(mapping, page);
 			}
-			shmem_dir_unmap(dir);
-			cond_resched();
-			dir = shmem_dir_map(middir);
-			diroff = 0;
-		}
-		punch_lock = needs_lock;
-		subdir = dir[diroff];
-		if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
-			if (needs_lock) {
-				spin_lock(needs_lock);
-				dir[diroff] = NULL;
-				spin_unlock(needs_lock);
-				punch_lock = NULL;
-			} else
-				dir[diroff] = NULL;
-			nr_pages_to_free++;
-			list_add(&subdir->lru, &pages_to_free);
-		}
-		if (subdir && page_private(subdir) /* has swap entries */) {
-			size = limit - idx;
-			if (size > ENTRIES_PER_PAGE)
-				size = ENTRIES_PER_PAGE;
-			freed = shmem_map_and_free_swp(subdir,
-					offset, size, &dir, punch_lock);
-			if (!dir)
-				dir = shmem_dir_map(middir);
-			nr_swaps_freed += freed;
-			if (offset || punch_lock) {
-				spin_lock(&info->lock);
-				set_page_private(subdir,
-					page_private(subdir) - freed);
-				spin_unlock(&info->lock);
-			} else
-				BUG_ON(page_private(subdir) != freed);
+			unlock_page(page);
 		}
-		offset = 0;
-	}
-done1:
-	shmem_dir_unmap(dir);
-done2:
-	if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
-		/*
-		 * Call truncate_inode_pages again: racing shmem_unuse_inode
-		 * may have swizzled a page in from swap since
-		 * truncate_pagecache or generic_delete_inode did it, before we
-		 * lowered next_index.  Also, though shmem_getpage checks
-		 * i_size before adding to cache, no recheck after: so fix the
-		 * narrow window there too.
-		 */
-		truncate_inode_pages_range(inode->i_mapping, start, end);
+		shmem_pagevec_release(&pvec);
+		mem_cgroup_uncharge_end();
+		index++;
 	}
 
 	spin_lock(&info->lock);
-	info->flags &= ~SHMEM_TRUNCATE;
 	info->swapped -= nr_swaps_freed;
-	if (nr_pages_to_free)
-		shmem_free_blocks(inode, nr_pages_to_free);
 	shmem_recalc_inode(inode);
 	spin_unlock(&info->lock);
 
-	/*
-	 * Empty swap vector directory pages to be freed?
-	 */
-	if (!list_empty(&pages_to_free)) {
-		pages_to_free.prev->next = NULL;
-		shmem_free_pages(pages_to_free.next);
-	}
+	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 }
 EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
@@ -780,37 +520,7 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
 		loff_t oldsize = inode->i_size;
 		loff_t newsize = attr->ia_size;
-		struct page *page = NULL;
 
-		if (newsize < oldsize) {
-			/*
-			 * If truncating down to a partial page, then
-			 * if that page is already allocated, hold it
-			 * in memory until the truncation is over, so
-			 * truncate_partial_page cannot miss it were
-			 * it assigned to swap.
-			 */
-			if (newsize & (PAGE_CACHE_SIZE-1)) {
-				(void) shmem_getpage(inode,
-					newsize >> PAGE_CACHE_SHIFT,
-						&page, SGP_READ, NULL);
-				if (page)
-					unlock_page(page);
-			}
-			/*
-			 * Reset SHMEM_PAGEIN flag so that shmem_truncate can
-			 * detect if any pages might have been added to cache
-			 * after truncate_inode_pages.  But we needn't bother
-			 * if it's being fully truncated to zero-length: the
-			 * nrpages check is efficient enough in that case.
-			 */
-			if (newsize) {
-				struct shmem_inode_info *info = SHMEM_I(inode);
-				spin_lock(&info->lock);
-				info->flags &= ~SHMEM_PAGEIN;
-				spin_unlock(&info->lock);
-			}
-		}
 		if (newsize != oldsize) {
 			i_size_write(inode, newsize);
 			inode->i_ctime = inode->i_mtime = CURRENT_TIME;
@@ -822,8 +532,6 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 			/* unmap again to remove racily COWed private pages */
 			unmap_mapping_range(inode->i_mapping, holebegin, 0, 1);
 		}
-		if (page)
-			page_cache_release(page);
 	}
 
 	setattr_copy(inode, attr);
@@ -848,7 +556,8 @@ static void shmem_evict_inode(struct inode *inode)
 			list_del_init(&info->swaplist);
 			mutex_unlock(&shmem_swaplist_mutex);
 		}
-	}
+	} else
+		kfree(info->symlink);
 
 	list_for_each_entry_safe(xattr, nxattr, &info->xattr_list, list) {
 		kfree(xattr->name);
@@ -859,106 +568,27 @@ static void shmem_evict_inode(struct inode *inode)
 	end_writeback(inode);
 }
 
-static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
-{
-	swp_entry_t *ptr;
-
-	for (ptr = dir; ptr < edir; ptr++) {
-		if (ptr->val == entry.val)
-			return ptr - dir;
-	}
-	return -1;
-}
-
-static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
+/*
+ * If swap found in inode, free it and move page from swapcache to filecache.
+ */
+static int shmem_unuse_inode(struct shmem_inode_info *info,
+			     swp_entry_t swap, struct page *page)
 {
-	struct address_space *mapping;
-	unsigned long idx;
-	unsigned long size;
-	unsigned long limit;
-	unsigned long stage;
-	struct page **dir;
-	struct page *subdir;
-	swp_entry_t *ptr;
-	int offset;
+	struct address_space *mapping = info->vfs_inode.i_mapping;
+	void *radswap;
+	pgoff_t index;
 	int error;
 
-	idx = 0;
-	ptr = info->i_direct;
-	spin_lock(&info->lock);
-	if (!info->swapped) {
-		list_del_init(&info->swaplist);
-		goto lost2;
-	}
-	limit = info->next_index;
-	size = limit;
-	if (size > SHMEM_NR_DIRECT)
-		size = SHMEM_NR_DIRECT;
-	offset = shmem_find_swp(entry, ptr, ptr+size);
-	if (offset >= 0) {
-		shmem_swp_balance_unmap();
-		goto found;
-	}
-	if (!info->i_indirect)
-		goto lost2;
-
-	dir = shmem_dir_map(info->i_indirect);
-	stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2;
-
-	for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) {
-		if (unlikely(idx == stage)) {
-			shmem_dir_unmap(dir-1);
-			if (cond_resched_lock(&info->lock)) {
-				/* check it has not been truncated */
-				if (limit > info->next_index) {
-					limit = info->next_index;
-					if (idx >= limit)
-						goto lost2;
-				}
-			}
-			dir = shmem_dir_map(info->i_indirect) +
-			    ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE;
-			while (!*dir) {
-				dir++;
-				idx += ENTRIES_PER_PAGEPAGE;
-				if (idx >= limit)
-					goto lost1;
-			}
-			stage = idx + ENTRIES_PER_PAGEPAGE;
-			subdir = *dir;
-			shmem_dir_unmap(dir);
-			dir = shmem_dir_map(subdir);
-		}
-		subdir = *dir;
-		if (subdir && page_private(subdir)) {
-			ptr = shmem_swp_map(subdir);
-			size = limit - idx;
-			if (size > ENTRIES_PER_PAGE)
-				size = ENTRIES_PER_PAGE;
-			offset = shmem_find_swp(entry, ptr, ptr+size);
-			shmem_swp_unmap(ptr);
-			if (offset >= 0) {
-				shmem_dir_unmap(dir);
-				ptr = shmem_swp_map(subdir);
-				goto found;
-			}
-		}
-	}
-lost1:
-	shmem_dir_unmap(dir-1);
-lost2:
-	spin_unlock(&info->lock);
-	return 0;
-found:
-	idx += offset;
-	ptr += offset;
+	radswap = swp_to_radix_entry(swap);
+	index = radix_tree_locate_item(&mapping->page_tree, radswap);
+	if (index == -1)
+		return 0;
 
 	/*
 	 * Move _head_ to start search for next from here.
 	 * But be careful: shmem_evict_inode checks list_empty without taking
 	 * mutex, and there's an instant in list_move_tail when info->swaplist
-	 * would appear empty, if it were the only one on shmem_swaplist.  We
-	 * could avoid doing it if inode NULL; or use this minor optimization.
+	 * would appear empty, if it were the only one on shmem_swaplist.
 	 */
 	if (shmem_swaplist.next != &info->swaplist)
 		list_move_tail(&shmem_swaplist, &info->swaplist);
@@ -968,29 +598,34 @@ found:
 	 * but also to hold up shmem_evict_inode(): so inode cannot be freed
 	 * beneath us (pagelock doesn't help until the page is in pagecache).
 	 */
-	mapping = info->vfs_inode.i_mapping;
-	error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT);
+	error = shmem_add_to_page_cache(page, mapping, index,
+						GFP_NOWAIT, radswap);
 	/* which does mem_cgroup_uncharge_cache_page on error */
 
 	if (error != -ENOMEM) {
+		/*
+		 * Truncation and eviction use free_swap_and_cache(), which
+		 * only does trylock page: if we raced, best clean up here.
+		 */
 		delete_from_swap_cache(page);
 		set_page_dirty(page);
-		info->flags |= SHMEM_PAGEIN;
-		shmem_swp_set(info, ptr, 0);
-		swap_free(entry);
+		if (!error) {
+			spin_lock(&info->lock);
+			info->swapped--;
+			spin_unlock(&info->lock);
+			swap_free(swap);
+		}
 		error = 1;	/* not an error, but entry was found */
 	}
-	shmem_swp_unmap(ptr);
-	spin_unlock(&info->lock);
 	return error;
 }
 
 /*
- * shmem_unuse() search for an eventually swapped out shmem page.
+ * Search through swapped inodes to find and replace swap by page.
  */
-int shmem_unuse(swp_entry_t entry, struct page *page)
+int shmem_unuse(swp_entry_t swap, struct page *page)
 {
-	struct list_head *p, *next;
+	struct list_head *this, *next;
 	struct shmem_inode_info *info;
 	int found = 0;
 	int error;
@@ -999,32 +634,25 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
 	 * Charge page using GFP_KERNEL while we can wait, before taking
 	 * the shmem_swaplist_mutex which might hold up shmem_writepage().
 	 * Charged back to the user (not to caller) when swap account is used.
-	 * add_to_page_cache() will be called with GFP_NOWAIT.
 	 */
 	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
 	if (error)
 		goto out;
-	/*
-	 * Try to preload while we can wait, to not make a habit of
-	 * draining atomic reserves; but don't latch on to this cpu,
-	 * it's okay if sometimes we get rescheduled after this.
-	 */
-	error = radix_tree_preload(GFP_KERNEL);
-	if (error)
-		goto uncharge;
-	radix_tree_preload_end();
+	/* No radix_tree_preload: swap entry keeps a place for page in tree */
 
 	mutex_lock(&shmem_swaplist_mutex);
-	list_for_each_safe(p, next, &shmem_swaplist) {
-		info = list_entry(p, struct shmem_inode_info, swaplist);
-		found = shmem_unuse_inode(info, entry, page);
+	list_for_each_safe(this, next, &shmem_swaplist) {
+		info = list_entry(this, struct shmem_inode_info, swaplist);
+		if (info->swapped)
+			found = shmem_unuse_inode(info, swap, page);
+		else
+			list_del_init(&info->swaplist);
 		cond_resched();
 		if (found)
 			break;
 	}
 	mutex_unlock(&shmem_swaplist_mutex);
 
-uncharge:
 	if (!found)
 		mem_cgroup_uncharge_cache_page(page);
 	if (found < 0)
@@ -1041,10 +669,10 @@ out:
 static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct shmem_inode_info *info;
-	swp_entry_t *entry, swap;
 	struct address_space *mapping;
-	unsigned long index;
 	struct inode *inode;
+	swp_entry_t swap;
+	pgoff_t index;
 
 	BUG_ON(!PageLocked(page));
 	mapping = page->mapping;
@@ -1073,50 +701,32 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 
 	/*
 	 * Add inode to shmem_unuse()'s list of swapped-out inodes,
-	 * if it's not already there.  Do it now because we cannot take
-	 * mutex while holding spinlock, and must do so before the page
-	 * is moved to swap cache, when its pagelock no longer protects
+	 * if it's not already there.  Do it now before the page is
+	 * moved to swap cache, when its pagelock no longer protects
 	 * the inode from eviction.  But don't unlock the mutex until
-	 * we've taken the spinlock, because shmem_unuse_inode() will
-	 * prune a !swapped inode from the swaplist under both locks.
+	 * we've incremented swapped, because shmem_unuse_inode() will
+	 * prune a !swapped inode from the swaplist under this mutex.
 	 */
 	mutex_lock(&shmem_swaplist_mutex);
 	if (list_empty(&info->swaplist))
 		list_add_tail(&info->swaplist, &shmem_swaplist);
 
-	spin_lock(&info->lock);
-	mutex_unlock(&shmem_swaplist_mutex);
-
-	if (index >= info->next_index) {
-		BUG_ON(!(info->flags & SHMEM_TRUNCATE));
-		goto unlock;
-	}
-	entry = shmem_swp_entry(info, index, NULL);
-	if (entry->val) {
-		WARN_ON_ONCE(1);	/* Still happens? Tell us about it! */
-		free_swap_and_cache(*entry);
-		shmem_swp_set(info, entry, 0);
-	}
-	shmem_recalc_inode(inode);
-
 	if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
-		delete_from_page_cache(page);
-		shmem_swp_set(info, entry, swap.val);
-		shmem_swp_unmap(entry);
 		swap_shmem_alloc(swap);
+		shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
+
+		spin_lock(&info->lock);
+		info->swapped++;
+		shmem_recalc_inode(inode);
 		spin_unlock(&info->lock);
+
+		mutex_unlock(&shmem_swaplist_mutex);
 		BUG_ON(page_mapped(page));
 		swap_writepage(page, wbc);
 		return 0;
 	}
 
-	shmem_swp_unmap(entry);
-unlock:
-	spin_unlock(&info->lock);
-	/*
-	 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
-	 * clear SWAP_HAS_CACHE flag.
-	 */
+	mutex_unlock(&shmem_swaplist_mutex);
 	swapcache_free(swap, NULL);
 redirty:
 	set_page_dirty(page);
@@ -1153,35 +763,33 @@ static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
 }
 #endif /* CONFIG_TMPFS */
 
-static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
-			struct shmem_inode_info *info, unsigned long idx)
+static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
+			struct shmem_inode_info *info, pgoff_t index)
 {
 	struct mempolicy mpol, *spol;
 	struct vm_area_struct pvma;
-	struct page *page;
 
 	spol = mpol_cond_copy(&mpol,
-				mpol_shared_policy_lookup(&info->policy, idx));
+			mpol_shared_policy_lookup(&info->policy, index));
 
 	/* Create a pseudo vma that just contains the policy */
 	pvma.vm_start = 0;
-	pvma.vm_pgoff = idx;
+	pvma.vm_pgoff = index;
 	pvma.vm_ops = NULL;
 	pvma.vm_policy = spol;
-	page = swapin_readahead(entry, gfp, &pvma, 0);
-	return page;
+	return swapin_readahead(swap, gfp, &pvma, 0);
 }
 
 static struct page *shmem_alloc_page(gfp_t gfp,
-			struct shmem_inode_info *info, unsigned long idx)
+			struct shmem_inode_info *info, pgoff_t index)
 {
 	struct vm_area_struct pvma;
 
 	/* Create a pseudo vma that just contains the policy */
 	pvma.vm_start = 0;
-	pvma.vm_pgoff = idx;
+	pvma.vm_pgoff = index;
 	pvma.vm_ops = NULL;
-	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
+	pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, index);
 
 	/*
 	 * alloc_page_vma() will drop the shared policy reference
@@ -1190,19 +798,19 @@ static struct page *shmem_alloc_page(gfp_t gfp,
 }
 #else /* !CONFIG_NUMA */
 #ifdef CONFIG_TMPFS
-static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p)
+static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
 {
 }
 #endif /* CONFIG_TMPFS */
 
-static inline struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
-			struct shmem_inode_info *info, unsigned long idx)
+static inline struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
+			struct shmem_inode_info *info, pgoff_t index)
 {
-	return swapin_readahead(entry, gfp, NULL, 0);
+	return swapin_readahead(swap, gfp, NULL, 0);
 }
 
 static inline struct page *shmem_alloc_page(gfp_t gfp,
-			struct shmem_inode_info *info, unsigned long idx)
+			struct shmem_inode_info *info, pgoff_t index)
 {
 	return alloc_page(gfp);
 }
@@ -1222,243 +830,190 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
  * vm. If we swap it in we mark it dirty since we also free the swap
  * entry since a page cannot live in both the swap and page cache
  */
-static int shmem_getpage_gfp(struct inode *inode, pgoff_t idx,
+static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type)
 {
 	struct address_space *mapping = inode->i_mapping;
-	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct shmem_inode_info *info;
 	struct shmem_sb_info *sbinfo;
 	struct page *page;
-	struct page *prealloc_page = NULL;
-	swp_entry_t *entry;
 	swp_entry_t swap;
 	int error;
-	int ret;
+	int once = 0;
 
-	if (idx >= SHMEM_MAX_INDEX)
+	if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
 		return -EFBIG;
 repeat:
-	page = find_lock_page(mapping, idx);
-	if (page) {
+	swap.val = 0;
+	page = find_lock_page(mapping, index);
+	if (radix_tree_exceptional_entry(page)) {
+		swap = radix_to_swp_entry(page);
+		page = NULL;
+	}
+
+	if (sgp != SGP_WRITE &&
+	    ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+		error = -EINVAL;
+		goto failed;
+	}
+
+	if (page || (sgp == SGP_READ && !swap.val)) {
 		/*
 		 * Once we can get the page lock, it must be uptodate:
 		 * if there were an error in reading back from swap,
 		 * the page would not be inserted into the filecache.
 		 */
-		BUG_ON(!PageUptodate(page));
-		goto done;
+		BUG_ON(page && !PageUptodate(page));
+		*pagep = page;
+		return 0;
 	}
 
 	/*
-	 * Try to preload while we can wait, to not make a habit of
-	 * draining atomic reserves; but don't latch on to this cpu.
+	 * Fast cache lookup did not find it:
+	 * bring it back from swap or allocate.
 	 */
-	error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
-	if (error)
-		goto out;
-	radix_tree_preload_end();
-
-	if (sgp != SGP_READ && !prealloc_page) {
-		prealloc_page = shmem_alloc_page(gfp, info, idx);
-		if (prealloc_page) {
-			SetPageSwapBacked(prealloc_page);
-			if (mem_cgroup_cache_charge(prealloc_page,
-					current->mm, GFP_KERNEL)) {
-				page_cache_release(prealloc_page);
-				prealloc_page = NULL;
-			}
-		}
-	}
-
-	spin_lock(&info->lock);
-	shmem_recalc_inode(inode);
-	entry = shmem_swp_alloc(info, idx, sgp, gfp);
-	if (IS_ERR(entry)) {
-		spin_unlock(&info->lock);
-		error = PTR_ERR(entry);
-		goto out;
-	}
-	swap = *entry;
+	info = SHMEM_I(inode);
+	sbinfo = SHMEM_SB(inode->i_sb);
 
 	if (swap.val) {
 		/* Look it up and read it in.. */
 		page = lookup_swap_cache(swap);
 		if (!page) {
-			shmem_swp_unmap(entry);
-			spin_unlock(&info->lock);
 			/* here we actually do the io */
 			if (fault_type)
 				*fault_type |= VM_FAULT_MAJOR;
-			page = shmem_swapin(swap, gfp, info, idx);
+			page = shmem_swapin(swap, gfp, info, index);
 			if (!page) {
-				spin_lock(&info->lock);
-				entry = shmem_swp_alloc(info, idx, sgp, gfp);
-				if (IS_ERR(entry))
-					error = PTR_ERR(entry);
-				else {
-					if (entry->val == swap.val)
-						error = -ENOMEM;
-					shmem_swp_unmap(entry);
-				}
-				spin_unlock(&info->lock);
-				if (error)
-					goto out;
-				goto repeat;
+				error = -ENOMEM;
+				goto failed;
 			}
-			wait_on_page_locked(page);
-			page_cache_release(page);
-			goto repeat;
 		}
 
 		/* We have to do this with page locked to prevent races */
-		if (!trylock_page(page)) {
-			shmem_swp_unmap(entry);
-			spin_unlock(&info->lock);
-			wait_on_page_locked(page);
-			page_cache_release(page);
-			goto repeat;
-		}
-		if (PageWriteback(page)) {
-			shmem_swp_unmap(entry);
-			spin_unlock(&info->lock);
-			wait_on_page_writeback(page);
-			unlock_page(page);
-			page_cache_release(page);
-			goto repeat;
-		}
+		lock_page(page);
 		if (!PageUptodate(page)) {
-			shmem_swp_unmap(entry);
-			spin_unlock(&info->lock);
-			unlock_page(page);
-			page_cache_release(page);
 			error = -EIO;
-			goto out;
+			goto failed;
 		}
-
-		error = add_to_page_cache_locked(page, mapping,
-						 idx, GFP_NOWAIT);
-		if (error) {
-			shmem_swp_unmap(entry);
-			spin_unlock(&info->lock);
-			if (error == -ENOMEM) {
-				/*
-				 * reclaim from proper memory cgroup and
-				 * call memcg's OOM if needed.
-				 */
-				error = mem_cgroup_shmem_charge_fallback(
-						page, current->mm, gfp);
-				if (error) {
-					unlock_page(page);
-					page_cache_release(page);
-					goto out;
-				}
-			}
-			unlock_page(page);
-			page_cache_release(page);
-			goto repeat;
+		wait_on_page_writeback(page);
+
+		/* Someone may have already done it for us */
+		if (page->mapping) {
+			if (page->mapping == mapping &&
+			    page->index == index)
+				goto done;
+			error = -EEXIST;
+			goto failed;
 		}
 
-		info->flags |= SHMEM_PAGEIN;
-		shmem_swp_set(info, entry, 0);
-		shmem_swp_unmap(entry);
-		delete_from_swap_cache(page);
+		error = mem_cgroup_cache_charge(page, current->mm,
+						gfp & GFP_RECLAIM_MASK);
+		if (!error)
+			error = shmem_add_to_page_cache(page, mapping, index,
+						gfp, swp_to_radix_entry(swap));
+		if (error)
+			goto failed;
+
+		spin_lock(&info->lock);
+		info->swapped--;
+		shmem_recalc_inode(inode);
 		spin_unlock(&info->lock);
+
+		delete_from_swap_cache(page);
 		set_page_dirty(page);
 		swap_free(swap);
 
-	} else if (sgp == SGP_READ) {
-		shmem_swp_unmap(entry);
-		page = find_get_page(mapping, idx);
-		if (page && !trylock_page(page)) {
-			spin_unlock(&info->lock);
-			wait_on_page_locked(page);
-			page_cache_release(page);
-			goto repeat;
+	} else {
+		if (shmem_acct_block(info->flags)) {
+			error = -ENOSPC;
+			goto failed;
 		}
-		spin_unlock(&info->lock);
-
-	} else if (prealloc_page) {
-		shmem_swp_unmap(entry);
-		sbinfo = SHMEM_SB(inode->i_sb);
 		if (sbinfo->max_blocks) {
 			if (percpu_counter_compare(&sbinfo->used_blocks,
-						sbinfo->max_blocks) >= 0 ||
-			    shmem_acct_block(info->flags))
-				goto nospace;
+						sbinfo->max_blocks) >= 0) {
+				error = -ENOSPC;
+				goto unacct;
+			}
 			percpu_counter_inc(&sbinfo->used_blocks);
-			inode->i_blocks += BLOCKS_PER_PAGE;
-		} else if (shmem_acct_block(info->flags))
-			goto nospace;
-
-		page = prealloc_page;
-		prealloc_page = NULL;
-
-		entry = shmem_swp_alloc(info, idx, sgp, gfp);
-		if (IS_ERR(entry))
-			error = PTR_ERR(entry);
-		else {
-			swap = *entry;
-			shmem_swp_unmap(entry);
 		}
-		ret = error || swap.val;
-		if (ret)
-			mem_cgroup_uncharge_cache_page(page);
-		else
-			ret = add_to_page_cache_lru(page, mapping,
-						idx, GFP_NOWAIT);
-		/*
-		 * At add_to_page_cache_lru() failure,
-		 * uncharge will be done automatically.
-		 */
-		if (ret) {
-			shmem_unacct_blocks(info->flags, 1);
-			shmem_free_blocks(inode, 1);
-			spin_unlock(&info->lock);
-			page_cache_release(page);
-			if (error)
-				goto out;
-			goto repeat;
+
+		page = shmem_alloc_page(gfp, info, index);
+		if (!page) {
+			error = -ENOMEM;
+			goto decused;
 		}
 
-		info->flags |= SHMEM_PAGEIN;
+		SetPageSwapBacked(page);
+		__set_page_locked(page);
+		error = mem_cgroup_cache_charge(page, current->mm,
+						gfp & GFP_RECLAIM_MASK);
+		if (!error)
+			error = shmem_add_to_page_cache(page, mapping, index,
+						gfp, NULL);
+		if (error)
+			goto decused;
+		lru_cache_add_anon(page);
+
+		spin_lock(&info->lock);
 		info->alloced++;
+		inode->i_blocks += BLOCKS_PER_PAGE;
+		shmem_recalc_inode(inode);
 		spin_unlock(&info->lock);
+
 		clear_highpage(page);
 		flush_dcache_page(page);
 		SetPageUptodate(page);
 		if (sgp == SGP_DIRTY)
 			set_page_dirty(page);
-
-	} else {
-		spin_unlock(&info->lock);
-		error = -ENOMEM;
-		goto out;
 	}
 done:
-	*pagep = page;
-	error = 0;
-out:
-	if (prealloc_page) {
-		mem_cgroup_uncharge_cache_page(prealloc_page);
-		page_cache_release(prealloc_page);
+	/* Perhaps the file has been truncated since we checked */
+	if (sgp != SGP_WRITE &&
+	    ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+		error = -EINVAL;
+		goto trunc;
 	}
-	return error;
+	*pagep = page;
+	return 0;
 
-nospace:
 	/*
-	 * Perhaps the page was brought in from swap between find_lock_page
-	 * and taking info->lock?  We allow for that at add_to_page_cache_lru,
-	 * but must also avoid reporting a spurious ENOSPC while working on a
-	 * full tmpfs.
+	 * Error recovery.
 	 */
-	page = find_get_page(mapping, idx);
+trunc:
+	ClearPageDirty(page);
+	delete_from_page_cache(page);
+	spin_lock(&info->lock);
+	info->alloced--;
+	inode->i_blocks -= BLOCKS_PER_PAGE;
 	spin_unlock(&info->lock);
+decused:
+	if (sbinfo->max_blocks)
+		percpu_counter_add(&sbinfo->used_blocks, -1);
+unacct:
+	shmem_unacct_blocks(info->flags, 1);
+failed:
+	if (swap.val && error != -EINVAL) {
+		struct page *test = find_get_page(mapping, index);
+		if (test && !radix_tree_exceptional_entry(test))
+			page_cache_release(test);
+		/* Have another try if the entry has changed */
+		if (test != swp_to_radix_entry(swap))
+			error = -EEXIST;
+	}
 	if (page) {
+		unlock_page(page);
 		page_cache_release(page);
+	}
+	if (error == -ENOSPC && !once++) {
+		info = SHMEM_I(inode);
+		spin_lock(&info->lock);
+		shmem_recalc_inode(inode);
+		spin_unlock(&info->lock);
 		goto repeat;
 	}
-	error = -ENOSPC;
-	goto out;
+	if (error == -EEXIST)
+		goto repeat;
+	return error;
 }
 
 static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -1467,9 +1022,6 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	int error;
 	int ret = VM_FAULT_LOCKED;
 
-	if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
-		return VM_FAULT_SIGBUS;
-
 	error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
 	if (error)
 		return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
@@ -1482,20 +1034,20 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 }
 
 #ifdef CONFIG_NUMA
-static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
+static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
 {
-	struct inode *i = vma->vm_file->f_path.dentry->d_inode;
-	return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new);
+	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+	return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
 }
 
 static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
 					  unsigned long addr)
 {
-	struct inode *i = vma->vm_file->f_path.dentry->d_inode;
-	unsigned long idx;
+	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+	pgoff_t index;
 
-	idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
-	return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx);
+	index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+	return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index);
 }
 #endif
 
@@ -1593,7 +1145,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 
 #ifdef CONFIG_TMPFS
 static const struct inode_operations shmem_symlink_inode_operations;
-static const struct inode_operations shmem_symlink_inline_operations;
+static const struct inode_operations shmem_short_symlink_operations;
 
 static int
 shmem_write_begin(struct file *file, struct address_space *mapping,
@@ -1626,7 +1178,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
 {
 	struct inode *inode = filp->f_path.dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
-	unsigned long index, offset;
+	pgoff_t index;
+	unsigned long offset;
 	enum sgp_type sgp = SGP_READ;
 
 	/*
@@ -1642,7 +1195,8 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
 
 	for (;;) {
 		struct page *page = NULL;
-		unsigned long end_index, nr, ret;
+		pgoff_t end_index;
+		unsigned long nr, ret;
 		loff_t i_size = i_size_read(inode);
 
 		end_index = i_size >> PAGE_CACHE_SHIFT;
@@ -1880,8 +1434,9 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_namelen = NAME_MAX;
 	if (sbinfo->max_blocks) {
 		buf->f_blocks = sbinfo->max_blocks;
-		buf->f_bavail = buf->f_bfree =
-				sbinfo->max_blocks - percpu_counter_sum(&sbinfo->used_blocks);
+		buf->f_bavail =
+		buf->f_bfree  = sbinfo->max_blocks -
+				percpu_counter_sum(&sbinfo->used_blocks);
 	}
 	if (sbinfo->max_inodes) {
 		buf->f_files = sbinfo->max_inodes;
@@ -2055,10 +1610,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 
 	info = SHMEM_I(inode);
 	inode->i_size = len-1;
-	if (len <= SHMEM_SYMLINK_INLINE_LEN) {
-		/* do it inline */
-		memcpy(info->inline_symlink, symname, len);
-		inode->i_op = &shmem_symlink_inline_operations;
+	if (len <= SHORT_SYMLINK_LEN) {
+		info->symlink = kmemdup(symname, len, GFP_KERNEL);
+		if (!info->symlink) {
+			iput(inode);
+			return -ENOMEM;
+		}
+		inode->i_op = &shmem_short_symlink_operations;
 	} else {
 		error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
 		if (error) {
@@ -2081,17 +1639,17 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 	return 0;
 }
 
-static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
+static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
 {
-	nd_set_link(nd, SHMEM_I(dentry->d_inode)->inline_symlink);
+	nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
 	return NULL;
 }
 
 static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct page *page = NULL;
-	int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
-	nd_set_link(nd, res ? ERR_PTR(res) : kmap(page));
+	int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
+	nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
 	if (page)
 		unlock_page(page);
 	return page;
@@ -2202,7 +1760,6 @@ out:
 	return err;
 }
 
-
 static const struct xattr_handler *shmem_xattr_handlers[] = {
 #ifdef CONFIG_TMPFS_POSIX_ACL
 	&generic_acl_access_handler,
@@ -2332,9 +1889,9 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
 }
 #endif /* CONFIG_TMPFS_XATTR */
 
-static const struct inode_operations shmem_symlink_inline_operations = {
+static const struct inode_operations shmem_short_symlink_operations = {
 	.readlink	= generic_readlink,
-	.follow_link	= shmem_follow_link_inline,
+	.follow_link	= shmem_follow_short_symlink,
 #ifdef CONFIG_TMPFS_XATTR
 	.setxattr	= shmem_setxattr,
 	.getxattr	= shmem_getxattr,
@@ -2534,8 +2091,7 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
 	if (config.max_inodes < inodes)
 		goto out;
 	/*
-	 * Those tests also disallow limited->unlimited while any are in
-	 * use, so i_blocks will always be zero when max_blocks is zero;
+	 * Those tests disallow limited->unlimited while any are in use;
 	 * but we must separately disallow unlimited->limited, because
 	 * in that case we have no record of how much is already in use.
 	 */
@@ -2627,7 +2183,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 		goto failed;
 	sbinfo->free_inodes = sbinfo->max_inodes;
 
-	sb->s_maxbytes = SHMEM_MAX_BYTES;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_blocksize = PAGE_CACHE_SIZE;
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = TMPFS_MAGIC;
@@ -2662,14 +2218,14 @@ static struct kmem_cache *shmem_inode_cachep;
 
 static struct inode *shmem_alloc_inode(struct super_block *sb)
 {
-	struct shmem_inode_info *p;
-	p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
-	if (!p)
+	struct shmem_inode_info *info;
+	info = kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL);
+	if (!info)
 		return NULL;
-	return &p->vfs_inode;
+	return &info->vfs_inode;
 }
 
-static void shmem_i_callback(struct rcu_head *head)
+static void shmem_destroy_callback(struct rcu_head *head)
 {
 	struct inode *inode = container_of(head, struct inode, i_rcu);
 	INIT_LIST_HEAD(&inode->i_dentry);
@@ -2678,29 +2234,26 @@ static void shmem_i_callback(struct rcu_head *head)
 
 static void shmem_destroy_inode(struct inode *inode)
 {
-	if ((inode->i_mode & S_IFMT) == S_IFREG) {
-		/* only struct inode is valid if it's an inline symlink */
+	if ((inode->i_mode & S_IFMT) == S_IFREG)
 		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
-	}
-	call_rcu(&inode->i_rcu, shmem_i_callback);
+	call_rcu(&inode->i_rcu, shmem_destroy_callback);
 }
 
-static void init_once(void *foo)
+static void shmem_init_inode(void *foo)
 {
-	struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
-
-	inode_init_once(&p->vfs_inode);
+	struct shmem_inode_info *info = foo;
+	inode_init_once(&info->vfs_inode);
 }
 
-static int init_inodecache(void)
+static int shmem_init_inodecache(void)
 {
 	shmem_inode_cachep = kmem_cache_create("shmem_inode_cache",
 				sizeof(struct shmem_inode_info),
-				0, SLAB_PANIC, init_once);
+				0, SLAB_PANIC, shmem_init_inode);
 	return 0;
 }
 
-static void destroy_inodecache(void)
+static void shmem_destroy_inodecache(void)
 {
 	kmem_cache_destroy(shmem_inode_cachep);
 }
@@ -2797,21 +2350,20 @@ static const struct vm_operations_struct shmem_vm_ops = {
 #endif
 };
 
-
 static struct dentry *shmem_mount(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
 {
 	return mount_nodev(fs_type, flags, data, shmem_fill_super);
 }
 
-static struct file_system_type tmpfs_fs_type = {
+static struct file_system_type shmem_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "tmpfs",
 	.mount		= shmem_mount,
 	.kill_sb	= kill_litter_super,
 };
 
-int __init init_tmpfs(void)
+int __init shmem_init(void)
 {
 	int error;
 
@@ -2819,18 +2371,18 @@ int __init init_tmpfs(void)
 	if (error)
 		goto out4;
 
-	error = init_inodecache();
+	error = shmem_init_inodecache();
 	if (error)
 		goto out3;
 
-	error = register_filesystem(&tmpfs_fs_type);
+	error = register_filesystem(&shmem_fs_type);
 	if (error) {
 		printk(KERN_ERR "Could not register tmpfs\n");
 		goto out2;
 	}
 
-	shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER,
-				tmpfs_fs_type.name, NULL);
+	shm_mnt = vfs_kern_mount(&shmem_fs_type, MS_NOUSER,
+				 shmem_fs_type.name, NULL);
 	if (IS_ERR(shm_mnt)) {
 		error = PTR_ERR(shm_mnt);
 		printk(KERN_ERR "Could not kern_mount tmpfs\n");
@@ -2839,9 +2391,9 @@ int __init init_tmpfs(void)
 	return 0;
 
 out1:
-	unregister_filesystem(&tmpfs_fs_type);
+	unregister_filesystem(&shmem_fs_type);
 out2:
-	destroy_inodecache();
+	shmem_destroy_inodecache();
 out3:
 	bdi_destroy(&shmem_backing_dev_info);
 out4:
@@ -2849,45 +2401,6 @@ out4:
 	return error;
 }
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-/**
- * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
- * @inode: the inode to be searched
- * @pgoff: the offset to be searched
- * @pagep: the pointer for the found page to be stored
- * @ent: the pointer for the found swap entry to be stored
- *
- * If a page is found, refcount of it is incremented. Callers should handle
- * these refcount.
- */
-void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
-					struct page **pagep, swp_entry_t *ent)
-{
-	swp_entry_t entry = { .val = 0 }, *ptr;
-	struct page *page = NULL;
-	struct shmem_inode_info *info = SHMEM_I(inode);
-
-	if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
-		goto out;
-
-	spin_lock(&info->lock);
-	ptr = shmem_swp_entry(info, pgoff, NULL);
-#ifdef CONFIG_SWAP
-	if (ptr && ptr->val) {
-		entry.val = ptr->val;
-		page = find_get_page(&swapper_space, entry.val);
-	} else
-#endif
-		page = find_get_page(inode->i_mapping, pgoff);
-	if (ptr)
-		shmem_swp_unmap(ptr);
-	spin_unlock(&info->lock);
-out:
-	*pagep = page;
-	*ent = entry;
-}
-#endif
-
 #else /* !CONFIG_SHMEM */
 
 /*
@@ -2901,23 +2414,23 @@ out:
 
 #include <linux/ramfs.h>
 
-static struct file_system_type tmpfs_fs_type = {
+static struct file_system_type shmem_fs_type = {
 	.name		= "tmpfs",
 	.mount		= ramfs_mount,
 	.kill_sb	= kill_litter_super,
 };
 
-int __init init_tmpfs(void)
+int __init shmem_init(void)
 {
-	BUG_ON(register_filesystem(&tmpfs_fs_type) != 0);
+	BUG_ON(register_filesystem(&shmem_fs_type) != 0);
 
-	shm_mnt = kern_mount(&tmpfs_fs_type);
+	shm_mnt = kern_mount(&shmem_fs_type);
 	BUG_ON(IS_ERR(shm_mnt));
 
 	return 0;
 }
 
-int shmem_unuse(swp_entry_t entry, struct page *page)
+int shmem_unuse(swp_entry_t swap, struct page *page)
 {
 	return 0;
 }
@@ -2927,43 +2440,17 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
 	return 0;
 }
 
-void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 {
-	truncate_inode_pages_range(inode->i_mapping, start, end);
+	truncate_inode_pages_range(inode->i_mapping, lstart, lend);
 }
 EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-/**
- * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
- * @inode: the inode to be searched
- * @pgoff: the offset to be searched
- * @pagep: the pointer for the found page to be stored
- * @ent: the pointer for the found swap entry to be stored
- *
- * If a page is found, refcount of it is incremented. Callers should handle
- * these refcount.
- */
-void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
-					struct page **pagep, swp_entry_t *ent)
-{
-	struct page *page = NULL;
-
-	if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
-		goto out;
-	page = find_get_page(inode->i_mapping, pgoff);
-out:
-	*pagep = page;
-	*ent = (swp_entry_t){ .val = 0 };
-}
-#endif
-
 #define shmem_vm_ops				generic_file_vm_ops
 #define shmem_file_operations			ramfs_file_operations
 #define shmem_get_inode(sb, dir, mode, dev, flags)	ramfs_get_inode(sb, dir, mode, dev)
 #define shmem_acct_size(flags, size)		0
 #define shmem_unacct_size(flags, size)		do {} while (0)
-#define SHMEM_MAX_BYTES				MAX_LFS_FILESIZE
 
 #endif /* CONFIG_SHMEM */
 
@@ -2987,7 +2474,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
 	if (IS_ERR(shm_mnt))
 		return (void *)shm_mnt;
 
-	if (size < 0 || size > SHMEM_MAX_BYTES)
+	if (size < 0 || size > MAX_LFS_FILESIZE)
 		return ERR_PTR(-EINVAL);
 
 	if (shmem_acct_size(flags, size))
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1b8c33907242..17bc224bce68 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1924,20 +1924,24 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 
 	/*
 	 * Find out how many pages are allowed for a single swap
-	 * device. There are two limiting factors: 1) the number of
-	 * bits for the swap offset in the swp_entry_t type and
-	 * 2) the number of bits in the a swap pte as defined by
-	 * the different architectures. In order to find the
-	 * largest possible bit mask a swap entry with swap type 0
+	 * device. There are three limiting factors: 1) the number
+	 * of bits for the swap offset in the swp_entry_t type, and
+	 * 2) the number of bits in the swap pte as defined by the
+	 * the different architectures, and 3) the number of free bits
+	 * in an exceptional radix_tree entry. In order to find the
+	 * largest possible bit mask, a swap entry with swap type 0
 	 * and swap offset ~0UL is created, encoded to a swap pte,
-	 * decoded to a swp_entry_t again and finally the swap
+	 * decoded to a swp_entry_t again, and finally the swap
 	 * offset is extracted. This will mask all the bits from
 	 * the initial ~0UL mask that can't be encoded in either
 	 * the swp_entry_t or the architecture definition of a
-	 * swap pte.
+	 * swap pte.  Then the same is done for a radix_tree entry.
 	 */
 	maxpages = swp_offset(pte_to_swp_entry(
-			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
+			swp_entry_to_pte(swp_entry(0, ~0UL))));
+	maxpages = swp_offset(radix_to_swp_entry(
+			swp_to_radix_entry(swp_entry(0, maxpages)))) + 1;
+
 	if (maxpages > swap_header->info.last_page) {
 		maxpages = swap_header->info.last_page + 1;
 		/* p->max is an unsigned int: don't overflow it */
diff --git a/mm/truncate.c b/mm/truncate.c
index 232eb2736a79..b40ac6d4e86e 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -336,6 +336,14 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 	unsigned long count = 0;
 	int i;
 
+	/*
+	 * Note: this function may get called on a shmem/tmpfs mapping:
+	 * pagevec_lookup() might then return 0 prematurely (because it
+	 * got a gangful of swap entries); but it's hardly worth worrying
+	 * about - it can rarely have anything to free from such a mapping
+	 * (most pages are dirty), and already skips over any difficulties.
+	 */
+
 	pagevec_init(&pvec, 0);
 	while (index <= end && pagevec_lookup(&pvec, mapping, index,
 			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 6d8ef4a3a9b5..8b2d37b59c9e 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -128,34 +128,34 @@ unsigned long long get_msr(int cpu, off_t offset)
 void print_header(void)
 {
 	if (show_pkg)
-		fprintf(stderr, "pkg ");
+		fprintf(stderr, "pk");
 	if (show_core)
-		fprintf(stderr, "core");
+		fprintf(stderr, " cr");
 	if (show_cpu)
 		fprintf(stderr, " CPU");
 	if (do_nhm_cstates)
-		fprintf(stderr, "   %%c0 ");
+		fprintf(stderr, "    %%c0 ");
 	if (has_aperf)
-		fprintf(stderr, "  GHz");
+		fprintf(stderr, " GHz");
 	fprintf(stderr, "  TSC");
 	if (do_nhm_cstates)
-		fprintf(stderr, "   %%c1 ");
+		fprintf(stderr, "    %%c1");
 	if (do_nhm_cstates)
-		fprintf(stderr, "   %%c3 ");
+		fprintf(stderr, "    %%c3");
 	if (do_nhm_cstates)
-		fprintf(stderr, "   %%c6 ");
+		fprintf(stderr, "    %%c6");
 	if (do_snb_cstates)
-		fprintf(stderr, "   %%c7 ");
+		fprintf(stderr, "    %%c7");
 	if (do_snb_cstates)
-		fprintf(stderr, "  %%pc2 ");
+		fprintf(stderr, "  %%pc2");
 	if (do_nhm_cstates)
-		fprintf(stderr, "  %%pc3 ");
+		fprintf(stderr, "  %%pc3");
 	if (do_nhm_cstates)
-		fprintf(stderr, "  %%pc6 ");
+		fprintf(stderr, "  %%pc6");
 	if (do_snb_cstates)
-		fprintf(stderr, "  %%pc7 ");
+		fprintf(stderr, "  %%pc7");
 	if (extra_msr_offset)
-		fprintf(stderr, "       MSR 0x%x ", extra_msr_offset);
+		fprintf(stderr, "        MSR 0x%x ", extra_msr_offset);
 
 	putc('\n', stderr);
 }
@@ -194,14 +194,14 @@ void print_cnt(struct counters *p)
 	/* topology columns, print blanks on 1st (average) line */
 	if (p == cnt_average) {
 		if (show_pkg)
-			fprintf(stderr, "    ");
+			fprintf(stderr, " ");
 		if (show_core)
 			fprintf(stderr, "    ");
 		if (show_cpu)
 			fprintf(stderr, "    ");
 	} else {
 		if (show_pkg)
-			fprintf(stderr, "%4d", p->pkg);
+			fprintf(stderr, "%d", p->pkg);
 		if (show_core)
 			fprintf(stderr, "%4d", p->core);
 		if (show_cpu)
@@ -241,22 +241,22 @@ void print_cnt(struct counters *p)
 		if (!skip_c1)
 			fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc);
 		else
-			fprintf(stderr, "   ****");
+			fprintf(stderr, "  ****");
 	}
 	if (do_nhm_cstates)
-		fprintf(stderr, "%7.2f", 100.0 * p->c3/p->tsc);
+		fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc);
 	if (do_nhm_cstates)
-		fprintf(stderr, "%7.2f", 100.0 * p->c6/p->tsc);
+		fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc);
 	if (do_snb_cstates)
-		fprintf(stderr, "%7.2f", 100.0 * p->c7/p->tsc);
+		fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc);
 	if (do_snb_cstates)
-		fprintf(stderr, "%7.2f", 100.0 * p->pc2/p->tsc);
+		fprintf(stderr, " %5.2f", 100.0 * p->pc2/p->tsc);
 	if (do_nhm_cstates)
-		fprintf(stderr, "%7.2f", 100.0 * p->pc3/p->tsc);
+		fprintf(stderr, " %5.2f", 100.0 * p->pc3/p->tsc);
 	if (do_nhm_cstates)
-		fprintf(stderr, "%7.2f", 100.0 * p->pc6/p->tsc);
+		fprintf(stderr, " %5.2f", 100.0 * p->pc6/p->tsc);
 	if (do_snb_cstates)
-		fprintf(stderr, "%7.2f", 100.0 * p->pc7/p->tsc);
+		fprintf(stderr, " %5.2f", 100.0 * p->pc7/p->tsc);
 	if (extra_msr_offset)
 		fprintf(stderr, "  0x%016llx", p->extra_msr);
 	putc('\n', stderr);
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
index 2618ef2ba31f..33c5c7ee148f 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -137,7 +137,6 @@ void cmdline(int argc, char **argv)
 void validate_cpuid(void)
 {
 	unsigned int eax, ebx, ecx, edx, max_level;
-	char brand[16];
 	unsigned int fms, family, model, stepping;
 
 	eax = ebx = ecx = edx = 0;
@@ -160,8 +159,8 @@ void validate_cpuid(void)
 		model += ((fms >> 16) & 0xf) << 4;
 
 	if (verbose > 1)
-		printf("CPUID %s %d levels family:model:stepping "
-			"0x%x:%x:%x (%d:%d:%d)\n", brand, max_level,
+		printf("CPUID %d levels family:model:stepping "
+			"0x%x:%x:%x (%d:%d:%d)\n", max_level,
 			family, model, stepping, family, model, stepping);
 
 	if (!(edx & (1 << 5))) {